From: Mark Huang Date: Wed, 2 Jun 2004 20:45:41 +0000 (+0000) Subject: patch-2.6.6-vs1.9.0 X-Git-Tag: vserver-2_6_6-vs1_9_0~1 X-Git-Url: http://git.onelab.eu/?p=linux-2.6.git;a=commitdiff_plain;h=a8e794ca871505c8ea96cc102f4ad555c5231d7f patch-2.6.6-vs1.9.0 --- diff --git a/Makefile b/Makefile index 4d8697026..1511e96ae 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 6 -EXTRAVERSION = +EXTRAVERSION = -vs1.9.0 NAME=Zonked Quokka # *DOCUMENTATION* diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 16328adb9..57ae7a429 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -692,6 +692,8 @@ config DEBUG_INFO endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index aa654cc85..5d9aae6be 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -287,6 +287,8 @@ do_sys_ptrace(long request, long pid, long addr, long data, read_unlock(&tasklist_lock); if (!child) goto out_notsk; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out; if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 8e28e0382..088cecf1d 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -291,7 +291,7 @@ sys_call_table: .quad alpha_ni_syscall /* 270 */ .quad alpha_ni_syscall .quad alpha_ni_syscall - .quad alpha_ni_syscall + .quad sys_vserver /* 273 sys_vserver */ .quad alpha_ni_syscall .quad alpha_ni_syscall /* 275 */ .quad alpha_ni_syscall diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 588fd8b7c..266427ef9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -801,6 +801,8 @@ config DEBUG_S3C2410_UART endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index d4058edc5..4dcf99afd 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -754,6 +754,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig index ce96fd34b..b40758b9f 100644 --- a/arch/arm26/Kconfig +++ b/arch/arm26/Kconfig @@ -327,6 +327,8 @@ config DEBUG_LL endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/arm26/kernel/ptrace.c b/arch/arm26/kernel/ptrace.c index 57b9fb1e2..78cd8931d 100644 --- a/arch/arm26/kernel/ptrace.c +++ b/arch/arm26/kernel/ptrace.c @@ -691,6 +691,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 86a05dbff..9b0b719bf 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -214,6 +214,8 @@ config PROFILE_SHIFT endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 556863890..3822a609f 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -368,6 +368,8 @@ config CONFIG_BLKDEV_RESERVE_ADDRESS BLKDEV start address. endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index 0843013d1..cdbfe9e5c 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -80,6 +80,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 954887332..7d4f69caa 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1296,6 +1296,8 @@ config X86_MPPARSE endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index afa02ea35..499552841 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -881,7 +881,7 @@ ENTRY(sys_call_table) .long sys_tgkill /* 270 */ .long sys_utimes .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ + .long sys_vserver .long sys_ni_syscall /* sys_mbind */ .long sys_ni_syscall /* 275 sys_get_mempolicy */ .long sys_ni_syscall /* sys_set_mempolicy */ diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 9f9b32a3f..f96549542 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -258,6 +258,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c index 0c36130af..fc8001a81 100644 --- a/arch/i386/kernel/sys_i386.c +++ b/arch/i386/kernel/sys_i386.c @@ -217,7 +217,7 @@ asmlinkage int sys_uname(struct old_utsname __user * name) if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } @@ -225,6 +225,7 @@ asmlinkage int sys_uname(struct old_utsname __user * name) asmlinkage int sys_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -233,15 +234,16 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name) down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error |= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error |= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error |= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error |= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error |= __put_user(0,name->machine+__OLD_UTS_LEN); up_read(&uts_sem); diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 6e2fa1f39..31aba6a0f 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c @@ -43,7 +43,8 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struc { pte_t entry; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE); if (write_access) { entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); @@ -83,7 +84,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ptepage = pte_page(entry); get_page(ptepage); set_pte(dst_pte, entry); - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } return 0; @@ -222,7 +224,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, page = pte_page(pte); put_page(page); } - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); flush_tlb_range(vma, start, end); } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 991916a4a..ff8970d74 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -491,6 +491,8 @@ config SYSVIPC_COMPAT default y endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index 679e68afd..ad67b0454 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -151,7 +151,7 @@ ia64_elf32_init (struct pt_regs *regs) int ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) { - unsigned long stack_base; + unsigned long stack_base, grow; struct vm_area_struct *mpnt; struct mm_struct *mm = current->mm; int i; @@ -168,7 +168,10 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) if (!mpnt) return -ENOMEM; - if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + grow = (IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p)) + >> PAGE_SHIFT; + if (security_vm_enough_memory(grow) || + !vx_vmpages_avail(mm, grow)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -191,7 +194,9 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) mpnt->vm_file = NULL; mpnt->vm_private_data = 0; insert_vm_struct(current->mm, mpnt); - current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + // current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + vx_vmpages_sub(current->mm, current->mm->total_vm - + ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT)); } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index ae2eb13f9..42caeae9b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2350,7 +2350,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon */ insert_vm_struct(mm, vma); - mm->total_vm += size >> PAGE_SHIFT; + // mm->total_vm += size >> PAGE_SHIFT; + vx_vmpages_add(mm, size >> PAGE_SHIFT); up_write(&task->mm->mmap_sem); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 0432abe56..eace1b77c 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1310,6 +1310,9 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data, read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; + ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index d823ff897..20d11f4d5 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -36,10 +36,14 @@ expand_backing_store (struct vm_area_struct *vma, unsigned long address) if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur)) return -ENOMEM; + if (!vx_vmpages_avail(vma->vm_mm, grow) + return -ENOMEM; vma->vm_end += PAGE_SIZE; - vma->vm_mm->total_vm += grow; + // vma->vm_mm->total_vm += grow; + vx_vmpages_add(vma->vm_mm, grow); if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; + // vma->vm_mm->locked_vm += grow; + vx_vmlocked_add(vma->vm_mm, grow); return 0; } diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 114e3d96c..eaae371d4 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -65,7 +65,8 @@ set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma, { pte_t entry; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE); if (write_access) { entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); @@ -108,7 +109,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, ptepage = pte_page(entry); get_page(ptepage); set_pte(dst_pte, entry); - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } return 0; @@ -251,7 +253,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsig put_page(page); pte_clear(pte); } - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); flush_tlb_range(vma, start, end); } diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 503a32d23..5cac8410e 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -690,6 +690,8 @@ config DEBUG_INFO endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c index 45644b89e..ebc08fc00 100644 --- a/arch/m68k/atari/stram.c +++ b/arch/m68k/atari/stram.c @@ -635,7 +635,8 @@ static inline void unswap_pte(struct vm_area_struct * vma, unsigned long set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); swap_free(entry); get_page(page); - ++vma->vm_mm->rss; + // ++vma->vm_mm->rss; + vx_rsspages_inc(vma->vm_mm); } static inline void unswap_pmd(struct vm_area_struct * vma, pmd_t *dir, diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index 666b3ae01..540f49cc8 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -140,6 +140,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index f11317725..b8f71e760 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -566,6 +566,8 @@ config BDM_DISABLE endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c index 4f3df6d67..0c57f873a 100644 --- a/arch/m68knommu/kernel/ptrace.c +++ b/arch/m68knommu/kernel/ptrace.c @@ -124,6 +124,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 61fa43fa4..5f2b2771c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1600,6 +1600,8 @@ config DEBUG_HIGHMEM endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c index 6a2b36236..b6de9b834 100644 --- a/arch/mips/kernel/irixelf.c +++ b/arch/mips/kernel/irixelf.c @@ -686,7 +686,8 @@ static int load_irix_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* Do this so that we can load the interpreter, if need be. We will * change some of these later. */ - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); setup_arg_pages(bprm, EXSTACK_DEFAULT); current->mm->start_stack = bprm->p; diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 06813fe22..c7b19aa70 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -1728,7 +1728,7 @@ asmlinkage long sys32_newuname(struct new_utsname * name) int ret = 0; down_read(&uts_sem); - if (copy_to_user(name,&system_utsname,sizeof *name)) + if (copy_to_user(name, vx_new_utsname(), sizeof *name)) ret = -EFAULT; up_read(&uts_sem); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 1036abe1c..76611289b 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -76,6 +76,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 7e1eca973..5785d06bc 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -209,7 +209,7 @@ out: */ asmlinkage int sys_uname(struct old_utsname * name) { - if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name))) return 0; return -EFAULT; } @@ -220,21 +220,23 @@ asmlinkage int sys_uname(struct old_utsname * name) asmlinkage int sys_olduname(struct oldold_utsname * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) return -EFAULT; - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error = __put_user(0,name->machine+__OLD_UTS_LEN); error = error ? -EFAULT : 0; @@ -260,10 +262,10 @@ asmlinkage int _sys_sysmips(int cmd, long arg1, int arg2, int arg3) return -EFAULT; down_write(&uts_sem); - strncpy(system_utsname.nodename, nodename, len); + strncpy(vx_new_uts(nodename), nodename, len); nodename[__NEW_UTS_LEN] = '\0'; - strlcpy(system_utsname.nodename, nodename, - sizeof(system_utsname.nodename)); + strlcpy(vx_new_uts(nodename), nodename, + sizeof(vx_new_uts(nodename))); up_write(&uts_sem); return 0; } diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c index 1b10357ab..8aa6e23c4 100644 --- a/arch/mips/kernel/sysirix.c +++ b/arch/mips/kernel/sysirix.c @@ -577,7 +577,8 @@ asmlinkage int irix_brk(unsigned long brk) /* * Check if we have enough memory.. */ - if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { + if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT) || + !vx_vmpages_avail(mm, (newbrk-oldbrk) >> PAGE_SHIFT)) { ret = -ENOMEM; goto out; } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 751f3ac76..aa723ca74 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -224,6 +224,8 @@ config DEBUG_INFO endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 514e8b5f8..d45980cb5 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -110,6 +110,9 @@ long sys_ptrace(long request, pid_t pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; + ret = -EPERM; if (pid == 1) /* no messing around with init! */ goto out_tsk; diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 7159953b2..cd0d1bd2d 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -1211,6 +1211,7 @@ asmlinkage int sys32_sysinfo(struct sysinfo32 *info) do { seq = read_seqbegin(&xtime_lock); + /* requires vx virtualization */ val.uptime = jiffies / HZ; val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 8de9f39a1..9326f2720 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -1245,6 +1245,8 @@ config OCP endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index 134b74be7..7a0b48b60 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -1374,7 +1374,7 @@ _GLOBAL(sys_call_table) .long ppc_fadvise64_64 .long sys_ni_syscall /* 255 - rtas (used on ppc64) */ .long sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .long sys_ni_syscall /* 257 reserved for vserver */ + .long sys_vserver .long sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ .long sys_ni_syscall /* 259 reserved for new sys_mbind */ .long sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ diff --git a/arch/ppc/kernel/ptrace.c b/arch/ppc/kernel/ptrace.c index 2ddfb1a37..f9120785b 100644 --- a/arch/ppc/kernel/ptrace.c +++ b/arch/ppc/kernel/ptrace.c @@ -197,6 +197,8 @@ int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/ppc/kernel/syscalls.c b/arch/ppc/kernel/syscalls.c index 7f2531d12..d33e63520 100644 --- a/arch/ppc/kernel/syscalls.c +++ b/arch/ppc/kernel/syscalls.c @@ -229,7 +229,7 @@ int sys_uname(struct old_utsname __user * name) int err = -EFAULT; down_read(&uts_sem); - if (name && !copy_to_user(name, &system_utsname, sizeof (*name))) + if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name))) err = 0; up_read(&uts_sem); return err; @@ -238,6 +238,7 @@ int sys_uname(struct old_utsname __user * name) int sys_olduname(struct oldold_utsname __user * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -245,15 +246,16 @@ int sys_olduname(struct oldold_utsname __user * name) return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error = __put_user(0,name->machine+__OLD_UTS_LEN); up_read(&uts_sem); diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index 9b2f319d0..584c8a175 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -397,6 +397,8 @@ config DEBUG_INFO endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index a59c0408a..68c167ae7 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -829,7 +829,7 @@ _GLOBAL(sys_call_table32) .llong .ppc32_fadvise64_64 /* 32bit only fadvise64_64 */ .llong .ppc_rtas /* 255 */ .llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */ - .llong .sys_ni_syscall /* 257 reserved for vserver */ + .llong .sys_vserver .llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */ .llong .sys_ni_syscall /* 259 reserved for new sys_mbind */ .llong .sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */ diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c index 6afe71a7d..a8fc80f98 100644 --- a/arch/ppc64/kernel/ptrace.c +++ b/arch/ppc64/kernel/ptrace.c @@ -76,6 +76,8 @@ int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index 7e4bc8355..7b5af0b16 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -2456,6 +2456,7 @@ asmlinkage long sys32_time(compat_time_t* tloc) int sys32_olduname(struct oldold_utsname * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -2463,15 +2464,16 @@ int sys32_olduname(struct oldold_utsname * name) return -EFAULT; down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); + error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN); error = __put_user(0,name->machine+__OLD_UTS_LEN); up_read(&uts_sem); diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index 3ac622c60..d17c903e5 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -126,7 +126,8 @@ static void setup_huge_pte(struct mm_struct *mm, struct page *page, hugepte_t entry; int i; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_sub(mm, HPAGE_SIZE / PAGE_SIZE); entry = mk_hugepte(page, write_access); for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) set_hugepte(ptep+i, entry); @@ -288,7 +289,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, /* This is the first hugepte in a batch */ ptepage = hugepte_page(entry); get_page(ptepage); - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); } set_hugepte(dst_pte, entry); @@ -408,7 +410,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, put_page(page); } - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); } int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index eab5d1e96..c0b72b438 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -425,6 +425,8 @@ config DEBUG_SPINLOCK_SLEEP endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/s390/kernel/compat_exec.c b/arch/s390/kernel/compat_exec.c index 162deb2bb..3f43a36e2 100644 --- a/arch/s390/kernel/compat_exec.c +++ b/arch/s390/kernel/compat_exec.c @@ -39,7 +39,7 @@ int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack) { - unsigned long stack_base; + unsigned long stack_base, grow; struct vm_area_struct *mpnt; struct mm_struct *mm = current->mm; int i; @@ -56,7 +56,10 @@ int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack) if (!mpnt) return -ENOMEM; - if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + grow = (STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p)) + >> PAGE_SHIFT; + if (security_vm_enough_memory(grow) || + !vx_vmpages_avail(mm, grow)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -75,7 +78,9 @@ int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack) INIT_LIST_HEAD(&mpnt->shared); mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); - mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + vx_vmpages_sub(mm, mm->total_vm - + ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT)); } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1176b4f5b..fa7875fba 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -680,9 +680,11 @@ sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = do_ptrace(child, request, addr, data); - +out_tsk: put_task_struct(child); out: unlock_kernel(); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index da7ba3e3c..beae3b189 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper) SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) -NI_SYSCALL /* reserved for vserver */ +SYSCALL(sys_vserver,sys_vserver,sys_vserver) SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 8efb8e02a..e774424d6 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -792,6 +792,8 @@ config FRAME_POINTER endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/sh/kernel/ptrace.c b/arch/sh/kernel/ptrace.c index 602f6c570..732afaeaf 100644 --- a/arch/sh/kernel/ptrace.c +++ b/arch/sh/kernel/ptrace.c @@ -108,6 +108,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 5309f67f6..9002fb0e9 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -62,7 +62,8 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long i; pte_t entry; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE); if (write_access) entry = pte_mkwrite(pte_mkdirty(mk_pte(page, @@ -115,7 +116,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, pte_val(entry) += PAGE_SIZE; dst_pte++; } - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } return 0; @@ -206,7 +208,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, pte++; } } - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); flush_tlb_range(vma, start, end); } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 79d52a1dd..4b3dd85b5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -450,6 +450,8 @@ config DEBUG_BUGVERBOSE endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c index b1b6cdac7..d13bf734a 100644 --- a/arch/sparc/kernel/ptrace.c +++ b/arch/sparc/kernel/ptrace.c @@ -319,6 +319,10 @@ asmlinkage void do_ptrace(struct pt_regs *regs) pt_error_return(regs, ESRCH); goto out; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c index 925efe992..f0720b82a 100644 --- a/arch/sparc/kernel/sys_sparc.c +++ b/arch/sparc/kernel/sys_sparc.c @@ -467,13 +467,13 @@ asmlinkage int sys_getdomainname(char __user *name, int len) down_read(&uts_sem); - nlen = strlen(system_utsname.domainname) + 1; + nlen = strlen(vx_new_uts(domainname)) + 1; if (nlen < len) len = nlen; if (len > __NEW_UTS_LEN) goto done; - if (copy_to_user(name, system_utsname.domainname, len)) + if (copy_to_user(name, vx_new_uts(domainname), len)) goto done; err = 0; done: diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S index fd452a6d3..56c8c878d 100644 --- a/arch/sparc/kernel/systbls.S +++ b/arch/sparc/kernel/systbls.S @@ -72,7 +72,7 @@ sys_call_table: /*250*/ .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl /*255*/ .long sys_nis_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_ni_syscall /*280*/ .long sys_ni_syscall, sys_ni_syscall, sys_ni_syscall diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index ff9718e10..d79d5f2b6 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -707,6 +707,8 @@ config FRAME_POINTER endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index 4ba5d4801..609b6ed06 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c @@ -239,7 +239,8 @@ static int load_aout32_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index d935eb602..dc477139e 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -168,6 +168,10 @@ asmlinkage void do_ptrace(struct pt_regs *regs) pt_error_return(regs, ESRCH); goto out; } + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) { + pt_error_return(regs, ESRCH); + goto out_tsk; + } if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH) || (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) { diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index 4c75f6137..a6b29c92f 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c @@ -459,13 +459,13 @@ asmlinkage int sys_getdomainname(char __user *name, int len) down_read(&uts_sem); - nlen = strlen(system_utsname.domainname) + 1; + nlen = strlen(vx_new_uts(domainname)) + 1; if (nlen < len) len = nlen; if (len > __NEW_UTS_LEN) goto done; - if (copy_to_user(name, system_utsname.domainname, len)) + if (copy_to_user(name, vx_new_uts(domainname), len)) goto done; err = 0; done: diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index 3c7ef2da1..6148e76e1 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -72,7 +72,7 @@ sys_call_table32: /*250*/ .word sys32_mremap, sys32_sysctl, sys_getsid, sys_fdatasync, sys32_nfsservctl .word sys_ni_syscall, compat_clock_settime, compat_clock_gettime, compat_clock_getres, compat_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, compat_timer_settime, compat_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys32_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys32_timer_create, sys_vserver, compat_sys_io_setup, sys_io_destroy /*270*/ .word compat_sys_io_submit, sys_io_cancel, compat_sys_io_getevents, compat_sys_mq_open, sys_mq_unlink .word sys32_mq_timedsend, sys32_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, sys_ni_syscall /*280*/ .word sys_ni_syscall, sys_ni_syscall, sys_ni_syscall @@ -136,7 +136,7 @@ sys_call_table: /*250*/ .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl .word sys_ni_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_ni_syscall /*280*/ .word sys_ni_syscall, sys_ni_syscall, sys_ni_syscall diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index 1df587bdd..cb6968c41 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -59,7 +59,8 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long i; pte_t entry; - mm->rss += (HPAGE_SIZE / PAGE_SIZE); + // mm->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE); if (write_access) entry = pte_mkwrite(pte_mkdirty(mk_pte(page, @@ -112,7 +113,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, pte_val(entry) += PAGE_SIZE; dst_pte++; } - dst->rss += (HPAGE_SIZE / PAGE_SIZE); + // dst->rss += (HPAGE_SIZE / PAGE_SIZE); + vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } return 0; @@ -203,7 +205,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, pte++; } } - mm->rss -= (end - start) >> PAGE_SHIFT; + // mm->rss -= (end - start) >> PAGE_SHIFT; + vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT); flush_tlb_range(vma, start, end); } diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 47fbaf3f4..7ef333e57 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -188,6 +188,8 @@ source "net/Kconfig" source "fs/Kconfig" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index c68c937f6..90f8d2861 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -58,6 +58,8 @@ int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c index 2af5fc2e5..f3a72d2d8 100644 --- a/arch/um/kernel/syscall_kern.c +++ b/arch/um/kernel/syscall_kern.c @@ -264,7 +264,7 @@ int sys_uname(struct old_utsname * name) if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); return err?-EFAULT:0; } @@ -272,6 +272,7 @@ int sys_uname(struct old_utsname * name) int sys_olduname(struct oldold_utsname * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -280,19 +281,20 @@ int sys_olduname(struct oldold_utsname * name) down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname, + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname, __OLD_UTS_LEN); error |= __put_user(0,name->sysname+__OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename,&system_utsname.nodename, + error |= __copy_to_user(&name->nodename,ptr->nodename, __OLD_UTS_LEN); error |= __put_user(0,name->nodename+__OLD_UTS_LEN); - error |= __copy_to_user(&name->release,&system_utsname.release, + error |= __copy_to_user(&name->release,ptr->release, __OLD_UTS_LEN); error |= __put_user(0,name->release+__OLD_UTS_LEN); - error |= __copy_to_user(&name->version,&system_utsname.version, + error |= __copy_to_user(&name->version,ptr->version, __OLD_UTS_LEN); error |= __put_user(0,name->version+__OLD_UTS_LEN); - error |= __copy_to_user(&name->machine,&system_utsname.machine, + error |= __copy_to_user(&name->machine,ptr->machine, __OLD_UTS_LEN); error |= __put_user(0,name->machine+__OLD_UTS_LEN); diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig index 8665652e2..994e4f859 100644 --- a/arch/v850/Kconfig +++ b/arch/v850/Kconfig @@ -334,6 +334,8 @@ config NO_KERNEL_MSG endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/v850/kernel/ptrace.c b/arch/v850/kernel/ptrace.c index fc06058f2..7d2554774 100644 --- a/arch/v850/kernel/ptrace.c +++ b/arch/v850/kernel/ptrace.c @@ -138,6 +138,8 @@ int sys_ptrace(long request, long pid, long addr, long data) read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; rval = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index cfe5b0f8d..e76374914 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -490,6 +490,8 @@ config IOMMU_LEAK endmenu +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index 040adf699..b7b698c68 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c @@ -308,7 +308,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) (current->mm->start_brk = N_BSSADDR(ex)); current->mm->free_area_cache = TASK_UNMAPPED_BASE; - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 92817f18e..1b70ef920 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -327,7 +327,7 @@ static void elf32_init(struct pt_regs *regs) int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) { - unsigned long stack_base; + unsigned long stack_base, grow; struct vm_area_struct *mpnt; struct mm_struct *mm = current->mm; int i; @@ -344,7 +344,10 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) if (!mpnt) return -ENOMEM; - if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + grow = (IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p)) + >> PAGE_SHIFT; + if (security_vm_enough_memory(grow) || + !vx_vmpages_avail(mm, grow)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -368,7 +371,9 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) INIT_LIST_HEAD(&mpnt->shared); mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); - mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + vx_vmpages_sub(mm, mm->total_vm - + ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT)); } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index 47c23419f..c60544fe7 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -1258,6 +1258,7 @@ asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, asmlinkage long sys32_olduname(struct oldold_utsname * name) { int error; + struct new_utsname *ptr; if (!name) return -EFAULT; @@ -1266,13 +1267,14 @@ asmlinkage long sys32_olduname(struct oldold_utsname * name) down_read(&uts_sem); - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); + ptr = vx_new_utsname(); + error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN); __put_user(0,name->sysname+__OLD_UTS_LEN); - __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); + __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN); __put_user(0,name->nodename+__OLD_UTS_LEN); - __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); + __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN); __put_user(0,name->release+__OLD_UTS_LEN); - __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); + __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN); __put_user(0,name->version+__OLD_UTS_LEN); { char *arch = "x86_64"; @@ -1295,7 +1297,7 @@ long sys32_uname(struct old_utsname * name) if (!name) return -EFAULT; down_read(&uts_sem); - err=copy_to_user(name, &system_utsname, sizeof (*name)); + err=copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index a97bee6f2..87ab1e27f 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -208,6 +208,8 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data read_unlock(&tasklist_lock); if (!child) goto out; + if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) + goto out_tsk; ret = -EPERM; if (pid == 1) /* you may not mess with init */ diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index e518c1f29..752d84075 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c @@ -146,7 +146,7 @@ asmlinkage long sys_uname(struct new_utsname * name) { int err; down_read(&uts_sem); - err = copy_to_user(name, &system_utsname, sizeof (*name)); + err = copy_to_user(name, vx_new_utsname(), sizeof (*name)); up_read(&uts_sem); if (personality(current->personality) == PER_LINUX32) err |= copy_to_user(&name->machine, "i686", 5); diff --git a/fs/attr.c b/fs/attr.c index d63350cfc..5bb63a855 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -62,6 +62,24 @@ error: EXPORT_SYMBOL(inode_change_ok); +int inode_setattr_flags(struct inode *inode, unsigned int flags) +{ + unsigned int oldflags, newflags; + + oldflags = inode->i_flags; + newflags = oldflags & ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= S_IMMUTABLE; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= S_IUNLINK; + if (flags & ATTR_FLAG_BARRIER) + newflags |= S_BARRIER; + + if (oldflags ^ newflags) + inode->i_flags = newflags; + return 0; +} + int inode_setattr(struct inode * inode, struct iattr * attr) { unsigned int ia_valid = attr->ia_valid; @@ -98,6 +116,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr) mode &= ~S_ISGID; inode->i_mode = mode; } + if (ia_valid & ATTR_ATTR_FLAG) + inode_setattr_flags(inode, attr->ia_attr_flags); mark_inode_dirty(inode); out: return error; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 7827c1255..5552d2795 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -309,7 +309,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) (current->mm->start_brk = N_BSSADDR(ex)); current->mm->free_area_cache = TASK_UNMAPPED_BASE; - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); current->mm->mmap = NULL; compute_creds(bprm); current->flags &= ~PF_FORKNOEXEC; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a67e6f586..fee8d14d9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -695,7 +695,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* Do this so that we can load the interpreter, if need be. We will change some of these later */ - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); current->mm->free_area_cache = TASK_UNMAPPED_BASE; retval = setup_arg_pages(bprm, executable_stack); if (retval < 0) { diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index ddbc3904c..c8113cc23 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -651,7 +651,8 @@ static int load_flat_file(struct linux_binprm * bprm, current->mm->start_brk = datapos + data_len + bss_len; current->mm->brk = (current->mm->start_brk + 3) & ~3; current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len; - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); } if (flags & FLAT_FLAG_KTRACE) diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index cabf3ccc0..ac64fa572 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -259,7 +259,8 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) create_som_tables(bprm); current->mm->start_stack = bprm->p; - current->mm->rss = 0; + // current->mm->rss = 0; + vx_rsspages_sub(current->mm, current->mm->rss); #if 0 printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 1d49ef4af..17b686a82 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "xattr.h" #define DEVPTS_SUPER_MAGIC 0x1cd1 @@ -134,11 +135,21 @@ static struct dentry *get_node(int num) return lookup_one_len(s, root, sprintf(s, "%d", num)); } +static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + int ret = -EACCES; + + if (vx_check(inode->i_xid, VX_IDENT)) + ret = vfs_permission(inode, mask); + return ret; +} + static struct inode_operations devpts_file_inode_operations = { .setxattr = devpts_setxattr, .getxattr = devpts_getxattr, .listxattr = devpts_listxattr, .removexattr = devpts_removexattr, + .permission = devpts_permission, }; int devpts_pty_new(struct tty_struct *tty) @@ -162,6 +173,7 @@ int devpts_pty_new(struct tty_struct *tty) inode->i_gid = config.setgid ? config.gid : current->fsgid; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; init_special_inode(inode, S_IFCHR|config.mode, device); + inode->i_xid = vx_current_xid(); inode->i_op = &devpts_file_inode_operations; inode->u.generic_ip = tty; diff --git a/fs/exec.c b/fs/exec.c index f73d2c4cc..c10a7d9c6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -327,7 +327,8 @@ void put_dirty_page(struct task_struct *tsk, struct page *page, set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot)))); pte_chain = page_add_rmap(page, pte, pte_chain); pte_unmap(pte); - tsk->mm->rss++; + // tsk->mm->rss++; + vx_rsspages_inc(tsk->mm); spin_unlock(&tsk->mm->page_table_lock); /* no need for flush_tlb */ @@ -409,7 +410,8 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) if (!mpnt) return -ENOMEM; - if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) { + if (security_vm_enough_memory(arg_size >> PAGE_SHIFT) || + !vx_vmpages_avail(mm, arg_size >> PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -441,7 +443,9 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) INIT_LIST_HEAD(&mpnt->shared); mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); - mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + vx_vmpages_sub(mm, mm->total_vm - + ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT)); } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index cbd6ae899..a2ea40cab 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -579,7 +579,8 @@ got: inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; + ei->i_flags = EXT2_I(dir)->i_flags & + ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); /* dirsync is only applied to directories */ diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e7cc85d8b..9b8aeac5d 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "ext2.h" #include "acl.h" @@ -65,6 +66,8 @@ void ext2_put_inode(struct inode *inode) ext2_discard_prealloc(inode); } +static void ext2_truncate_nocheck (struct inode * inode); + /* * Called at the last iput() if i_nlink is zero. */ @@ -78,7 +81,7 @@ void ext2_delete_inode (struct inode * inode) inode->i_size = 0; if (inode->i_blocks) - ext2_truncate (inode); + ext2_truncate_nocheck(inode); ext2_free_inode (inode); return; @@ -877,7 +880,7 @@ static void ext2_free_branches(struct inode *inode, u32 *p, u32 *q, int depth) ext2_free_data(inode, p, q); } -void ext2_truncate (struct inode * inode) +static void ext2_truncate_nocheck(struct inode * inode) { u32 *i_data = EXT2_I(inode)->i_data; int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); @@ -894,8 +897,6 @@ void ext2_truncate (struct inode * inode) return; if (ext2_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; ext2_discard_prealloc(inode); @@ -1017,6 +1018,13 @@ Egdp: return ERR_PTR(-EIO); } +void ext2_truncate (struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + ext2_truncate_nocheck(inode); +} + void ext2_set_inode_flags(struct inode *inode) { unsigned int flags = EXT2_I(inode)->i_flags; @@ -1028,6 +1036,10 @@ void ext2_set_inode_flags(struct inode *inode) inode->i_flags |= S_APPEND; if (flags & EXT2_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & EXT2_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT2_BARRIER_FL) + inode->i_flags |= S_BARRIER; if (flags & EXT2_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT2_DIRSYNC_FL) @@ -1040,6 +1052,8 @@ void ext2_read_inode (struct inode * inode) ino_t ino = inode->i_ino; struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + uid_t uid; + gid_t gid; int n; #ifdef CONFIG_EXT2_FS_POSIX_ACL @@ -1050,12 +1064,17 @@ void ext2_read_inode (struct inode * inode) goto bad_inode; inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if (!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOXID_UID(uid, gid); + inode->i_gid = INOXID_GID(uid, gid); + if (inode->i_sb->s_flags & MS_TAGXID) + inode->i_xid = INOXID_XID(uid, gid, le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -1148,8 +1167,8 @@ static int ext2_update_inode(struct inode * inode, int do_sync) struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - uid_t uid = inode->i_uid; - gid_t gid = inode->i_gid; + uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid); struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); int n; @@ -1184,6 +1203,9 @@ static int ext2_update_inode(struct inode * inode, int do_sync) raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_INOXID_GID32 + raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(inode->i_size); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -1261,6 +1283,27 @@ int ext2_sync_inode(struct inode *inode) return sync_inode(inode, &wbc); } +int ext2_setattr_flags(struct inode *inode, unsigned int flags) +{ + unsigned int oldflags, newflags; + + oldflags = EXT2_I(inode)->i_flags; + newflags = oldflags & + ~(EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL | EXT2_BARRIER_FL); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= EXT2_IMMUTABLE_FL; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= EXT2_IUNLINK_FL; + if (flags & ATTR_FLAG_BARRIER) + newflags |= EXT2_BARRIER_FL; + + if (oldflags ^ newflags) { + EXT2_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + } + return 0; +} + int ext2_setattr(struct dentry *dentry, struct iattr *iattr) { struct inode *inode = dentry->d_inode; @@ -1275,6 +1318,9 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) if (error) return error; } + if (iattr->ia_valid & ATTR_ATTR_FLAG) + ext2_setattr_flags(inode, iattr->ia_attr_flags); + inode_setattr(inode, iattr); if (iattr->ia_valid & ATTR_MODE) error = ext2_acl_chmod(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 3a06830a5..3e54fbeac 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -270,7 +270,7 @@ enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, + Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_tagxid, Opt_ignore, Opt_err, }; @@ -299,6 +299,7 @@ static match_table_t tokens = { {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_tagxid, "tagxid"}, {Opt_ignore, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, @@ -362,6 +363,11 @@ static int parse_options (char * options, case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + set_opt (sbi->s_mount_opt, TAG_XID); + break; +#endif case Opt_check: #ifdef CONFIG_EXT2_CHECK set_opt (sbi->s_mount_opt, CHECK); @@ -646,6 +652,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (!parse_options ((char *) data, sbi)) goto failed_mount; + if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAG_XID) + sb->s_flags |= MS_TAGXID; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index ac238b2fa..624f0bb8b 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -567,7 +567,8 @@ got: ei->i_dir_start_lookup = 0; ei->i_disksize = 0; - ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; + ei->i_flags = EXT3_I(dir)->i_flags & + ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL); if (S_ISLNK(mode)) ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); /* dirsync only applies to directories */ diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index b76259ede..c1508b263 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "xattr.h" #include "acl.h" @@ -189,6 +190,8 @@ void ext3_put_inode(struct inode *inode) ext3_discard_prealloc(inode); } +static void ext3_truncate_nocheck (struct inode *inode); + /* * Called at the last iput() if i_nlink is zero. */ @@ -214,7 +217,7 @@ void ext3_delete_inode (struct inode * inode) handle->h_sync = 1; inode->i_size = 0; if (inode->i_blocks) - ext3_truncate(inode); + ext3_truncate_nocheck(inode); /* * Kill off the orphan record which ext3_truncate created. * AKPM: I think this can be inside the above `if'. @@ -2112,7 +2115,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, * ext3_truncate() run will find them and release them. */ -void ext3_truncate(struct inode * inode) +void ext3_truncate_nocheck(struct inode * inode) { handle_t *handle; struct ext3_inode_info *ei = EXT3_I(inode); @@ -2133,8 +2136,6 @@ void ext3_truncate(struct inode * inode) return; if (ext3_inode_is_fast_symlink(inode)) return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; ext3_discard_prealloc(inode); @@ -2441,6 +2442,13 @@ has_buffer: return 0; } +void ext3_truncate(struct inode * inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + ext3_truncate_nocheck(inode); +} + void ext3_set_inode_flags(struct inode *inode) { unsigned int flags = EXT3_I(inode)->i_flags; @@ -2452,6 +2460,10 @@ void ext3_set_inode_flags(struct inode *inode) inode->i_flags |= S_APPEND; if (flags & EXT3_IMMUTABLE_FL) inode->i_flags |= S_IMMUTABLE; + if (flags & EXT3_IUNLINK_FL) + inode->i_flags |= S_IUNLINK; + if (flags & EXT3_BARRIER_FL) + inode->i_flags |= S_BARRIER; if (flags & EXT3_NOATIME_FL) inode->i_flags |= S_NOATIME; if (flags & EXT3_DIRSYNC_FL) @@ -2465,6 +2477,8 @@ void ext3_read_inode(struct inode * inode) struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh; int block; + uid_t uid; + gid_t gid; #ifdef CONFIG_EXT3_FS_POSIX_ACL ei->i_acl = EXT3_ACL_NOT_CACHED; @@ -2475,12 +2489,17 @@ void ext3_read_inode(struct inode * inode) bh = iloc.bh; raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if(!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } + inode->i_uid = INOXID_UID(uid, gid); + inode->i_gid = INOXID_GID(uid, gid); + if (inode->i_sb->s_flags & MS_TAGXID) + inode->i_xid = INOXID_XID(uid, gid, le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); @@ -2588,6 +2607,8 @@ static int ext3_do_update_inode(handle_t *handle, struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; + uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid); int err = 0, rc, block; /* For fields not not tracking in the in-memory inode, @@ -2597,29 +2618,32 @@ static int ext3_do_update_inode(handle_t *handle, raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); + raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); + raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ if(!ei->i_dtime) { raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); + cpu_to_le16(high_16_bits(uid)); raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(inode->i_gid)); + cpu_to_le16(high_16_bits(gid)); } else { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } } else { raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(inode->i_uid)); + cpu_to_le16(fs_high2lowuid(uid)); raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(inode->i_gid)); + cpu_to_le16(fs_high2lowgid(gid)); raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; } +#ifdef CONFIG_INOXID_GID32 + raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid); +#endif raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); @@ -2741,6 +2765,44 @@ void ext3_write_inode(struct inode *inode, int wait) ext3_force_commit(inode->i_sb); } +int ext3_setattr_flags(struct inode *inode, unsigned int flags) +{ + unsigned int oldflags, newflags; + int err = 0; + + oldflags = EXT3_I(inode)->i_flags; + newflags = oldflags & + ~(EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL | EXT3_BARRIER_FL); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= EXT3_IMMUTABLE_FL; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= EXT3_IUNLINK_FL; + if (flags & ATTR_FLAG_BARRIER) + newflags |= EXT3_BARRIER_FL; + + if (oldflags ^ newflags) { + handle_t *handle; + struct ext3_iloc iloc; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (IS_SYNC(inode)) + handle->h_sync = 1; + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + goto flags_err; + + EXT3_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + flags_err: + ext3_journal_stop(handle); + } + return err; +} + /* * ext3_setattr() * @@ -2812,6 +2874,12 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) ext3_journal_stop(handle); } + if (ia_valid & ATTR_ATTR_FLAG) { + rc = ext3_setattr_flags(inode, attr->ia_attr_flags); + if (!error) + error = rc; + } + rc = inode_setattr(inode, attr); /* If inode_setattr's call to ext3_truncate failed to get a diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 3681474e5..82c325f55 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -150,6 +151,38 @@ flags_err: remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); return ret; } +#endif +#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE) + case EXT3_IOC_SETXID: { + handle_t *handle; + struct ext3_iloc iloc; + int xid; + int err; + + /* fixme: if stealth, return -ENOTTY */ + if (!capable(CAP_CONTEXT)) + return -EPERM; + if (IS_RDONLY(inode)) + return -EROFS; + if (!(inode->i_sb->s_flags & MS_TAGXID)) + return -ENOSYS; + if (get_user(xid, (int *) arg)) + return -EFAULT; + + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + err = ext3_reserve_inode_write(handle, inode, &iloc); + if (err) + return err; + + inode->i_xid = (xid & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + + err = ext3_mark_iloc_dirty(handle, inode, &iloc); + ext3_journal_stop(handle); + return err; + } #endif default: return -ENOTTY; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 4bbb5a078..09d603256 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -583,7 +583,7 @@ enum { Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, - Opt_ignore, Opt_err, + Opt_tagxid, Opt_ignore, Opt_err, }; static match_table_t tokens = { @@ -624,6 +624,7 @@ static match_table_t tokens = { {Opt_grpjquota, "grpjquota=%s"}, {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, + {Opt_tagxid, "tagxid"}, {Opt_ignore, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, @@ -717,6 +718,11 @@ static int parse_options (char * options, struct super_block *sb, case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); break; +#ifndef CONFIG_INOXID_NONE + case Opt_tagxid: + set_opt (sbi->s_mount_opt, TAG_XID); + break; +#endif case Opt_check: #ifdef CONFIG_EXT3_CHECK set_opt (sbi->s_mount_opt, CHECK); @@ -1287,6 +1293,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (!parse_options ((char *) data, sb, &journal_inum, 0)) goto failed_mount; + if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAG_XID) + sb->s_flags |= MS_TAGXID; sb->s_flags |= MS_ONE_SECOND; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); diff --git a/fs/fcntl.c b/fs/fcntl.c index abad0aa00..c8c39b75c 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -107,6 +107,8 @@ repeat: error = -EMFILE; if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur) goto out; + if (!vx_files_avail(1)) + goto out; error = expand_files(files, newfd); if (error < 0) @@ -139,6 +141,7 @@ static int dupfd(struct file *file, unsigned int start) FD_SET(fd, files->open_fds); FD_CLR(fd, files->close_on_exec); spin_unlock(&files->file_lock); + vx_openfd_inc(fd); fd_install(fd, file); } else { spin_unlock(&files->file_lock); @@ -186,6 +189,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) FD_SET(newfd, files->open_fds); FD_CLR(newfd, files->close_on_exec); spin_unlock(&files->file_lock); + vx_openfd_inc(newfd); if (tofree) filp_close(tofree, files); diff --git a/fs/file_table.c b/fs/file_table.c index 5d56ec5db..1894e3b34 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -87,6 +87,7 @@ static int old_max; f->f_owner.lock = RW_LOCK_UNLOCKED; /* f->f_version: 0 */ INIT_LIST_HEAD(&f->f_list); + vx_files_inc(f); return f; } } @@ -184,6 +185,7 @@ void fastcall __fput(struct file *file) fops_put(file->f_op); if (file->f_mode & FMODE_WRITE) put_write_access(inode); + vx_files_dec(file); file_kill(file); file->f_dentry = NULL; file->f_vfsmnt = NULL; diff --git a/fs/inode.c b/fs/inode.c index 282d86aed..1ad2a7133 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -114,6 +114,11 @@ static struct inode *alloc_inode(struct super_block *sb) struct address_space * const mapping = &inode->i_data; inode->i_sb = sb; + if (sb->s_flags & MS_TAGXID) + inode->i_xid = current->xid; + else + inode->i_xid = 0; /* maybe xid -1 would be better? */ + // inode->i_dqh = dqhget(sb->s_dqh); inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; atomic_set(&inode->i_count, 1); @@ -133,6 +138,7 @@ static struct inode *alloc_inode(struct super_block *sb) inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_rdev = 0; + // inode->i_xid = 0; /* maybe not too wise ... */ inode->i_security = NULL; inode->dirtied_when = 0; if (security_inode_alloc(inode)) { diff --git a/fs/ioctl.c b/fs/ioctl.c index 9737a0fa8..f84ba41b2 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -9,10 +9,18 @@ #include #include #include +#include +#include +#include #include #include +#ifdef CONFIG_VSERVER_LEGACY +extern int vx_proc_ioctl(struct inode *, struct file *, + unsigned int, unsigned long); +#endif + static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg) { int error; @@ -119,6 +127,48 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) else error = -ENOTTY; break; +#ifdef CONFIG_VSERVER_LEGACY +#ifndef CONFIG_INOXID_NONE + case FIOC_GETXID: { + struct inode *inode = filp->f_dentry->d_inode; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (capable(CAP_CONTEXT)) + error = put_user(inode->i_xid, (int *) arg); + break; + } + case FIOC_SETXID: { + struct inode *inode = filp->f_dentry->d_inode; + int xid; + + /* fixme: if stealth, return -ENOTTY */ + error = -EPERM; + if (!capable(CAP_CONTEXT)) + break; + error = -EROFS; + if (IS_RDONLY(inode)) + break; + error = -ENOSYS; + if (!(inode->i_sb->s_flags & MS_TAGXID)) + break; + error = -EFAULT; + if (get_user(xid, (int *) arg)) + break; + error = 0; + inode->i_xid = (xid & 0xFFFF); + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + break; + } +#endif + case FIOC_GETXFLG: + case FIOC_SETXFLG: + error = -ENOTTY; + if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC) + error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg); + break; +#endif default: error = -ENOTTY; if (S_ISREG(filp->f_dentry->d_inode->i_mode)) diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 865334f6c..848fbc8ba 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" @@ -3098,14 +3099,21 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno, static int copy_from_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + uid_t uid; + gid_t gid; jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; ip->i_nlink = le32_to_cpu(dip->di_nlink); - ip->i_uid = le32_to_cpu(dip->di_uid); - ip->i_gid = le32_to_cpu(dip->di_gid); + + uid = le32_to_cpu(dip->di_uid); + gid = le32_to_cpu(dip->di_gid); + ip->i_uid = INOXID_UID(uid, gid); + ip->i_gid = INOXID_GID(uid, gid); + ip->i_xid = INOXID_XID(uid, gid, 0); + ip->i_size = le64_to_cpu(dip->di_size); ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); @@ -3156,6 +3164,8 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) static void copy_to_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + uid_t uid; + gid_t gid; dip->di_fileset = cpu_to_le32(jfs_ip->fileset); dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp); @@ -3164,8 +3174,11 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip) dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - dip->di_uid = cpu_to_le32(ip->i_uid); - dip->di_gid = cpu_to_le32(ip->i_gid); + + uid = XIDINO_UID(ip->i_uid, ip->i_xid); + gid = XIDINO_GID(ip->i_gid, ip->i_xid); + dip->di_uid = cpu_to_le32(uid); + dip->di_gid = cpu_to_le32(gid); /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones diff --git a/fs/namei.c b/fs/namei.c index d2cab643c..45b138d0a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -163,6 +163,9 @@ int vfs_permission(struct inode * inode, int mask) { umode_t mode = inode->i_mode; + if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN|VX_WATCH)) + return -EACCES; + if (mask & MAY_WRITE) { /* * Nobody gets write access to a read-only fs. @@ -208,6 +211,15 @@ int vfs_permission(struct inode * inode, int mask) return -EACCES; } +static inline int xid_permission(struct inode *inode) +{ + if (inode->i_xid == 0) + return 0; + if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT)) + return 0; + return -EACCES; +} + int permission(struct inode * inode,int mask, struct nameidata *nd) { int retval; @@ -216,6 +228,8 @@ int permission(struct inode * inode,int mask, struct nameidata *nd) /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; + if ((retval = xid_permission(inode))) + return retval; if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, submask, nd); else @@ -1039,7 +1053,7 @@ static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) if (IS_APPEND(dir)) return -EPERM; if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| - IS_IMMUTABLE(victim->d_inode)) + IS_IXORUNLINK(victim->d_inode)) return -EPERM; if (isdir) { if (!S_ISDIR(victim->d_inode->i_mode)) @@ -1833,7 +1847,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de /* * A link to an append-only or immutable file cannot be created. */ - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (IS_APPEND(inode) || IS_IXORUNLINK(inode)) return -EPERM; if (!dir->i_op || !dir->i_op->link) return -EPERM; diff --git a/fs/namespace.c b/fs/namespace.c index fb0a3ab58..dfeac2193 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -232,6 +232,9 @@ static int show_vfsmnt(struct seq_file *m, void *v) }; struct proc_fs_info *fs_infop; + if (vx_flags(VXF_HIDE_MOUNT, 0)) + return 0; + mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); seq_putc(m, ' '); seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); @@ -273,18 +276,10 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -void umount_tree(struct vfsmount *mnt) +static inline void __umount_tree(struct vfsmount *mnt, struct list_head *kill) { - struct vfsmount *p; - LIST_HEAD(kill); - - for (p = mnt; p; p = next_mnt(p, mnt)) { - list_del(&p->mnt_list); - list_add(&p->mnt_list, &kill); - } - - while (!list_empty(&kill)) { - mnt = list_entry(kill.next, struct vfsmount, mnt_list); + while (!list_empty(kill)) { + mnt = list_entry(kill->next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); if (mnt->mnt_parent == mnt) { spin_unlock(&vfsmount_lock); @@ -299,6 +294,32 @@ void umount_tree(struct vfsmount *mnt) } } +void umount_tree(struct vfsmount *mnt) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + } + __umount_tree(mnt, &kill); +} + +void umount_unused(struct vfsmount *mnt, struct fs_struct *fs) +{ + struct vfsmount *p; + LIST_HEAD(kill); + + for (p = mnt; p; p = next_mnt(p, mnt)) { + if (p == fs->rootmnt || p == fs->pwdmnt) + continue; + list_del(&p->mnt_list); + list_add(&p->mnt_list, &kill); + } + __umount_tree(mnt, &kill); +} + static int do_umount(struct vfsmount *mnt, int flags) { struct super_block * sb = mnt->mnt_sb; @@ -396,7 +417,7 @@ asmlinkage long sys_umount(char __user * name, int flags) goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) goto dput_and_out; retval = do_umount(nd.mnt, flags); @@ -419,6 +440,8 @@ static int mount_is_safe(struct nameidata *nd) { if (capable(CAP_SYS_ADMIN)) return 0; + if (vx_ccaps(VXC_SECURE_MOUNT)) + return 0; return -EPERM; #ifdef notyet if (S_ISLNK(nd->dentry->d_inode->i_mode)) @@ -779,6 +802,9 @@ long do_mount(char * dev_name, char * dir_name, char *type_page, mnt_flags |= MNT_NOEXEC; flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); + if (vx_ccaps(VXC_SECURE_MOUNT)) + mnt_flags |= MNT_NODEV; + /* ... and get the mountpoint */ retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); if (retval) diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 864615916..a561819b6 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -306,7 +306,7 @@ static int __init root_nfs_name(char *name) /* Override them by options set on kernel command-line */ root_nfs_parse(name, buf); - cp = system_utsname.nodename; + cp = vx_new_uts(nodename); if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); return -1; diff --git a/fs/open.c b/fs/open.c index 66a77f563..1ded0f798 100644 --- a/fs/open.c +++ b/fs/open.c @@ -22,6 +22,7 @@ #include #include #include +#include int vfs_statfs(struct super_block *sb, struct kstatfs *buf) { @@ -605,6 +606,9 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) dentry = file->f_dentry; inode = dentry->d_inode; + err = -EPERM; + if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) + goto out_putf; err = -EROFS; if (IS_RDONLY(inode)) goto out_putf; @@ -637,6 +641,10 @@ asmlinkage long sys_chmod(const char __user * filename, mode_t mode) goto out; inode = nd.dentry->d_inode; + error = -EPERM; + if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) + goto dput_and_out; + error = -EROFS; if (IS_RDONLY(inode)) goto dput_and_out; @@ -676,14 +684,15 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group) error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out; + newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { newattrs.ia_valid |= ATTR_UID; - newattrs.ia_uid = user; + newattrs.ia_uid = vx_map_uid(user); } if (group != (gid_t) -1) { newattrs.ia_valid |= ATTR_GID; - newattrs.ia_gid = group; + newattrs.ia_gid = vx_map_gid(group); } if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; @@ -878,6 +887,7 @@ repeat: FD_SET(fd, files->open_fds); FD_CLR(fd, files->close_on_exec); files->next_fd = fd + 1; + vx_openfd_inc(fd); #if 1 /* Sanity check */ if (files->fd[fd] != NULL) { @@ -1032,6 +1042,7 @@ asmlinkage long sys_close(unsigned int fd) FD_CLR(fd, files->close_on_exec); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); + vx_openfd_dec(fd); return filp_close(filp, files); out_unlock: diff --git a/fs/proc/array.c b/fs/proc/array.c index 6bdd15bfb..4a2cce73e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include @@ -128,7 +129,8 @@ static const char *task_state_array[] = { "D (disk sleep)", /* 2 */ "T (stopped)", /* 4 */ "Z (zombie)", /* 8 */ - "X (dead)" /* 16 */ + "X (dead)", /* 16 */ + "H (on hold)" /* 32 */ }; static inline const char * get_task_state(struct task_struct *tsk) @@ -137,7 +139,8 @@ static inline const char * get_task_state(struct task_struct *tsk) TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE | TASK_ZOMBIE | - TASK_STOPPED); + TASK_STOPPED | + TASK_ONHOLD); const char **p = &task_state_array[0]; while (state) { @@ -150,8 +153,10 @@ static inline const char * get_task_state(struct task_struct *tsk) static inline char * task_state(struct task_struct *p, char *buffer) { int g; + pid_t ppid; read_lock(&tasklist_lock); + ppid = vx_map_tgid(current->vx_info, p->real_parent->pid); buffer += sprintf(buffer, "State:\t%s\n" "SleepAVG:\t%lu%%\n" @@ -164,7 +169,7 @@ static inline char * task_state(struct task_struct *p, char *buffer) get_task_state(p), (p->sleep_avg/1024)*100/(1020000000/1024), p->tgid, - p->pid, p->pid ? p->real_parent->pid : 0, + p->pid, p->pid ? ppid : 0, p->pid && p->ptrace ? p->parent->pid : 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); @@ -275,6 +280,10 @@ extern char *task_mem(struct mm_struct *, char *); int proc_pid_status(struct task_struct *task, char * buffer) { char * orig = buffer; +#ifdef CONFIG_VSERVER_LEGACY + struct vx_info *vxi; + struct nx_info *nxi; +#endif struct mm_struct *mm = get_task_mm(task); buffer = task_name(task, buffer); @@ -286,6 +295,41 @@ int proc_pid_status(struct task_struct *task, char * buffer) } buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); + +#ifdef CONFIG_VSERVER_LEGACY + buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task)); + vxi = task_get_vx_info(task); + if (vxi) { + buffer += sprintf (buffer,"ctxflags: %08llx\n" + ,vxi->vx_flags); + buffer += sprintf (buffer,"initpid: %d\n" + ,vxi->vx_initpid); + } else { + buffer += sprintf (buffer,"ctxflags: none\n"); + buffer += sprintf (buffer,"initpid: none\n"); + } + put_vx_info(vxi); + nxi = task_get_nx_info(task); + if (nxi) { + int i; + + buffer += sprintf (buffer,"ipv4root:"); + for (i=0; inbipv4; i++){ + buffer += sprintf (buffer," %08x/%08x" + ,nxi->ipv4[i] + ,nxi->mask[i]); + } + *buffer++ = '\n'; + buffer += sprintf (buffer,"ipv4root_bcast: %08x\n" + ,nxi->v4_bcast); + buffer += sprintf (buffer,"ipv4root_refcnt: %d\n" + ,atomic_read(&nxi->nx_refcount)); + } else { + buffer += sprintf (buffer,"ipv4root: 0\n"); + buffer += sprintf (buffer,"ipv4root_bcast: 0\n"); + } + put_nx_info(nxi); +#endif #if defined(CONFIG_ARCH_S390) buffer = task_show_regs(task, buffer); #endif @@ -297,6 +341,7 @@ int proc_pid_stat(struct task_struct *task, char * buffer) { unsigned long vsize, eip, esp, wchan; long priority, nice; + unsigned long long bias_jiffies; int tty_pgrp = -1, tty_nr = 0; sigset_t sigign, sigcatch; char state; @@ -308,7 +353,16 @@ int proc_pid_stat(struct task_struct *task, char * buffer) state = *get_task_state(task); vsize = eip = esp = 0; + bias_jiffies = INITIAL_JIFFIES; + task_lock(task); + if (__vx_task_flags(task, VXF_VIRT_UPTIME, 0)) { + bias_jiffies = task->vx_info->cvirt.bias_jiffies; + /* hmm, do we need that? */ + if (bias_jiffies > task->start_time) + bias_jiffies = task->start_time; + } + mm = task->mm; if(mm) mm = mmgrab(mm); @@ -352,7 +406,7 @@ int proc_pid_stat(struct task_struct *task, char * buffer) read_unlock(&tasklist_lock); /* Temporary variable needed for gcc-2.96 */ - start_time = jiffies_64_to_clock_t(task->start_time - INITIAL_JIFFIES); + start_time = jiffies_64_to_clock_t(task->start_time - bias_jiffies); res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \ diff --git a/fs/proc/base.c b/fs/proc/base.c index 7ff742cec..353407b4c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * For hysterical raisins we keep the same inumbers as in the old procfs. @@ -67,6 +68,8 @@ enum pid_directory_inos { PROC_TGID_ATTR_EXEC, PROC_TGID_ATTR_FSCREATE, #endif + PROC_TGID_VX_INFO, + PROC_TGID_IP_INFO, PROC_TGID_FD_DIR, PROC_TID_INO, PROC_TID_STATUS, @@ -90,6 +93,8 @@ enum pid_directory_inos { PROC_TID_ATTR_EXEC, PROC_TID_ATTR_FSCREATE, #endif + PROC_TID_VX_INFO, + PROC_TID_IP_INFO, PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; @@ -123,6 +128,8 @@ static struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_KALLSYMS E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), #endif + E(PROC_TGID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TGID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), {0,0,NULL,0} }; static struct pid_entry tid_base_stuff[] = { @@ -145,6 +152,8 @@ static struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_KALLSYMS E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), #endif + E(PROC_TID_VX_INFO, "vinfo", S_IFREG|S_IRUGO), + E(PROC_TID_IP_INFO, "ninfo", S_IFREG|S_IRUGO), {0,0,NULL,0} }; @@ -954,6 +963,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_uid = task->euid; inode->i_gid = task->egid; } + inode->i_xid = vx_task_xid(task); security_task_to_inode(task, inode); out: @@ -979,6 +989,11 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct task_struct *task = proc_task(inode); + + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto out_drop; + /* discard wrong fakeinit */ + if (pid_alive(task)) { if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { inode->i_uid = task->euid; @@ -990,6 +1005,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) security_task_to_inode(task, inode); return 1; } +out_drop: d_drop(dentry); return 0; } @@ -1375,6 +1391,16 @@ static struct dentry *proc_pident_lookup(struct inode *dir, ei->op.proc_read = proc_pid_wchan; break; #endif + case PROC_TID_VX_INFO: + case PROC_TGID_VX_INFO: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_vx_info; + break; + case PROC_TID_IP_INFO: + case PROC_TGID_IP_INFO: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_nx_info; + break; default: printk("procfs: impossible type (%d)",p->type); iput(inode); @@ -1555,7 +1581,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct d_add(dentry, inode); return NULL; } - tgid = name_to_int(dentry); + tgid = vx_rmap_tgid(current->vx_info, name_to_int(dentry)); if (tgid == ~0U) goto out; @@ -1567,8 +1593,9 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct if (!task) goto out; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); - + inode = NULL; + if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); if (!inode) { put_task_struct(task); @@ -1610,10 +1637,12 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry struct inode *inode; unsigned tid; - tid = name_to_int(dentry); + tid = vx_rmap_tgid(current->vx_info, name_to_int(dentry)); if (tid == ~0U) goto out; +/* handle fakeinit */ + read_lock(&tasklist_lock); task = find_task_by_pid(tid); if (task) @@ -1624,8 +1653,9 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry if (leader->tgid != task->tgid) goto out_drop_task; - inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); - + inode = NULL; + if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); if (!inode) goto out_drop_task; @@ -1676,11 +1706,14 @@ static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) for ( ; p != &init_task; p = next_task(p)) { int tgid = p->pid; + if (!pid_alive(p)) continue; + if (!vx_check(vx_task_xid(p), VX_WATCH|VX_IDENT)) + continue; if (--index >= 0) continue; - tgids[nr_tgids] = tgid; + tgids[nr_tgids] = vx_map_tgid(current->vx_info, tgid); nr_tgids++; if (nr_tgids >= PROC_MAXPIDS) break; @@ -1710,9 +1743,11 @@ static int get_tid_list(int index, unsigned int *tids, struct inode *dir) if (pid_alive(task)) do { int tid = task->pid; + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + continue; if (--index >= 0) continue; - tids[nr_tids] = tid; + tids[nr_tids] = vx_map_tgid(current->vx_info, tid); nr_tids++; if (nr_tids >= PROC_MAXPIDS) break; @@ -1766,11 +1801,14 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi unsigned int nr_tids, i; struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; + struct task_struct *task = proc_task(inode); int retval = -ENOENT; ino_t ino; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ - if (!pid_alive(proc_task(inode))) + if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT)) + goto out; + if (!pid_alive(task)) goto out; retval = 0; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index d2c88ebb1..21e06c9aa 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -348,8 +350,15 @@ static int proc_delete_dentry(struct dentry * dentry) return 1; } +static int proc_revalidate_dentry(struct dentry *de, struct nameidata *nd) +{ + /* maybe add a check if it's really necessary? */ + return 0; +} + static struct dentry_operations proc_dentry_operations = { + .d_revalidate = proc_revalidate_dentry, .d_delete = proc_delete_dentry, }; @@ -369,6 +378,8 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) continue; + if (!vx_hide_check(0, de->vx_flags)) + continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino = de->low_ino; @@ -445,9 +456,12 @@ int proc_readdir(struct file * filp, } do { + if (!vx_hide_check(0, de->vx_flags)) + goto skip; if (filldir(dirent, de->name, de->namelen, filp->f_pos, de->low_ino, de->mode >> 12) < 0) goto out; + skip: filp->f_pos++; de = de->next; } while (de); @@ -559,6 +573,7 @@ static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, ent->namelen = len; ent->mode = mode; ent->nlink = nlink; + ent->vx_flags = IATTR_PROC_DEFAULT; out: return ent; } @@ -579,7 +594,8 @@ struct proc_dir_entry *proc_symlink(const char *name, kfree(ent->data); kfree(ent); ent = NULL; - } + } else + ent->vx_flags = IATTR_PROC_SYMLINK; } else { kfree(ent); ent = NULL; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 2d38f02c9..bf090daaf 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -211,6 +211,8 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, inode->i_uid = de->uid; inode->i_gid = de->gid; } + if (de->vx_flags) + PROC_I(inode)->vx_flags = de->vx_flags; if (de->size) inode->i_size = de->size; if (de->nlink) diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index d6b65c00e..0b4de431c 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -144,6 +145,9 @@ static int uptime_read_proc(char *page, char **start, off_t off, do_posix_clock_monotonic_gettime(&uptime); jiffies_to_timespec(idle_jiffies, &idle); + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&uptime, &idle); + len = sprintf(page,"%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, (uptime.tv_nsec / (NSEC_PER_SEC / 100)), diff --git a/fs/proc/root.c b/fs/proc/root.c index bf4b5d299..c84e88fe0 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -23,6 +23,9 @@ struct proc_dir_entry *proc_net, *proc_bus, *proc_root_fs, *proc_root_driver; #ifdef CONFIG_SYSCTL struct proc_dir_entry *proc_sys_root; #endif +struct proc_dir_entry *proc_virtual; + +extern void proc_vx_init(void); static struct super_block *proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -75,6 +78,7 @@ void __init proc_root_init(void) proc_device_tree_init(); #endif proc_bus = proc_mkdir("bus", 0); + proc_vx_init(); } static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 6fa949e2a..7eba8fd37 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -97,10 +97,35 @@ static int reiserfs_sync_file( return ( n_err < 0 ) ? -EIO : 0; } -static int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { +int reiserfs_setattr_flags(struct inode *inode, unsigned int flags) +{ + unsigned int oldflags, newflags; + + oldflags = REISERFS_I(inode)->i_flags; + newflags = oldflags & ~(REISERFS_IMMUTABLE_FL | + REISERFS_IUNLINK_FL | REISERFS_BARRIER_FL); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= REISERFS_IMMUTABLE_FL; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= REISERFS_IUNLINK_FL; + if (flags & ATTR_FLAG_BARRIER) + newflags |= REISERFS_BARRIER_FL; + + if (oldflags ^ newflags) { + REISERFS_I(inode)->i_flags = newflags; + inode->i_ctime = CURRENT_TIME; + } + return 0; +} + +int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode ; int error ; + reiserfs_write_lock(inode->i_sb); + if (S_ISDIR(inode->i_mode)) + goto is_dir; + if (attr->ia_valid & ATTR_SIZE) { /* version 2 items will be caught by the s_maxbytes check ** done for us in vmtruncate @@ -133,7 +158,12 @@ static int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { goto out; } +is_dir: error = inode_change_ok(inode, attr) ; + + if (!error && attr->ia_valid & ATTR_ATTR_FLAG) + reiserfs_setattr_flags(inode, attr->ia_attr_flags); + if (!error) inode_setattr(inode, attr) ; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 6aded3b89..ce9c3d805 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -13,6 +13,7 @@ #include #include #include +#include extern int reiserfs_default_io_size; /* default io size devuned in super.c */ @@ -957,6 +958,8 @@ static void init_inode (struct inode * inode, struct path * path) struct buffer_head * bh; struct item_head * ih; __u32 rdev; + uid_t uid; + gid_t gid; //int version = ITEM_VERSION_1; bh = PATH_PLAST_BUFFER (path); @@ -977,12 +980,13 @@ static void init_inode (struct inode * inode, struct path * path) struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih); unsigned long blocks; + uid = sd_v1_uid(sd); + gid = sd_v1_gid(sd); + set_inode_item_key_version (inode, KEY_FORMAT_3_5); set_inode_sd_version (inode, STAT_DATA_V1); inode->i_mode = sd_v1_mode(sd); inode->i_nlink = sd_v1_nlink(sd); - inode->i_uid = sd_v1_uid(sd); - inode->i_gid = sd_v1_gid(sd); inode->i_size = sd_v1_size(sd); inode->i_atime.tv_sec = sd_v1_atime(sd); inode->i_mtime.tv_sec = sd_v1_mtime(sd); @@ -1014,11 +1018,12 @@ static void init_inode (struct inode * inode, struct path * path) // (directories and symlinks) struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih); + uid = sd_v2_uid(sd); + gid = sd_v2_gid(sd); + inode->i_mode = sd_v2_mode(sd); inode->i_nlink = sd_v2_nlink(sd); - inode->i_uid = sd_v2_uid(sd); inode->i_size = sd_v2_size(sd); - inode->i_gid = sd_v2_gid(sd); inode->i_mtime.tv_sec = sd_v2_mtime(sd); inode->i_atime.tv_sec = sd_v2_atime(sd); inode->i_ctime.tv_sec = sd_v2_ctime(sd); @@ -1043,6 +1048,9 @@ static void init_inode (struct inode * inode, struct path * path) REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd ); sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode ); } + inode->i_uid = INOXID_UID(uid, gid); + inode->i_gid = INOXID_GID(uid, gid); + inode->i_xid = INOXID_XID(uid, gid, 0); pathrelse (path); if (S_ISREG (inode->i_mode)) { @@ -1066,13 +1074,15 @@ static void init_inode (struct inode * inode, struct path * path) static void inode2sd (void * sd, struct inode * inode) { struct stat_data * sd_v2 = (struct stat_data *)sd; + uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid); __u16 flags; + set_sd_v2_uid(sd_v2, uid ); + set_sd_v2_gid(sd_v2, gid ); set_sd_v2_mode(sd_v2, inode->i_mode ); set_sd_v2_nlink(sd_v2, inode->i_nlink ); - set_sd_v2_uid(sd_v2, inode->i_uid ); set_sd_v2_size(sd_v2, inode->i_size ); - set_sd_v2_gid(sd_v2, inode->i_gid ); set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec ); set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec ); set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec ); @@ -2326,6 +2336,14 @@ void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode ) inode -> i_flags |= S_IMMUTABLE; else inode -> i_flags &= ~S_IMMUTABLE; + if( sd_attrs & REISERFS_IUNLINK_FL ) + inode -> i_flags |= S_IUNLINK; + else + inode -> i_flags &= ~S_IUNLINK; + if( sd_attrs & REISERFS_BARRIER_FL ) + inode -> i_flags |= S_BARRIER; + else + inode -> i_flags &= ~S_BARRIER; if( sd_attrs & REISERFS_APPEND_FL ) inode -> i_flags |= S_APPEND; else @@ -2348,6 +2366,14 @@ void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ) *sd_attrs |= REISERFS_IMMUTABLE_FL; else *sd_attrs &= ~REISERFS_IMMUTABLE_FL; + if( inode -> i_flags & S_IUNLINK ) + *sd_attrs |= REISERFS_IUNLINK_FL; + else + *sd_attrs &= ~REISERFS_IUNLINK_FL; + if( inode -> i_flags & S_BARRIER ) + *sd_attrs |= REISERFS_BARRIER_FL; + else + *sd_attrs &= ~REISERFS_BARRIER_FL; if( inode -> i_flags & S_SYNC ) *sd_attrs |= REISERFS_SYNC_FL; else diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index ec59e0744..eaddaf9ce 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -20,7 +20,7 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg) { - unsigned int flags; + unsigned int flags, oldflags; switch (cmd) { case REISERFS_IOC_UNPACK: @@ -36,6 +36,7 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, case REISERFS_IOC_GETFLAGS: flags = REISERFS_I(inode) -> i_attrs; i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags ); + flags &= REISERFS_FL_USER_VISIBLE; return put_user(flags, (int *) arg); case REISERFS_IOC_SETFLAGS: { if (IS_RDONLY(inode)) @@ -47,7 +48,9 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, if (get_user(flags, (int *) arg)) return -EFAULT; - if ( ( ( flags ^ REISERFS_I(inode) -> i_attrs) & ( REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) && + oldflags = REISERFS_I(inode) -> i_attrs; + if ( ( ( flags ^ oldflags) & + ( REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | REISERFS_APPEND_FL)) && !capable( CAP_LINUX_IMMUTABLE ) ) return -EPERM; @@ -59,6 +62,9 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, if( result ) return result; } + + flags = flags & REISERFS_FL_USER_MODIFYABLE; + flags |= oldflags & ~REISERFS_FL_USER_MODIFYABLE; sd_attrs_to_i_attrs( flags, inode ); REISERFS_I(inode) -> i_attrs = flags; inode->i_ctime = CURRENT_TIME; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 5dae18f5b..827b64f67 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -1318,5 +1318,6 @@ struct inode_operations reiserfs_dir_inode_operations = { .rmdir = reiserfs_rmdir, .mknod = reiserfs_mknod, .rename = reiserfs_rename, + .setattr = reiserfs_setattr, }; diff --git a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c index d6402d746..df6330c25 100644 --- a/fs/xfs/linux/xfs_ioctl.c +++ b/fs/xfs/linux/xfs_ioctl.c @@ -1017,6 +1017,8 @@ xfs_ioc_fsgeometry( #define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */ #define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */ #define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */ +#define LINUX_XFLAG_BARRIER 0x00004000 /* chroot() barrier */ +#define LINUX_XFLAG_IUNLINK 0x00008000 /* Immutable unlink */ STATIC unsigned int xfs_merge_ioc_xflags( @@ -1062,6 +1064,7 @@ xfs_ioc_xattr( int error; int attr_flags; unsigned int flags; + unsigned int old_flags; switch (cmd) { case XFS_IOC_FSGETXATTR: { @@ -1086,7 +1089,7 @@ xfs_ioc_xattr( attr_flags = 0; if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= ATTR_NONBLOCK; - + va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE; va.va_xflags = fa.fsx_xflags; va.va_extsize = fa.fsx_extsize; @@ -1114,15 +1117,17 @@ xfs_ioc_xattr( case XFS_IOC_GETXFLAGS: { flags = 0; - if (ip->i_d.di_flags & XFS_XFLAG_IMMUTABLE) + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) flags |= LINUX_XFLAG_IMMUTABLE; - if (ip->i_d.di_flags & XFS_XFLAG_APPEND) + if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK) + flags |= LINUX_XFLAG_IUNLINK; + if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) flags |= LINUX_XFLAG_APPEND; - if (ip->i_d.di_flags & XFS_XFLAG_SYNC) + if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) flags |= LINUX_XFLAG_SYNC; - if (ip->i_d.di_flags & XFS_XFLAG_NOATIME) + if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) flags |= LINUX_XFLAG_NOATIME; - if (ip->i_d.di_flags & XFS_XFLAG_NODUMP) + if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP) flags |= LINUX_XFLAG_NODUMP; if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags))) return -XFS_ERROR(EFAULT); @@ -1142,8 +1147,16 @@ xfs_ioc_xattr( if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= ATTR_NONBLOCK; + old_flags = 0; + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + old_flags |= LINUX_XFLAG_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK) + old_flags |= LINUX_XFLAG_IUNLINK; + if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER) + old_flags |= LINUX_XFLAG_BARRIER; + va.va_mask = XFS_AT_XFLAGS; - va.va_xflags = xfs_merge_ioc_xflags(flags, ip->i_d.di_flags); + va.va_xflags = xfs_merge_ioc_xflags(flags, old_flags); VOP_SETATTR(vp, &va, attr_flags, NULL, error); if (!error) diff --git a/fs/xfs/linux/xfs_iops.c b/fs/xfs/linux/xfs_iops.c index 4b3e61d6c..dcc86827d 100644 --- a/fs/xfs/linux/xfs_iops.c +++ b/fs/xfs/linux/xfs_iops.c @@ -490,6 +490,28 @@ linvfs_getattr( return 0; } +STATIC int +linvfs_setattr_flags( + vattr_t *vap, + unsigned int flags) +{ + unsigned int oldflags, newflags; + + oldflags = vap->va_xflags; + newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE | + XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER); + if (flags & ATTR_FLAG_IMMUTABLE) + newflags |= XFS_XFLAG_IMMUTABLE; + if (flags & ATTR_FLAG_IUNLINK) + newflags |= XFS_XFLAG_IUNLINK; + if (flags & ATTR_FLAG_BARRIER) + newflags |= XFS_XFLAG_BARRIER; + + if (oldflags ^ newflags) + vap->va_xflags = newflags; + return 0; +} + STATIC int linvfs_setattr( struct dentry *dentry, @@ -541,6 +563,11 @@ linvfs_setattr( flags |= ATTR_NONBLOCK; #endif + if (ia_valid & ATTR_ATTR_FLAG) { + vattr.va_mask |= XFS_AT_XFLAGS; + linvfs_setattr_flags(&vattr, attr->ia_attr_flags); + } + VOP_SETATTR(vp, &vattr, flags, NULL, error); if (error) return(-error); /* Positive error up from XFS */ diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c index bbaf61bee..b0efce8e4 100644 --- a/fs/xfs/linux/xfs_super.c +++ b/fs/xfs/linux/xfs_super.c @@ -189,6 +189,14 @@ xfs_revalidate_inode( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) inode->i_flags |= S_APPEND; else diff --git a/fs/xfs/linux/xfs_vnode.c b/fs/xfs/linux/xfs_vnode.c index 9240efb2b..44ba5e5f4 100644 --- a/fs/xfs/linux/xfs_vnode.c +++ b/fs/xfs/linux/xfs_vnode.c @@ -217,6 +217,14 @@ vn_revalidate( inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; + if (va.va_xflags & XFS_XFLAG_IUNLINK) + inode->i_flags |= S_IUNLINK; + else + inode->i_flags &= ~S_IUNLINK; + if (va.va_xflags & XFS_XFLAG_BARRIER) + inode->i_flags |= S_BARRIER; + else + inode->i_flags &= ~S_BARRIER; if (va.va_xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index e0b529b33..f8f6e0fe6 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -456,6 +456,9 @@ xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp); #define XFS_DIFLAG_SYNC_BIT 5 /* inode is written synchronously */ #define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */ #define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */ +#define XFS_DIFLAG_BARRIER_BIT 10 /* chroot() barrier */ +#define XFS_DIFLAG_IUNLINK_BIT 11 /* inode has iunlink */ + #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT) @@ -464,5 +467,8 @@ xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp); #define XFS_DIFLAG_SYNC (1 << XFS_DIFLAG_SYNC_BIT) #define XFS_DIFLAG_NOATIME (1 << XFS_DIFLAG_NOATIME_BIT) #define XFS_DIFLAG_NODUMP (1 << XFS_DIFLAG_NODUMP_BIT) +#define XFS_DIFLAG_BARRIER (1 << XFS_DIFLAG_BARRIER_BIT) +#define XFS_DIFLAG_IUNLINK (1 << XFS_DIFLAG_IUNLINK_BIT) + #endif /* __XFS_DINODE_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 1ed650e90..7e1b82c4f 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -76,6 +76,8 @@ struct fsxattr { #define XFS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ #define XFS_XFLAG_NOATIME 0x00000040 /* do not update access time */ #define XFS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ +#define XFS_XFLAG_BARRIER 0x00004000 /* chroot() barrier */ +#define XFS_XFLAG_IUNLINK 0x00008000 /* Immutable unlink */ #define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index dd20a0a26..550986f61 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -255,6 +255,10 @@ xfs_getattr( vap->va_xflags |= XFS_XFLAG_PREALLOC; if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) vap->va_xflags |= XFS_XFLAG_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK) + vap->va_xflags |= XFS_XFLAG_IUNLINK; + if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER) + vap->va_xflags |= XFS_XFLAG_BARRIER; if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) vap->va_xflags |= XFS_XFLAG_APPEND; if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) @@ -850,6 +854,10 @@ xfs_setattr( } if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) ip->i_d.di_flags |= XFS_DIFLAG_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_IUNLINK) + ip->i_d.di_flags |= XFS_DIFLAG_IUNLINK; + if (vap->va_xflags & XFS_XFLAG_BARRIER) + ip->i_d.di_flags |= XFS_DIFLAG_BARRIER; if (vap->va_xflags & XFS_XFLAG_APPEND) ip->i_d.di_flags |= XFS_DIFLAG_APPEND; if (vap->va_xflags & XFS_XFLAG_SYNC) diff --git a/include/asm-alpha/unistd.h b/include/asm-alpha/unistd.h index 7e65aa4f2..a898e27de 100644 --- a/include/asm-alpha/unistd.h +++ b/include/asm-alpha/unistd.h @@ -233,6 +233,7 @@ #define __NR_osf_memcntl 260 /* not implemented */ #define __NR_osf_fdatasync 261 /* not implemented */ +#define __NR_vserver 273 /* * Linux-specific system calls begin at 300 diff --git a/include/asm-arm/tlb.h b/include/asm-arm/tlb.h index ab3cad4fb..a21e6a01e 100644 --- a/include/asm-arm/tlb.h +++ b/include/asm-arm/tlb.h @@ -58,7 +58,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) if (rss < freed) freed = rss; - mm->rss = rss - freed; + // mm->rss = rss - freed; + vx_rsspages_sub(mm, freed); if (freed) { flush_tlb_mm(mm); diff --git a/include/asm-arm26/tlb.h b/include/asm-arm26/tlb.h index ee6d11d86..214185e68 100644 --- a/include/asm-arm26/tlb.h +++ b/include/asm-arm26/tlb.h @@ -38,7 +38,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) if (rss < freed) freed = rss; - mm->rss = rss - freed; + // mm->rss = rss - freed; + vx_rsspages_sub(mm, freed); if (freed) { flush_tlb_mm(mm); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 1358c5136..aa7d3c093 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -15,6 +15,7 @@ #include #include +#include #include /* @@ -91,7 +92,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) if (rss < freed) freed = rss; - mm->rss = rss - freed; + // mm->rss = rss - freed; + vx_rsspages_sub(mm, freed); tlb_flush_mmu(tlb, start, end); /* keep the page table cache within bounds */ diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h index 5d714b206..f1ff70964 100644 --- a/include/asm-ia64/tlb.h +++ b/include/asm-ia64/tlb.h @@ -163,7 +163,8 @@ tlb_finish_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end) if (rss < freed) freed = rss; - mm->rss = rss - freed; + // mm->rss = rss - freed; + vx_rsspages_sub(mm, freed); /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and * tlb->end_addr. diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h index 0cc0b9f40..429e61718 100644 --- a/include/asm-m68k/unistd.h +++ b/include/asm-m68k/unistd.h @@ -239,7 +239,9 @@ #define __NR_fremovexattr 234 #define __NR_futex 235 -#define NR_syscalls 236 +#define __NR_vserver 273 + +#define NR_syscalls 274 /* user-visible error numbers are in the range -1 - -124: see */ diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h index c58c9790c..c6a520832 100644 --- a/include/asm-m68knommu/unistd.h +++ b/include/asm-m68knommu/unistd.h @@ -221,7 +221,9 @@ #define __NR_setfsuid32 215 #define __NR_setfsgid32 216 -#define NR_syscalls 256 +#define __NR_vserver 273 + +#define NR_syscalls 274 /* user-visible error numbers are in the range -1 - -122: see */ diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h index 8fe42c70e..bdf0c3dcd 100644 --- a/include/asm-mips/unistd.h +++ b/include/asm-mips/unistd.h @@ -289,10 +289,12 @@ #define __NR_tgkill (__NR_Linux + 266) #define __NR_utimes (__NR_Linux + 267) +#define __NR_vserver (__NR_Linux + 273) + /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 267 +#define __NR_Linux_syscalls 273 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ diff --git a/include/asm-parisc/unistd.h b/include/asm-parisc/unistd.h index d810f2ba5..c4a36aab6 100644 --- a/include/asm-parisc/unistd.h +++ b/include/asm-parisc/unistd.h @@ -722,8 +722,9 @@ #define __NR_remap_file_pages (__NR_Linux + 227) #define __NR_semtimedop (__NR_Linux + 228) +#define __NR_vserver (__NR_Linux + 273) -#define __NR_Linux_syscalls 228 +#define __NR_Linux_syscalls 273 #define HPUX_GATEWAY_ADDR 0xC0000004 #define LINUX_GATEWAY_ADDR 0x100 diff --git a/include/asm-ppc/unistd.h b/include/asm-ppc/unistd.h index 6ef6c95f0..aa190772b 100644 --- a/include/asm-ppc/unistd.h +++ b/include/asm-ppc/unistd.h @@ -261,7 +261,7 @@ #define __NR_fadvise64_64 254 #define __NR_rtas 255 /* Number 256 is reserved for sys_debug_setcontext */ -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 /* Number 258 is reserved for new sys_remap_file_pages */ /* Number 259 is reserved for new sys_mbind */ /* Number 260 is reserved for new sys_get_mempolicy */ diff --git a/include/asm-ppc64/unistd.h b/include/asm-ppc64/unistd.h index 8b3e5d562..6604ae781 100644 --- a/include/asm-ppc64/unistd.h +++ b/include/asm-ppc64/unistd.h @@ -267,7 +267,7 @@ #define __NR_fadvise64_64 254 #define __NR_rtas 255 /* Number 256 is reserved for sys_debug_setcontext */ -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 /* Number 258 is reserved for new sys_remap_file_pages */ /* Number 259 is reserved for new sys_mbind */ /* Number 260 is reserved for new sys_get_mempolicy */ diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h index c15e6d75c..27b83840c 100644 --- a/include/asm-s390/unistd.h +++ b/include/asm-s390/unistd.h @@ -255,7 +255,7 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) -/* Number 263 is reserved for vserver */ +#define __NR_vserver 263 #define __NR_fadvise64_64 264 #define __NR_statfs64 265 #define __NR_fstatfs64 266 diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h index 4c0c9fbc3..a28b62117 100644 --- a/include/asm-sparc/unistd.h +++ b/include/asm-sparc/unistd.h @@ -283,7 +283,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h index 751f7de1c..fa7c78a16 100644 --- a/include/asm-sparc64/unistd.h +++ b/include/asm-sparc64/unistd.h @@ -285,7 +285,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff --git a/include/linux/capability.h b/include/linux/capability.h index c96e7b624..7798d2c26 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -235,6 +235,7 @@ typedef __u32 kernel_cap_t; /* Allow enabling/disabling tagged queuing on SCSI controllers and sending arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ +/* Allow the selection of a security context */ #define CAP_SYS_ADMIN 21 @@ -284,6 +285,11 @@ typedef __u32 kernel_cap_t; #define CAP_LEASE 28 +/* Allow context manipulations */ +/* Allow changing context info on files */ + +#define CAP_CONTEXT 29 + #ifdef __KERNEL__ /* * Bounding set diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index d701ba88c..7c6f650c9 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -192,6 +192,8 @@ struct ext2_group_desc #define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT2_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT2_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ #define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ @@ -240,7 +242,7 @@ struct ext2_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_xid; /* LRU Context */ __u16 l_i_uid_high; /* these 2 fields */ __u16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -272,6 +274,7 @@ struct ext2_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_xid osd2.linux2.l_i_xid #define i_reserved2 osd2.linux2.l_i_reserved2 #endif @@ -312,6 +315,7 @@ struct ext2_inode { #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ #define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ +#define EXT2_MOUNT_TAG_XID (1<<16) /* Enable Context Tags */ #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt #define set_opt(o, opt) o |= EXT2_MOUNT_##opt diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index d90013eb3..27449a4a2 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -185,6 +185,8 @@ struct ext3_group_desc #define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define EXT3_BARRIER_FL 0x04000000 /* Barrier for chroot() */ +#define EXT3_IUNLINK_FL 0x08000000 /* Immutable unlink */ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ @@ -208,6 +210,9 @@ struct ext3_group_desc #ifdef CONFIG_JBD_DEBUG #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif +#ifdef CONFIG_VSERVER_LEGACY +#define EXT3_IOC_SETXID FIOC_SETXIDJ +#endif /* * Structure of an inode on the disk @@ -244,7 +249,7 @@ struct ext3_inode { struct { __u8 l_i_frag; /* Fragment number */ __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; + __u16 l_i_xid; /* LRU Context */ __u16 l_i_uid_high; /* these 2 fields */ __u16 l_i_gid_high; /* were reserved2[0] */ __u32 l_i_reserved2; @@ -276,6 +281,7 @@ struct ext3_inode { #define i_gid_low i_gid #define i_uid_high osd2.linux2.l_i_uid_high #define i_gid_high osd2.linux2.l_i_gid_high +#define i_raw_xid osd2.linux2.l_i_xid #define i_reserved2 osd2.linux2.l_i_reserved2 #elif defined(__GNU__) @@ -324,6 +330,7 @@ struct ext3_inode { #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ +#define EXT3_MOUNT_TAG_XID (1<<16) /* Enable Context Tags */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H diff --git a/include/linux/fs.h b/include/linux/fs.h index 8971ae34d..5b70fb85d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -116,6 +116,7 @@ extern int leases_enable, dir_notify_enable, lease_break_time; #define MS_VERBOSE 32768 #define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ #define MS_ONE_SECOND (1<<17) /* fs has 1 sec a/m/ctime resolution */ +#define MS_TAGXID (1<<24) /* tag inodes with context information */ #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) @@ -142,6 +143,8 @@ extern int leases_enable, dir_notify_enable, lease_break_time; #define S_NOQUOTA 64 /* Inode is not counted to quota */ #define S_DIRSYNC 128 /* Directory modifications are synchronous */ #define S_NOCMTIME 256 /* Do not update file c/mtime */ +#define S_BARRIER 512 /* Barrier for chroot() */ +#define S_IUNLINK 1024 /* Immutable unlink */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -169,11 +172,14 @@ extern int leases_enable, dir_notify_enable, lease_break_time; #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) +#define IS_IUNLINK(inode) ((inode)->i_flags & S_IUNLINK) +#define IS_IXORUNLINK(inode) ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode)) #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME)) #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME) #define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL) #define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND) +#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER)) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) @@ -280,6 +286,9 @@ struct iattr { #define ATTR_FLAG_IMMUTABLE 8 /* Immutable file */ #define ATTR_FLAG_NODIRATIME 16 /* Don't update atime for directory */ +#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */ +#define ATTR_FLAG_IUNLINK 1024 /* Immutable unlink */ + /* * Includes for diskquotas. */ @@ -415,6 +424,7 @@ struct inode { unsigned int i_nlink; uid_t i_uid; gid_t i_gid; + xid_t i_xid; dev_t i_rdev; loff_t i_size; struct timespec i_atime; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 29189706e..3ba25d682 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -112,6 +112,10 @@ extern struct group_info init_groups; .proc_lock = SPIN_LOCK_UNLOCKED, \ .switch_lock = SPIN_LOCK_UNLOCKED, \ .journal_info = NULL, \ + .xid = 0, \ + .nid = 0, \ + .vx_info = NULL, \ + .nx_info = NULL, \ } diff --git a/include/linux/ip.h b/include/linux/ip.h index ab799b48b..5ea190d1b 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -111,6 +111,7 @@ struct inet_opt { /* Socket demultiplex comparisons on incoming packets. */ __u32 daddr; /* Foreign IPv4 addr */ __u32 rcv_saddr; /* Bound local IPv4 addr */ + __u32 rcv_saddr2; /* Second bound ipv4 addr, for ipv4root */ __u16 dport; /* Destination port */ __u16 num; /* Local port */ __u32 saddr; /* Sending source */ diff --git a/include/linux/ipc.h b/include/linux/ipc.h index b29118973..079c2feb3 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -66,6 +66,7 @@ struct kern_ipc_perm mode_t mode; unsigned long seq; void *security; + xid_t xid; }; #endif /* __KERNEL__ */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 46c59a68a..1de4294b2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ diff --git a/include/linux/namespace.h b/include/linux/namespace.h index fdd8abb07..ae9c17d6e 100644 --- a/include/linux/namespace.h +++ b/include/linux/namespace.h @@ -13,6 +13,7 @@ struct namespace { }; extern void umount_tree(struct vfsmount *); +extern void umount_unused(struct vfsmount *, struct fs_struct *); extern int copy_namespace(int, struct task_struct *); void __put_namespace(struct namespace *namespace); diff --git a/include/linux/net.h b/include/linux/net.h index 3d2cd0cec..6293dd300 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -60,6 +60,8 @@ typedef enum { #define SOCK_ASYNC_NOSPACE 0 #define SOCK_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 +#define SOCK_PASS_CRED 16 +#define SOCK_USER_SOCKET 17 /** * struct socket - general BSD socket @@ -82,7 +84,6 @@ struct socket { struct sock *sk; wait_queue_head_t wait; short type; - unsigned char passcred; }; struct vm_area_struct; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 2d439a839..3c3226075 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -55,6 +55,7 @@ struct proc_dir_entry { nlink_t nlink; uid_t uid; gid_t gid; + int vx_flags; unsigned long size; struct inode_operations * proc_iops; struct file_operations * proc_fops; @@ -237,9 +238,11 @@ extern struct kcore_list *kclist_del(void *); struct proc_inode { struct task_struct *task; int type; + int vx_flags; union { int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **); int (*proc_read)(struct task_struct *task, char *page); + int (*proc_vid_read)(int vid, char *page); } op; struct proc_dir_entry *pde; struct inode vfs_inode; diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index dfb46b513..f64e252d1 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -888,6 +888,13 @@ struct stat_data_v1 #define REISERFS_COMPR_FL EXT2_COMPR_FL #define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL +/* unfortunately reiserfs sdattr is only 16 bit */ +#define REISERFS_BARRIER_FL (EXT2_BARRIER_FL >> 16) +#define REISERFS_IUNLINK_FL (EXT2_IUNLINK_FL >> 16) + +#define REISERFS_FL_USER_VISIBLE 0x80FF +#define REISERFS_FL_USER_MODIFYABLE 0x80FF + /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ REISERFS_SYNC_FL | \ @@ -1957,6 +1964,7 @@ int reiserfs_new_inode (struct reiserfs_transaction_handle *th, struct dentry *dentry, struct inode *inode); int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode); void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode); +int reiserfs_setattr ( struct dentry *dentry, struct iattr *attr); void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode ); void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ); diff --git a/include/linux/sched.h b/include/linux/sched.h index 73d7127e3..27ba7dd23 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -102,6 +102,7 @@ extern unsigned long nr_iowait(void); #include #include +#include #define TASK_RUNNING 0 #define TASK_INTERRUPTIBLE 1 @@ -109,6 +110,7 @@ extern unsigned long nr_iowait(void); #define TASK_STOPPED 4 #define TASK_ZOMBIE 8 #define TASK_DEAD 16 +#define TASK_ONHOLD 32 #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -221,6 +223,7 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; + struct vx_info *mm_vx_info; /* coredumping support */ int core_waiters; @@ -315,9 +318,10 @@ struct user_struct { /* Hash table maintenance information */ struct list_head uidhash_list; uid_t uid; + xid_t xid; }; -extern struct user_struct *find_user(uid_t); +extern struct user_struct *find_user(xid_t, uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) @@ -481,6 +485,14 @@ struct task_struct { void *security; struct audit_context *audit_context; +/* vserver context data */ + xid_t xid; + struct vx_info *vx_info; + +/* vserver network data */ + nid_t nid; + struct nx_info *nx_info; + /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; @@ -600,7 +612,7 @@ extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(xid_t, uid_t); extern void free_uid(struct user_struct *); extern void switch_uid(struct user_struct *); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index d8929c671..64753c544 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -133,6 +133,7 @@ enum KERN_NGROUPS_MAX=63, /* int: NGROUPS_MAX */ KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */ KERN_HZ_TIMER=65, /* int: hz timer on or off */ + KERN_VSHELPER=66, /* string: path to vshelper policy agent */ }; diff --git a/include/linux/types.h b/include/linux/types.h index 23c414f11..288ab653e 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -36,6 +36,8 @@ typedef __kernel_uid32_t uid_t; typedef __kernel_gid32_t gid_t; typedef __kernel_uid16_t uid16_t; typedef __kernel_gid16_t gid16_t; +typedef unsigned int xid_t; +typedef unsigned int nid_t; #ifdef CONFIG_UID16 /* This is defined by include/asm-{arch}/posix_types.h */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index be877d75c..b47489bfc 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -11,9 +11,9 @@ extern rwlock_t unix_table_lock; extern atomic_t unix_tot_inflight; -static inline struct sock *first_unix_socket(int *i) +static inline struct sock *next_unix_socket_table(int *i) { - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { + for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { if (!hlist_empty(&unix_socket_table[*i])) return __sk_head(&unix_socket_table[*i]); } @@ -22,16 +22,19 @@ static inline struct sock *first_unix_socket(int *i) static inline struct sock *next_unix_socket(int *i, struct sock *s) { - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; + do { + if (s) + s = sk_next(s); + if (!s) + s = next_unix_socket_table(i); + } while (s && !vx_check(s->sk_xid, VX_IDENT|VX_WATCH)); + return s; +} + +static inline struct sock *first_unix_socket(int *i) +{ + *i = 0; + return next_unix_socket(i, NULL); } #define forall_unix_sockets(i, s) \ diff --git a/include/net/route.h b/include/net/route.h index 7a851de92..4cbac7da3 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -33,6 +33,7 @@ #include #include #include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -146,6 +147,59 @@ static inline char rt_tos2priority(u8 tos) return ip_tos2prio[IPTOS_TOS(tos)>>1]; } +#define IPI_LOOPBACK 0x0100007f + +static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl) +{ + int err; + int i, n = nxi->nbipv4; + u32 ipv4root = nxi->ipv4[0]; + + if (ipv4root == 0) + return 0; + + if (fl->fl4_src == 0) { + if (n > 1) { + u32 foundsrc; + + err = __ip_route_output_key(rp, fl); + if (err) { + fl->fl4_src = ipv4root; + err = __ip_route_output_key(rp, fl); + } + if (err) + return err; + + foundsrc = (*rp)->rt_src; + ip_rt_put(*rp); + + for (i=0; imask[i]; + u32 ipv4 = nxi->ipv4[i]; + u32 net4 = ipv4 & mask; + + if (foundsrc == ipv4) { + fl->fl4_src = ipv4; + break; + } + if (!fl->fl4_src && (foundsrc & mask) == net4) + fl->fl4_src = ipv4; + } + } + if (fl->fl4_src == 0) + fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK) + ? IPI_LOOPBACK : ipv4root; + } else { + for (i=0; iipv4[i] == fl->fl4_src) + break; + } + if (i == n) + return -EPERM; + } + return 0; +} + static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif, u8 protocol, u16 sport, u16 dport, struct sock *sk) @@ -160,7 +214,22 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst, .dport = dport } } }; int err; - if (!dst || !src) { + struct nx_info *nx_info = current->nx_info; + + if (sk) + nx_info = sk->sk_nx_info; + vxdprintk("ip_route_connect(%p) %p,%p;%lx\n", + sk, nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (nx_info) { + err = ip_find_src(nx_info, rp, &fl); + if (err) + return err; + if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + fl.fl4_dst = nx_info->ipv4[0]; + } + if (!fl.fl4_dst || !fl.fl4_src) { err = __ip_route_output_key(rp, &fl); if (err) return err; diff --git a/include/net/scm.h b/include/net/scm.h index b7ba74dbc..30e9fbb98 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -51,13 +51,13 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg, { if (!msg->msg_control) { - if (sock->passcred || scm->fp) + if (test_bit(SOCK_PASS_CRED, &sock->flags) || scm->fp) msg->msg_flags |= MSG_CTRUNC; scm_destroy(scm); return; } - if (sock->passcred) + if (test_bit(SOCK_PASS_CRED, &sock->flags)) put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(scm->creds), &scm->creds); if (!scm->fp) diff --git a/include/net/sock.h b/include/net/sock.h index e01e61768..276d8a612 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -50,6 +50,7 @@ #include #include +#include #include #include @@ -109,6 +110,10 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + xid_t skc_xid; + struct vx_info *skc_vx_info; + nid_t skc_nid; + struct nx_info *skc_nx_info; }; /** @@ -186,6 +191,10 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_xid __sk_common.skc_xid +#define sk_vx_info __sk_common.skc_vx_info +#define sk_nid __sk_common.skc_nid +#define sk_nx_info __sk_common.skc_nx_info volatile unsigned char sk_zapped; unsigned char sk_shutdown; unsigned char sk_use_write_queue; diff --git a/include/net/tcp.h b/include/net/tcp.h index cba1f701e..946d29e14 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -195,6 +195,10 @@ struct tcp_tw_bucket { #define tw_node __tw_common.skc_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt +#define tw_xid __tw_common.skc_xid +#define tw_vx_info __tw_common.skc_vx_info +#define tw_nid __tw_common.skc_nid +#define tw_nx_info __tw_common.skc_nx_info volatile unsigned char tw_substate; unsigned char tw_rcv_wscale; __u16 tw_sport; diff --git a/ipc/msg.c b/ipc/msg.c index 37e2d3bb1..12c9367c8 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -96,6 +96,7 @@ static int newque (key_t key, int msgflg) msq->q_perm.mode = (msgflg & S_IRWXUGO); msq->q_perm.key = key; + msq->q_perm.xid = current->xid; msq->q_perm.security = NULL; retval = security_msg_queue_alloc(msq); @@ -788,7 +789,11 @@ static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int l for(i = 0; i <= msg_ids.max_id; i++) { struct msg_queue * msq; msq = msg_lock(i); - if(msq != NULL) { + if (msq) { + if (!vx_check(msq->q_perm.xid, VX_IDENT)) { + msg_unlock(msq); + continue; + } len += sprintf(buffer + len, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", msq->q_perm.key, msg_buildid(i,msq->q_perm.seq), diff --git a/ipc/sem.c b/ipc/sem.c index 372567355..11ce9673a 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -175,6 +175,7 @@ static int newary (key_t key, int nsems, int semflg) sma->sem_perm.mode = (semflg & S_IRWXUGO); sma->sem_perm.key = key; + sma->sem_perm.xid = current->xid; sma->sem_perm.security = NULL; retval = security_sem_alloc(sma); @@ -1296,7 +1297,11 @@ static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int l for(i = 0; i <= sem_ids.max_id; i++) { struct sem_array *sma; sma = sem_lock(i); - if(sma) { + if (sma) { + if (!vx_check(sma->sem_perm.xid, VX_IDENT)) { + sem_unlock(sma); + continue; + } len += sprintf(buffer + len, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", sma->sem_perm.key, sem_buildid(i,sma->sem_perm.seq), diff --git a/ipc/shm.c b/ipc/shm.c index 714933b14..ebe5fa4e0 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -185,6 +185,7 @@ static int newseg (key_t key, int shmflg, size_t size) return -ENOMEM; shp->shm_perm.key = key; + shp->shm_perm.xid = current->xid; shp->shm_flags = (shmflg & S_IRWXUGO); shp->shm_perm.security = NULL; @@ -843,11 +844,15 @@ static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int l struct shmid_kernel* shp; shp = shm_lock(i); - if(shp!=NULL) { + if (shp) { #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" char *format; + if (!vx_check(shp->shm_perm.xid, VX_IDENT)) { + shm_unlock(shp); + continue; + } if (sizeof(size_t) <= sizeof(int)) format = SMALL_STRING; else diff --git a/ipc/util.c b/ipc/util.c index f74c5eef5..310ccc5ec 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -103,8 +103,10 @@ int ipc_findkey(struct ipc_ids* ids, key_t key) */ for (id = 0; id <= max_id; id++) { p = ids->entries[id].p; - if(p==NULL) + if (p==NULL) continue; + if (!vx_check(p->xid, VX_IDENT)) + continue; if (key == p->key) return id; } @@ -367,6 +369,8 @@ int ipcperms (struct kern_ipc_perm *ipcp, short flag) { /* flag will most probably be 0 or S_...UGO from */ int requested_mode, granted_mode; + if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */ + return -1; requested_mode = (flag >> 6) | (flag >> 3) | flag; granted_mode = ipcp->mode; if (current->euid == ipcp->cuid || current->euid == ipcp->uid) diff --git a/kernel/Makefile b/kernel/Makefile index 238c65f60..81849958d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,6 +9,11 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ kthread.o +# mod-subdirs := vserver + +subdir-y += vserver +obj-y += vserver/vserver.o + obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o diff --git a/kernel/exit.c b/kernel/exit.c index 96d7394cc..9b652d862 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -35,6 +35,11 @@ int getrusage(struct task_struct *, int, struct rusage *); static void __unhash_process(struct task_struct *p) { nr_threads--; + /* tasklist_lock is held, is this sufficient? */ + if (p->vx_info) { + atomic_dec(&p->vx_info->cacct.nr_threads); + atomic_dec(&p->vx_info->limit.res[RLIMIT_NPROC]); + } detach_pid(p, PIDTYPE_PID); detach_pid(p, PIDTYPE_TGID); if (thread_group_leader(p)) { @@ -234,6 +239,7 @@ void reparent_to_init(void) ptrace_unlink(current); /* Reparent to init */ REMOVE_LINKS(current); + /* FIXME handle vchild_reaper/initpid */ current->parent = child_reaper; current->real_parent = child_reaper; SET_LINKS(current); @@ -378,6 +384,7 @@ static inline void close_files(struct files_struct * files) struct file * file = xchg(&files->fd[i], NULL); if (file) filp_close(file, files); + vx_openfd_dec(fd); } i++; set >>= 1; @@ -597,6 +604,7 @@ static inline void forget_original_parent(struct task_struct * father) struct task_struct *p, *reaper = father; struct list_head *_p, *_n; + /* FIXME handle vchild_reaper/initpid */ reaper = father->group_leader; if (reaper == father) reaper = child_reaper; diff --git a/kernel/fork.c b/kernel/fork.c index 68597bc34..4336cf0ba 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -75,6 +77,8 @@ static kmem_cache_t *task_struct_cachep; static void free_task(struct task_struct *tsk) { free_thread_info(tsk->thread_info); + clr_vx_info(&tsk->vx_info); + clr_nx_info(&tsk->nx_info); free_task_struct(tsk); } @@ -405,6 +409,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm) if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + set_vx_info(&mm->mm_vx_info, current->vx_info); return mm; } free_mm(mm); @@ -436,6 +441,7 @@ void fastcall __mmdrop(struct mm_struct *mm) BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); + clr_vx_info(&mm->mm_vx_info); free_mm(mm); } @@ -550,6 +556,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) /* Copy the current MM stuff.. */ memcpy(mm, oldmm, sizeof(*mm)); + mm->mm_vx_info = NULL; if (!mm_init(mm)) goto fail_nomem; @@ -861,6 +868,8 @@ struct task_struct *copy_process(unsigned long clone_flags, { int retval; struct task_struct *p = NULL; + struct vx_info *vxi; + struct nx_info *nxi; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -885,11 +894,31 @@ struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; retval = -ENOMEM; + p = dup_task_struct(current); if (!p) goto fork_out; + vxi = get_vx_info(current->vx_info); + nxi = get_nx_info(current->nx_info); + + /* check vserver memory */ + if (p->mm && !(clone_flags & CLONE_VM)) { + if (vx_vmpages_avail(p->mm, p->mm->total_vm)) + vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm); + else + goto bad_fork_free; + } + if (p->mm && vx_flags(VXF_FORK_RSS, 0)) { + if (!vx_rsspages_avail(p->mm, p->mm->rss)) + goto bad_fork_free; + } + retval = -EAGAIN; + if (vxi && (atomic_read(&vxi->limit.res[RLIMIT_NPROC]) + >= vxi->limit.rlim[RLIMIT_NPROC])) + goto bad_fork_free; + if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && @@ -1074,6 +1103,10 @@ struct task_struct *copy_process(unsigned long clone_flags, link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid); nr_threads++; + if (vxi) { + atomic_inc(&vxi->cacct.nr_threads); + atomic_inc(&vxi->limit.res[RLIMIT_NPROC]); + } write_unlock_irq(&tasklist_lock); retval = 0; diff --git a/kernel/pid.c b/kernel/pid.c index 6ed44f56c..a8b7eb3c2 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -224,7 +224,8 @@ void fastcall detach_pid(task_t *task, enum pid_type type) task_t *find_task_by_pid(int nr) { - struct pid *pid = find_pid(PIDTYPE_PID, nr); + struct pid *pid = find_pid(PIDTYPE_PID, + vx_rmap_tgid(current->vx_info, nr)); if (!pid) return NULL; diff --git a/kernel/printk.c b/kernel/printk.c index 3b7468818..bb5ac824d 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -247,7 +247,10 @@ int do_syslog(int type, char __user * buf, int len) unsigned long i, j, limit, count; int do_clear = 0; char c; - int error = 0; + int error = -EPERM; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return error; error = security_syslog(type); if (error) diff --git a/kernel/sched.c b/kernel/sched.c index 1493acff5..a9f143b5f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #ifdef CONFIG_NUMA #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu)) @@ -214,6 +216,8 @@ struct runqueue { #endif task_t *migration_thread; struct list_head migration_queue; + struct list_head hold_queue; + int idle_tokens; atomic_t nr_iowait; }; @@ -373,6 +377,9 @@ static int effective_prio(task_t *p) bonus = CURRENT_BONUS(p) - MAX_BONUS / 2; prio = p->static_prio - bonus; + if (__vx_task_flags(p, VXF_SCHED_PRIO, 0)) + prio += effective_vavavoom(p, MAX_USER_PRIO); + if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) @@ -1503,6 +1510,9 @@ void scheduler_tick(int user_ticks, int sys_ticks) } if (p == rq->idle) { + if (!--rq->idle_tokens && !list_empty(&rq->hold_queue)) + set_need_resched(); + if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait += sys_ticks; else @@ -1545,7 +1555,7 @@ void scheduler_tick(int user_ticks, int sys_ticks) } goto out_unlock; } - if (!--p->time_slice) { + if (vx_need_resched(p)) { dequeue_task(p, rq->active); set_tsk_need_resched(p); p->prio = effective_prio(p); @@ -1606,6 +1616,10 @@ asmlinkage void __sched schedule(void) struct list_head *queue; unsigned long long now; unsigned long run_time; +#ifdef CONFIG_VSERVER_HARDCPU + struct vx_info *vxi; + int maxidle = -HZ; +#endif int idx; /* @@ -1656,6 +1670,37 @@ need_resched: deactivate_task(prev, rq); } +#ifdef CONFIG_VSERVER_HARDCPU + if (!list_empty(&rq->hold_queue)) { + struct list_head *l, *n; + int ret; + + vxi = NULL; + list_for_each_safe(l, n, &rq->hold_queue) { + next = list_entry(l, task_t, run_list); + if (vxi == next->vx_info) + continue; + + vxi = next->vx_info; + ret = vx_tokens_recalc(vxi); + // tokens = vx_tokens_avail(next); + + if (ret > 0) { + list_del(&next->run_list); + next->state &= ~TASK_ONHOLD; + recalc_task_prio(next, now); + __activate_task(next, rq); + // printk("ยทยทยท unhold %p\n", next); + break; + } + if ((ret < 0) && (maxidle < ret)) + maxidle = ret; + } + } + rq->idle_tokens = -maxidle; + +pick_next: +#endif if (unlikely(!rq->nr_running)) { #ifdef CONFIG_SMP load_balance(rq, 1, cpu_to_node_mask(smp_processor_id())); @@ -1683,6 +1728,23 @@ need_resched: queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); +#ifdef CONFIG_VSERVER_HARDCPU + vxi = next->vx_info; + if (vxi && __vx_flags(vxi->vx_flags, + VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) { + int ret = vx_tokens_recalc(vxi); + + if (unlikely(ret <= 0)) { + if (ret && (rq->idle_tokens > -ret)) + rq->idle_tokens = -ret; + deactivate_task(next, rq); + list_add_tail(&next->run_list, &rq->hold_queue); + next->state |= TASK_ONHOLD; + goto pick_next; + } + } +#endif + if (!rt_task(next) && next->activated > 0) { unsigned long long delta = now - next->timestamp; @@ -2954,6 +3016,7 @@ void __init sched_init(void) spin_lock_init(&rq->lock); INIT_LIST_HEAD(&rq->migration_queue); + INIT_LIST_HEAD(&rq->hold_queue); atomic_set(&rq->nr_iowait, 0); nr_running_init(rq); diff --git a/kernel/signal.c b/kernel/signal.c index a9181552a..7a9912a0c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1050,6 +1050,9 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) unsigned long flags; int ret; + if (!vx_check(vx_task_xid(p), VX_ADMIN|VX_WATCH|VX_IDENT)) + return -ESRCH; + ret = check_kill_permission(sig, info, p); if (!ret && sig && p->sighand) { spin_lock_irqsave(&p->sighand->siglock, flags); diff --git a/kernel/sys.c b/kernel/sys.c index 4d414d925..952321323 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -339,7 +340,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) goto out_unlock; @@ -398,7 +399,7 @@ asmlinkage long sys_getpriority(int which, int who) if (!who) user = current->user; else - user = find_user(who); + user = find_user(vx_current_xid(), who); if (!user) goto out_unlock; @@ -418,6 +419,72 @@ out_unlock: return retval; } +/* + * vshelper path is set via /proc/sys + * invoked by vserver sys_reboot(), with + * the following arguments + * + * argv [0] = vshelper_path; + * argv [1] = action: "restart", "halt", "poweroff", ... + * argv [2] = context identifier + * argv [3] = additional argument (restart2) + * + * envp [*] = type-specific parameters + */ +char vshelper_path[255] = "/sbin/vshelper"; + +long vs_reboot(unsigned int cmd, void * arg) +{ + char id_buf[8], cmd_buf[32]; + char uid_buf[32], pid_buf[32]; + char buffer[256]; + + char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + uid_buf, pid_buf, cmd_buf, 0}; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid()); + + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid); + snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid); + + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + argv[1] = "restart"; + break; + + case LINUX_REBOOT_CMD_HALT: + argv[1] = "halt"; + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + argv[1] = "poweroff"; + break; + + case LINUX_REBOOT_CMD_SW_SUSPEND: + argv[1] = "swsusp"; + break; + + case LINUX_REBOOT_CMD_RESTART2: + if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) + return -EFAULT; + argv[3] = buffer; + default: + argv[1] = "restart2"; + break; + } + + /* maybe we should wait ? */ + if (call_usermodehelper(*argv, argv, envp, 0)) { + printk( KERN_WARNING + "vs_reboot(): failed to exec (%s %s %s %s)\n", + vshelper_path, argv[1], argv[2], argv[3]); + return -EPERM; + } + return 0; +} /* * Reboot system call: for obvious reasons only root may call it, @@ -443,6 +510,9 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user magic2 != LINUX_REBOOT_MAGIC2C)) return -EINVAL; + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return vs_reboot(cmd, arg); + lock_kernel(); switch (cmd) { case LINUX_REBOOT_CMD_RESTART: @@ -637,7 +707,7 @@ static int set_user(uid_t new_ruid, int dumpclear) { struct user_struct *new_user; - new_user = alloc_uid(new_ruid); + new_user = alloc_uid(vx_current_xid(), new_ruid); if (!new_user) return -EAGAIN; @@ -1376,7 +1446,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name) int errno = 0; down_read(&uts_sem); - if (copy_to_user(name,&system_utsname,sizeof *name)) + if (copy_to_user(name, vx_new_utsname(), sizeof *name)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1387,15 +1457,17 @@ asmlinkage long sys_sethostname(char __user *name, int len) int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.nodename, tmp, len); - system_utsname.nodename[len] = 0; + char *ptr = vx_new_uts(nodename); + + memcpy(ptr, tmp, len); + ptr[len] = 0; errno = 0; } up_write(&uts_sem); @@ -1405,15 +1477,17 @@ asmlinkage long sys_sethostname(char __user *name, int len) asmlinkage long sys_gethostname(char __user *name, int len) { int i, errno; + char *ptr; if (len < 0) return -EINVAL; down_read(&uts_sem); - i = 1 + strlen(system_utsname.nodename); + ptr = vx_new_uts(nodename); + i = 1 + strlen(ptr); if (i > len) i = len; errno = 0; - if (copy_to_user(name, system_utsname.nodename, i)) + if (copy_to_user(name, ptr, i)) errno = -EFAULT; up_read(&uts_sem); return errno; @@ -1428,7 +1502,7 @@ asmlinkage long sys_setdomainname(char __user *name, int len) int errno; char tmp[__NEW_UTS_LEN]; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; @@ -1436,8 +1510,10 @@ asmlinkage long sys_setdomainname(char __user *name, int len) down_write(&uts_sem); errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { - memcpy(system_utsname.domainname, tmp, len); - system_utsname.domainname[len] = 0; + char *ptr = vx_new_uts(domainname); + + memcpy(ptr, tmp, len); + ptr[len] = 0; errno = 0; } up_write(&uts_sem); @@ -1489,7 +1565,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) old_rlim = current->rlim + resource; if (((new_rlim.rlim_cur > old_rlim->rlim_max) || (new_rlim.rlim_max > old_rlim->rlim_max)) && - !capable(CAP_SYS_RESOURCE)) + !capable(CAP_SYS_RESOURCE) && vx_ccaps(VXC_SET_RLIMIT)) return -EPERM; if (resource == RLIMIT_NOFILE) { if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 79e7c0906..c26f5f46c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -77,6 +77,7 @@ extern char modprobe_path[]; #ifdef CONFIG_HOTPLUG extern char hotplug_path[]; #endif +extern char vshelper_path[]; #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif @@ -409,6 +410,15 @@ static ctl_table kern_table[] = { .strategy = &sysctl_string, }, #endif + { + .ctl_name = KERN_VSHELPER, + .procname = "vshelper", + .data = &vshelper_path, + .maxlen = 256, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, #ifdef CONFIG_CHR_DEV_SG { .ctl_name = KERN_SG_BIG_BUFF, diff --git a/kernel/timer.c b/kernel/timer.c index 08cec6ae7..88636e614 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include @@ -961,7 +963,7 @@ asmlinkage unsigned long sys_alarm(unsigned int seconds) */ asmlinkage long sys_getpid(void) { - return current->tgid; + return vx_map_tgid(current->vx_info, current->tgid); } /* @@ -1005,7 +1007,7 @@ asmlinkage long sys_getppid(void) #endif break; } - return pid; + return vx_map_tgid(current->vx_info, pid); } asmlinkage long sys_getuid(void) @@ -1214,6 +1216,8 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; tp.tv_sec++; } + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&tp, NULL); val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); @@ -1223,6 +1227,9 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) val.procs = nr_threads; } while (read_seqretry(&xtime_lock, seq)); +/* if (vx_flags(VXF_VIRT_CPU, 0)) + vx_vsi_cpu(val); +*/ si_meminfo(&val); si_swapinfo(&val); diff --git a/kernel/user.c b/kernel/user.c index f5c9d425a..75faf3d47 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -20,8 +20,8 @@ #define UIDHASH_BITS 8 #define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) -#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) -#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) +#define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK) +#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid))) static kmem_cache_t *uid_cachep; static struct list_head uidhash_table[UIDHASH_SZ]; @@ -46,7 +46,7 @@ static inline void uid_hash_remove(struct user_struct *up) list_del(&up->uidhash_list); } -static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) +static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent) { struct list_head *up; @@ -55,7 +55,7 @@ static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *has user = list_entry(up, struct user_struct, uidhash_list); - if(user->uid == uid) { + if(user->uid == uid && user->xid == xid) { atomic_inc(&user->__count); return user; } @@ -64,9 +64,9 @@ static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *has return NULL; } -struct user_struct *find_user(uid_t uid) +struct user_struct *find_user(xid_t xid, uid_t uid) { - return uid_hash_find(uid, uidhashentry(uid)); + return uid_hash_find(xid, uid, uidhashentry(xid, uid)); } void free_uid(struct user_struct *up) @@ -78,13 +78,13 @@ void free_uid(struct user_struct *up) } } -struct user_struct * alloc_uid(uid_t uid) +struct user_struct * alloc_uid(xid_t xid, uid_t uid) { - struct list_head *hashent = uidhashentry(uid); + struct list_head *hashent = uidhashentry(xid, uid); struct user_struct *up; spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); spin_unlock(&uidhash_lock); if (!up) { @@ -94,6 +94,7 @@ struct user_struct * alloc_uid(uid_t uid) if (!new) return NULL; new->uid = uid; + new->xid = xid; atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); atomic_set(&new->files, 0); @@ -103,7 +104,7 @@ struct user_struct * alloc_uid(uid_t uid) * on adding the same user already.. */ spin_lock(&uidhash_lock); - up = uid_hash_find(uid, hashent); + up = uid_hash_find(xid, uid, hashent); if (up) { kmem_cache_free(uid_cachep, new); } else { @@ -148,7 +149,7 @@ static int __init uid_cache_init(void) /* Insert the root user immediately (init already runs as root) */ spin_lock(&uidhash_lock); - uid_hash_insert(&root_user, uidhashentry(0)); + uid_hash_insert(&root_user, uidhashentry(0,0)); spin_unlock(&uidhash_lock); return 0; diff --git a/mm/fremap.c b/mm/fremap.c index 2c8abe6d1..1b891a1bf 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -38,7 +38,8 @@ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, set_page_dirty(page); page_remove_rmap(page, ptep); page_cache_release(page); - mm->rss--; + // mm->rss--; + vx_rsspages_dec(mm); } } } else { @@ -68,6 +69,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); + if (!vx_rsspages_avail(mm, 1)) + goto err_unlock; + pmd = pmd_alloc(mm, pgd, addr); if (!pmd) goto err_unlock; @@ -78,7 +82,8 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, zap_pte(mm, vma, addr, pte); - mm->rss++; + // mm->rss++; + vx_rsspages_inc(mm); flush_icache_page(vma, page); set_pte(pte, mk_pte(page, prot)); pte_chain = page_add_rmap(page, pte, pte_chain); diff --git a/mm/memory.c b/mm/memory.c index 5ae7c99ae..576d4c97e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -285,6 +285,10 @@ skip_copy_pte_range: struct page *page; unsigned long pfn; + if (!vx_rsspages_avail(dst, 1)) { + spin_unlock(&src->page_table_lock); + goto nomem; + } /* copy_one_pte */ if (pte_none(pte)) @@ -328,7 +332,8 @@ skip_copy_pte_range: pte = pte_mkclean(pte); pte = pte_mkold(pte); get_page(page); - dst->rss++; + // dst->rss++; + vx_rsspages_inc(dst); set_pte(dst_pte, pte); pte_chain = page_add_rmap(page, dst_pte, @@ -1124,7 +1129,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, page_table = pte_offset_map(pmd, address); if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) - ++mm->rss; + // ++mm->rss; + vx_rsspages_inc(mm); page_remove_rmap(old_page, page_table); break_cow(vma, new_page, address, page_table); pte_chain = page_add_rmap(new_page, page_table, pte_chain); @@ -1343,6 +1349,10 @@ static int do_swap_page(struct mm_struct * mm, inc_page_state(pgmajfault); } + if (!vx_rsspages_avail(mm, 1)) { + ret = VM_FAULT_OOM; + goto out; + } mark_page_accessed(page); pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) { @@ -1372,7 +1382,8 @@ static int do_swap_page(struct mm_struct * mm, if (vm_swap_full()) remove_exclusive_swap_page(page); - mm->rss++; + // mm->rss++; + vx_rsspages_inc(mm); pte = mk_pte(page, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) pte = maybe_mkwrite(pte_mkdirty(pte), vma); @@ -1406,6 +1417,11 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, struct pte_chain *pte_chain; int ret; + if (!vx_rsspages_avail(mm, 1)) { + spin_unlock(&mm->page_table_lock); + return VM_FAULT_OOM; + } + pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN); if (!pte_chain) { pte_unmap(page_table); @@ -1441,7 +1457,8 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ret = VM_FAULT_MINOR; goto out; } - mm->rss++; + // mm->rss++; + vx_rsspages_inc(mm); entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)), vma); @@ -1509,6 +1526,8 @@ retry: return VM_FAULT_SIGBUS; if (new_page == NOPAGE_OOM) return VM_FAULT_OOM; + if (!vx_rsspages_avail(mm, 1)) + return VM_FAULT_OOM; pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) @@ -1556,7 +1575,8 @@ retry: /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { if (!PageReserved(new_page)) - ++mm->rss; + // ++mm->rss; + vx_rsspages_inc(mm); flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) diff --git a/mm/mlock.c b/mm/mlock.c index 0cf446b5f..2b4bf4d42 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -100,7 +100,7 @@ static int do_mlock(unsigned long start, size_t len, int on) asmlinkage long sys_mlock(unsigned long start, size_t len) { - unsigned long locked; + unsigned long locked, grow; unsigned long lock_limit; int error = -ENOMEM; @@ -108,8 +108,10 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; - locked = len >> PAGE_SHIFT; - locked += current->mm->locked_vm; + grow = len >> PAGE_SHIFT; + if (!vx_vmlocked_avail(current->mm, grow)) + goto out; + locked = current->mm->locked_vm + grow; lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; @@ -117,6 +119,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) /* check against resource limits */ if (locked <= lock_limit) error = do_mlock(start, len, 1); +out: up_write(¤t->mm->mmap_sem); return error; } @@ -174,6 +177,9 @@ asmlinkage long sys_mlockall(int flags) lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; + if (!vx_vmlocked_avail(current->mm, current->mm->total_vm)) + goto out; + /* check vserver lock limits? */ if (current->mm->total_vm <= lock_limit) ret = do_mlockall(flags); out: diff --git a/mm/mmap.c b/mm/mmap.c index bbdcb91d4..f7990f9d3 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -614,6 +614,10 @@ munmap_back: > current->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; + /* check context space, maybe only Private writable mapping? */ + if (!vx_vmpages_avail(mm, len >> PAGE_SHIFT)) + return -ENOMEM; + if (accountable && (!(flags & MAP_NORESERVE) || sysctl_overcommit_memory > 1)) { if (vm_flags & VM_SHARED) { @@ -708,9 +712,11 @@ munmap_back: kmem_cache_free(vm_area_cachep, vma); } out: - mm->total_vm += len >> PAGE_SHIFT; + // mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + // mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } if (flags & MAP_POPULATE) { @@ -935,7 +941,8 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address) grow = (address - vma->vm_end) >> PAGE_SHIFT; /* Overcommit.. */ - if (security_vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow) || + !vx_vmpages_avail(vma->vm_mm, grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -947,10 +954,13 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address) vm_unacct_memory(grow); return -ENOMEM; } + vma->vm_end = address; - vma->vm_mm->total_vm += grow; + // vma->vm_mm->total_vm += grow; + vx_vmpages_add(vma->vm_mm, grow); if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; + // vma->vm_mm->locked_vm += grow; + vx_vmlocked_add(vma->vm_mm, grow); spin_unlock(&vma->vm_mm->page_table_lock); return 0; } @@ -989,7 +999,8 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address) grow = (vma->vm_start - address) >> PAGE_SHIFT; /* Overcommit.. */ - if (security_vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow) || + !vx_vmpages_avail(vma->vm_mm, grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1001,11 +1012,14 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address) vm_unacct_memory(grow); return -ENOMEM; } + vma->vm_start = address; vma->vm_pgoff -= grow; - vma->vm_mm->total_vm += grow; + // vma->vm_mm->total_vm += grow; + vx_vmpages_add(vma->vm_mm, grow); if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm += grow; + // vma->vm_mm->locked_vm += grow; + vx_vmlocked_add(vma->vm_mm, grow); spin_unlock(&vma->vm_mm->page_table_lock); return 0; } @@ -1108,9 +1122,12 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area) { size_t len = area->vm_end - area->vm_start; - area->vm_mm->total_vm -= len >> PAGE_SHIFT; + // area->vm_mm->total_vm -= len >> PAGE_SHIFT; + vx_vmpages_sub(area->vm_mm, len >> PAGE_SHIFT); + if (area->vm_flags & VM_LOCKED) - area->vm_mm->locked_vm -= len >> PAGE_SHIFT; + // area->vm_mm->locked_vm -= len >> PAGE_SHIFT; + vx_vmlocked_sub(area->vm_mm, len >> PAGE_SHIFT); /* * Is this a new hole at the lowest possible address? */ @@ -1365,6 +1382,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) locked += len; if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; + /* vserver checks ? */ } /* @@ -1386,7 +1404,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len) if (mm->map_count > sysctl_max_map_count) return -ENOMEM; - if (security_vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> PAGE_SHIFT) || + !vx_vmpages_avail(mm, len >> PAGE_SHIFT)) return -ENOMEM; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; @@ -1419,9 +1438,11 @@ unsigned long do_brk(unsigned long addr, unsigned long len) vma_link(mm, vma, prev, rb_link, rb_parent); out: - mm->total_vm += len >> PAGE_SHIFT; + // mm->total_vm += len >> PAGE_SHIFT; + vx_vmpages_add(mm, len >> PAGE_SHIFT); if (flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + // mm->locked_vm += len >> PAGE_SHIFT; + vx_vmlocked_add(mm, len >> PAGE_SHIFT); make_pages_present(addr, addr + len); } return addr; @@ -1455,9 +1476,12 @@ void exit_mmap(struct mm_struct *mm) vma = mm->mmap; mm->mmap = mm->mmap_cache = NULL; mm->mm_rb = RB_ROOT; - mm->rss = 0; - mm->total_vm = 0; - mm->locked_vm = 0; + // mm->rss = 0; + vx_rsspages_sub(mm, mm->rss); + // mm->total_vm = 0; + vx_vmpages_sub(mm, mm->total_vm); + // mm->locked_vm = 0; + vx_vmlocked_sub(mm, mm->locked_vm); spin_unlock(&mm->page_table_lock); diff --git a/mm/mremap.c b/mm/mremap.c index 904307691..82ceecde9 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -238,9 +238,11 @@ static unsigned long move_vma(struct vm_area_struct *vma, vma->vm_next->vm_flags |= VM_ACCOUNT; } - mm->total_vm += new_len >> PAGE_SHIFT; + // mm->total_vm += new_len >> PAGE_SHIFT; + vx_vmpages_add(mm, new_len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { - mm->locked_vm += new_len >> PAGE_SHIFT; + // mm->locked_vm += new_len >> PAGE_SHIFT; + vx_vmlocked_add(mm, new_len >> PAGE_SHIFT); if (new_len > old_len) make_pages_present(new_addr + old_len, new_addr + new_len); @@ -349,6 +351,9 @@ unsigned long do_mremap(unsigned long addr, if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len) > current->rlim[RLIMIT_AS].rlim_cur) goto out; + /* check context space, maybe only Private writable mapping? */ + if (!vx_vmpages_avail(current->mm, (new_len - old_len) >> PAGE_SHIFT)) + goto out; if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; @@ -371,9 +376,11 @@ unsigned long do_mremap(unsigned long addr, spin_lock(&vma->vm_mm->page_table_lock); vma->vm_end = addr + new_len; spin_unlock(&vma->vm_mm->page_table_lock); - current->mm->total_vm += pages; + // current->mm->total_vm += pages; + vx_vmpages_add(current->mm, pages); if (vma->vm_flags & VM_LOCKED) { - current->mm->locked_vm += pages; + // current->mm->locked_vm += pages; + vx_vmlocked_add(current->mm, pages); make_pages_present(addr + old_len, addr + new_len); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ed0cee4e6..2f1529049 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -54,6 +54,7 @@ static int badness(struct task_struct *p) * The memory size of the process is the basis for the badness. */ points = p->mm->total_vm; + /* add vserver badness ;) */ /* * CPU time is in seconds and run time is in minutes. There is no diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8d3f6f461..ff2ffbe03 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -982,6 +982,8 @@ void si_meminfo(struct sysinfo *val) val->freehigh = 0; #endif val->mem_unit = PAGE_SIZE; + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_meminfo(val); } EXPORT_SYMBOL(si_meminfo); diff --git a/mm/rmap.c b/mm/rmap.c index 5577805ae..f23ff9c4c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -374,7 +374,8 @@ static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr) if (pte_dirty(pte)) set_page_dirty(page); - mm->rss--; + // mm->rss--; + vx_rsspages_dec(mm); page_cache_release(page); ret = SWAP_SUCCESS; diff --git a/mm/swapfile.c b/mm/swapfile.c index 9ca222bc0..321b09c83 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -459,7 +459,8 @@ static void unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir, swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) { - vma->vm_mm->rss++; + // vma->vm_mm->rss++; + vx_rsspages_inc(vma->vm_mm); get_page(page); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); *pte_chainp = page_add_rmap(page, dir, *pte_chainp); @@ -1595,6 +1596,8 @@ void si_swapinfo(struct sysinfo *val) val->freeswap = nr_swap_pages + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; swap_list_unlock(); + if (vx_flags(VXF_VIRT_MEM, 0)) + vx_vsi_swapinfo(val); } /* diff --git a/net/core/dev.c b/net/core/dev.c index 43c273e74..b6dc21353 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1969,6 +1969,8 @@ static int dev_ifconf(char __user *arg) total = 0; for (dev = dev_base; dev; dev = dev->next) { + if (!dev_in_nx_info(dev, current->nx_info)) + continue; for (i = 0; i < NPROTO; i++) { if (gifconf_list[i]) { int done; @@ -2029,6 +2031,10 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { + struct nx_info *nxi = current->nx_info; + + if (!dev_in_nx_info(dev, nxi)) + return; if (dev->get_stats) { struct net_device_stats *stats = dev->get_stats(dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 26e3577c0..f577cba67 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -224,6 +224,8 @@ int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; + if (!dev_in_nx_info(dev, current->nx_info)) + continue; if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0) break; } @@ -309,6 +311,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct sk_buff *skb; int size = NLMSG_GOODSIZE; + if (!dev_in_nx_info(dev, current->nx_info)) + return; skb = alloc_skb(size, GFP_KERNEL); if (!skb) return; diff --git a/net/core/sock.c b/net/core/sock.c index 92bc77ad6..afdc91865 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -323,7 +323,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_PASSCRED: - sock->passcred = valbool; + if (valbool) + set_bit(SOCK_PASS_CRED, &sock->flags); + else + clear_bit(SOCK_PASS_CRED, &sock->flags); break; case SO_TIMESTAMP: @@ -546,7 +549,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_PASSCRED: - v.val = sock->passcred; + v.val = test_bit(SOCK_PASS_CRED, &sock->flags)?1:0; break; case SO_PEERCRED: @@ -621,6 +624,8 @@ struct sock *sk_alloc(int family, int priority, int zero_it, kmem_cache_t *slab) sock_lock_init(sk); } sk->sk_slab = slab; + sock_vx_init(sk); + sock_nx_init(sk); if (security_sk_alloc(sk, family, priority)) { kmem_cache_free(slab, sk); @@ -651,6 +656,10 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + BUG_ON(sk->sk_vx_info); + BUG_ON(sk->sk_nx_info); +/* clr_vx_info(&sk->sk_vx_info); + clr_nx_info(&sk->sk_nx_info); */ kmem_cache_free(sk->sk_slab, sk); module_put(owner); } @@ -1142,6 +1151,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp.tv_sec = -1L; sk->sk_stamp.tv_usec = -1L; + sk->sk_vx_info = NULL; + sk->sk_xid = 0; + sk->sk_nx_info = NULL; + sk->sk_nid = 0; + atomic_set(&sk->sk_refcnt, 1); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index edf457090..358b7728b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -158,6 +158,9 @@ void inet_sock_destruct(struct sock *sk) if (inet->opt) kfree(inet->opt); + + BUG_ON(sk->sk_nx_info); + BUG_ON(sk->sk_vx_info); dst_release(sk->sk_dst_cache); #ifdef INET_REFCNT_DEBUG atomic_dec(&inet_sock_nr); @@ -397,6 +400,11 @@ static int inet_create(struct socket *sock, int protocol) sk->sk_family = PF_INET; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + + set_vx_info(&sk->sk_vx_info, current->vx_info); + sk->sk_xid = vx_current_xid(); + set_nx_info(&sk->sk_nx_info, current->nx_info); + sk->sk_nid = nx_current_nid(); inet->uc_ttl = -1; inet->mc_loop = 1; @@ -421,8 +429,13 @@ static int inet_create(struct socket *sock, int protocol) if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) - inet_sock_release(sk); + if (err) { +/* sk->sk_vx_info = NULL; + put_vx_info(current->vx_info); + sk->sk_nx_info = NULL; + put_nx_info(current->nx_info); +*/ inet_sock_release(sk); + } } out: return err; @@ -460,6 +473,8 @@ int inet_release(struct socket *sock) !(current->flags & PF_EXITING)) timeout = sk->sk_lingertime; sock->sk = NULL; + clr_vx_info(&sk->sk_vx_info); + clr_nx_info(&sk->sk_nx_info); sk->sk_prot->close(sk, timeout); } return 0; @@ -476,6 +491,10 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) unsigned short snum; int chk_addr_ret; int err; + __u32 s_addr; /* Address used for validation */ + __u32 s_addr1; + __u32 s_addr2 = 0xffffffffl; /* Optional address of the socket */ + struct nx_info *nxi = sk->sk_nx_info; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { @@ -486,7 +505,36 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); + s_addr = s_addr1 = addr->sin_addr.s_addr; + nxdprintk("inet_bind(%p) %p,%p;%lx\n", + sk, nx_info, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + if (nxi) { + __u32 v4_bcast = nxi->v4_bcast; + __u32 ipv4root = nxi->ipv4[0]; + int nbipv4 = nxi->nbipv4; + if (s_addr == 0) { + s_addr = ipv4root; + if (nbipv4 > 1) + s_addr1 = 0; + else { + s_addr1 = ipv4root; + } + s_addr2 = v4_bcast; + } else if (s_addr == 0x0100007f) { + s_addr = s_addr1 = ipv4root; + } else if (s_addr != v4_bcast) { + int i; + for (i=0; iipv4[i]) + break; + } + if (i == nbipv4) { + return -EADDRNOTAVAIL; + } + } + } + chk_addr_ret = inet_addr_type(s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -498,7 +546,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !inet->freebind && - addr->sin_addr.s_addr != INADDR_ANY && + s_addr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -523,7 +571,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (sk->sk_state != TCP_CLOSE || inet->num) goto out_release_sock; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->rcv_saddr = inet->saddr = s_addr1; + inet->rcv_saddr2 = s_addr2; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) inet->saddr = 0; /* Use device */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c9cd619cf..b97526d8c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -488,6 +488,33 @@ static __inline__ int inet_abc_len(u32 addr) return rc; } +/* + Check that a device is not member of the ipv4root assigned to the process + Return true if this is the case + + If the process is not bound to specific IP, then it returns 0 (all + interface are fine). +*/ +static inline int devinet_notiproot (struct in_ifaddr *ifa) +{ + int ret = 0; + struct nx_info *nxi; + + if ((nxi = current->nx_info)) { + int i; + int nbip = nxi->nbipv4; + __u32 addr = ifa->ifa_local; + ret = 1; + for (i=0; iipv4[i] == addr) { + ret = 0; + break; + } + } + } + return ret; +} + int devinet_ioctl(unsigned int cmd, void *arg) { @@ -595,6 +622,8 @@ int devinet_ioctl(unsigned int cmd, void *arg) ret = -EADDRNOTAVAIL; if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) goto done; + if (!ifa_in_nx_info(ifa, current->nx_info)) + goto done; switch(cmd) { case SIOCGIFADDR: /* Get interface address */ @@ -724,6 +753,8 @@ static int inet_gifconf(struct net_device *dev, char *buf, int len) goto out; for (; ifa; ifa = ifa->ifa_next) { + if (!ifa_in_nx_info(ifa, current->nx_info)) + continue; if (!buf) { done += sizeof(ifr); continue; @@ -1059,6 +1090,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) read_lock(&in_dev->lock); for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; ifa = ifa->ifa_next, ip_idx++) { + if (!ifa_in_nx_info(ifa, current->nx_info)) + continue; if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 2316dfa91..3b4f8e35e 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -1010,6 +1010,8 @@ static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi return flags; } +extern int dev_in_nx_info(struct net_device *, struct nx_info *); + /* * This outputs /proc/net/route. * @@ -1039,7 +1041,7 @@ static int fib_seq_show(struct seq_file *seq, void *v) mask = FZ_MASK(iter->zone); flags = fib_flag_trans(f->fn_type, f->fn_state & FN_S_ZOMBIE, mask, fi); - if (fi) + if (fi && dev_in_nx_info(fi->fib_dev, current->nx_info)) snprintf(bf, sizeof(bf), "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u", fi->fib_dev ? fi->fib_dev->name : "*", prefix, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index d7b9238d1..a0650881a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -102,6 +102,38 @@ static void raw_v4_unhash(struct sock *sk) write_unlock_bh(&raw_v4_lock); } + +/* + Check if an address is in the list +*/ +static inline int raw_addr_in_list ( + u32 rcv_saddr1, + u32 rcv_saddr2, + u32 loc_addr, + struct nx_info *nx_info) +{ + int ret = 0; + if (loc_addr != 0 && + (rcv_saddr1 == loc_addr || rcv_saddr2 == loc_addr)) + ret = 1; + else if (rcv_saddr1 == 0) { + /* Accept any address or only the one in the list */ + if (nx_info == NULL) + ret = 1; + else { + int n = nx_info->nbipv4; + int i; + for (i=0; iipv4[i] == loc_addr) { + ret = 1; + break; + } + } + } + } + return ret; +} + struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif) @@ -113,7 +145,8 @@ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + raw_addr_in_list(inet->rcv_saddr, inet->rcv_saddr2, + laddr, sk->sk_nx_info) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -687,7 +720,8 @@ static struct sock *raw_get_first(struct seq_file *seq) struct hlist_node *node; sk_for_each(sk, node, &raw_v4_htable[state->bucket]) - if (sk->sk_family == PF_INET) + if (sk->sk_family == PF_INET && + vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)) goto found; } sk = NULL; @@ -703,7 +737,8 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != PF_INET); + } while (sk && (sk->sk_family != PF_INET || + !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))); if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { sk = sk_head(&raw_v4_htable[state->bucket]); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ac9211504..208233eed 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -179,9 +179,63 @@ void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, tcp_sk(sk)->bind_hash = tb; } +/* + Return 1 if addr match the socket IP list + or the socket is INADDR_ANY +*/ +static inline int tcp_in_list(struct sock *sk, u32 addr) +{ + struct nx_info *nxi = sk->sk_nx_info; + + vxdprintk("tcp_in_list(%p) %p,%p;%lx\n", + sk, nxi, sk->sk_socket, + (sk->sk_socket?sk->sk_socket->flags:0)); + + if (nxi) { + int n = nxi->nbipv4; + int i; + + for (i=0; iipv4[i] == addr) + return 1; + } + else if (!tcp_v4_rcv_saddr(sk) || tcp_v4_rcv_saddr(sk) == addr) + return 1; + return 0; +} + +/* + Check if the addresses in sk1 conflict with those in sk2 +*/ +int tcp_ipv4_addr_conflict(struct sock *sk1, struct sock *sk2) +{ + if (sk1 && sk2) + nxdprintk("inet_bind(%p,%p) %p,%p;%lx %p,%p;%lx\n", + sk1, sk2, + sk1->sk_nx_info, sk1->sk_socket, + (sk1->sk_socket?sk1->sk_socket->flags:0), + sk2->sk_nx_info, sk2->sk_socket, + (sk2->sk_socket?sk2->sk_socket->flags:0)); + + if (tcp_v4_rcv_saddr(sk1)) { + /* Bind to one address only */ + return tcp_in_list (sk2, tcp_v4_rcv_saddr(sk1)); + } else if (sk1->sk_nx_info) { + /* A restricted bind(any) */ + struct nx_info *nxi = sk1->sk_nx_info; + int n = nxi->nbipv4; + int i; + + for (i=0; iipv4[i])) + return 1; + } else /* A bind(any) do not allow other bind on the same port */ + return 1; + return 0; +} + static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) { - const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -194,9 +248,7 @@ static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) + if (tcp_ipv4_addr_conflict(sk, sk2)) break; } } @@ -405,6 +457,34 @@ void tcp_unhash(struct sock *sk) wake_up(&tcp_lhash_wait); } +/* + Check if an address is in the list +*/ +static inline int tcp_addr_in_list( + u32 rcv_saddr, + u32 daddr, + struct nx_info *nx_info) +{ + if (rcv_saddr == daddr) + return 1; + else if (rcv_saddr == 0) { + /* Accept any address or check the list */ + if (!nx_info) + return 1; + else { + int n = nx_info->nbipv4; + int i; + + for (i=0; iipv4[i] == daddr) + return 1; + } + } + return 0; +} + + + /* Don't inline this cruft. Here are some nice properties to * exploit here. The BSD API does not allow a listening TCP * to specify the remote port nor the remote address for the @@ -426,11 +506,10 @@ static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, __u32 rcv_saddr = inet->rcv_saddr; score = (sk->sk_family == PF_INET ? 1 : 0); - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; + if (tcp_addr_in_list(rcv_saddr, daddr, sk->sk_nx_info)) score+=2; - } + else + continue; if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) continue; @@ -460,8 +539,8 @@ inline struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, struct inet_opt *inet = inet_sk((sk = __sk_head(head))); if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + tcp_addr_in_list(inet->rcv_saddr, daddr, sk->sk_nx_info) && !sk->sk_bound_dev_if) goto sherry_cache; sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif); @@ -2159,6 +2238,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur) req = req->dl_next; while (1) { while (req) { + if (!vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (req->class->family == st->family) { cur = req; goto out; @@ -2177,6 +2258,8 @@ get_req: sk = sk_next(sk); get_sk: sk_for_each_from(sk, node) { + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (sk->sk_family == st->family) { cur = sk; goto out; @@ -2224,18 +2307,20 @@ static void *established_get_first(struct seq_file *seq) read_lock(&tcp_ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) { - if (sk->sk_family != st->family) { + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; + if (sk->sk_family != st->family) continue; - } rc = sk; goto out; } st->state = TCP_SEQ_STATE_TIME_WAIT; tw_for_each(tw, node, &tcp_ehash[st->bucket + tcp_ehash_size].chain) { - if (tw->tw_family != st->family) { + if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) + continue; + if (tw->tw_family != st->family) continue; - } rc = tw; goto out; } @@ -2259,7 +2344,8 @@ static void *established_get_next(struct seq_file *seq, void *cur) tw = cur; tw = tw_next(tw); get_tw: - while (tw && tw->tw_family != st->family) { + while (tw && tw->tw_family != st->family && + !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) { tw = tw_next(tw); } if (tw) { @@ -2279,6 +2365,8 @@ get_tw: sk = sk_next(sk); sk_for_each_from(sk, node) { + if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) + continue; if (sk->sk_family == st->family) goto found; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d4c0d84d1..a0a8422cd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -362,6 +362,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_ts_recent_stamp = tp->ts_recent_stamp; tw_dead_node_init(tw); + tw->tw_xid = sk->sk_xid; + tw->tw_vx_info = NULL; + tw->tw_nid = sk->sk_nid; + tw->tw_nx_info = NULL; + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -697,6 +702,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newsk->sk_state = TCP_SYN_RECV; /* SANITY */ + sock_vx_init(newsk); + sock_nx_init(newsk); sk_node_init(&newsk->sk_node); tcp_sk(newsk)->bind_hash = NULL; @@ -798,6 +805,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newsk->sk_err = 0; newsk->sk_priority = 0; atomic_set(&newsk->sk_refcnt, 2); + + /* hmm, maybe from socket? */ + set_vx_info(&newsk->sk_vx_info, current->vx_info); + set_nx_info(&newsk->sk_nx_info, current->nx_info); #ifdef INET_REFCNT_DEBUG atomic_inc(&inet_sock_nr); #endif diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 05dcb54d5..8ddc29930 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -120,6 +120,8 @@ rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED; /* Shared by v4/v6 udp. */ int udp_port_rover; +int tcp_ipv4_addr_conflict(struct sock *sk1, struct sock *sk2); + static int udp_v4_get_port(struct sock *sk, unsigned short snum) { struct hlist_node *node; @@ -179,9 +181,7 @@ gotit: (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && + tcp_ipv4_addr_conflict(sk2, sk) && (!sk2->sk_reuse || !sk->sk_reuse)) goto fail; } @@ -216,6 +216,17 @@ static void udp_v4_unhash(struct sock *sk) write_unlock_bh(&udp_hash_lock); } +static inline int udp_in_list(struct nx_info *nx_info, u32 addr) +{ + int n = nx_info->nbipv4; + int i; + + for (i=0; iipv4[i] == addr) + return 1; + return 0; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -235,6 +246,11 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, i if (inet->rcv_saddr != daddr) continue; score+=2; + } else if (sk->sk_nx_info) { + if (udp_in_list(sk->sk_nx_info, daddr)) + score+=2; + else + continue; } if (inet->daddr) { if (inet->daddr != saddr) @@ -290,7 +306,8 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk, if (inet->num != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr && + inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr) || ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; @@ -599,6 +616,15 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + struct nx_info *nxi = sk->sk_nx_info; + + if (nxi) { + err = ip_find_src(nxi, &rt, &fl); + if (err) + goto out; + if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN)) + daddr = fl.fl4_dst = nxi->ipv4[0]; + } err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; @@ -1374,8 +1400,10 @@ static struct sock *udp_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; + sk_for_each(sk, node, &udp_hash[state->bucket]) { - if (sk->sk_family == state->family) + if (sk->sk_family == state->family && + vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)) goto found; } } @@ -1392,7 +1420,8 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != state->family); + } while (sk && (sk->sk_family != state->family || + !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { sk = sk_head(&udp_hash[state->bucket]); diff --git a/net/socket.c b/net/socket.c index bee094985..3f6b90aab 100644 --- a/net/socket.c +++ b/net/socket.c @@ -285,7 +285,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb) ei->socket.ops = NULL; ei->socket.sk = NULL; ei->socket.file = NULL; - ei->socket.passcred = 0; + ei->socket.flags = 0; return &ei->vfs_inode; } @@ -529,7 +529,7 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); - int err; + int err, len; si->sock = sock; si->scm = NULL; @@ -540,7 +540,20 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, if (err) return err; - return sock->ops->sendmsg(iocb, sock, msg, size); + len = sock->ops->sendmsg(iocb, sock, msg, size); + if (sock->sk) { + if (len == size) + vx_sock_send(sock->sk, size); + else + vx_sock_fail(sock->sk, size); + } + vxdprintk("__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d\n", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + size, len); + return len; } int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -559,7 +572,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { - int err; + int err, len; struct sock_iocb *si = kiocb_to_siocb(iocb); si->sock = sock; @@ -572,7 +585,16 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (err) return err; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); + len = sock->ops->recvmsg(iocb, sock, msg, size, flags); + if ((len >= 0) && sock->sk) + vx_sock_recv(sock->sk, len); + vxdprintk("__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d\n", + sock, sock->sk, + (sock->sk)?sock->sk->sk_nx_info:0, + (sock->sk)?sock->sk->sk_vx_info:0, + (sock->sk)?sock->sk->sk_xid:0, + size, len); + return len; } int sock_recvmsg(struct socket *sock, struct msghdr *msg, @@ -1018,6 +1040,10 @@ static int __sock_create(int family, int type, int protocol, struct socket **res if (type < 0 || type >= SOCK_MAX) return -EINVAL; + /* disable IPv6 inside vservers for now */ + if (family == PF_INET6 && !vx_check(0, VX_ADMIN)) + return -EAFNOSUPPORT; + /* Compatibility. This uglymoron is moved from INET layer to here to avoid @@ -1126,6 +1152,7 @@ asmlinkage long sys_socket(int family, int type, int protocol) if (retval < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock->flags); retval = sock_map_fd(sock); if (retval < 0) goto out_release; @@ -1156,10 +1183,12 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *u err = sock_create(family, type, protocol, &sock1); if (err < 0) goto out; + set_bit(SOCK_USER_SOCKET, &sock1->flags); err = sock_create(family, type, protocol, &sock2); if (err < 0) goto out_release_1; + set_bit(SOCK_USER_SOCKET, &sock2->flags); err = sock1->ops->socketpair(sock1, sock2); if (err < 0) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 89fb7eb9e..7bd7423d5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -405,6 +405,8 @@ static int unix_release_sock (struct sock *sk, int embrion) mntput(mnt); } + clr_vx_info(&sk->sk_vx_info); + clr_nx_info(&sk->sk_nx_info); sock_put(sk); /* ---- Socket is dead now and most probably destroyed ---- */ @@ -559,6 +561,10 @@ static struct sock * unix_create1(struct socket *sock) sock_init_data(sock,sk); sk_set_owner(sk, THIS_MODULE); + set_vx_info(&sk->sk_vx_info, current->vx_info); + set_nx_info(&sk->sk_nx_info, current->nx_info); + sk->sk_xid = vx_current_xid(); + sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; @@ -870,7 +876,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, goto out; alen = err; - if (sock->passcred && !unix_sk(sk)->addr && + if (test_bit(SOCK_PASS_CRED, &sock->flags) && !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) goto out; @@ -961,7 +967,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; addr_len = err; - if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0) + if (test_bit(SOCK_PASS_CRED, &sock->flags) + && !u->addr && (err = unix_autobind(sock)) != 0) goto out; timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); @@ -1295,7 +1302,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } - if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0) + if (test_bit(SOCK_PASS_CRED, &sock->flags) + && !u->addr && (err = unix_autobind(sock)) != 0) goto out; err = -EMSGSIZE; diff --git a/security/commoncap.c b/security/commoncap.c index f40fc7370..9b8cd2d33 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -120,7 +120,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) /* Derived from fs/exec.c:compute_creds. */ kernel_cap_t new_permitted, working; - new_permitted = cap_intersect (bprm->cap_permitted, cap_bset); + new_permitted = cap_intersect (bprm->cap_permitted, vx_current_bcaps()); working = cap_intersect (bprm->cap_inheritable, current->cap_inheritable); new_permitted = cap_combine (new_permitted, working);