VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 6
-EXTRAVERSION =
+EXTRAVERSION = -vs1.9.0
NAME=Zonked Quokka
# *DOCUMENTATION*
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
read_unlock(&tasklist_lock);
if (!child)
goto out_notsk;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out;
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child);
.quad alpha_ni_syscall /* 270 */
.quad alpha_ni_syscall
.quad alpha_ni_syscall
- .quad alpha_ni_syscall
+ .quad sys_vserver /* 273 sys_vserver */
.quad alpha_ni_syscall
.quad alpha_ni_syscall /* 275 */
.quad alpha_ni_syscall
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
BLKDEV start address.
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
.long sys_tgkill /* 270 */
.long sys_utimes
.long sys_fadvise64_64
- .long sys_ni_syscall /* sys_vserver */
+ .long sys_vserver
.long sys_ni_syscall /* sys_mbind */
.long sys_ni_syscall /* 275 sys_get_mempolicy */
.long sys_ni_syscall /* sys_set_mempolicy */
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err=copy_to_user(name, &system_utsname, sizeof (*name));
+ err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
up_read(&uts_sem);
return err?-EFAULT:0;
}
asmlinkage int sys_olduname(struct oldold_utsname __user * name)
{
int error;
+ struct new_utsname *ptr;
if (!name)
return -EFAULT;
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+ ptr = vx_new_utsname();
+ error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+ error |= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+ error |= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
error |= __put_user(0,name->release+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+ error |= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
error |= __put_user(0,name->version+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+ error |= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
error |= __put_user(0,name->machine+__OLD_UTS_LEN);
up_read(&uts_sem);
{
pte_t entry;
- mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
if (write_access) {
entry =
pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
ptepage = pte_page(entry);
get_page(ptepage);
set_pte(dst_pte, entry);
- dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
addr += HPAGE_SIZE;
}
return 0;
page = pte_page(pte);
put_page(page);
}
- mm->rss -= (end - start) >> PAGE_SHIFT;
+ // mm->rss -= (end - start) >> PAGE_SHIFT;
+ vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
flush_tlb_range(vma, start, end);
}
default y
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
int
ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
{
- unsigned long stack_base;
+ unsigned long stack_base, grow;
struct vm_area_struct *mpnt;
struct mm_struct *mm = current->mm;
int i;
if (!mpnt)
return -ENOMEM;
- if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+ grow = (IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+ >> PAGE_SHIFT;
+ if (security_vm_enough_memory(grow) ||
+ !vx_vmpages_avail(mm, grow)) {
kmem_cache_free(vm_area_cachep, mpnt);
return -ENOMEM;
}
mpnt->vm_file = NULL;
mpnt->vm_private_data = 0;
insert_vm_struct(current->mm, mpnt);
- current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ // current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ vx_vmpages_sub(current->mm, current->mm->total_vm -
+ ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT));
}
for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
*/
insert_vm_struct(mm, vma);
- mm->total_vm += size >> PAGE_SHIFT;
+ // mm->total_vm += size >> PAGE_SHIFT;
+ vx_vmpages_add(mm, size >> PAGE_SHIFT);
up_write(&task->mm->mmap_sem);
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
+
ret = -EPERM;
if (pid == 1) /* no messing around with init! */
goto out_tsk;
if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur
|| (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur))
return -ENOMEM;
+ if (!vx_vmpages_avail(vma->vm_mm, grow)
+ return -ENOMEM;
vma->vm_end += PAGE_SIZE;
- vma->vm_mm->total_vm += grow;
+ // vma->vm_mm->total_vm += grow;
+ vx_vmpages_add(vma->vm_mm, grow);
if (vma->vm_flags & VM_LOCKED)
- vma->vm_mm->locked_vm += grow;
+ // vma->vm_mm->locked_vm += grow;
+ vx_vmlocked_add(vma->vm_mm, grow);
return 0;
}
{
pte_t entry;
- mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
if (write_access) {
entry =
pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
ptepage = pte_page(entry);
get_page(ptepage);
set_pte(dst_pte, entry);
- dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
addr += HPAGE_SIZE;
}
return 0;
put_page(page);
pte_clear(pte);
}
- mm->rss -= (end - start) >> PAGE_SHIFT;
+ // mm->rss -= (end - start) >> PAGE_SHIFT;
+ vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
flush_tlb_range(vma, start, end);
}
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
swap_free(entry);
get_page(page);
- ++vma->vm_mm->rss;
+ // ++vma->vm_mm->rss;
+ vx_rsspages_inc(vma->vm_mm);
}
static inline void unswap_pmd(struct vm_area_struct * vma, pmd_t *dir,
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
/* Do this so that we can load the interpreter, if need be. We will
* change some of these later.
*/
- current->mm->rss = 0;
+ // current->mm->rss = 0;
+ vx_rsspages_sub(current->mm, current->mm->rss);
setup_arg_pages(bprm, EXSTACK_DEFAULT);
current->mm->start_stack = bprm->p;
int ret = 0;
down_read(&uts_sem);
- if (copy_to_user(name,&system_utsname,sizeof *name))
+ if (copy_to_user(name, vx_new_utsname(), sizeof *name))
ret = -EFAULT;
up_read(&uts_sem);
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
*/
asmlinkage int sys_uname(struct old_utsname * name)
{
- if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+ if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name)))
return 0;
return -EFAULT;
}
asmlinkage int sys_olduname(struct oldold_utsname * name)
{
int error;
+ struct new_utsname *ptr;
if (!name)
return -EFAULT;
if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
return -EFAULT;
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+ ptr = vx_new_utsname();
+ error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
error -= __put_user(0,name->release+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
error -= __put_user(0,name->version+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
error = __put_user(0,name->machine+__OLD_UTS_LEN);
error = error ? -EFAULT : 0;
return -EFAULT;
down_write(&uts_sem);
- strncpy(system_utsname.nodename, nodename, len);
+ strncpy(vx_new_uts(nodename), nodename, len);
nodename[__NEW_UTS_LEN] = '\0';
- strlcpy(system_utsname.nodename, nodename,
- sizeof(system_utsname.nodename));
+ strlcpy(vx_new_uts(nodename), nodename,
+ sizeof(vx_new_uts(nodename)));
up_write(&uts_sem);
return 0;
}
/*
* Check if we have enough memory..
*/
- if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) {
+ if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT) ||
+ !vx_vmpages_avail(mm, (newbrk-oldbrk) >> PAGE_SHIFT)) {
ret = -ENOMEM;
goto out;
}
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
+
ret = -EPERM;
if (pid == 1) /* no messing around with init! */
goto out_tsk;
do {
seq = read_seqbegin(&xtime_lock);
+ /* requires vx virtualization */
val.uptime = jiffies / HZ;
val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
.long ppc_fadvise64_64
.long sys_ni_syscall /* 255 - rtas (used on ppc64) */
.long sys_ni_syscall /* 256 reserved for sys_debug_setcontext */
- .long sys_ni_syscall /* 257 reserved for vserver */
+ .long sys_vserver
.long sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */
.long sys_ni_syscall /* 259 reserved for new sys_mbind */
.long sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
int err = -EFAULT;
down_read(&uts_sem);
- if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+ if (name && !copy_to_user(name, vx_new_utsname(), sizeof (*name)))
err = 0;
up_read(&uts_sem);
return err;
int sys_olduname(struct oldold_utsname __user * name)
{
int error;
+ struct new_utsname *ptr;
if (!name)
return -EFAULT;
return -EFAULT;
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+ ptr = vx_new_utsname();
+ error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
error -= __put_user(0,name->release+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
error -= __put_user(0,name->version+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
error = __put_user(0,name->machine+__OLD_UTS_LEN);
up_read(&uts_sem);
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
.llong .ppc32_fadvise64_64 /* 32bit only fadvise64_64 */
.llong .ppc_rtas /* 255 */
.llong .sys_ni_syscall /* 256 reserved for sys_debug_setcontext */
- .llong .sys_ni_syscall /* 257 reserved for vserver */
+ .llong .sys_vserver
.llong .sys_ni_syscall /* 258 reserved for new sys_remap_file_pages */
.llong .sys_ni_syscall /* 259 reserved for new sys_mbind */
.llong .sys_ni_syscall /* 260 reserved for new sys_get_mempolicy */
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
int sys32_olduname(struct oldold_utsname * name)
{
int error;
+ struct new_utsname *ptr;
if (!name)
return -EFAULT;
return -EFAULT;
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+ ptr = vx_new_utsname();
+ error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
error -= __put_user(0,name->release+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
error -= __put_user(0,name->version+__OLD_UTS_LEN);
- error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
+ error -= __copy_to_user(&name->machine,ptr->machine,__OLD_UTS_LEN);
error = __put_user(0,name->machine+__OLD_UTS_LEN);
up_read(&uts_sem);
hugepte_t entry;
int i;
- mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_sub(mm, HPAGE_SIZE / PAGE_SIZE);
entry = mk_hugepte(page, write_access);
for (i = 0; i < HUGEPTE_BATCH_SIZE; i++)
set_hugepte(ptep+i, entry);
/* This is the first hugepte in a batch */
ptepage = hugepte_page(entry);
get_page(ptepage);
- dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
}
set_hugepte(dst_pte, entry);
put_page(page);
}
- mm->rss -= (end - start) >> PAGE_SHIFT;
+ // mm->rss -= (end - start) >> PAGE_SHIFT;
+ vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
}
int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack)
{
- unsigned long stack_base;
+ unsigned long stack_base, grow;
struct vm_area_struct *mpnt;
struct mm_struct *mm = current->mm;
int i;
if (!mpnt)
return -ENOMEM;
- if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+ grow = (STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+ >> PAGE_SHIFT;
+ if (security_vm_enough_memory(grow) ||
+ !vx_vmpages_avail(mm, grow)) {
kmem_cache_free(vm_area_cachep, mpnt);
return -ENOMEM;
}
INIT_LIST_HEAD(&mpnt->shared);
mpnt->vm_private_data = (void *) 0;
insert_vm_struct(mm, mpnt);
- mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ vx_vmpages_sub(mm, mm->total_vm -
+ ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT));
}
for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = do_ptrace(child, request, addr, data);
-
+out_tsk:
put_task_struct(child);
out:
unlock_kernel();
SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */
SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper)
SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper)
-NI_SYSCALL /* reserved for vserver */
+SYSCALL(sys_vserver,sys_vserver,sys_vserver)
SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper)
SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper)
SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper)
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
unsigned long i;
pte_t entry;
- mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
if (write_access)
entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
pte_val(entry) += PAGE_SIZE;
dst_pte++;
}
- dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
addr += HPAGE_SIZE;
}
return 0;
pte++;
}
}
- mm->rss -= (end - start) >> PAGE_SHIFT;
+ // mm->rss -= (end - start) >> PAGE_SHIFT;
+ vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
flush_tlb_range(vma, start, end);
}
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
pt_error_return(regs, ESRCH);
goto out;
}
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
+ pt_error_return(regs, ESRCH);
+ goto out_tsk;
+ }
if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH)
|| (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) {
down_read(&uts_sem);
- nlen = strlen(system_utsname.domainname) + 1;
+ nlen = strlen(vx_new_uts(domainname)) + 1;
if (nlen < len)
len = nlen;
if (len > __NEW_UTS_LEN)
goto done;
- if (copy_to_user(name, system_utsname.domainname, len))
+ if (copy_to_user(name, vx_new_uts(domainname), len))
goto done;
err = 0;
done:
/*250*/ .long sparc_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
/*255*/ .long sys_nis_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
/*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
-/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
+/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
/*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
/*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_ni_syscall
/*280*/ .long sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
- current->mm->rss = 0;
+ // current->mm->rss = 0;
+ vx_rsspages_sub(current->mm, current->mm->rss);
current->mm->mmap = NULL;
compute_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
pt_error_return(regs, ESRCH);
goto out;
}
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT)) {
+ pt_error_return(regs, ESRCH);
+ goto out_tsk;
+ }
if ((current->personality == PER_SUNOS && request == PTRACE_SUNATTACH)
|| (current->personality != PER_SUNOS && request == PTRACE_ATTACH)) {
down_read(&uts_sem);
- nlen = strlen(system_utsname.domainname) + 1;
+ nlen = strlen(vx_new_uts(domainname)) + 1;
if (nlen < len)
len = nlen;
if (len > __NEW_UTS_LEN)
goto done;
- if (copy_to_user(name, system_utsname.domainname, len))
+ if (copy_to_user(name, vx_new_uts(domainname), len))
goto done;
err = 0;
done:
/*250*/ .word sys32_mremap, sys32_sysctl, sys_getsid, sys_fdatasync, sys32_nfsservctl
.word sys_ni_syscall, compat_clock_settime, compat_clock_gettime, compat_clock_getres, compat_clock_nanosleep
/*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, compat_timer_settime, compat_timer_gettime, sys_timer_getoverrun
- .word sys_timer_delete, sys32_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
+ .word sys_timer_delete, sys32_timer_create, sys_vserver, compat_sys_io_setup, sys_io_destroy
/*270*/ .word compat_sys_io_submit, sys_io_cancel, compat_sys_io_getevents, compat_sys_mq_open, sys_mq_unlink
.word sys32_mq_timedsend, sys32_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, sys_ni_syscall
/*280*/ .word sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
/*250*/ .word sys64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
.word sys_ni_syscall, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
/*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
- .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
+ .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy
/*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
.word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_ni_syscall
/*280*/ .word sys_ni_syscall, sys_ni_syscall, sys_ni_syscall
unsigned long i;
pte_t entry;
- mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
if (write_access)
entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
pte_val(entry) += PAGE_SIZE;
dst_pte++;
}
- dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
addr += HPAGE_SIZE;
}
return 0;
pte++;
}
}
- mm->rss -= (end - start) >> PAGE_SHIFT;
+ // mm->rss -= (end - start) >> PAGE_SHIFT;
+ vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
flush_tlb_range(vma, start, end);
}
source "fs/Kconfig"
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err=copy_to_user(name, &system_utsname, sizeof (*name));
+ err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
up_read(&uts_sem);
return err?-EFAULT:0;
}
int sys_olduname(struct oldold_utsname * name)
{
int error;
+ struct new_utsname *ptr;
if (!name)
return -EFAULT;
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,
+ ptr = vx_new_utsname();
+ error = __copy_to_user(&name->sysname,ptr->sysname,
__OLD_UTS_LEN);
error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+ error |= __copy_to_user(&name->nodename,ptr->nodename,
__OLD_UTS_LEN);
error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->release,&system_utsname.release,
+ error |= __copy_to_user(&name->release,ptr->release,
__OLD_UTS_LEN);
error |= __put_user(0,name->release+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->version,&system_utsname.version,
+ error |= __copy_to_user(&name->version,ptr->version,
__OLD_UTS_LEN);
error |= __put_user(0,name->version+__OLD_UTS_LEN);
- error |= __copy_to_user(&name->machine,&system_utsname.machine,
+ error |= __copy_to_user(&name->machine,ptr->machine,
__OLD_UTS_LEN);
error |= __put_user(0,name->machine+__OLD_UTS_LEN);
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
rval = -EPERM;
if (pid == 1) /* you may not mess with init */
endmenu
+source "kernel/vserver/Kconfig"
+
source "security/Kconfig"
source "crypto/Kconfig"
(current->mm->start_brk = N_BSSADDR(ex));
current->mm->free_area_cache = TASK_UNMAPPED_BASE;
- current->mm->rss = 0;
+ // current->mm->rss = 0;
+ vx_rsspages_sub(current->mm, current->mm->rss);
current->mm->mmap = NULL;
compute_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
{
- unsigned long stack_base;
+ unsigned long stack_base, grow;
struct vm_area_struct *mpnt;
struct mm_struct *mm = current->mm;
int i;
if (!mpnt)
return -ENOMEM;
- if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
+ grow = (IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
+ >> PAGE_SHIFT;
+ if (security_vm_enough_memory(grow) ||
+ !vx_vmpages_avail(mm, grow)) {
kmem_cache_free(vm_area_cachep, mpnt);
return -ENOMEM;
}
INIT_LIST_HEAD(&mpnt->shared);
mpnt->vm_private_data = (void *) 0;
insert_vm_struct(mm, mpnt);
- mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ vx_vmpages_sub(mm, mm->total_vm -
+ ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT));
}
for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
asmlinkage long sys32_olduname(struct oldold_utsname * name)
{
int error;
+ struct new_utsname *ptr;
if (!name)
return -EFAULT;
down_read(&uts_sem);
- error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+ ptr = vx_new_utsname();
+ error = __copy_to_user(&name->sysname,ptr->sysname,__OLD_UTS_LEN);
__put_user(0,name->sysname+__OLD_UTS_LEN);
- __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+ __copy_to_user(&name->nodename,ptr->nodename,__OLD_UTS_LEN);
__put_user(0,name->nodename+__OLD_UTS_LEN);
- __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+ __copy_to_user(&name->release,ptr->release,__OLD_UTS_LEN);
__put_user(0,name->release+__OLD_UTS_LEN);
- __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+ __copy_to_user(&name->version,ptr->version,__OLD_UTS_LEN);
__put_user(0,name->version+__OLD_UTS_LEN);
{
char *arch = "x86_64";
if (!name)
return -EFAULT;
down_read(&uts_sem);
- err=copy_to_user(name, &system_utsname, sizeof (*name));
+ err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
up_read(&uts_sem);
if (personality(current->personality) == PER_LINUX32)
err |= copy_to_user(&name->machine, "i686", 5);
read_unlock(&tasklist_lock);
if (!child)
goto out;
+ if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+ goto out_tsk;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
{
int err;
down_read(&uts_sem);
- err = copy_to_user(name, &system_utsname, sizeof (*name));
+ err = copy_to_user(name, vx_new_utsname(), sizeof (*name));
up_read(&uts_sem);
if (personality(current->personality) == PER_LINUX32)
err |= copy_to_user(&name->machine, "i686", 5);
EXPORT_SYMBOL(inode_change_ok);
+int inode_setattr_flags(struct inode *inode, unsigned int flags)
+{
+ unsigned int oldflags, newflags;
+
+ oldflags = inode->i_flags;
+ newflags = oldflags & ~(S_IMMUTABLE | S_IUNLINK | S_BARRIER);
+ if (flags & ATTR_FLAG_IMMUTABLE)
+ newflags |= S_IMMUTABLE;
+ if (flags & ATTR_FLAG_IUNLINK)
+ newflags |= S_IUNLINK;
+ if (flags & ATTR_FLAG_BARRIER)
+ newflags |= S_BARRIER;
+
+ if (oldflags ^ newflags)
+ inode->i_flags = newflags;
+ return 0;
+}
+
int inode_setattr(struct inode * inode, struct iattr * attr)
{
unsigned int ia_valid = attr->ia_valid;
mode &= ~S_ISGID;
inode->i_mode = mode;
}
+ if (ia_valid & ATTR_ATTR_FLAG)
+ inode_setattr_flags(inode, attr->ia_attr_flags);
mark_inode_dirty(inode);
out:
return error;
(current->mm->start_brk = N_BSSADDR(ex));
current->mm->free_area_cache = TASK_UNMAPPED_BASE;
- current->mm->rss = 0;
+ // current->mm->rss = 0;
+ vx_rsspages_sub(current->mm, current->mm->rss);
current->mm->mmap = NULL;
compute_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
- current->mm->rss = 0;
+ // current->mm->rss = 0;
+ vx_rsspages_sub(current->mm, current->mm->rss);
current->mm->free_area_cache = TASK_UNMAPPED_BASE;
retval = setup_arg_pages(bprm, executable_stack);
if (retval < 0) {
current->mm->start_brk = datapos + data_len + bss_len;
current->mm->brk = (current->mm->start_brk + 3) & ~3;
current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len;
- current->mm->rss = 0;
+ // current->mm->rss = 0;
+ vx_rsspages_sub(current->mm, current->mm->rss);
}
if (flags & FLAT_FLAG_KTRACE)
create_som_tables(bprm);
current->mm->start_stack = bprm->p;
- current->mm->rss = 0;
+ // current->mm->rss = 0;
+ vx_rsspages_sub(current->mm, current->mm->rss);
#if 0
printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk);
#include <linux/mount.h>
#include <linux/tty.h>
#include <linux/devpts_fs.h>
+#include <linux/vinline.h>
#include "xattr.h"
#define DEVPTS_SUPER_MAGIC 0x1cd1
return lookup_one_len(s, root, sprintf(s, "%d", num));
}
+static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ int ret = -EACCES;
+
+ if (vx_check(inode->i_xid, VX_IDENT))
+ ret = vfs_permission(inode, mask);
+ return ret;
+}
+
static struct inode_operations devpts_file_inode_operations = {
.setxattr = devpts_setxattr,
.getxattr = devpts_getxattr,
.listxattr = devpts_listxattr,
.removexattr = devpts_removexattr,
+ .permission = devpts_permission,
};
int devpts_pty_new(struct tty_struct *tty)
inode->i_gid = config.setgid ? config.gid : current->fsgid;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
init_special_inode(inode, S_IFCHR|config.mode, device);
+ inode->i_xid = vx_current_xid();
inode->i_op = &devpts_file_inode_operations;
inode->u.generic_ip = tty;
set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
pte_chain = page_add_rmap(page, pte, pte_chain);
pte_unmap(pte);
- tsk->mm->rss++;
+ // tsk->mm->rss++;
+ vx_rsspages_inc(tsk->mm);
spin_unlock(&tsk->mm->page_table_lock);
/* no need for flush_tlb */
if (!mpnt)
return -ENOMEM;
- if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
+ if (security_vm_enough_memory(arg_size >> PAGE_SHIFT) ||
+ !vx_vmpages_avail(mm, arg_size >> PAGE_SHIFT)) {
kmem_cache_free(vm_area_cachep, mpnt);
return -ENOMEM;
}
INIT_LIST_HEAD(&mpnt->shared);
mpnt->vm_private_data = (void *) 0;
insert_vm_struct(mm, mpnt);
- mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ // mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ vx_vmpages_sub(mm, mm->total_vm -
+ ((mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT));
}
for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
memset(ei->i_data, 0, sizeof(ei->i_data));
- ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
+ ei->i_flags = EXT2_I(dir)->i_flags &
+ ~(EXT2_BTREE_FL|EXT2_IUNLINK_FL|EXT2_BARRIER_FL);
if (S_ISLNK(mode))
ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
/* dirsync is only applied to directories */
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
+#include <linux/vserver/xid.h>
#include "ext2.h"
#include "acl.h"
ext2_discard_prealloc(inode);
}
+static void ext2_truncate_nocheck (struct inode * inode);
+
/*
* Called at the last iput() if i_nlink is zero.
*/
inode->i_size = 0;
if (inode->i_blocks)
- ext2_truncate (inode);
+ ext2_truncate_nocheck(inode);
ext2_free_inode (inode);
return;
ext2_free_data(inode, p, q);
}
-void ext2_truncate (struct inode * inode)
+static void ext2_truncate_nocheck(struct inode * inode)
{
u32 *i_data = EXT2_I(inode)->i_data;
int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
return;
if (ext2_inode_is_fast_symlink(inode))
return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
ext2_discard_prealloc(inode);
return ERR_PTR(-EIO);
}
+void ext2_truncate (struct inode * inode)
+{
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return;
+ ext2_truncate_nocheck(inode);
+}
+
void ext2_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT2_I(inode)->i_flags;
inode->i_flags |= S_APPEND;
if (flags & EXT2_IMMUTABLE_FL)
inode->i_flags |= S_IMMUTABLE;
+ if (flags & EXT2_IUNLINK_FL)
+ inode->i_flags |= S_IUNLINK;
+ if (flags & EXT2_BARRIER_FL)
+ inode->i_flags |= S_BARRIER;
if (flags & EXT2_NOATIME_FL)
inode->i_flags |= S_NOATIME;
if (flags & EXT2_DIRSYNC_FL)
ino_t ino = inode->i_ino;
struct buffer_head * bh;
struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
+ uid_t uid;
+ gid_t gid;
int n;
#ifdef CONFIG_EXT2_FS_POSIX_ACL
goto bad_inode;
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
- inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
- inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+ gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
if (!(test_opt (inode->i_sb, NO_UID32))) {
- inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
- inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+ gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
+ inode->i_uid = INOXID_UID(uid, gid);
+ inode->i_gid = INOXID_GID(uid, gid);
+ if (inode->i_sb->s_flags & MS_TAGXID)
+ inode->i_xid = INOXID_XID(uid, gid, le16_to_cpu(raw_inode->i_raw_xid));
+
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
struct ext2_inode_info *ei = EXT2_I(inode);
struct super_block *sb = inode->i_sb;
ino_t ino = inode->i_ino;
- uid_t uid = inode->i_uid;
- gid_t gid = inode->i_gid;
+ uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid);
+ gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid);
struct buffer_head * bh;
struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh);
int n;
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
}
+#ifdef CONFIG_INOXID_GID32
+ raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
+#endif
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(inode->i_size);
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
return sync_inode(inode, &wbc);
}
+int ext2_setattr_flags(struct inode *inode, unsigned int flags)
+{
+ unsigned int oldflags, newflags;
+
+ oldflags = EXT2_I(inode)->i_flags;
+ newflags = oldflags &
+ ~(EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL | EXT2_BARRIER_FL);
+ if (flags & ATTR_FLAG_IMMUTABLE)
+ newflags |= EXT2_IMMUTABLE_FL;
+ if (flags & ATTR_FLAG_IUNLINK)
+ newflags |= EXT2_IUNLINK_FL;
+ if (flags & ATTR_FLAG_BARRIER)
+ newflags |= EXT2_BARRIER_FL;
+
+ if (oldflags ^ newflags) {
+ EXT2_I(inode)->i_flags = newflags;
+ inode->i_ctime = CURRENT_TIME;
+ }
+ return 0;
+}
+
int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = dentry->d_inode;
if (error)
return error;
}
+ if (iattr->ia_valid & ATTR_ATTR_FLAG)
+ ext2_setattr_flags(inode, iattr->ia_attr_flags);
+
inode_setattr(inode, iattr);
if (iattr->ia_valid & ATTR_MODE)
error = ext2_acl_chmod(inode);
Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
- Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+ Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_tagxid,
Opt_ignore, Opt_err,
};
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
+ {Opt_tagxid, "tagxid"},
{Opt_ignore, "grpquota"},
{Opt_ignore, "noquota"},
{Opt_ignore, "quota"},
case Opt_nouid32:
set_opt (sbi->s_mount_opt, NO_UID32);
break;
+#ifndef CONFIG_INOXID_NONE
+ case Opt_tagxid:
+ set_opt (sbi->s_mount_opt, TAG_XID);
+ break;
+#endif
case Opt_check:
#ifdef CONFIG_EXT2_CHECK
set_opt (sbi->s_mount_opt, CHECK);
if (!parse_options ((char *) data, sbi))
goto failed_mount;
+ if (EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_TAG_XID)
+ sb->s_flags |= MS_TAGXID;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
MS_POSIXACL : 0);
ei->i_dir_start_lookup = 0;
ei->i_disksize = 0;
- ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
+ ei->i_flags = EXT3_I(dir)->i_flags &
+ ~(EXT3_INDEX_FL|EXT3_IUNLINK_FL|EXT3_BARRIER_FL);
if (S_ISLNK(mode))
ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
/* dirsync only applies to directories */
#include <linux/writeback.h>
#include <linux/mpage.h>
#include <linux/uio.h>
+#include <linux/vserver/xid.h>
#include "xattr.h"
#include "acl.h"
ext3_discard_prealloc(inode);
}
+static void ext3_truncate_nocheck (struct inode *inode);
+
/*
* Called at the last iput() if i_nlink is zero.
*/
handle->h_sync = 1;
inode->i_size = 0;
if (inode->i_blocks)
- ext3_truncate(inode);
+ ext3_truncate_nocheck(inode);
/*
* Kill off the orphan record which ext3_truncate created.
* AKPM: I think this can be inside the above `if'.
* ext3_truncate() run will find them and release them.
*/
-void ext3_truncate(struct inode * inode)
+void ext3_truncate_nocheck(struct inode * inode)
{
handle_t *handle;
struct ext3_inode_info *ei = EXT3_I(inode);
return;
if (ext3_inode_is_fast_symlink(inode))
return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
ext3_discard_prealloc(inode);
return 0;
}
+void ext3_truncate(struct inode * inode)
+{
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return;
+ ext3_truncate_nocheck(inode);
+}
+
void ext3_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT3_I(inode)->i_flags;
inode->i_flags |= S_APPEND;
if (flags & EXT3_IMMUTABLE_FL)
inode->i_flags |= S_IMMUTABLE;
+ if (flags & EXT3_IUNLINK_FL)
+ inode->i_flags |= S_IUNLINK;
+ if (flags & EXT3_BARRIER_FL)
+ inode->i_flags |= S_BARRIER;
if (flags & EXT3_NOATIME_FL)
inode->i_flags |= S_NOATIME;
if (flags & EXT3_DIRSYNC_FL)
struct ext3_inode_info *ei = EXT3_I(inode);
struct buffer_head *bh;
int block;
+ uid_t uid;
+ gid_t gid;
#ifdef CONFIG_EXT3_FS_POSIX_ACL
ei->i_acl = EXT3_ACL_NOT_CACHED;
bh = iloc.bh;
raw_inode = ext3_raw_inode(&iloc);
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
- inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
- inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+ gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
if(!(test_opt (inode->i_sb, NO_UID32))) {
- inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
- inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+ gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
+ inode->i_uid = INOXID_UID(uid, gid);
+ inode->i_gid = INOXID_GID(uid, gid);
+ if (inode->i_sb->s_flags & MS_TAGXID)
+ inode->i_xid = INOXID_XID(uid, gid, le16_to_cpu(raw_inode->i_raw_xid));
+
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
struct ext3_inode_info *ei = EXT3_I(inode);
struct buffer_head *bh = iloc->bh;
+ uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid);
+ gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid);
int err = 0, rc, block;
/* For fields not not tracking in the in-memory inode,
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
if(!(test_opt(inode->i_sb, NO_UID32))) {
- raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
- raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
+ raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
+ raw_inode->i_gid_low = cpu_to_le16(low_16_bits(gid));
/*
* Fix up interoperability with old kernels. Otherwise, old inodes get
* re-used with the upper 16 bits of the uid/gid intact
*/
if(!ei->i_dtime) {
raw_inode->i_uid_high =
- cpu_to_le16(high_16_bits(inode->i_uid));
+ cpu_to_le16(high_16_bits(uid));
raw_inode->i_gid_high =
- cpu_to_le16(high_16_bits(inode->i_gid));
+ cpu_to_le16(high_16_bits(gid));
} else {
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
}
} else {
raw_inode->i_uid_low =
- cpu_to_le16(fs_high2lowuid(inode->i_uid));
+ cpu_to_le16(fs_high2lowuid(uid));
raw_inode->i_gid_low =
- cpu_to_le16(fs_high2lowgid(inode->i_gid));
+ cpu_to_le16(fs_high2lowgid(gid));
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
}
+#ifdef CONFIG_INOXID_GID32
+ raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
+#endif
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(ei->i_disksize);
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
ext3_force_commit(inode->i_sb);
}
+int ext3_setattr_flags(struct inode *inode, unsigned int flags)
+{
+ unsigned int oldflags, newflags;
+ int err = 0;
+
+ oldflags = EXT3_I(inode)->i_flags;
+ newflags = oldflags &
+ ~(EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL | EXT3_BARRIER_FL);
+ if (flags & ATTR_FLAG_IMMUTABLE)
+ newflags |= EXT3_IMMUTABLE_FL;
+ if (flags & ATTR_FLAG_IUNLINK)
+ newflags |= EXT3_IUNLINK_FL;
+ if (flags & ATTR_FLAG_BARRIER)
+ newflags |= EXT3_BARRIER_FL;
+
+ if (oldflags ^ newflags) {
+ handle_t *handle;
+ struct ext3_iloc iloc;
+
+ handle = ext3_journal_start(inode, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ if (IS_SYNC(inode))
+ handle->h_sync = 1;
+ err = ext3_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto flags_err;
+
+ EXT3_I(inode)->i_flags = newflags;
+ inode->i_ctime = CURRENT_TIME;
+
+ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ flags_err:
+ ext3_journal_stop(handle);
+ }
+ return err;
+}
+
/*
* ext3_setattr()
*
ext3_journal_stop(handle);
}
+ if (ia_valid & ATTR_ATTR_FLAG) {
+ rc = ext3_setattr_flags(inode, attr->ia_attr_flags);
+ if (!error)
+ error = rc;
+ }
+
rc = inode_setattr(inode, attr);
/* If inode_setattr's call to ext3_truncate failed to get a
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
#include <linux/time.h>
+#include <linux/vserver/xid.h>
#include <asm/uaccess.h>
remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
return ret;
}
+#endif
+#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE)
+ case EXT3_IOC_SETXID: {
+ handle_t *handle;
+ struct ext3_iloc iloc;
+ int xid;
+ int err;
+
+ /* fixme: if stealth, return -ENOTTY */
+ if (!capable(CAP_CONTEXT))
+ return -EPERM;
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ if (!(inode->i_sb->s_flags & MS_TAGXID))
+ return -ENOSYS;
+ if (get_user(xid, (int *) arg))
+ return -EFAULT;
+
+ handle = ext3_journal_start(inode, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ err = ext3_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ return err;
+
+ inode->i_xid = (xid & 0xFFFF);
+ inode->i_ctime = CURRENT_TIME;
+
+ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ ext3_journal_stop(handle);
+ return err;
+ }
#endif
default:
return -ENOTTY;
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
- Opt_ignore, Opt_err,
+ Opt_tagxid, Opt_ignore, Opt_err,
};
static match_table_t tokens = {
{Opt_grpjquota, "grpjquota=%s"},
{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
+ {Opt_tagxid, "tagxid"},
{Opt_ignore, "grpquota"},
{Opt_ignore, "noquota"},
{Opt_ignore, "quota"},
case Opt_nouid32:
set_opt (sbi->s_mount_opt, NO_UID32);
break;
+#ifndef CONFIG_INOXID_NONE
+ case Opt_tagxid:
+ set_opt (sbi->s_mount_opt, TAG_XID);
+ break;
+#endif
case Opt_check:
#ifdef CONFIG_EXT3_CHECK
set_opt (sbi->s_mount_opt, CHECK);
if (!parse_options ((char *) data, sb, &journal_inum, 0))
goto failed_mount;
+ if (EXT3_SB(sb)->s_mount_opt & EXT3_MOUNT_TAG_XID)
+ sb->s_flags |= MS_TAGXID;
sb->s_flags |= MS_ONE_SECOND;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
error = -EMFILE;
if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
+ if (!vx_files_avail(1))
+ goto out;
error = expand_files(files, newfd);
if (error < 0)
FD_SET(fd, files->open_fds);
FD_CLR(fd, files->close_on_exec);
spin_unlock(&files->file_lock);
+ vx_openfd_inc(fd);
fd_install(fd, file);
} else {
spin_unlock(&files->file_lock);
FD_SET(newfd, files->open_fds);
FD_CLR(newfd, files->close_on_exec);
spin_unlock(&files->file_lock);
+ vx_openfd_inc(newfd);
if (tofree)
filp_close(tofree, files);
f->f_owner.lock = RW_LOCK_UNLOCKED;
/* f->f_version: 0 */
INIT_LIST_HEAD(&f->f_list);
+ vx_files_inc(f);
return f;
}
}
fops_put(file->f_op);
if (file->f_mode & FMODE_WRITE)
put_write_access(inode);
+ vx_files_dec(file);
file_kill(file);
file->f_dentry = NULL;
file->f_vfsmnt = NULL;
struct address_space * const mapping = &inode->i_data;
inode->i_sb = sb;
+ if (sb->s_flags & MS_TAGXID)
+ inode->i_xid = current->xid;
+ else
+ inode->i_xid = 0; /* maybe xid -1 would be better? */
+ // inode->i_dqh = dqhget(sb->s_dqh);
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
atomic_set(&inode->i_count, 1);
inode->i_bdev = NULL;
inode->i_cdev = NULL;
inode->i_rdev = 0;
+ // inode->i_xid = 0; /* maybe not too wise ... */
inode->i_security = NULL;
inode->dirtied_when = 0;
if (security_inode_alloc(inode)) {
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/security.h>
+#include <linux/proc_fs.h>
+#include <linux/vserver/inode.h>
+#include <linux/vserver/xid.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
+#ifdef CONFIG_VSERVER_LEGACY
+extern int vx_proc_ioctl(struct inode *, struct file *,
+ unsigned int, unsigned long);
+#endif
+
static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg)
{
int error;
else
error = -ENOTTY;
break;
+#ifdef CONFIG_VSERVER_LEGACY
+#ifndef CONFIG_INOXID_NONE
+ case FIOC_GETXID: {
+ struct inode *inode = filp->f_dentry->d_inode;
+
+ /* fixme: if stealth, return -ENOTTY */
+ error = -EPERM;
+ if (capable(CAP_CONTEXT))
+ error = put_user(inode->i_xid, (int *) arg);
+ break;
+ }
+ case FIOC_SETXID: {
+ struct inode *inode = filp->f_dentry->d_inode;
+ int xid;
+
+ /* fixme: if stealth, return -ENOTTY */
+ error = -EPERM;
+ if (!capable(CAP_CONTEXT))
+ break;
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ break;
+ error = -ENOSYS;
+ if (!(inode->i_sb->s_flags & MS_TAGXID))
+ break;
+ error = -EFAULT;
+ if (get_user(xid, (int *) arg))
+ break;
+ error = 0;
+ inode->i_xid = (xid & 0xFFFF);
+ inode->i_ctime = CURRENT_TIME;
+ mark_inode_dirty(inode);
+ break;
+ }
+#endif
+ case FIOC_GETXFLG:
+ case FIOC_SETXFLG:
+ error = -ENOTTY;
+ if (filp->f_dentry->d_inode->i_sb->s_magic == PROC_SUPER_MAGIC)
+ error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
+ break;
+#endif
default:
error = -ENOTTY;
if (S_ISREG(filp->f_dentry->d_inode->i_mode))
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
+#include <linux/vserver/xid.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
static int copy_from_dinode(struct dinode * dip, struct inode *ip)
{
struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+ uid_t uid;
+ gid_t gid;
jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
ip->i_nlink = le32_to_cpu(dip->di_nlink);
- ip->i_uid = le32_to_cpu(dip->di_uid);
- ip->i_gid = le32_to_cpu(dip->di_gid);
+
+ uid = le32_to_cpu(dip->di_uid);
+ gid = le32_to_cpu(dip->di_gid);
+ ip->i_uid = INOXID_UID(uid, gid);
+ ip->i_gid = INOXID_GID(uid, gid);
+ ip->i_xid = INOXID_XID(uid, gid, 0);
+
ip->i_size = le64_to_cpu(dip->di_size);
ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
static void copy_to_dinode(struct dinode * dip, struct inode *ip)
{
struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+ uid_t uid;
+ gid_t gid;
dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp);
dip->di_size = cpu_to_le64(ip->i_size);
dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
dip->di_nlink = cpu_to_le32(ip->i_nlink);
- dip->di_uid = cpu_to_le32(ip->i_uid);
- dip->di_gid = cpu_to_le32(ip->i_gid);
+
+ uid = XIDINO_UID(ip->i_uid, ip->i_xid);
+ gid = XIDINO_GID(ip->i_gid, ip->i_xid);
+ dip->di_uid = cpu_to_le32(uid);
+ dip->di_gid = cpu_to_le32(gid);
/*
* mode2 is only needed for storing the higher order bits.
* Trust i_mode for the lower order ones
{
umode_t mode = inode->i_mode;
+ if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN|VX_WATCH))
+ return -EACCES;
+
if (mask & MAY_WRITE) {
/*
* Nobody gets write access to a read-only fs.
return -EACCES;
}
+static inline int xid_permission(struct inode *inode)
+{
+ if (inode->i_xid == 0)
+ return 0;
+ if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
+ return 0;
+ return -EACCES;
+}
+
int permission(struct inode * inode,int mask, struct nameidata *nd)
{
int retval;
/* Ordinary permission routines do not understand MAY_APPEND. */
submask = mask & ~MAY_APPEND;
+ if ((retval = xid_permission(inode)))
+ return retval;
if (inode->i_op && inode->i_op->permission)
retval = inode->i_op->permission(inode, submask, nd);
else
if (IS_APPEND(dir))
return -EPERM;
if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
- IS_IMMUTABLE(victim->d_inode))
+ IS_IXORUNLINK(victim->d_inode))
return -EPERM;
if (isdir) {
if (!S_ISDIR(victim->d_inode->i_mode))
/*
* A link to an append-only or immutable file cannot be created.
*/
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ if (IS_APPEND(inode) || IS_IXORUNLINK(inode))
return -EPERM;
if (!dir->i_op || !dir->i_op->link)
return -EPERM;
};
struct proc_fs_info *fs_infop;
+ if (vx_flags(VXF_HIDE_MOUNT, 0))
+ return 0;
+
mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
seq_putc(m, ' ');
seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
EXPORT_SYMBOL(may_umount);
-void umount_tree(struct vfsmount *mnt)
+static inline void __umount_tree(struct vfsmount *mnt, struct list_head *kill)
{
- struct vfsmount *p;
- LIST_HEAD(kill);
-
- for (p = mnt; p; p = next_mnt(p, mnt)) {
- list_del(&p->mnt_list);
- list_add(&p->mnt_list, &kill);
- }
-
- while (!list_empty(&kill)) {
- mnt = list_entry(kill.next, struct vfsmount, mnt_list);
+ while (!list_empty(kill)) {
+ mnt = list_entry(kill->next, struct vfsmount, mnt_list);
list_del_init(&mnt->mnt_list);
if (mnt->mnt_parent == mnt) {
spin_unlock(&vfsmount_lock);
}
}
+void umount_tree(struct vfsmount *mnt)
+{
+ struct vfsmount *p;
+ LIST_HEAD(kill);
+
+ for (p = mnt; p; p = next_mnt(p, mnt)) {
+ list_del(&p->mnt_list);
+ list_add(&p->mnt_list, &kill);
+ }
+ __umount_tree(mnt, &kill);
+}
+
+void umount_unused(struct vfsmount *mnt, struct fs_struct *fs)
+{
+ struct vfsmount *p;
+ LIST_HEAD(kill);
+
+ for (p = mnt; p; p = next_mnt(p, mnt)) {
+ if (p == fs->rootmnt || p == fs->pwdmnt)
+ continue;
+ list_del(&p->mnt_list);
+ list_add(&p->mnt_list, &kill);
+ }
+ __umount_tree(mnt, &kill);
+}
+
static int do_umount(struct vfsmount *mnt, int flags)
{
struct super_block * sb = mnt->mnt_sb;
goto dput_and_out;
retval = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
+ if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT))
goto dput_and_out;
retval = do_umount(nd.mnt, flags);
{
if (capable(CAP_SYS_ADMIN))
return 0;
+ if (vx_ccaps(VXC_SECURE_MOUNT))
+ return 0;
return -EPERM;
#ifdef notyet
if (S_ISLNK(nd->dentry->d_inode->i_mode))
mnt_flags |= MNT_NOEXEC;
flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);
+ if (vx_ccaps(VXC_SECURE_MOUNT))
+ mnt_flags |= MNT_NODEV;
+
/* ... and get the mountpoint */
retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
if (retval)
/* Override them by options set on kernel command-line */
root_nfs_parse(name, buf);
- cp = system_utsname.nodename;
+ cp = vx_new_uts(nodename);
if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
return -1;
#include <asm/uaccess.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
+#include <linux/vserver/xid.h>
int vfs_statfs(struct super_block *sb, struct kstatfs *buf)
{
dentry = file->f_dentry;
inode = dentry->d_inode;
+ err = -EPERM;
+ if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
+ goto out_putf;
err = -EROFS;
if (IS_RDONLY(inode))
goto out_putf;
goto out;
inode = nd.dentry->d_inode;
+ error = -EPERM;
+ if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
+ goto dput_and_out;
+
error = -EROFS;
if (IS_RDONLY(inode))
goto dput_and_out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out;
+
newattrs.ia_valid = ATTR_CTIME;
if (user != (uid_t) -1) {
newattrs.ia_valid |= ATTR_UID;
- newattrs.ia_uid = user;
+ newattrs.ia_uid = vx_map_uid(user);
}
if (group != (gid_t) -1) {
newattrs.ia_valid |= ATTR_GID;
- newattrs.ia_gid = group;
+ newattrs.ia_gid = vx_map_gid(group);
}
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
FD_SET(fd, files->open_fds);
FD_CLR(fd, files->close_on_exec);
files->next_fd = fd + 1;
+ vx_openfd_inc(fd);
#if 1
/* Sanity check */
if (files->fd[fd] != NULL) {
FD_CLR(fd, files->close_on_exec);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
+ vx_openfd_dec(fd);
return filp_close(filp, files);
out_unlock:
#include <linux/highmem.h>
#include <linux/file.h>
#include <linux/times.h>
+#include <linux/ninline.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
"D (disk sleep)", /* 2 */
"T (stopped)", /* 4 */
"Z (zombie)", /* 8 */
- "X (dead)" /* 16 */
+ "X (dead)", /* 16 */
+ "H (on hold)" /* 32 */
};
static inline const char * get_task_state(struct task_struct *tsk)
TASK_INTERRUPTIBLE |
TASK_UNINTERRUPTIBLE |
TASK_ZOMBIE |
- TASK_STOPPED);
+ TASK_STOPPED |
+ TASK_ONHOLD);
const char **p = &task_state_array[0];
while (state) {
static inline char * task_state(struct task_struct *p, char *buffer)
{
int g;
+ pid_t ppid;
read_lock(&tasklist_lock);
+ ppid = vx_map_tgid(current->vx_info, p->real_parent->pid);
buffer += sprintf(buffer,
"State:\t%s\n"
"SleepAVG:\t%lu%%\n"
get_task_state(p),
(p->sleep_avg/1024)*100/(1020000000/1024),
p->tgid,
- p->pid, p->pid ? p->real_parent->pid : 0,
+ p->pid, p->pid ? ppid : 0,
p->pid && p->ptrace ? p->parent->pid : 0,
p->uid, p->euid, p->suid, p->fsuid,
p->gid, p->egid, p->sgid, p->fsgid);
int proc_pid_status(struct task_struct *task, char * buffer)
{
char * orig = buffer;
+#ifdef CONFIG_VSERVER_LEGACY
+ struct vx_info *vxi;
+ struct nx_info *nxi;
+#endif
struct mm_struct *mm = get_task_mm(task);
buffer = task_name(task, buffer);
}
buffer = task_sig(task, buffer);
buffer = task_cap(task, buffer);
+
+#ifdef CONFIG_VSERVER_LEGACY
+ buffer += sprintf (buffer,"s_context: %d\n", vx_task_xid(task));
+ vxi = task_get_vx_info(task);
+ if (vxi) {
+ buffer += sprintf (buffer,"ctxflags: %08llx\n"
+ ,vxi->vx_flags);
+ buffer += sprintf (buffer,"initpid: %d\n"
+ ,vxi->vx_initpid);
+ } else {
+ buffer += sprintf (buffer,"ctxflags: none\n");
+ buffer += sprintf (buffer,"initpid: none\n");
+ }
+ put_vx_info(vxi);
+ nxi = task_get_nx_info(task);
+ if (nxi) {
+ int i;
+
+ buffer += sprintf (buffer,"ipv4root:");
+ for (i=0; i<nxi->nbipv4; i++){
+ buffer += sprintf (buffer," %08x/%08x"
+ ,nxi->ipv4[i]
+ ,nxi->mask[i]);
+ }
+ *buffer++ = '\n';
+ buffer += sprintf (buffer,"ipv4root_bcast: %08x\n"
+ ,nxi->v4_bcast);
+ buffer += sprintf (buffer,"ipv4root_refcnt: %d\n"
+ ,atomic_read(&nxi->nx_refcount));
+ } else {
+ buffer += sprintf (buffer,"ipv4root: 0\n");
+ buffer += sprintf (buffer,"ipv4root_bcast: 0\n");
+ }
+ put_nx_info(nxi);
+#endif
#if defined(CONFIG_ARCH_S390)
buffer = task_show_regs(task, buffer);
#endif
{
unsigned long vsize, eip, esp, wchan;
long priority, nice;
+ unsigned long long bias_jiffies;
int tty_pgrp = -1, tty_nr = 0;
sigset_t sigign, sigcatch;
char state;
state = *get_task_state(task);
vsize = eip = esp = 0;
+ bias_jiffies = INITIAL_JIFFIES;
+
task_lock(task);
+ if (__vx_task_flags(task, VXF_VIRT_UPTIME, 0)) {
+ bias_jiffies = task->vx_info->cvirt.bias_jiffies;
+ /* hmm, do we need that? */
+ if (bias_jiffies > task->start_time)
+ bias_jiffies = task->start_time;
+ }
+
mm = task->mm;
if(mm)
mm = mmgrab(mm);
read_unlock(&tasklist_lock);
/* Temporary variable needed for gcc-2.96 */
- start_time = jiffies_64_to_clock_t(task->start_time - INITIAL_JIFFIES);
+ start_time = jiffies_64_to_clock_t(task->start_time - bias_jiffies);
res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/ptrace.h>
+#include <linux/ninline.h>
/*
* For hysterical raisins we keep the same inumbers as in the old procfs.
PROC_TGID_ATTR_EXEC,
PROC_TGID_ATTR_FSCREATE,
#endif
+ PROC_TGID_VX_INFO,
+ PROC_TGID_IP_INFO,
PROC_TGID_FD_DIR,
PROC_TID_INO,
PROC_TID_STATUS,
PROC_TID_ATTR_EXEC,
PROC_TID_ATTR_FSCREATE,
#endif
+ PROC_TID_VX_INFO,
+ PROC_TID_IP_INFO,
PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */
};
#ifdef CONFIG_KALLSYMS
E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO),
#endif
+ E(PROC_TGID_VX_INFO, "vinfo", S_IFREG|S_IRUGO),
+ E(PROC_TGID_IP_INFO, "ninfo", S_IFREG|S_IRUGO),
{0,0,NULL,0}
};
static struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_KALLSYMS
E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO),
#endif
+ E(PROC_TID_VX_INFO, "vinfo", S_IFREG|S_IRUGO),
+ E(PROC_TID_IP_INFO, "ninfo", S_IFREG|S_IRUGO),
{0,0,NULL,0}
};
inode->i_uid = task->euid;
inode->i_gid = task->egid;
}
+ inode->i_xid = vx_task_xid(task);
security_task_to_inode(task, inode);
out:
{
struct inode *inode = dentry->d_inode;
struct task_struct *task = proc_task(inode);
+
+ if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+ goto out_drop;
+ /* discard wrong fakeinit */
+
if (pid_alive(task)) {
if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
inode->i_uid = task->euid;
security_task_to_inode(task, inode);
return 1;
}
+out_drop:
d_drop(dentry);
return 0;
}
ei->op.proc_read = proc_pid_wchan;
break;
#endif
+ case PROC_TID_VX_INFO:
+ case PROC_TGID_VX_INFO:
+ inode->i_fop = &proc_info_file_operations;
+ ei->op.proc_read = proc_pid_vx_info;
+ break;
+ case PROC_TID_IP_INFO:
+ case PROC_TGID_IP_INFO:
+ inode->i_fop = &proc_info_file_operations;
+ ei->op.proc_read = proc_pid_nx_info;
+ break;
default:
printk("procfs: impossible type (%d)",p->type);
iput(inode);
d_add(dentry, inode);
return NULL;
}
- tgid = name_to_int(dentry);
+ tgid = vx_rmap_tgid(current->vx_info, name_to_int(dentry));
if (tgid == ~0U)
goto out;
if (!task)
goto out;
- inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
-
+ inode = NULL;
+ if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+ inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
if (!inode) {
put_task_struct(task);
struct inode *inode;
unsigned tid;
- tid = name_to_int(dentry);
+ tid = vx_rmap_tgid(current->vx_info, name_to_int(dentry));
if (tid == ~0U)
goto out;
+/* handle fakeinit */
+
read_lock(&tasklist_lock);
task = find_task_by_pid(tid);
if (task)
if (leader->tgid != task->tgid)
goto out_drop_task;
- inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
-
+ inode = NULL;
+ if (vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+ inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
if (!inode)
goto out_drop_task;
for ( ; p != &init_task; p = next_task(p)) {
int tgid = p->pid;
+
if (!pid_alive(p))
continue;
+ if (!vx_check(vx_task_xid(p), VX_WATCH|VX_IDENT))
+ continue;
if (--index >= 0)
continue;
- tgids[nr_tgids] = tgid;
+ tgids[nr_tgids] = vx_map_tgid(current->vx_info, tgid);
nr_tgids++;
if (nr_tgids >= PROC_MAXPIDS)
break;
if (pid_alive(task)) do {
int tid = task->pid;
+ if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+ continue;
if (--index >= 0)
continue;
- tids[nr_tids] = tid;
+ tids[nr_tids] = vx_map_tgid(current->vx_info, tid);
nr_tids++;
if (nr_tids >= PROC_MAXPIDS)
break;
unsigned int nr_tids, i;
struct dentry *dentry = filp->f_dentry;
struct inode *inode = dentry->d_inode;
+ struct task_struct *task = proc_task(inode);
int retval = -ENOENT;
ino_t ino;
unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */
- if (!pid_alive(proc_task(inode)))
+ if (!vx_check(vx_task_xid(task), VX_WATCH|VX_IDENT))
+ goto out;
+ if (!pid_alive(task))
goto out;
retval = 0;
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/idr.h>
+#include <linux/vinline.h>
+#include <linux/vserver/inode.h>
#include <asm/uaccess.h>
#include <asm/bitops.h>
return 1;
}
+static int proc_revalidate_dentry(struct dentry *de, struct nameidata *nd)
+{
+ /* maybe add a check if it's really necessary? */
+ return 0;
+}
+
static struct dentry_operations proc_dentry_operations =
{
+ .d_revalidate = proc_revalidate_dentry,
.d_delete = proc_delete_dentry,
};
for (de = de->subdir; de ; de = de->next) {
if (de->namelen != dentry->d_name.len)
continue;
+ if (!vx_hide_check(0, de->vx_flags))
+ continue;
if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
unsigned int ino = de->low_ino;
}
do {
+ if (!vx_hide_check(0, de->vx_flags))
+ goto skip;
if (filldir(dirent, de->name, de->namelen, filp->f_pos,
de->low_ino, de->mode >> 12) < 0)
goto out;
+ skip:
filp->f_pos++;
de = de->next;
} while (de);
ent->namelen = len;
ent->mode = mode;
ent->nlink = nlink;
+ ent->vx_flags = IATTR_PROC_DEFAULT;
out:
return ent;
}
kfree(ent->data);
kfree(ent);
ent = NULL;
- }
+ } else
+ ent->vx_flags = IATTR_PROC_SYMLINK;
} else {
kfree(ent);
ent = NULL;
inode->i_uid = de->uid;
inode->i_gid = de->gid;
}
+ if (de->vx_flags)
+ PROC_I(inode)->vx_flags = de->vx_flags;
if (de->size)
inode->i_size = de->size;
if (de->nlink)
#include <linux/jiffies.h>
#include <linux/sysrq.h>
#include <linux/vmalloc.h>
+#include <linux/vinline.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
do_posix_clock_monotonic_gettime(&uptime);
jiffies_to_timespec(idle_jiffies, &idle);
+ if (vx_flags(VXF_VIRT_UPTIME, 0))
+ vx_vsi_uptime(&uptime, &idle);
+
len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
(unsigned long) uptime.tv_sec,
(uptime.tv_nsec / (NSEC_PER_SEC / 100)),
#ifdef CONFIG_SYSCTL
struct proc_dir_entry *proc_sys_root;
#endif
+struct proc_dir_entry *proc_virtual;
+
+extern void proc_vx_init(void);
static struct super_block *proc_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
proc_device_tree_init();
#endif
proc_bus = proc_mkdir("bus", 0);
+ proc_vx_init();
}
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
return ( n_err < 0 ) ? -EIO : 0;
}
-static int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
+int reiserfs_setattr_flags(struct inode *inode, unsigned int flags)
+{
+ unsigned int oldflags, newflags;
+
+ oldflags = REISERFS_I(inode)->i_flags;
+ newflags = oldflags & ~(REISERFS_IMMUTABLE_FL |
+ REISERFS_IUNLINK_FL | REISERFS_BARRIER_FL);
+ if (flags & ATTR_FLAG_IMMUTABLE)
+ newflags |= REISERFS_IMMUTABLE_FL;
+ if (flags & ATTR_FLAG_IUNLINK)
+ newflags |= REISERFS_IUNLINK_FL;
+ if (flags & ATTR_FLAG_BARRIER)
+ newflags |= REISERFS_BARRIER_FL;
+
+ if (oldflags ^ newflags) {
+ REISERFS_I(inode)->i_flags = newflags;
+ inode->i_ctime = CURRENT_TIME;
+ }
+ return 0;
+}
+
+int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
struct inode *inode = dentry->d_inode ;
int error ;
+
reiserfs_write_lock(inode->i_sb);
+ if (S_ISDIR(inode->i_mode))
+ goto is_dir;
+
if (attr->ia_valid & ATTR_SIZE) {
/* version 2 items will be caught by the s_maxbytes check
** done for us in vmtruncate
goto out;
}
+is_dir:
error = inode_change_ok(inode, attr) ;
+
+ if (!error && attr->ia_valid & ATTR_ATTR_FLAG)
+ reiserfs_setattr_flags(inode, attr->ia_attr_flags);
+
if (!error)
inode_setattr(inode, attr) ;
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
+#include <linux/vserver/xid.h>
extern int reiserfs_default_io_size; /* default io size devuned in super.c */
struct buffer_head * bh;
struct item_head * ih;
__u32 rdev;
+ uid_t uid;
+ gid_t gid;
//int version = ITEM_VERSION_1;
bh = PATH_PLAST_BUFFER (path);
struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
unsigned long blocks;
+ uid = sd_v1_uid(sd);
+ gid = sd_v1_gid(sd);
+
set_inode_item_key_version (inode, KEY_FORMAT_3_5);
set_inode_sd_version (inode, STAT_DATA_V1);
inode->i_mode = sd_v1_mode(sd);
inode->i_nlink = sd_v1_nlink(sd);
- inode->i_uid = sd_v1_uid(sd);
- inode->i_gid = sd_v1_gid(sd);
inode->i_size = sd_v1_size(sd);
inode->i_atime.tv_sec = sd_v1_atime(sd);
inode->i_mtime.tv_sec = sd_v1_mtime(sd);
// (directories and symlinks)
struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
+ uid = sd_v2_uid(sd);
+ gid = sd_v2_gid(sd);
+
inode->i_mode = sd_v2_mode(sd);
inode->i_nlink = sd_v2_nlink(sd);
- inode->i_uid = sd_v2_uid(sd);
inode->i_size = sd_v2_size(sd);
- inode->i_gid = sd_v2_gid(sd);
inode->i_mtime.tv_sec = sd_v2_mtime(sd);
inode->i_atime.tv_sec = sd_v2_atime(sd);
inode->i_ctime.tv_sec = sd_v2_ctime(sd);
REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd );
sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode );
}
+ inode->i_uid = INOXID_UID(uid, gid);
+ inode->i_gid = INOXID_GID(uid, gid);
+ inode->i_xid = INOXID_XID(uid, gid, 0);
pathrelse (path);
if (S_ISREG (inode->i_mode)) {
static void inode2sd (void * sd, struct inode * inode)
{
struct stat_data * sd_v2 = (struct stat_data *)sd;
+ uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid);
+ gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid);
__u16 flags;
+ set_sd_v2_uid(sd_v2, uid );
+ set_sd_v2_gid(sd_v2, gid );
set_sd_v2_mode(sd_v2, inode->i_mode );
set_sd_v2_nlink(sd_v2, inode->i_nlink );
- set_sd_v2_uid(sd_v2, inode->i_uid );
set_sd_v2_size(sd_v2, inode->i_size );
- set_sd_v2_gid(sd_v2, inode->i_gid );
set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec );
set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec );
set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec );
inode -> i_flags |= S_IMMUTABLE;
else
inode -> i_flags &= ~S_IMMUTABLE;
+ if( sd_attrs & REISERFS_IUNLINK_FL )
+ inode -> i_flags |= S_IUNLINK;
+ else
+ inode -> i_flags &= ~S_IUNLINK;
+ if( sd_attrs & REISERFS_BARRIER_FL )
+ inode -> i_flags |= S_BARRIER;
+ else
+ inode -> i_flags &= ~S_BARRIER;
if( sd_attrs & REISERFS_APPEND_FL )
inode -> i_flags |= S_APPEND;
else
*sd_attrs |= REISERFS_IMMUTABLE_FL;
else
*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
+ if( inode -> i_flags & S_IUNLINK )
+ *sd_attrs |= REISERFS_IUNLINK_FL;
+ else
+ *sd_attrs &= ~REISERFS_IUNLINK_FL;
+ if( inode -> i_flags & S_BARRIER )
+ *sd_attrs |= REISERFS_BARRIER_FL;
+ else
+ *sd_attrs &= ~REISERFS_BARRIER_FL;
if( inode -> i_flags & S_SYNC )
*sd_attrs |= REISERFS_SYNC_FL;
else
int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
unsigned long arg)
{
- unsigned int flags;
+ unsigned int flags, oldflags;
switch (cmd) {
case REISERFS_IOC_UNPACK:
case REISERFS_IOC_GETFLAGS:
flags = REISERFS_I(inode) -> i_attrs;
i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags );
+ flags &= REISERFS_FL_USER_VISIBLE;
return put_user(flags, (int *) arg);
case REISERFS_IOC_SETFLAGS: {
if (IS_RDONLY(inode))
if (get_user(flags, (int *) arg))
return -EFAULT;
- if ( ( ( flags ^ REISERFS_I(inode) -> i_attrs) & ( REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) &&
+ oldflags = REISERFS_I(inode) -> i_attrs;
+ if ( ( ( flags ^ oldflags) &
+ ( REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | REISERFS_APPEND_FL)) &&
!capable( CAP_LINUX_IMMUTABLE ) )
return -EPERM;
if( result )
return result;
}
+
+ flags = flags & REISERFS_FL_USER_MODIFYABLE;
+ flags |= oldflags & ~REISERFS_FL_USER_MODIFYABLE;
sd_attrs_to_i_attrs( flags, inode );
REISERFS_I(inode) -> i_attrs = flags;
inode->i_ctime = CURRENT_TIME;
.rmdir = reiserfs_rmdir,
.mknod = reiserfs_mknod,
.rename = reiserfs_rename,
+ .setattr = reiserfs_setattr,
};
#define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */
#define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */
#define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */
+#define LINUX_XFLAG_BARRIER 0x00004000 /* chroot() barrier */
+#define LINUX_XFLAG_IUNLINK 0x00008000 /* Immutable unlink */
STATIC unsigned int
xfs_merge_ioc_xflags(
int error;
int attr_flags;
unsigned int flags;
+ unsigned int old_flags;
switch (cmd) {
case XFS_IOC_FSGETXATTR: {
attr_flags = 0;
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
attr_flags |= ATTR_NONBLOCK;
-
+
va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE;
va.va_xflags = fa.fsx_xflags;
va.va_extsize = fa.fsx_extsize;
case XFS_IOC_GETXFLAGS: {
flags = 0;
- if (ip->i_d.di_flags & XFS_XFLAG_IMMUTABLE)
+ if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
flags |= LINUX_XFLAG_IMMUTABLE;
- if (ip->i_d.di_flags & XFS_XFLAG_APPEND)
+ if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
+ flags |= LINUX_XFLAG_IUNLINK;
+ if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
flags |= LINUX_XFLAG_APPEND;
- if (ip->i_d.di_flags & XFS_XFLAG_SYNC)
+ if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
flags |= LINUX_XFLAG_SYNC;
- if (ip->i_d.di_flags & XFS_XFLAG_NOATIME)
+ if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
flags |= LINUX_XFLAG_NOATIME;
- if (ip->i_d.di_flags & XFS_XFLAG_NODUMP)
+ if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP)
flags |= LINUX_XFLAG_NODUMP;
if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags)))
return -XFS_ERROR(EFAULT);
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
attr_flags |= ATTR_NONBLOCK;
+ old_flags = 0;
+ if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+ old_flags |= LINUX_XFLAG_IMMUTABLE;
+ if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
+ old_flags |= LINUX_XFLAG_IUNLINK;
+ if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER)
+ old_flags |= LINUX_XFLAG_BARRIER;
+
va.va_mask = XFS_AT_XFLAGS;
- va.va_xflags = xfs_merge_ioc_xflags(flags, ip->i_d.di_flags);
+ va.va_xflags = xfs_merge_ioc_xflags(flags, old_flags);
VOP_SETATTR(vp, &va, attr_flags, NULL, error);
if (!error)
return 0;
}
+STATIC int
+linvfs_setattr_flags(
+ vattr_t *vap,
+ unsigned int flags)
+{
+ unsigned int oldflags, newflags;
+
+ oldflags = vap->va_xflags;
+ newflags = oldflags & ~(XFS_XFLAG_IMMUTABLE |
+ XFS_XFLAG_IUNLINK | XFS_XFLAG_BARRIER);
+ if (flags & ATTR_FLAG_IMMUTABLE)
+ newflags |= XFS_XFLAG_IMMUTABLE;
+ if (flags & ATTR_FLAG_IUNLINK)
+ newflags |= XFS_XFLAG_IUNLINK;
+ if (flags & ATTR_FLAG_BARRIER)
+ newflags |= XFS_XFLAG_BARRIER;
+
+ if (oldflags ^ newflags)
+ vap->va_xflags = newflags;
+ return 0;
+}
+
STATIC int
linvfs_setattr(
struct dentry *dentry,
flags |= ATTR_NONBLOCK;
#endif
+ if (ia_valid & ATTR_ATTR_FLAG) {
+ vattr.va_mask |= XFS_AT_XFLAGS;
+ linvfs_setattr_flags(&vattr, attr->ia_attr_flags);
+ }
+
VOP_SETATTR(vp, &vattr, flags, NULL, error);
if (error)
return(-error); /* Positive error up from XFS */
inode->i_flags |= S_IMMUTABLE;
else
inode->i_flags &= ~S_IMMUTABLE;
+ if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
+ inode->i_flags |= S_IUNLINK;
+ else
+ inode->i_flags &= ~S_IUNLINK;
+ if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER)
+ inode->i_flags |= S_BARRIER;
+ else
+ inode->i_flags &= ~S_BARRIER;
if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
inode->i_flags |= S_APPEND;
else
inode->i_flags |= S_IMMUTABLE;
else
inode->i_flags &= ~S_IMMUTABLE;
+ if (va.va_xflags & XFS_XFLAG_IUNLINK)
+ inode->i_flags |= S_IUNLINK;
+ else
+ inode->i_flags &= ~S_IUNLINK;
+ if (va.va_xflags & XFS_XFLAG_BARRIER)
+ inode->i_flags |= S_BARRIER;
+ else
+ inode->i_flags &= ~S_BARRIER;
if (va.va_xflags & XFS_XFLAG_APPEND)
inode->i_flags |= S_APPEND;
else
#define XFS_DIFLAG_SYNC_BIT 5 /* inode is written synchronously */
#define XFS_DIFLAG_NOATIME_BIT 6 /* do not update atime */
#define XFS_DIFLAG_NODUMP_BIT 7 /* do not dump */
+#define XFS_DIFLAG_BARRIER_BIT 10 /* chroot() barrier */
+#define XFS_DIFLAG_IUNLINK_BIT 11 /* inode has iunlink */
+
#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
#define XFS_DIFLAG_SYNC (1 << XFS_DIFLAG_SYNC_BIT)
#define XFS_DIFLAG_NOATIME (1 << XFS_DIFLAG_NOATIME_BIT)
#define XFS_DIFLAG_NODUMP (1 << XFS_DIFLAG_NODUMP_BIT)
+#define XFS_DIFLAG_BARRIER (1 << XFS_DIFLAG_BARRIER_BIT)
+#define XFS_DIFLAG_IUNLINK (1 << XFS_DIFLAG_IUNLINK_BIT)
+
#endif /* __XFS_DINODE_H__ */
#define XFS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
#define XFS_XFLAG_NOATIME 0x00000040 /* do not update access time */
#define XFS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
+#define XFS_XFLAG_BARRIER 0x00004000 /* chroot() barrier */
+#define XFS_XFLAG_IUNLINK 0x00008000 /* Immutable unlink */
#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/*
vap->va_xflags |= XFS_XFLAG_PREALLOC;
if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
vap->va_xflags |= XFS_XFLAG_IMMUTABLE;
+ if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
+ vap->va_xflags |= XFS_XFLAG_IUNLINK;
+ if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER)
+ vap->va_xflags |= XFS_XFLAG_BARRIER;
if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
vap->va_xflags |= XFS_XFLAG_APPEND;
if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
}
if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
ip->i_d.di_flags |= XFS_DIFLAG_IMMUTABLE;
+ if (vap->va_xflags & XFS_XFLAG_IUNLINK)
+ ip->i_d.di_flags |= XFS_DIFLAG_IUNLINK;
+ if (vap->va_xflags & XFS_XFLAG_BARRIER)
+ ip->i_d.di_flags |= XFS_DIFLAG_BARRIER;
if (vap->va_xflags & XFS_XFLAG_APPEND)
ip->i_d.di_flags |= XFS_DIFLAG_APPEND;
if (vap->va_xflags & XFS_XFLAG_SYNC)
#define __NR_osf_memcntl 260 /* not implemented */
#define __NR_osf_fdatasync 261 /* not implemented */
+#define __NR_vserver 273
/*
* Linux-specific system calls begin at 300
if (rss < freed)
freed = rss;
- mm->rss = rss - freed;
+ // mm->rss = rss - freed;
+ vx_rsspages_sub(mm, freed);
if (freed) {
flush_tlb_mm(mm);
if (rss < freed)
freed = rss;
- mm->rss = rss - freed;
+ // mm->rss = rss - freed;
+ vx_rsspages_sub(mm, freed);
if (freed) {
flush_tlb_mm(mm);
#include <linux/config.h>
#include <linux/swap.h>
+#include <linux/vinline.h>
#include <asm/tlbflush.h>
/*
if (rss < freed)
freed = rss;
- mm->rss = rss - freed;
+ // mm->rss = rss - freed;
+ vx_rsspages_sub(mm, freed);
tlb_flush_mmu(tlb, start, end);
/* keep the page table cache within bounds */
if (rss < freed)
freed = rss;
- mm->rss = rss - freed;
+ // mm->rss = rss - freed;
+ vx_rsspages_sub(mm, freed);
/*
* Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
* tlb->end_addr.
#define __NR_fremovexattr 234
#define __NR_futex 235
-#define NR_syscalls 236
+#define __NR_vserver 273
+
+#define NR_syscalls 274
/* user-visible error numbers are in the range -1 - -124: see
<asm-m68k/errno.h> */
#define __NR_setfsuid32 215
#define __NR_setfsgid32 216
-#define NR_syscalls 256
+#define __NR_vserver 273
+
+#define NR_syscalls 274
/* user-visible error numbers are in the range -1 - -122: see
<asm-m68k/errno.h> */
#define __NR_tgkill (__NR_Linux + 266)
#define __NR_utimes (__NR_Linux + 267)
+#define __NR_vserver (__NR_Linux + 273)
+
/*
* Offset of the last Linux o32 flavoured syscall
*/
-#define __NR_Linux_syscalls 267
+#define __NR_Linux_syscalls 273
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
#define __NR_remap_file_pages (__NR_Linux + 227)
#define __NR_semtimedop (__NR_Linux + 228)
+#define __NR_vserver (__NR_Linux + 273)
-#define __NR_Linux_syscalls 228
+#define __NR_Linux_syscalls 273
#define HPUX_GATEWAY_ADDR 0xC0000004
#define LINUX_GATEWAY_ADDR 0x100
#define __NR_fadvise64_64 254
#define __NR_rtas 255
/* Number 256 is reserved for sys_debug_setcontext */
-/* Number 257 is reserved for vserver */
+#define __NR_vserver 257
/* Number 258 is reserved for new sys_remap_file_pages */
/* Number 259 is reserved for new sys_mbind */
/* Number 260 is reserved for new sys_get_mempolicy */
#define __NR_fadvise64_64 254
#define __NR_rtas 255
/* Number 256 is reserved for sys_debug_setcontext */
-/* Number 257 is reserved for vserver */
+#define __NR_vserver 257
/* Number 258 is reserved for new sys_remap_file_pages */
/* Number 259 is reserved for new sys_mbind */
/* Number 260 is reserved for new sys_get_mempolicy */
#define __NR_clock_gettime (__NR_timer_create+6)
#define __NR_clock_getres (__NR_timer_create+7)
#define __NR_clock_nanosleep (__NR_timer_create+8)
-/* Number 263 is reserved for vserver */
+#define __NR_vserver 263
#define __NR_fadvise64_64 264
#define __NR_statfs64 265
#define __NR_fstatfs64 266
#define __NR_timer_getoverrun 264
#define __NR_timer_delete 265
#define __NR_timer_create 266
-/* #define __NR_vserver 267 Reserved for VSERVER */
+#define __NR_vserver 267
#define __NR_io_setup 268
#define __NR_io_destroy 269
#define __NR_io_submit 270
#define __NR_timer_getoverrun 264
#define __NR_timer_delete 265
#define __NR_timer_create 266
-/* #define __NR_vserver 267 Reserved for VSERVER */
+#define __NR_vserver 267
#define __NR_io_setup 268
#define __NR_io_destroy 269
#define __NR_io_submit 270
/* Allow enabling/disabling tagged queuing on SCSI controllers and sending
arbitrary SCSI commands */
/* Allow setting encryption key on loopback filesystem */
+/* Allow the selection of a security context */
#define CAP_SYS_ADMIN 21
#define CAP_LEASE 28
+/* Allow context manipulations */
+/* Allow changing context info on files */
+
+#define CAP_CONTEXT 29
+
#ifdef __KERNEL__
/*
* Bounding set
#define EXT2_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define EXT2_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define EXT2_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define EXT2_BARRIER_FL 0x04000000 /* Barrier for chroot() */
+#define EXT2_IUNLINK_FL 0x08000000 /* Immutable unlink */
#define EXT2_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
#define EXT2_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
struct {
__u8 l_i_frag; /* Fragment number */
__u8 l_i_fsize; /* Fragment size */
- __u16 i_pad1;
+ __u16 l_i_xid; /* LRU Context */
__u16 l_i_uid_high; /* these 2 fields */
__u16 l_i_gid_high; /* were reserved2[0] */
__u32 l_i_reserved2;
#define i_gid_low i_gid
#define i_uid_high osd2.linux2.l_i_uid_high
#define i_gid_high osd2.linux2.l_i_gid_high
+#define i_raw_xid osd2.linux2.l_i_xid
#define i_reserved2 osd2.linux2.l_i_reserved2
#endif
#define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */
#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
#define EXT2_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
+#define EXT2_MOUNT_TAG_XID (1<<16) /* Enable Context Tags */
#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
#define set_opt(o, opt) o |= EXT2_MOUNT_##opt
#define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define EXT3_BARRIER_FL 0x04000000 /* Barrier for chroot() */
+#define EXT3_IUNLINK_FL 0x08000000 /* Immutable unlink */
#define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
#ifdef CONFIG_JBD_DEBUG
#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
#endif
+#ifdef CONFIG_VSERVER_LEGACY
+#define EXT3_IOC_SETXID FIOC_SETXIDJ
+#endif
/*
* Structure of an inode on the disk
struct {
__u8 l_i_frag; /* Fragment number */
__u8 l_i_fsize; /* Fragment size */
- __u16 i_pad1;
+ __u16 l_i_xid; /* LRU Context */
__u16 l_i_uid_high; /* these 2 fields */
__u16 l_i_gid_high; /* were reserved2[0] */
__u32 l_i_reserved2;
#define i_gid_low i_gid
#define i_uid_high osd2.linux2.l_i_uid_high
#define i_gid_high osd2.linux2.l_i_gid_high
+#define i_raw_xid osd2.linux2.l_i_xid
#define i_reserved2 osd2.linux2.l_i_reserved2
#elif defined(__GNU__)
#define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
#define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
+#define EXT3_MOUNT_TAG_XID (1<<16) /* Enable Context Tags */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
#define MS_VERBOSE 32768
#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
#define MS_ONE_SECOND (1<<17) /* fs has 1 sec a/m/ctime resolution */
+#define MS_TAGXID (1<<24) /* tag inodes with context information */
#define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31)
#define S_NOQUOTA 64 /* Inode is not counted to quota */
#define S_DIRSYNC 128 /* Directory modifications are synchronous */
#define S_NOCMTIME 256 /* Do not update file c/mtime */
+#define S_BARRIER 512 /* Barrier for chroot() */
+#define S_IUNLINK 1024 /* Immutable unlink */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
#define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
+#define IS_IUNLINK(inode) ((inode)->i_flags & S_IUNLINK)
+#define IS_IXORUNLINK(inode) ((IS_IUNLINK(inode) ? S_IMMUTABLE : 0) ^ IS_IMMUTABLE(inode))
#define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME))
#define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME)
#define IS_POSIXACL(inode) __IS_FLG(inode, MS_POSIXACL)
#define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND)
+#define IS_BARRIER(inode) (S_ISDIR((inode)->i_mode) && ((inode)->i_flags & S_BARRIER))
#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
#define ATTR_FLAG_IMMUTABLE 8 /* Immutable file */
#define ATTR_FLAG_NODIRATIME 16 /* Don't update atime for directory */
+#define ATTR_FLAG_BARRIER 512 /* Barrier for chroot() */
+#define ATTR_FLAG_IUNLINK 1024 /* Immutable unlink */
+
/*
* Includes for diskquotas.
*/
unsigned int i_nlink;
uid_t i_uid;
gid_t i_gid;
+ xid_t i_xid;
dev_t i_rdev;
loff_t i_size;
struct timespec i_atime;
.proc_lock = SPIN_LOCK_UNLOCKED, \
.switch_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \
+ .xid = 0, \
+ .nid = 0, \
+ .vx_info = NULL, \
+ .nx_info = NULL, \
}
/* Socket demultiplex comparisons on incoming packets. */
__u32 daddr; /* Foreign IPv4 addr */
__u32 rcv_saddr; /* Bound local IPv4 addr */
+ __u32 rcv_saddr2; /* Second bound ipv4 addr, for ipv4root */
__u16 dport; /* Destination port */
__u16 num; /* Local port */
__u32 saddr; /* Sending source */
mode_t mode;
unsigned long seq;
void *security;
+ xid_t xid;
};
#endif /* __KERNEL__ */
#include <linux/list.h>
#include <linux/mmzone.h>
#include <linux/rbtree.h>
+#include <linux/vinline.h>
#include <linux/fs.h>
#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */
};
extern void umount_tree(struct vfsmount *);
+extern void umount_unused(struct vfsmount *, struct fs_struct *);
extern int copy_namespace(int, struct task_struct *);
void __put_namespace(struct namespace *namespace);
#define SOCK_ASYNC_NOSPACE 0
#define SOCK_ASYNC_WAITDATA 1
#define SOCK_NOSPACE 2
+#define SOCK_PASS_CRED 16
+#define SOCK_USER_SOCKET 17
/**
* struct socket - general BSD socket
struct sock *sk;
wait_queue_head_t wait;
short type;
- unsigned char passcred;
};
struct vm_area_struct;
nlink_t nlink;
uid_t uid;
gid_t gid;
+ int vx_flags;
unsigned long size;
struct inode_operations * proc_iops;
struct file_operations * proc_fops;
struct proc_inode {
struct task_struct *task;
int type;
+ int vx_flags;
union {
int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
int (*proc_read)(struct task_struct *task, char *page);
+ int (*proc_vid_read)(int vid, char *page);
} op;
struct proc_dir_entry *pde;
struct inode vfs_inode;
#define REISERFS_COMPR_FL EXT2_COMPR_FL
#define REISERFS_NOTAIL_FL EXT2_NOTAIL_FL
+/* unfortunately reiserfs sdattr is only 16 bit */
+#define REISERFS_BARRIER_FL (EXT2_BARRIER_FL >> 16)
+#define REISERFS_IUNLINK_FL (EXT2_IUNLINK_FL >> 16)
+
+#define REISERFS_FL_USER_VISIBLE 0x80FF
+#define REISERFS_FL_USER_MODIFYABLE 0x80FF
+
/* persistent flags that file inherits from the parent directory */
#define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \
REISERFS_SYNC_FL | \
struct dentry *dentry, struct inode *inode);
int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode);
void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode);
+int reiserfs_setattr ( struct dentry *dentry, struct iattr *attr);
void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode );
void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs );
#include <linux/timer.h>
#include <asm/processor.h>
+#include <linux/vserver/context.h>
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_STOPPED 4
#define TASK_ZOMBIE 8
#define TASK_DEAD 16
+#define TASK_ONHOLD 32
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
/* Architecture-specific MM context */
mm_context_t context;
+ struct vx_info *mm_vx_info;
/* coredumping support */
int core_waiters;
/* Hash table maintenance information */
struct list_head uidhash_list;
uid_t uid;
+ xid_t xid;
};
-extern struct user_struct *find_user(uid_t);
+extern struct user_struct *find_user(xid_t, uid_t);
extern struct user_struct root_user;
#define INIT_USER (&root_user)
void *security;
struct audit_context *audit_context;
+/* vserver context data */
+ xid_t xid;
+ struct vx_info *vx_info;
+
+/* vserver network data */
+ nid_t nid;
+ struct nx_info *nx_info;
+
/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
extern void __set_special_pids(pid_t session, pid_t pgrp);
/* per-UID process charging. */
-extern struct user_struct * alloc_uid(uid_t);
+extern struct user_struct * alloc_uid(xid_t, uid_t);
extern void free_uid(struct user_struct *);
extern void switch_uid(struct user_struct *);
KERN_NGROUPS_MAX=63, /* int: NGROUPS_MAX */
KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */
KERN_HZ_TIMER=65, /* int: hz timer on or off */
+ KERN_VSHELPER=66, /* string: path to vshelper policy agent */
};
typedef __kernel_gid32_t gid_t;
typedef __kernel_uid16_t uid16_t;
typedef __kernel_gid16_t gid16_t;
+typedef unsigned int xid_t;
+typedef unsigned int nid_t;
#ifdef CONFIG_UID16
/* This is defined by include/asm-{arch}/posix_types.h */
extern atomic_t unix_tot_inflight;
-static inline struct sock *first_unix_socket(int *i)
+static inline struct sock *next_unix_socket_table(int *i)
{
- for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
+ for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
if (!hlist_empty(&unix_socket_table[*i]))
return __sk_head(&unix_socket_table[*i]);
}
static inline struct sock *next_unix_socket(int *i, struct sock *s)
{
- struct sock *next = sk_next(s);
- /* More in this chain? */
- if (next)
- return next;
- /* Look for next non-empty chain. */
- for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
- if (!hlist_empty(&unix_socket_table[*i]))
- return __sk_head(&unix_socket_table[*i]);
- }
- return NULL;
+ do {
+ if (s)
+ s = sk_next(s);
+ if (!s)
+ s = next_unix_socket_table(i);
+ } while (s && !vx_check(s->sk_xid, VX_IDENT|VX_WATCH));
+ return s;
+}
+
+static inline struct sock *first_unix_socket(int *i)
+{
+ *i = 0;
+ return next_unix_socket(i, NULL);
}
#define forall_unix_sockets(i, s) \
#include <linux/route.h>
#include <linux/ip.h>
#include <linux/cache.h>
+#include <linux/ninline.h>
#ifndef __KERNEL__
#warning This file is not supposed to be used outside of kernel.
return ip_tos2prio[IPTOS_TOS(tos)>>1];
}
+#define IPI_LOOPBACK 0x0100007f
+
+static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl)
+{
+ int err;
+ int i, n = nxi->nbipv4;
+ u32 ipv4root = nxi->ipv4[0];
+
+ if (ipv4root == 0)
+ return 0;
+
+ if (fl->fl4_src == 0) {
+ if (n > 1) {
+ u32 foundsrc;
+
+ err = __ip_route_output_key(rp, fl);
+ if (err) {
+ fl->fl4_src = ipv4root;
+ err = __ip_route_output_key(rp, fl);
+ }
+ if (err)
+ return err;
+
+ foundsrc = (*rp)->rt_src;
+ ip_rt_put(*rp);
+
+ for (i=0; i<n; i++){
+ u32 mask = nxi->mask[i];
+ u32 ipv4 = nxi->ipv4[i];
+ u32 net4 = ipv4 & mask;
+
+ if (foundsrc == ipv4) {
+ fl->fl4_src = ipv4;
+ break;
+ }
+ if (!fl->fl4_src && (foundsrc & mask) == net4)
+ fl->fl4_src = ipv4;
+ }
+ }
+ if (fl->fl4_src == 0)
+ fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK)
+ ? IPI_LOOPBACK : ipv4root;
+ } else {
+ for (i=0; i<n; i++) {
+ if (nxi->ipv4[i] == fl->fl4_src)
+ break;
+ }
+ if (i == n)
+ return -EPERM;
+ }
+ return 0;
+}
+
static inline int ip_route_connect(struct rtable **rp, u32 dst,
u32 src, u32 tos, int oif, u8 protocol,
u16 sport, u16 dport, struct sock *sk)
.dport = dport } } };
int err;
- if (!dst || !src) {
+ struct nx_info *nx_info = current->nx_info;
+
+ if (sk)
+ nx_info = sk->sk_nx_info;
+ vxdprintk("ip_route_connect(%p) %p,%p;%lx\n",
+ sk, nx_info, sk->sk_socket,
+ (sk->sk_socket?sk->sk_socket->flags:0));
+
+ if (nx_info) {
+ err = ip_find_src(nx_info, rp, &fl);
+ if (err)
+ return err;
+ if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
+ fl.fl4_dst = nx_info->ipv4[0];
+ }
+ if (!fl.fl4_dst || !fl.fl4_src) {
err = __ip_route_output_key(rp, &fl);
if (err)
return err;
{
if (!msg->msg_control)
{
- if (sock->passcred || scm->fp)
+ if (test_bit(SOCK_PASS_CRED, &sock->flags) || scm->fp)
msg->msg_flags |= MSG_CTRUNC;
scm_destroy(scm);
return;
}
- if (sock->passcred)
+ if (test_bit(SOCK_PASS_CRED, &sock->flags))
put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(scm->creds), &scm->creds);
if (!scm->fp)
#include <linux/security.h>
#include <linux/filter.h>
+#include <linux/vinline.h>
#include <asm/atomic.h>
#include <net/dst.h>
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
+ xid_t skc_xid;
+ struct vx_info *skc_vx_info;
+ nid_t skc_nid;
+ struct nx_info *skc_nx_info;
};
/**
#define sk_node __sk_common.skc_node
#define sk_bind_node __sk_common.skc_bind_node
#define sk_refcnt __sk_common.skc_refcnt
+#define sk_xid __sk_common.skc_xid
+#define sk_vx_info __sk_common.skc_vx_info
+#define sk_nid __sk_common.skc_nid
+#define sk_nx_info __sk_common.skc_nx_info
volatile unsigned char sk_zapped;
unsigned char sk_shutdown;
unsigned char sk_use_write_queue;
#define tw_node __tw_common.skc_node
#define tw_bind_node __tw_common.skc_bind_node
#define tw_refcnt __tw_common.skc_refcnt
+#define tw_xid __tw_common.skc_xid
+#define tw_vx_info __tw_common.skc_vx_info
+#define tw_nid __tw_common.skc_nid
+#define tw_nx_info __tw_common.skc_nx_info
volatile unsigned char tw_substate;
unsigned char tw_rcv_wscale;
__u16 tw_sport;
msq->q_perm.mode = (msgflg & S_IRWXUGO);
msq->q_perm.key = key;
+ msq->q_perm.xid = current->xid;
msq->q_perm.security = NULL;
retval = security_msg_queue_alloc(msq);
for(i = 0; i <= msg_ids.max_id; i++) {
struct msg_queue * msq;
msq = msg_lock(i);
- if(msq != NULL) {
+ if (msq) {
+ if (!vx_check(msq->q_perm.xid, VX_IDENT)) {
+ msg_unlock(msq);
+ continue;
+ }
len += sprintf(buffer + len, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
msq->q_perm.key,
msg_buildid(i,msq->q_perm.seq),
sma->sem_perm.mode = (semflg & S_IRWXUGO);
sma->sem_perm.key = key;
+ sma->sem_perm.xid = current->xid;
sma->sem_perm.security = NULL;
retval = security_sem_alloc(sma);
for(i = 0; i <= sem_ids.max_id; i++) {
struct sem_array *sma;
sma = sem_lock(i);
- if(sma) {
+ if (sma) {
+ if (!vx_check(sma->sem_perm.xid, VX_IDENT)) {
+ sem_unlock(sma);
+ continue;
+ }
len += sprintf(buffer + len, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
sma->sem_perm.key,
sem_buildid(i,sma->sem_perm.seq),
return -ENOMEM;
shp->shm_perm.key = key;
+ shp->shm_perm.xid = current->xid;
shp->shm_flags = (shmflg & S_IRWXUGO);
shp->shm_perm.security = NULL;
struct shmid_kernel* shp;
shp = shm_lock(i);
- if(shp!=NULL) {
+ if (shp) {
#define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
#define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n"
char *format;
+ if (!vx_check(shp->shm_perm.xid, VX_IDENT)) {
+ shm_unlock(shp);
+ continue;
+ }
if (sizeof(size_t) <= sizeof(int))
format = SMALL_STRING;
else
*/
for (id = 0; id <= max_id; id++) {
p = ids->entries[id].p;
- if(p==NULL)
+ if (p==NULL)
continue;
+ if (!vx_check(p->xid, VX_IDENT))
+ continue;
if (key == p->key)
return id;
}
{ /* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
int requested_mode, granted_mode;
+ if (!vx_check(ipcp->xid, VX_ADMIN|VX_IDENT)) /* maybe just VX_IDENT? */
+ return -1;
requested_mode = (flag >> 6) | (flag >> 3) | flag;
granted_mode = ipcp->mode;
if (current->euid == ipcp->cuid || current->euid == ipcp->uid)
rcupdate.o intermodule.o extable.o params.o posix-timers.o \
kthread.o
+# mod-subdirs := vserver
+
+subdir-y += vserver
+obj-y += vserver/vserver.o
+
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += cpu.o
static void __unhash_process(struct task_struct *p)
{
nr_threads--;
+ /* tasklist_lock is held, is this sufficient? */
+ if (p->vx_info) {
+ atomic_dec(&p->vx_info->cacct.nr_threads);
+ atomic_dec(&p->vx_info->limit.res[RLIMIT_NPROC]);
+ }
detach_pid(p, PIDTYPE_PID);
detach_pid(p, PIDTYPE_TGID);
if (thread_group_leader(p)) {
ptrace_unlink(current);
/* Reparent to init */
REMOVE_LINKS(current);
+ /* FIXME handle vchild_reaper/initpid */
current->parent = child_reaper;
current->real_parent = child_reaper;
SET_LINKS(current);
struct file * file = xchg(&files->fd[i], NULL);
if (file)
filp_close(file, files);
+ vx_openfd_dec(fd);
}
i++;
set >>= 1;
struct task_struct *p, *reaper = father;
struct list_head *_p, *_n;
+ /* FIXME handle vchild_reaper/initpid */
reaper = father->group_leader;
if (reaper == father)
reaper = child_reaper;
#include <linux/ptrace.h>
#include <linux/mount.h>
#include <linux/audit.h>
+#include <linux/vinline.h>
+#include <linux/ninline.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
static void free_task(struct task_struct *tsk)
{
free_thread_info(tsk->thread_info);
+ clr_vx_info(&tsk->vx_info);
+ clr_nx_info(&tsk->nx_info);
free_task_struct(tsk);
}
if (likely(!mm_alloc_pgd(mm))) {
mm->def_flags = 0;
+ set_vx_info(&mm->mm_vx_info, current->vx_info);
return mm;
}
free_mm(mm);
BUG_ON(mm == &init_mm);
mm_free_pgd(mm);
destroy_context(mm);
+ clr_vx_info(&mm->mm_vx_info);
free_mm(mm);
}
/* Copy the current MM stuff.. */
memcpy(mm, oldmm, sizeof(*mm));
+ mm->mm_vx_info = NULL;
if (!mm_init(mm))
goto fail_nomem;
{
int retval;
struct task_struct *p = NULL;
+ struct vx_info *vxi;
+ struct nx_info *nxi;
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
goto fork_out;
retval = -ENOMEM;
+
p = dup_task_struct(current);
if (!p)
goto fork_out;
+ vxi = get_vx_info(current->vx_info);
+ nxi = get_nx_info(current->nx_info);
+
+ /* check vserver memory */
+ if (p->mm && !(clone_flags & CLONE_VM)) {
+ if (vx_vmpages_avail(p->mm, p->mm->total_vm))
+ vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
+ else
+ goto bad_fork_free;
+ }
+ if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
+ if (!vx_rsspages_avail(p->mm, p->mm->rss))
+ goto bad_fork_free;
+ }
+
retval = -EAGAIN;
+ if (vxi && (atomic_read(&vxi->limit.res[RLIMIT_NPROC])
+ >= vxi->limit.rlim[RLIMIT_NPROC]))
+ goto bad_fork_free;
+
if (atomic_read(&p->user->processes) >=
p->rlim[RLIMIT_NPROC].rlim_cur) {
if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
nr_threads++;
+ if (vxi) {
+ atomic_inc(&vxi->cacct.nr_threads);
+ atomic_inc(&vxi->limit.res[RLIMIT_NPROC]);
+ }
write_unlock_irq(&tasklist_lock);
retval = 0;
task_t *find_task_by_pid(int nr)
{
- struct pid *pid = find_pid(PIDTYPE_PID, nr);
+ struct pid *pid = find_pid(PIDTYPE_PID,
+ vx_rmap_tgid(current->vx_info, nr));
if (!pid)
return NULL;
unsigned long i, j, limit, count;
int do_clear = 0;
char c;
- int error = 0;
+ int error = -EPERM;
+
+ if (!vx_check(0, VX_ADMIN|VX_WATCH))
+ return error;
error = security_syslog(type);
if (error)
#include <linux/cpu.h>
#include <linux/percpu.h>
#include <linux/kthread.h>
+#include <linux/vserver/sched.h>
+#include <linux/vinline.h>
#ifdef CONFIG_NUMA
#define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
#endif
task_t *migration_thread;
struct list_head migration_queue;
+ struct list_head hold_queue;
+ int idle_tokens;
atomic_t nr_iowait;
};
bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
prio = p->static_prio - bonus;
+ if (__vx_task_flags(p, VXF_SCHED_PRIO, 0))
+ prio += effective_vavavoom(p, MAX_USER_PRIO);
+
if (prio < MAX_RT_PRIO)
prio = MAX_RT_PRIO;
if (prio > MAX_PRIO-1)
}
if (p == rq->idle) {
+ if (!--rq->idle_tokens && !list_empty(&rq->hold_queue))
+ set_need_resched();
+
if (atomic_read(&rq->nr_iowait) > 0)
cpustat->iowait += sys_ticks;
else
}
goto out_unlock;
}
- if (!--p->time_slice) {
+ if (vx_need_resched(p)) {
dequeue_task(p, rq->active);
set_tsk_need_resched(p);
p->prio = effective_prio(p);
struct list_head *queue;
unsigned long long now;
unsigned long run_time;
+#ifdef CONFIG_VSERVER_HARDCPU
+ struct vx_info *vxi;
+ int maxidle = -HZ;
+#endif
int idx;
/*
deactivate_task(prev, rq);
}
+#ifdef CONFIG_VSERVER_HARDCPU
+ if (!list_empty(&rq->hold_queue)) {
+ struct list_head *l, *n;
+ int ret;
+
+ vxi = NULL;
+ list_for_each_safe(l, n, &rq->hold_queue) {
+ next = list_entry(l, task_t, run_list);
+ if (vxi == next->vx_info)
+ continue;
+
+ vxi = next->vx_info;
+ ret = vx_tokens_recalc(vxi);
+ // tokens = vx_tokens_avail(next);
+
+ if (ret > 0) {
+ list_del(&next->run_list);
+ next->state &= ~TASK_ONHOLD;
+ recalc_task_prio(next, now);
+ __activate_task(next, rq);
+ // printk("··· unhold %p\n", next);
+ break;
+ }
+ if ((ret < 0) && (maxidle < ret))
+ maxidle = ret;
+ }
+ }
+ rq->idle_tokens = -maxidle;
+
+pick_next:
+#endif
if (unlikely(!rq->nr_running)) {
#ifdef CONFIG_SMP
load_balance(rq, 1, cpu_to_node_mask(smp_processor_id()));
queue = array->queue + idx;
next = list_entry(queue->next, task_t, run_list);
+#ifdef CONFIG_VSERVER_HARDCPU
+ vxi = next->vx_info;
+ if (vxi && __vx_flags(vxi->vx_flags,
+ VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) {
+ int ret = vx_tokens_recalc(vxi);
+
+ if (unlikely(ret <= 0)) {
+ if (ret && (rq->idle_tokens > -ret))
+ rq->idle_tokens = -ret;
+ deactivate_task(next, rq);
+ list_add_tail(&next->run_list, &rq->hold_queue);
+ next->state |= TASK_ONHOLD;
+ goto pick_next;
+ }
+ }
+#endif
+
if (!rt_task(next) && next->activated > 0) {
unsigned long long delta = now - next->timestamp;
spin_lock_init(&rq->lock);
INIT_LIST_HEAD(&rq->migration_queue);
+ INIT_LIST_HEAD(&rq->hold_queue);
atomic_set(&rq->nr_iowait, 0);
nr_running_init(rq);
unsigned long flags;
int ret;
+ if (!vx_check(vx_task_xid(p), VX_ADMIN|VX_WATCH|VX_IDENT))
+ return -ESRCH;
+
ret = check_kill_permission(sig, info, p);
if (!ret && sig && p->sighand) {
spin_lock_irqsave(&p->sighand->siglock, flags);
#include <linux/mman.h>
#include <linux/smp_lock.h>
#include <linux/notifier.h>
+#include <linux/kmod.h>
#include <linux/reboot.h>
#include <linux/prctl.h>
#include <linux/init.h>
if (!who)
user = current->user;
else
- user = find_user(who);
+ user = find_user(vx_current_xid(), who);
if (!user)
goto out_unlock;
if (!who)
user = current->user;
else
- user = find_user(who);
+ user = find_user(vx_current_xid(), who);
if (!user)
goto out_unlock;
return retval;
}
+/*
+ * vshelper path is set via /proc/sys
+ * invoked by vserver sys_reboot(), with
+ * the following arguments
+ *
+ * argv [0] = vshelper_path;
+ * argv [1] = action: "restart", "halt", "poweroff", ...
+ * argv [2] = context identifier
+ * argv [3] = additional argument (restart2)
+ *
+ * envp [*] = type-specific parameters
+ */
+char vshelper_path[255] = "/sbin/vshelper";
+
+long vs_reboot(unsigned int cmd, void * arg)
+{
+ char id_buf[8], cmd_buf[32];
+ char uid_buf[32], pid_buf[32];
+ char buffer[256];
+
+ char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0};
+ char *envp[] = {"HOME=/", "TERM=linux",
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ uid_buf, pid_buf, cmd_buf, 0};
+
+ snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid());
+
+ snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
+ snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid);
+ snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
+
+ switch (cmd) {
+ case LINUX_REBOOT_CMD_RESTART:
+ argv[1] = "restart";
+ break;
+
+ case LINUX_REBOOT_CMD_HALT:
+ argv[1] = "halt";
+ break;
+
+ case LINUX_REBOOT_CMD_POWER_OFF:
+ argv[1] = "poweroff";
+ break;
+
+ case LINUX_REBOOT_CMD_SW_SUSPEND:
+ argv[1] = "swsusp";
+ break;
+
+ case LINUX_REBOOT_CMD_RESTART2:
+ if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0)
+ return -EFAULT;
+ argv[3] = buffer;
+ default:
+ argv[1] = "restart2";
+ break;
+ }
+
+ /* maybe we should wait ? */
+ if (call_usermodehelper(*argv, argv, envp, 0)) {
+ printk( KERN_WARNING
+ "vs_reboot(): failed to exec (%s %s %s %s)\n",
+ vshelper_path, argv[1], argv[2], argv[3]);
+ return -EPERM;
+ }
+ return 0;
+}
/*
* Reboot system call: for obvious reasons only root may call it,
magic2 != LINUX_REBOOT_MAGIC2C))
return -EINVAL;
+ if (!vx_check(0, VX_ADMIN|VX_WATCH))
+ return vs_reboot(cmd, arg);
+
lock_kernel();
switch (cmd) {
case LINUX_REBOOT_CMD_RESTART:
{
struct user_struct *new_user;
- new_user = alloc_uid(new_ruid);
+ new_user = alloc_uid(vx_current_xid(), new_ruid);
if (!new_user)
return -EAGAIN;
int errno = 0;
down_read(&uts_sem);
- if (copy_to_user(name,&system_utsname,sizeof *name))
+ if (copy_to_user(name, vx_new_utsname(), sizeof *name))
errno = -EFAULT;
up_read(&uts_sem);
return errno;
int errno;
char tmp[__NEW_UTS_LEN];
- if (!capable(CAP_SYS_ADMIN))
+ if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME))
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- memcpy(system_utsname.nodename, tmp, len);
- system_utsname.nodename[len] = 0;
+ char *ptr = vx_new_uts(nodename);
+
+ memcpy(ptr, tmp, len);
+ ptr[len] = 0;
errno = 0;
}
up_write(&uts_sem);
asmlinkage long sys_gethostname(char __user *name, int len)
{
int i, errno;
+ char *ptr;
if (len < 0)
return -EINVAL;
down_read(&uts_sem);
- i = 1 + strlen(system_utsname.nodename);
+ ptr = vx_new_uts(nodename);
+ i = 1 + strlen(ptr);
if (i > len)
i = len;
errno = 0;
- if (copy_to_user(name, system_utsname.nodename, i))
+ if (copy_to_user(name, ptr, i))
errno = -EFAULT;
up_read(&uts_sem);
return errno;
int errno;
char tmp[__NEW_UTS_LEN];
- if (!capable(CAP_SYS_ADMIN))
+ if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SET_UTSNAME))
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
down_write(&uts_sem);
errno = -EFAULT;
if (!copy_from_user(tmp, name, len)) {
- memcpy(system_utsname.domainname, tmp, len);
- system_utsname.domainname[len] = 0;
+ char *ptr = vx_new_uts(domainname);
+
+ memcpy(ptr, tmp, len);
+ ptr[len] = 0;
errno = 0;
}
up_write(&uts_sem);
old_rlim = current->rlim + resource;
if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
(new_rlim.rlim_max > old_rlim->rlim_max)) &&
- !capable(CAP_SYS_RESOURCE))
+ !capable(CAP_SYS_RESOURCE) && vx_ccaps(VXC_SET_RLIMIT))
return -EPERM;
if (resource == RLIMIT_NOFILE) {
if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
#ifdef CONFIG_HOTPLUG
extern char hotplug_path[];
#endif
+extern char vshelper_path[];
#ifdef CONFIG_CHR_DEV_SG
extern int sg_big_buff;
#endif
.strategy = &sysctl_string,
},
#endif
+ {
+ .ctl_name = KERN_VSHELPER,
+ .procname = "vshelper",
+ .data = &vshelper_path,
+ .maxlen = 256,
+ .mode = 0644,
+ .proc_handler = &proc_dostring,
+ .strategy = &sysctl_string,
+ },
#ifdef CONFIG_CHR_DEV_SG
{
.ctl_name = KERN_SG_BIG_BUFF,
#include <linux/time.h>
#include <linux/jiffies.h>
#include <linux/cpu.h>
+#include <linux/vserver/sched.h>
+#include <linux/vserver/cvirt.h>
#include <asm/uaccess.h>
#include <asm/div64.h>
*/
asmlinkage long sys_getpid(void)
{
- return current->tgid;
+ return vx_map_tgid(current->vx_info, current->tgid);
}
/*
#endif
break;
}
- return pid;
+ return vx_map_tgid(current->vx_info, pid);
}
asmlinkage long sys_getuid(void)
tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
tp.tv_sec++;
}
+ if (vx_flags(VXF_VIRT_UPTIME, 0))
+ vx_vsi_uptime(&tp, NULL);
val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
val.procs = nr_threads;
} while (read_seqretry(&xtime_lock, seq));
+/* if (vx_flags(VXF_VIRT_CPU, 0))
+ vx_vsi_cpu(val);
+*/
si_meminfo(&val);
si_swapinfo(&val);
#define UIDHASH_BITS 8
#define UIDHASH_SZ (1 << UIDHASH_BITS)
#define UIDHASH_MASK (UIDHASH_SZ - 1)
-#define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
-#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid)))
+#define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK)
+#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid)))
static kmem_cache_t *uid_cachep;
static struct list_head uidhash_table[UIDHASH_SZ];
list_del(&up->uidhash_list);
}
-static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent)
+static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent)
{
struct list_head *up;
user = list_entry(up, struct user_struct, uidhash_list);
- if(user->uid == uid) {
+ if(user->uid == uid && user->xid == xid) {
atomic_inc(&user->__count);
return user;
}
return NULL;
}
-struct user_struct *find_user(uid_t uid)
+struct user_struct *find_user(xid_t xid, uid_t uid)
{
- return uid_hash_find(uid, uidhashentry(uid));
+ return uid_hash_find(xid, uid, uidhashentry(xid, uid));
}
void free_uid(struct user_struct *up)
}
}
-struct user_struct * alloc_uid(uid_t uid)
+struct user_struct * alloc_uid(xid_t xid, uid_t uid)
{
- struct list_head *hashent = uidhashentry(uid);
+ struct list_head *hashent = uidhashentry(xid, uid);
struct user_struct *up;
spin_lock(&uidhash_lock);
- up = uid_hash_find(uid, hashent);
+ up = uid_hash_find(xid, uid, hashent);
spin_unlock(&uidhash_lock);
if (!up) {
if (!new)
return NULL;
new->uid = uid;
+ new->xid = xid;
atomic_set(&new->__count, 1);
atomic_set(&new->processes, 0);
atomic_set(&new->files, 0);
* on adding the same user already..
*/
spin_lock(&uidhash_lock);
- up = uid_hash_find(uid, hashent);
+ up = uid_hash_find(xid, uid, hashent);
if (up) {
kmem_cache_free(uid_cachep, new);
} else {
/* Insert the root user immediately (init already runs as root) */
spin_lock(&uidhash_lock);
- uid_hash_insert(&root_user, uidhashentry(0));
+ uid_hash_insert(&root_user, uidhashentry(0,0));
spin_unlock(&uidhash_lock);
return 0;
set_page_dirty(page);
page_remove_rmap(page, ptep);
page_cache_release(page);
- mm->rss--;
+ // mm->rss--;
+ vx_rsspages_dec(mm);
}
}
} else {
pgd = pgd_offset(mm, addr);
spin_lock(&mm->page_table_lock);
+ if (!vx_rsspages_avail(mm, 1))
+ goto err_unlock;
+
pmd = pmd_alloc(mm, pgd, addr);
if (!pmd)
goto err_unlock;
zap_pte(mm, vma, addr, pte);
- mm->rss++;
+ // mm->rss++;
+ vx_rsspages_inc(mm);
flush_icache_page(vma, page);
set_pte(pte, mk_pte(page, prot));
pte_chain = page_add_rmap(page, pte, pte_chain);
struct page *page;
unsigned long pfn;
+ if (!vx_rsspages_avail(dst, 1)) {
+ spin_unlock(&src->page_table_lock);
+ goto nomem;
+ }
/* copy_one_pte */
if (pte_none(pte))
pte = pte_mkclean(pte);
pte = pte_mkold(pte);
get_page(page);
- dst->rss++;
+ // dst->rss++;
+ vx_rsspages_inc(dst);
set_pte(dst_pte, pte);
pte_chain = page_add_rmap(page, dst_pte,
page_table = pte_offset_map(pmd, address);
if (pte_same(*page_table, pte)) {
if (PageReserved(old_page))
- ++mm->rss;
+ // ++mm->rss;
+ vx_rsspages_inc(mm);
page_remove_rmap(old_page, page_table);
break_cow(vma, new_page, address, page_table);
pte_chain = page_add_rmap(new_page, page_table, pte_chain);
inc_page_state(pgmajfault);
}
+ if (!vx_rsspages_avail(mm, 1)) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
mark_page_accessed(page);
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain) {
if (vm_swap_full())
remove_exclusive_swap_page(page);
- mm->rss++;
+ // mm->rss++;
+ vx_rsspages_inc(mm);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && can_share_swap_page(page))
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
struct pte_chain *pte_chain;
int ret;
+ if (!vx_rsspages_avail(mm, 1)) {
+ spin_unlock(&mm->page_table_lock);
+ return VM_FAULT_OOM;
+ }
+
pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN);
if (!pte_chain) {
pte_unmap(page_table);
ret = VM_FAULT_MINOR;
goto out;
}
- mm->rss++;
+ // mm->rss++;
+ vx_rsspages_inc(mm);
entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
vma->vm_page_prot)),
vma);
return VM_FAULT_SIGBUS;
if (new_page == NOPAGE_OOM)
return VM_FAULT_OOM;
+ if (!vx_rsspages_avail(mm, 1))
+ return VM_FAULT_OOM;
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain)
/* Only go through if we didn't race with anybody else... */
if (pte_none(*page_table)) {
if (!PageReserved(new_page))
- ++mm->rss;
+ // ++mm->rss;
+ vx_rsspages_inc(mm);
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access)
asmlinkage long sys_mlock(unsigned long start, size_t len)
{
- unsigned long locked;
+ unsigned long locked, grow;
unsigned long lock_limit;
int error = -ENOMEM;
len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
start &= PAGE_MASK;
- locked = len >> PAGE_SHIFT;
- locked += current->mm->locked_vm;
+ grow = len >> PAGE_SHIFT;
+ if (!vx_vmlocked_avail(current->mm, grow))
+ goto out;
+ locked = current->mm->locked_vm + grow;
lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
lock_limit >>= PAGE_SHIFT;
/* check against resource limits */
if (locked <= lock_limit)
error = do_mlock(start, len, 1);
+out:
up_write(¤t->mm->mmap_sem);
return error;
}
lock_limit >>= PAGE_SHIFT;
ret = -ENOMEM;
+ if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
+ goto out;
+ /* check vserver lock limits? */
if (current->mm->total_vm <= lock_limit)
ret = do_mlockall(flags);
out:
> current->rlim[RLIMIT_AS].rlim_cur)
return -ENOMEM;
+ /* check context space, maybe only Private writable mapping? */
+ if (!vx_vmpages_avail(mm, len >> PAGE_SHIFT))
+ return -ENOMEM;
+
if (accountable && (!(flags & MAP_NORESERVE) ||
sysctl_overcommit_memory > 1)) {
if (vm_flags & VM_SHARED) {
kmem_cache_free(vm_area_cachep, vma);
}
out:
- mm->total_vm += len >> PAGE_SHIFT;
+ // mm->total_vm += len >> PAGE_SHIFT;
+ vx_vmpages_add(mm, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
- mm->locked_vm += len >> PAGE_SHIFT;
+ // mm->locked_vm += len >> PAGE_SHIFT;
+ vx_vmlocked_add(mm, len >> PAGE_SHIFT);
make_pages_present(addr, addr + len);
}
if (flags & MAP_POPULATE) {
grow = (address - vma->vm_end) >> PAGE_SHIFT;
/* Overcommit.. */
- if (security_vm_enough_memory(grow)) {
+ if (security_vm_enough_memory(grow) ||
+ !vx_vmpages_avail(vma->vm_mm, grow)) {
spin_unlock(&vma->vm_mm->page_table_lock);
return -ENOMEM;
}
vm_unacct_memory(grow);
return -ENOMEM;
}
+
vma->vm_end = address;
- vma->vm_mm->total_vm += grow;
+ // vma->vm_mm->total_vm += grow;
+ vx_vmpages_add(vma->vm_mm, grow);
if (vma->vm_flags & VM_LOCKED)
- vma->vm_mm->locked_vm += grow;
+ // vma->vm_mm->locked_vm += grow;
+ vx_vmlocked_add(vma->vm_mm, grow);
spin_unlock(&vma->vm_mm->page_table_lock);
return 0;
}
grow = (vma->vm_start - address) >> PAGE_SHIFT;
/* Overcommit.. */
- if (security_vm_enough_memory(grow)) {
+ if (security_vm_enough_memory(grow) ||
+ !vx_vmpages_avail(vma->vm_mm, grow)) {
spin_unlock(&vma->vm_mm->page_table_lock);
return -ENOMEM;
}
vm_unacct_memory(grow);
return -ENOMEM;
}
+
vma->vm_start = address;
vma->vm_pgoff -= grow;
- vma->vm_mm->total_vm += grow;
+ // vma->vm_mm->total_vm += grow;
+ vx_vmpages_add(vma->vm_mm, grow);
if (vma->vm_flags & VM_LOCKED)
- vma->vm_mm->locked_vm += grow;
+ // vma->vm_mm->locked_vm += grow;
+ vx_vmlocked_add(vma->vm_mm, grow);
spin_unlock(&vma->vm_mm->page_table_lock);
return 0;
}
{
size_t len = area->vm_end - area->vm_start;
- area->vm_mm->total_vm -= len >> PAGE_SHIFT;
+ // area->vm_mm->total_vm -= len >> PAGE_SHIFT;
+ vx_vmpages_sub(area->vm_mm, len >> PAGE_SHIFT);
+
if (area->vm_flags & VM_LOCKED)
- area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+ // area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
+ vx_vmlocked_sub(area->vm_mm, len >> PAGE_SHIFT);
/*
* Is this a new hole at the lowest possible address?
*/
locked += len;
if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
return -EAGAIN;
+ /* vserver checks ? */
}
/*
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
- if (security_vm_enough_memory(len >> PAGE_SHIFT))
+ if (security_vm_enough_memory(len >> PAGE_SHIFT) ||
+ !vx_vmpages_avail(mm, len >> PAGE_SHIFT))
return -ENOMEM;
flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
vma_link(mm, vma, prev, rb_link, rb_parent);
out:
- mm->total_vm += len >> PAGE_SHIFT;
+ // mm->total_vm += len >> PAGE_SHIFT;
+ vx_vmpages_add(mm, len >> PAGE_SHIFT);
if (flags & VM_LOCKED) {
- mm->locked_vm += len >> PAGE_SHIFT;
+ // mm->locked_vm += len >> PAGE_SHIFT;
+ vx_vmlocked_add(mm, len >> PAGE_SHIFT);
make_pages_present(addr, addr + len);
}
return addr;
vma = mm->mmap;
mm->mmap = mm->mmap_cache = NULL;
mm->mm_rb = RB_ROOT;
- mm->rss = 0;
- mm->total_vm = 0;
- mm->locked_vm = 0;
+ // mm->rss = 0;
+ vx_rsspages_sub(mm, mm->rss);
+ // mm->total_vm = 0;
+ vx_vmpages_sub(mm, mm->total_vm);
+ // mm->locked_vm = 0;
+ vx_vmlocked_sub(mm, mm->locked_vm);
spin_unlock(&mm->page_table_lock);
vma->vm_next->vm_flags |= VM_ACCOUNT;
}
- mm->total_vm += new_len >> PAGE_SHIFT;
+ // mm->total_vm += new_len >> PAGE_SHIFT;
+ vx_vmpages_add(mm, new_len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
- mm->locked_vm += new_len >> PAGE_SHIFT;
+ // mm->locked_vm += new_len >> PAGE_SHIFT;
+ vx_vmlocked_add(mm, new_len >> PAGE_SHIFT);
if (new_len > old_len)
make_pages_present(new_addr + old_len,
new_addr + new_len);
if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len)
> current->rlim[RLIMIT_AS].rlim_cur)
goto out;
+ /* check context space, maybe only Private writable mapping? */
+ if (!vx_vmpages_avail(current->mm, (new_len - old_len) >> PAGE_SHIFT))
+ goto out;
if (vma->vm_flags & VM_ACCOUNT) {
charged = (new_len - old_len) >> PAGE_SHIFT;
spin_lock(&vma->vm_mm->page_table_lock);
vma->vm_end = addr + new_len;
spin_unlock(&vma->vm_mm->page_table_lock);
- current->mm->total_vm += pages;
+ // current->mm->total_vm += pages;
+ vx_vmpages_add(current->mm, pages);
if (vma->vm_flags & VM_LOCKED) {
- current->mm->locked_vm += pages;
+ // current->mm->locked_vm += pages;
+ vx_vmlocked_add(current->mm, pages);
make_pages_present(addr + old_len,
addr + new_len);
}
* The memory size of the process is the basis for the badness.
*/
points = p->mm->total_vm;
+ /* add vserver badness ;) */
/*
* CPU time is in seconds and run time is in minutes. There is no
val->freehigh = 0;
#endif
val->mem_unit = PAGE_SIZE;
+ if (vx_flags(VXF_VIRT_MEM, 0))
+ vx_vsi_meminfo(val);
}
EXPORT_SYMBOL(si_meminfo);
if (pte_dirty(pte))
set_page_dirty(page);
- mm->rss--;
+ // mm->rss--;
+ vx_rsspages_dec(mm);
page_cache_release(page);
ret = SWAP_SUCCESS;
unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir,
swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp)
{
- vma->vm_mm->rss++;
+ // vma->vm_mm->rss++;
+ vx_rsspages_inc(vma->vm_mm);
get_page(page);
set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
*pte_chainp = page_add_rmap(page, dir, *pte_chainp);
val->freeswap = nr_swap_pages + nr_to_be_unused;
val->totalswap = total_swap_pages + nr_to_be_unused;
swap_list_unlock();
+ if (vx_flags(VXF_VIRT_MEM, 0))
+ vx_vsi_swapinfo(val);
}
/*
total = 0;
for (dev = dev_base; dev; dev = dev->next) {
+ if (!dev_in_nx_info(dev, current->nx_info))
+ continue;
for (i = 0; i < NPROTO; i++) {
if (gifconf_list[i]) {
int done;
static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
{
+ struct nx_info *nxi = current->nx_info;
+
+ if (!dev_in_nx_info(dev, nxi))
+ return;
if (dev->get_stats) {
struct net_device_stats *stats = dev->get_stats(dev);
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
+ if (!dev_in_nx_info(dev, current->nx_info))
+ continue;
if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
break;
}
struct sk_buff *skb;
int size = NLMSG_GOODSIZE;
+ if (!dev_in_nx_info(dev, current->nx_info))
+ return;
skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return;
break;
case SO_PASSCRED:
- sock->passcred = valbool;
+ if (valbool)
+ set_bit(SOCK_PASS_CRED, &sock->flags);
+ else
+ clear_bit(SOCK_PASS_CRED, &sock->flags);
break;
case SO_TIMESTAMP:
break;
case SO_PASSCRED:
- v.val = sock->passcred;
+ v.val = test_bit(SOCK_PASS_CRED, &sock->flags)?1:0;
break;
case SO_PEERCRED:
sock_lock_init(sk);
}
sk->sk_slab = slab;
+ sock_vx_init(sk);
+ sock_nx_init(sk);
if (security_sk_alloc(sk, family, priority)) {
kmem_cache_free(slab, sk);
__FUNCTION__, atomic_read(&sk->sk_omem_alloc));
security_sk_free(sk);
+ BUG_ON(sk->sk_vx_info);
+ BUG_ON(sk->sk_nx_info);
+/* clr_vx_info(&sk->sk_vx_info);
+ clr_nx_info(&sk->sk_nx_info); */
kmem_cache_free(sk->sk_slab, sk);
module_put(owner);
}
sk->sk_stamp.tv_sec = -1L;
sk->sk_stamp.tv_usec = -1L;
+ sk->sk_vx_info = NULL;
+ sk->sk_xid = 0;
+ sk->sk_nx_info = NULL;
+ sk->sk_nid = 0;
+
atomic_set(&sk->sk_refcnt, 1);
}
if (inet->opt)
kfree(inet->opt);
+
+ BUG_ON(sk->sk_nx_info);
+ BUG_ON(sk->sk_vx_info);
dst_release(sk->sk_dst_cache);
#ifdef INET_REFCNT_DEBUG
atomic_dec(&inet_sock_nr);
sk->sk_family = PF_INET;
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+
+ set_vx_info(&sk->sk_vx_info, current->vx_info);
+ sk->sk_xid = vx_current_xid();
+ set_nx_info(&sk->sk_nx_info, current->nx_info);
+ sk->sk_nid = nx_current_nid();
inet->uc_ttl = -1;
inet->mc_loop = 1;
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err)
- inet_sock_release(sk);
+ if (err) {
+/* sk->sk_vx_info = NULL;
+ put_vx_info(current->vx_info);
+ sk->sk_nx_info = NULL;
+ put_nx_info(current->nx_info);
+*/ inet_sock_release(sk);
+ }
}
out:
return err;
!(current->flags & PF_EXITING))
timeout = sk->sk_lingertime;
sock->sk = NULL;
+ clr_vx_info(&sk->sk_vx_info);
+ clr_nx_info(&sk->sk_nx_info);
sk->sk_prot->close(sk, timeout);
}
return 0;
unsigned short snum;
int chk_addr_ret;
int err;
+ __u32 s_addr; /* Address used for validation */
+ __u32 s_addr1;
+ __u32 s_addr2 = 0xffffffffl; /* Optional address of the socket */
+ struct nx_info *nxi = sk->sk_nx_info;
/* If the socket has its own bind function then use it. (RAW) */
if (sk->sk_prot->bind) {
if (addr_len < sizeof(struct sockaddr_in))
goto out;
- chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+ s_addr = s_addr1 = addr->sin_addr.s_addr;
+ nxdprintk("inet_bind(%p) %p,%p;%lx\n",
+ sk, nx_info, sk->sk_socket,
+ (sk->sk_socket?sk->sk_socket->flags:0));
+ if (nxi) {
+ __u32 v4_bcast = nxi->v4_bcast;
+ __u32 ipv4root = nxi->ipv4[0];
+ int nbipv4 = nxi->nbipv4;
+ if (s_addr == 0) {
+ s_addr = ipv4root;
+ if (nbipv4 > 1)
+ s_addr1 = 0;
+ else {
+ s_addr1 = ipv4root;
+ }
+ s_addr2 = v4_bcast;
+ } else if (s_addr == 0x0100007f) {
+ s_addr = s_addr1 = ipv4root;
+ } else if (s_addr != v4_bcast) {
+ int i;
+ for (i=0; i<nbipv4; i++) {
+ if (s_addr == nxi->ipv4[i])
+ break;
+ }
+ if (i == nbipv4) {
+ return -EADDRNOTAVAIL;
+ }
+ }
+ }
+ chk_addr_ret = inet_addr_type(s_addr);
/* Not specified by any standard per-se, however it breaks too
* many applications when removed. It is unfortunate since
err = -EADDRNOTAVAIL;
if (!sysctl_ip_nonlocal_bind &&
!inet->freebind &&
- addr->sin_addr.s_addr != INADDR_ANY &&
+ s_addr != INADDR_ANY &&
chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST &&
chk_addr_ret != RTN_BROADCAST)
if (sk->sk_state != TCP_CLOSE || inet->num)
goto out_release_sock;
- inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
+ inet->rcv_saddr = inet->saddr = s_addr1;
+ inet->rcv_saddr2 = s_addr2;
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->saddr = 0; /* Use device */
return rc;
}
+/*
+ Check that a device is not member of the ipv4root assigned to the process
+ Return true if this is the case
+
+ If the process is not bound to specific IP, then it returns 0 (all
+ interface are fine).
+*/
+static inline int devinet_notiproot (struct in_ifaddr *ifa)
+{
+ int ret = 0;
+ struct nx_info *nxi;
+
+ if ((nxi = current->nx_info)) {
+ int i;
+ int nbip = nxi->nbipv4;
+ __u32 addr = ifa->ifa_local;
+ ret = 1;
+ for (i=0; i<nbip; i++) {
+ if(nxi->ipv4[i] == addr) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+ return ret;
+}
+
int devinet_ioctl(unsigned int cmd, void *arg)
{
ret = -EADDRNOTAVAIL;
if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
goto done;
+ if (!ifa_in_nx_info(ifa, current->nx_info))
+ goto done;
switch(cmd) {
case SIOCGIFADDR: /* Get interface address */
goto out;
for (; ifa; ifa = ifa->ifa_next) {
+ if (!ifa_in_nx_info(ifa, current->nx_info))
+ continue;
if (!buf) {
done += sizeof(ifr);
continue;
read_lock(&in_dev->lock);
for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
ifa = ifa->ifa_next, ip_idx++) {
+ if (!ifa_in_nx_info(ifa, current->nx_info))
+ continue;
if (ip_idx < s_ip_idx)
continue;
if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
return flags;
}
+extern int dev_in_nx_info(struct net_device *, struct nx_info *);
+
/*
* This outputs /proc/net/route.
*
mask = FZ_MASK(iter->zone);
flags = fib_flag_trans(f->fn_type, f->fn_state & FN_S_ZOMBIE,
mask, fi);
- if (fi)
+ if (fi && dev_in_nx_info(fi->fib_dev, current->nx_info))
snprintf(bf, sizeof(bf),
"%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
fi->fib_dev ? fi->fib_dev->name : "*", prefix,
write_unlock_bh(&raw_v4_lock);
}
+
+/*
+ Check if an address is in the list
+*/
+static inline int raw_addr_in_list (
+ u32 rcv_saddr1,
+ u32 rcv_saddr2,
+ u32 loc_addr,
+ struct nx_info *nx_info)
+{
+ int ret = 0;
+ if (loc_addr != 0 &&
+ (rcv_saddr1 == loc_addr || rcv_saddr2 == loc_addr))
+ ret = 1;
+ else if (rcv_saddr1 == 0) {
+ /* Accept any address or only the one in the list */
+ if (nx_info == NULL)
+ ret = 1;
+ else {
+ int n = nx_info->nbipv4;
+ int i;
+ for (i=0; i<n; i++) {
+ if (nx_info->ipv4[i] == loc_addr) {
+ ret = 1;
+ break;
+ }
+ }
+ }
+ }
+ return ret;
+}
+
struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
unsigned long raddr, unsigned long laddr,
int dif)
if (inet->num == num &&
!(inet->daddr && inet->daddr != raddr) &&
- !(inet->rcv_saddr && inet->rcv_saddr != laddr) &&
+ raw_addr_in_list(inet->rcv_saddr, inet->rcv_saddr2,
+ laddr, sk->sk_nx_info) &&
!(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
goto found; /* gotcha */
}
struct hlist_node *node;
sk_for_each(sk, node, &raw_v4_htable[state->bucket])
- if (sk->sk_family == PF_INET)
+ if (sk->sk_family == PF_INET &&
+ vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))
goto found;
}
sk = NULL;
sk = sk_next(sk);
try_again:
;
- } while (sk && sk->sk_family != PF_INET);
+ } while (sk && (sk->sk_family != PF_INET ||
+ !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)));
if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
sk = sk_head(&raw_v4_htable[state->bucket]);
tcp_sk(sk)->bind_hash = tb;
}
+/*
+ Return 1 if addr match the socket IP list
+ or the socket is INADDR_ANY
+*/
+static inline int tcp_in_list(struct sock *sk, u32 addr)
+{
+ struct nx_info *nxi = sk->sk_nx_info;
+
+ vxdprintk("tcp_in_list(%p) %p,%p;%lx\n",
+ sk, nxi, sk->sk_socket,
+ (sk->sk_socket?sk->sk_socket->flags:0));
+
+ if (nxi) {
+ int n = nxi->nbipv4;
+ int i;
+
+ for (i=0; i<n; i++)
+ if (nxi->ipv4[i] == addr)
+ return 1;
+ }
+ else if (!tcp_v4_rcv_saddr(sk) || tcp_v4_rcv_saddr(sk) == addr)
+ return 1;
+ return 0;
+}
+
+/*
+ Check if the addresses in sk1 conflict with those in sk2
+*/
+int tcp_ipv4_addr_conflict(struct sock *sk1, struct sock *sk2)
+{
+ if (sk1 && sk2)
+ nxdprintk("inet_bind(%p,%p) %p,%p;%lx %p,%p;%lx\n",
+ sk1, sk2,
+ sk1->sk_nx_info, sk1->sk_socket,
+ (sk1->sk_socket?sk1->sk_socket->flags:0),
+ sk2->sk_nx_info, sk2->sk_socket,
+ (sk2->sk_socket?sk2->sk_socket->flags:0));
+
+ if (tcp_v4_rcv_saddr(sk1)) {
+ /* Bind to one address only */
+ return tcp_in_list (sk2, tcp_v4_rcv_saddr(sk1));
+ } else if (sk1->sk_nx_info) {
+ /* A restricted bind(any) */
+ struct nx_info *nxi = sk1->sk_nx_info;
+ int n = nxi->nbipv4;
+ int i;
+
+ for (i=0; i<n; i++)
+ if (tcp_in_list (sk2, nxi->ipv4[i]))
+ return 1;
+ } else /* A bind(any) do not allow other bind on the same port */
+ return 1;
+ return 0;
+}
+
static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
{
- const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
struct sock *sk2;
struct hlist_node *node;
int reuse = sk->sk_reuse;
sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
if (!reuse || !sk2->sk_reuse ||
sk2->sk_state == TCP_LISTEN) {
- const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
- if (!sk2_rcv_saddr || !sk_rcv_saddr ||
- sk2_rcv_saddr == sk_rcv_saddr)
+ if (tcp_ipv4_addr_conflict(sk, sk2))
break;
}
}
wake_up(&tcp_lhash_wait);
}
+/*
+ Check if an address is in the list
+*/
+static inline int tcp_addr_in_list(
+ u32 rcv_saddr,
+ u32 daddr,
+ struct nx_info *nx_info)
+{
+ if (rcv_saddr == daddr)
+ return 1;
+ else if (rcv_saddr == 0) {
+ /* Accept any address or check the list */
+ if (!nx_info)
+ return 1;
+ else {
+ int n = nx_info->nbipv4;
+ int i;
+
+ for (i=0; i<n; i++)
+ if (nx_info->ipv4[i] == daddr)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+
/* Don't inline this cruft. Here are some nice properties to
* exploit here. The BSD API does not allow a listening TCP
* to specify the remote port nor the remote address for the
__u32 rcv_saddr = inet->rcv_saddr;
score = (sk->sk_family == PF_INET ? 1 : 0);
- if (rcv_saddr) {
- if (rcv_saddr != daddr)
- continue;
+ if (tcp_addr_in_list(rcv_saddr, daddr, sk->sk_nx_info))
score+=2;
- }
+ else
+ continue;
if (sk->sk_bound_dev_if) {
if (sk->sk_bound_dev_if != dif)
continue;
struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
if (inet->num == hnum && !sk->sk_node.next &&
- (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
(sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+ tcp_addr_in_list(inet->rcv_saddr, daddr, sk->sk_nx_info) &&
!sk->sk_bound_dev_if)
goto sherry_cache;
sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
req = req->dl_next;
while (1) {
while (req) {
+ if (!vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH))
+ continue;
if (req->class->family == st->family) {
cur = req;
goto out;
sk = sk_next(sk);
get_sk:
sk_for_each_from(sk, node) {
+ if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+ continue;
if (sk->sk_family == st->family) {
cur = sk;
goto out;
read_lock(&tcp_ehash[st->bucket].lock);
sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
- if (sk->sk_family != st->family) {
+ if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+ continue;
+ if (sk->sk_family != st->family)
continue;
- }
rc = sk;
goto out;
}
st->state = TCP_SEQ_STATE_TIME_WAIT;
tw_for_each(tw, node,
&tcp_ehash[st->bucket + tcp_ehash_size].chain) {
- if (tw->tw_family != st->family) {
+ if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))
+ continue;
+ if (tw->tw_family != st->family)
continue;
- }
rc = tw;
goto out;
}
tw = cur;
tw = tw_next(tw);
get_tw:
- while (tw && tw->tw_family != st->family) {
+ while (tw && tw->tw_family != st->family &&
+ !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) {
tw = tw_next(tw);
}
if (tw) {
sk = sk_next(sk);
sk_for_each_from(sk, node) {
+ if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+ continue;
if (sk->sk_family == st->family)
goto found;
}
tw->tw_ts_recent_stamp = tp->ts_recent_stamp;
tw_dead_node_init(tw);
+ tw->tw_xid = sk->sk_xid;
+ tw->tw_vx_info = NULL;
+ tw->tw_nid = sk->sk_nid;
+ tw->tw_nx_info = NULL;
+
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
newsk->sk_state = TCP_SYN_RECV;
/* SANITY */
+ sock_vx_init(newsk);
+ sock_nx_init(newsk);
sk_node_init(&newsk->sk_node);
tcp_sk(newsk)->bind_hash = NULL;
newsk->sk_err = 0;
newsk->sk_priority = 0;
atomic_set(&newsk->sk_refcnt, 2);
+
+ /* hmm, maybe from socket? */
+ set_vx_info(&newsk->sk_vx_info, current->vx_info);
+ set_nx_info(&newsk->sk_nx_info, current->nx_info);
#ifdef INET_REFCNT_DEBUG
atomic_inc(&inet_sock_nr);
#endif
/* Shared by v4/v6 udp. */
int udp_port_rover;
+int tcp_ipv4_addr_conflict(struct sock *sk1, struct sock *sk2);
+
static int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
struct hlist_node *node;
(!sk2->sk_bound_dev_if ||
!sk->sk_bound_dev_if ||
sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- (!inet2->rcv_saddr ||
- !inet->rcv_saddr ||
- inet2->rcv_saddr == inet->rcv_saddr) &&
+ tcp_ipv4_addr_conflict(sk2, sk) &&
(!sk2->sk_reuse || !sk->sk_reuse))
goto fail;
}
write_unlock_bh(&udp_hash_lock);
}
+static inline int udp_in_list(struct nx_info *nx_info, u32 addr)
+{
+ int n = nx_info->nbipv4;
+ int i;
+
+ for (i=0; i<n; i++)
+ if (nx_info->ipv4[i] == addr)
+ return 1;
+ return 0;
+}
+
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
if (inet->rcv_saddr != daddr)
continue;
score+=2;
+ } else if (sk->sk_nx_info) {
+ if (udp_in_list(sk->sk_nx_info, daddr))
+ score+=2;
+ else
+ continue;
}
if (inet->daddr) {
if (inet->daddr != saddr)
if (inet->num != hnum ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
- (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
+ (inet->rcv_saddr && inet->rcv_saddr != loc_addr &&
+ inet->rcv_saddr2 && inet->rcv_saddr2 != loc_addr) ||
ipv6_only_sock(s) ||
(s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
continue;
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
+ struct nx_info *nxi = sk->sk_nx_info;
+
+ if (nxi) {
+ err = ip_find_src(nxi, &rt, &fl);
+ if (err)
+ goto out;
+ if (daddr == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
+ daddr = fl.fl4_dst = nxi->ipv4[0];
+ }
err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
if (err)
goto out;
for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
struct hlist_node *node;
+
sk_for_each(sk, node, &udp_hash[state->bucket]) {
- if (sk->sk_family == state->family)
+ if (sk->sk_family == state->family &&
+ vx_check(sk->sk_xid, VX_WATCH|VX_IDENT))
goto found;
}
}
sk = sk_next(sk);
try_again:
;
- } while (sk && sk->sk_family != state->family);
+ } while (sk && (sk->sk_family != state->family ||
+ !vx_check(sk->sk_xid, VX_WATCH|VX_IDENT)));
if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
sk = sk_head(&udp_hash[state->bucket]);
ei->socket.ops = NULL;
ei->socket.sk = NULL;
ei->socket.file = NULL;
- ei->socket.passcred = 0;
+ ei->socket.flags = 0;
return &ei->vfs_inode;
}
struct msghdr *msg, size_t size)
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
- int err;
+ int err, len;
si->sock = sock;
si->scm = NULL;
if (err)
return err;
- return sock->ops->sendmsg(iocb, sock, msg, size);
+ len = sock->ops->sendmsg(iocb, sock, msg, size);
+ if (sock->sk) {
+ if (len == size)
+ vx_sock_send(sock->sk, size);
+ else
+ vx_sock_fail(sock->sk, size);
+ }
+ vxdprintk("__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d\n",
+ sock, sock->sk,
+ (sock->sk)?sock->sk->sk_nx_info:0,
+ (sock->sk)?sock->sk->sk_vx_info:0,
+ (sock->sk)?sock->sk->sk_xid:0,
+ size, len);
+ return len;
}
int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
- int err;
+ int err, len;
struct sock_iocb *si = kiocb_to_siocb(iocb);
si->sock = sock;
if (err)
return err;
- return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+ len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
+ if ((len >= 0) && sock->sk)
+ vx_sock_recv(sock->sk, len);
+ vxdprintk("__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d\n",
+ sock, sock->sk,
+ (sock->sk)?sock->sk->sk_nx_info:0,
+ (sock->sk)?sock->sk->sk_vx_info:0,
+ (sock->sk)?sock->sk->sk_xid:0,
+ size, len);
+ return len;
}
int sock_recvmsg(struct socket *sock, struct msghdr *msg,
if (type < 0 || type >= SOCK_MAX)
return -EINVAL;
+ /* disable IPv6 inside vservers for now */
+ if (family == PF_INET6 && !vx_check(0, VX_ADMIN))
+ return -EAFNOSUPPORT;
+
/* Compatibility.
This uglymoron is moved from INET layer to here to avoid
if (retval < 0)
goto out;
+ set_bit(SOCK_USER_SOCKET, &sock->flags);
retval = sock_map_fd(sock);
if (retval < 0)
goto out_release;
err = sock_create(family, type, protocol, &sock1);
if (err < 0)
goto out;
+ set_bit(SOCK_USER_SOCKET, &sock1->flags);
err = sock_create(family, type, protocol, &sock2);
if (err < 0)
goto out_release_1;
+ set_bit(SOCK_USER_SOCKET, &sock2->flags);
err = sock1->ops->socketpair(sock1, sock2);
if (err < 0)
mntput(mnt);
}
+ clr_vx_info(&sk->sk_vx_info);
+ clr_nx_info(&sk->sk_nx_info);
sock_put(sk);
/* ---- Socket is dead now and most probably destroyed ---- */
sock_init_data(sock,sk);
sk_set_owner(sk, THIS_MODULE);
+ set_vx_info(&sk->sk_vx_info, current->vx_info);
+ set_nx_info(&sk->sk_nx_info, current->nx_info);
+ sk->sk_xid = vx_current_xid();
+
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
goto out;
alen = err;
- if (sock->passcred && !unix_sk(sk)->addr &&
+ if (test_bit(SOCK_PASS_CRED, &sock->flags) && !unix_sk(sk)->addr &&
(err = unix_autobind(sock)) != 0)
goto out;
goto out;
addr_len = err;
- if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0)
+ if (test_bit(SOCK_PASS_CRED, &sock->flags)
+ && !u->addr && (err = unix_autobind(sock)) != 0)
goto out;
timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
goto out;
}
- if (sock->passcred && !u->addr && (err = unix_autobind(sock)) != 0)
+ if (test_bit(SOCK_PASS_CRED, &sock->flags)
+ && !u->addr && (err = unix_autobind(sock)) != 0)
goto out;
err = -EMSGSIZE;
/* Derived from fs/exec.c:compute_creds. */
kernel_cap_t new_permitted, working;
- new_permitted = cap_intersect (bprm->cap_permitted, cap_bset);
+ new_permitted = cap_intersect (bprm->cap_permitted, vx_current_bcaps());
working = cap_intersect (bprm->cap_inheritable,
current->cap_inheritable);
new_permitted = cap_combine (new_permitted, working);