From c5fb35bebffd73f6c75e2947f99fceff83f187b3 Mon Sep 17 00:00:00 2001 From: Mark Huang Date: Fri, 17 Sep 2004 02:53:31 +0000 Subject: [PATCH] Merge to VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff) --- arch/ia64/ia32/binfmt_elf32.c | 1 + arch/sparc64/kernel/binfmt_aout32.c | 1 + arch/um/kernel/sys_call_table.c | 149 +------------------------- arch/x86_64/ia32/ia32_aout.c | 1 + fs/attr.c | 28 +++++ fs/binfmt_elf.c | 1 + fs/binfmt_flat.c | 1 + fs/binfmt_som.c | 1 + fs/devpts/inode.c | 3 +- fs/exec.c | 1 + fs/ext2/ialloc.c | 5 + fs/ext2/inode.c | 12 +-- fs/ext3/balloc.c | 30 ++++-- fs/ext3/ialloc.c | 9 +- fs/ext3/inode.c | 16 +-- fs/ext3/super.c | 5 + fs/fcntl.c | 4 +- fs/inode.c | 9 +- fs/ioctl.c | 1 + fs/jfs/jfs_imap.c | 10 +- fs/namei.c | 12 ++- fs/namespace.c | 1 + fs/nfs/dir.c | 5 +- fs/nfs/file.c | 158 +--------------------------- fs/nfs/inode.c | 28 ++--- fs/nfs/nfs3xdr.c | 27 +++-- fs/nfs/proc.c | 3 - fs/nfsd/auth.c | 8 +- fs/nfsd/nfs3xdr.c | 13 ++- fs/nfsd/nfs4xdr.c | 6 +- fs/nfsd/nfsxdr.c | 13 ++- fs/nfsd/vfs.c | 5 +- fs/open.c | 11 +- fs/proc/array.c | 12 ++- fs/proc/generic.c | 3 +- fs/reiserfs/inode.c | 10 +- fs/reiserfs/ioctl.c | 8 +- fs/reiserfs/super.c | 2 +- fs/super.c | 9 ++ fs/sysfs/mount.c | 4 +- include/linux/binfmts.h | 1 - include/linux/devpts_fs.h | 2 + include/linux/fs.h | 2 +- include/linux/init_task.h | 2 +- include/linux/shmem_fs.h | 3 + include/linux/sunrpc/auth.h | 1 + include/linux/sunrpc/clnt.h | 3 +- include/linux/sysfs.h | 2 + include/linux/vs_context.h | 24 ++--- include/linux/vs_dlimit.h | 102 ++++++++++++------ include/linux/vs_network.h | 27 ++--- include/linux/vserver/context.h | 2 + include/linux/vserver/dlimit.h | 2 + include/linux/vserver/network.h | 2 + include/linux/vserver/sched.h | 4 +- include/net/route.h | 3 + kernel/exit.c | 5 +- kernel/fork.c | 18 ++-- kernel/signal.c | 2 - kernel/vserver/context.c | 62 +++++++---- kernel/vserver/dlimit.c | 30 ++++-- kernel/vserver/network.c | 39 ++++--- kernel/vserver/sysctl.c | 59 +++++++++-- mm/mremap.c | 1 + mm/rmap.c | 3 +- mm/shmem.c | 5 +- mm/swapfile.c | 1 + net/core/dev.c | 1 + net/ipv4/af_inet.c | 7 +- net/ipv4/icmp.c | 1 - net/ipv4/tcp_ipv4.c | 21 +++- net/ipv4/tcp_minisocks.c | 4 +- net/ipv4/udp.c | 1 - net/packet/af_packet.c | 3 - net/socket.c | 10 +- net/sunrpc/auth.c | 10 +- net/sunrpc/auth_unix.c | 28 +++-- net/unix/af_unix.c | 5 +- 78 files changed, 525 insertions(+), 599 deletions(-) diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index 85c5f1ba7..299c15315 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c index 2a858197c..ae92b7776 100644 --- a/arch/sparc64/kernel/binfmt_aout32.c +++ b/arch/sparc64/kernel/binfmt_aout32.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/arch/um/kernel/sys_call_table.c b/arch/um/kernel/sys_call_table.c index 597d5683a..83beea7cc 100644 --- a/arch/um/kernel/sys_call_table.c +++ b/arch/um/kernel/sys_call_table.c @@ -107,154 +107,7 @@ syscall_handler_t *sys_call_table[] = { [ __NR_times ] (syscall_handler_t *) sys_times, [ __NR_prof ] (syscall_handler_t *) sys_ni_syscall, [ __NR_brk ] (syscall_handler_t *) sys_brk, - [ __NR_setgid ] (syscall_handler_t *) sys_setgid16, - [ __NR_getgid ] (syscall_handler_t *) sys_getgid16, - [ __NR_signal ] (syscall_handler_t *) sys_signal, - [ __NR_geteuid ] (syscall_handler_t *) sys_geteuid16, - [ __NR_getegid ] (syscall_handler_t *) sys_getegid16, - [ __NR_acct ] (syscall_handler_t *) sys_acct, - [ __NR_umount2 ] (syscall_handler_t *) sys_umount, - [ __NR_lock ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_ioctl ] (syscall_handler_t *) sys_ioctl, - [ __NR_fcntl ] (syscall_handler_t *) sys_fcntl, - [ __NR_mpx ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_setpgid ] (syscall_handler_t *) sys_setpgid, - [ __NR_ulimit ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_oldolduname ] (syscall_handler_t *) sys_olduname, - [ __NR_umask ] (syscall_handler_t *) sys_umask, - [ __NR_chroot ] (syscall_handler_t *) sys_chroot, - [ __NR_ustat ] (syscall_handler_t *) sys_ustat, - [ __NR_dup2 ] (syscall_handler_t *) sys_dup2, - [ __NR_getppid ] (syscall_handler_t *) sys_getppid, - [ __NR_getpgrp ] (syscall_handler_t *) sys_getpgrp, - [ __NR_setsid ] = (syscall_handler_t *) sys_setsid, - [ __NR_sigaction ] (syscall_handler_t *) sys_sigaction, - [ __NR_sgetmask ] (syscall_handler_t *) sys_sgetmask, - [ __NR_ssetmask ] (syscall_handler_t *) sys_ssetmask, - [ __NR_setreuid ] (syscall_handler_t *) sys_setreuid16, - [ __NR_setregid ] (syscall_handler_t *) sys_setregid16, - [ __NR_sigsuspend ] (syscall_handler_t *) sys_sigsuspend, - [ __NR_sigpending ] (syscall_handler_t *) sys_sigpending, - [ __NR_sethostname ] (syscall_handler_t *) sys_sethostname, - [ __NR_setrlimit ] (syscall_handler_t *) sys_setrlimit, - [ __NR_getrlimit ] (syscall_handler_t *) sys_old_getrlimit, - [ __NR_getrusage ] (syscall_handler_t *) sys_getrusage, - [ __NR_gettimeofday ] (syscall_handler_t *) sys_gettimeofday, - [ __NR_settimeofday ] (syscall_handler_t *) sys_settimeofday, - [ __NR_getgroups ] (syscall_handler_t *) sys_getgroups16, - [ __NR_setgroups ] (syscall_handler_t *) sys_setgroups16, - [ __NR_symlink ] (syscall_handler_t *) sys_symlink, - [ __NR_oldlstat ] (syscall_handler_t *) sys_lstat, - [ __NR_readlink ] (syscall_handler_t *) sys_readlink, - [ __NR_uselib ] (syscall_handler_t *) sys_uselib, - [ __NR_swapon ] = (syscall_handler_t *) sys_swapon, - [ __NR_reboot ] (syscall_handler_t *) sys_reboot, - [ __NR_readdir ] = old_readdir, - [ __NR_munmap ] (syscall_handler_t *) sys_munmap, - [ __NR_truncate ] (syscall_handler_t *) sys_truncate, - [ __NR_ftruncate ] (syscall_handler_t *) sys_ftruncate, - [ __NR_fchmod ] (syscall_handler_t *) sys_fchmod, - [ __NR_fchown ] (syscall_handler_t *) sys_fchown16, - [ __NR_getpriority ] (syscall_handler_t *) sys_getpriority, - [ __NR_setpriority ] (syscall_handler_t *) sys_setpriority, - [ __NR_profil ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_statfs ] (syscall_handler_t *) sys_statfs, - [ __NR_fstatfs ] (syscall_handler_t *) sys_fstatfs, - [ __NR_ioperm ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_socketcall ] (syscall_handler_t *) sys_socketcall, - [ __NR_syslog ] (syscall_handler_t *) sys_syslog, - [ __NR_setitimer ] (syscall_handler_t *) sys_setitimer, - [ __NR_getitimer ] (syscall_handler_t *) sys_getitimer, - [ __NR_stat ] (syscall_handler_t *) sys_newstat, - [ __NR_lstat ] (syscall_handler_t *) sys_newlstat, - [ __NR_fstat ] (syscall_handler_t *) sys_newfstat, - [ __NR_olduname ] (syscall_handler_t *) sys_uname, - [ __NR_iopl ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_vhangup ] (syscall_handler_t *) sys_vhangup, - [ __NR_idle ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_wait4 ] = (syscall_handler_t *) sys_wait4, - [ __NR_swapoff ] = (syscall_handler_t *) sys_swapoff, - [ __NR_sysinfo ] (syscall_handler_t *) sys_sysinfo, - [ __NR_ipc ] (syscall_handler_t *) sys_ipc, - [ __NR_fsync ] (syscall_handler_t *) sys_fsync, - [ __NR_sigreturn ] (syscall_handler_t *) sys_sigreturn, - [ __NR_clone ] (syscall_handler_t *) sys_clone, - [ __NR_setdomainname ] (syscall_handler_t *) sys_setdomainname, - [ __NR_uname ] (syscall_handler_t *) sys_newuname, - [ __NR_adjtimex ] (syscall_handler_t *) sys_adjtimex, - [ __NR_mprotect ] (syscall_handler_t *) sys_mprotect, - [ __NR_sigprocmask ] (syscall_handler_t *) sys_sigprocmask, - [ __NR_create_module ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_init_module ] (syscall_handler_t *) sys_init_module, - [ __NR_delete_module ] (syscall_handler_t *) sys_delete_module, - [ __NR_get_kernel_syms ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_quotactl ] (syscall_handler_t *) sys_quotactl, - [ __NR_getpgid ] (syscall_handler_t *) sys_getpgid, - [ __NR_fchdir ] (syscall_handler_t *) sys_fchdir, - [ __NR_bdflush ] (syscall_handler_t *) sys_bdflush, - [ __NR_sysfs ] (syscall_handler_t *) sys_sysfs, - [ __NR_personality ] (syscall_handler_t *) sys_personality, - [ __NR_afs_syscall ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_setfsuid ] (syscall_handler_t *) sys_setfsuid16, - [ __NR_setfsgid ] (syscall_handler_t *) sys_setfsgid16, - [ __NR__llseek ] (syscall_handler_t *) sys_llseek, - [ __NR_getdents ] (syscall_handler_t *) sys_getdents, - [ __NR__newselect ] = (syscall_handler_t *) sys_select, - [ __NR_flock ] (syscall_handler_t *) sys_flock, - [ __NR_msync ] (syscall_handler_t *) sys_msync, - [ __NR_readv ] (syscall_handler_t *) sys_readv, - [ __NR_writev ] (syscall_handler_t *) sys_writev, - [ __NR_getsid ] (syscall_handler_t *) sys_getsid, - [ __NR_fdatasync ] (syscall_handler_t *) sys_fdatasync, - [ __NR__sysctl ] = (syscall_handler_t *) sys_sysctl, - [ __NR_mlock ] (syscall_handler_t *) sys_mlock, - [ __NR_munlock ] (syscall_handler_t *) sys_munlock, - [ __NR_mlockall ] (syscall_handler_t *) sys_mlockall, - [ __NR_munlockall ] (syscall_handler_t *) sys_munlockall, - [ __NR_sched_setparam ] (syscall_handler_t *) sys_sched_setparam, - [ __NR_sched_getparam ] (syscall_handler_t *) sys_sched_getparam, - [ __NR_sched_setscheduler ] (syscall_handler_t *) sys_sched_setscheduler, - [ __NR_sched_getscheduler ] (syscall_handler_t *) sys_sched_getscheduler, - [ __NR_sched_yield ] = (syscall_handler_t *) yield, - [ __NR_sched_get_priority_max ] (syscall_handler_t *) sys_sched_get_priority_max, - [ __NR_sched_get_priority_min ] (syscall_handler_t *) sys_sched_get_priority_min, - [ __NR_sched_rr_get_interval ] (syscall_handler_t *) sys_sched_rr_get_interval, - [ __NR_nanosleep ] (syscall_handler_t *) sys_nanosleep, - [ __NR_mremap ] (syscall_handler_t *) sys_mremap, - [ __NR_setresuid ] (syscall_handler_t *) sys_setresuid16, - [ __NR_getresuid ] (syscall_handler_t *) sys_getresuid16, - [ __NR_vm86 ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_query_module ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_poll ] (syscall_handler_t *) sys_poll, - [ __NR_nfsservctl ] = (syscall_handler_t *) NFSSERVCTL, - [ __NR_setresgid ] (syscall_handler_t *) sys_setresgid16, - [ __NR_getresgid ] (syscall_handler_t *) sys_getresgid16, - [ __NR_prctl ] (syscall_handler_t *) sys_prctl, - [ __NR_rt_sigreturn ] (syscall_handler_t *) sys_rt_sigreturn, - [ __NR_rt_sigaction ] (syscall_handler_t *) sys_rt_sigaction, - [ __NR_rt_sigprocmask ] (syscall_handler_t *) sys_rt_sigprocmask, - [ __NR_rt_sigpending ] (syscall_handler_t *) sys_rt_sigpending, - [ __NR_rt_sigtimedwait ] (syscall_handler_t *) sys_rt_sigtimedwait, - [ __NR_rt_sigqueueinfo ] (syscall_handler_t *) sys_rt_sigqueueinfo, - [ __NR_rt_sigsuspend ] (syscall_handler_t *) sys_rt_sigsuspend, - [ __NR_pread64 ] (syscall_handler_t *) sys_pread64, - [ __NR_pwrite64 ] (syscall_handler_t *) sys_pwrite64, - [ __NR_chown ] (syscall_handler_t *) sys_chown16, - [ __NR_getcwd ] (syscall_handler_t *) sys_getcwd, - [ __NR_capget ] (syscall_handler_t *) sys_capget, - [ __NR_capset ] (syscall_handler_t *) sys_capset, - [ __NR_sigaltstack ] (syscall_handler_t *) sys_sigaltstack, - [ __NR_sendfile ] (syscall_handler_t *) sys_sendfile, - [ __NR_getpmsg ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_putpmsg ] (syscall_handler_t *) sys_ni_syscall, - [ __NR_vfork ] (syscall_handler_t *) sys_vfork, - [ __NR_ugetrlimit ] (syscall_handler_t *) sys_getrlimit, - [ __NR_mmap2 ] (syscall_handler_t *) sys_mmap2, - [ __NR_truncate64 ] (syscall_handler_t *) sys_truncate64, - [ __NR_ftruncate64 ] (syscall_handler_t *) sys_ftruncate64, - [ __NR_stat64 ] (syscall_handler_t *) sys_stat64, - [ __NR_lstat64 ] (syscall_handler_t *) sys_lstat64, - [ __NR_fstat64 ] (syscall_handler_t *) sys_fstat64, + [ __NR_setgid ] (syscall_handler_t *) sys_setgid1 [ __NR_getdents64 ] (syscall_handler_t *) sys_getdents64, [ __NR_fcntl64 ] (syscall_handler_t *) sys_fcntl64, [ 223 ] (syscall_handler_t *) sys_ni_syscall, diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index 63525ce04..fe6bae25e 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/fs/attr.c b/fs/attr.c index d21530210..89e03f7bb 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include /* Taken over from the old code... */ @@ -55,6 +58,31 @@ int inode_change_ok(struct inode *inode, struct iattr *attr) if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) goto error; } + + /* Check for evil vserver activity */ + if (vx_check(0, VX_ADMIN)) + goto fine; + + if (IS_BARRIER(inode)) { + printk(KERN_WARNING + "VSW: xid=%d messing with the barrier.\n", + vx_current_xid()); + goto error; + } + switch (inode->i_sb->s_magic) { + case PROC_SUPER_MAGIC: + printk(KERN_WARNING + "VSW: xid=%d messing with the procfs.\n", + vx_current_xid()); + goto error; + case DEVPTS_SUPER_MAGIC: + if (vx_check(inode->i_xid, VX_IDENT)) + goto fine; + printk(KERN_WARNING + "VSW: xid=%d messing with the devpts.\n", + vx_current_xid()); + goto error; + } fine: retval = 0; error: diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 82294d38d..fb98fceb5 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 3971d361e..0bc28feab 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 496196b4e..4969da676 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 93b55afa1..a429f2817 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -21,8 +21,6 @@ #include #include "xattr.h" -#define DEVPTS_SUPER_MAGIC 0x1cd1 - static struct vfsmount *devpts_mnt; static struct dentry *devpts_root; @@ -98,6 +96,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent) inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_nlink = 2; + inode->i_xid = vx_current_xid(); devpts_root = s->s_root = d_alloc_root(inode); if (s->s_root) diff --git a/fs/exec.c b/fs/exec.c index a6763ce9a..bca37d6c0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 299ea3f92..1b1dce48f 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -469,6 +469,11 @@ struct inode *ext2_new_inode(struct inode *dir, int mode) if (!inode) return ERR_PTR(-ENOMEM); + if (sb->s_flags & MS_TAGXID) + inode->i_xid = current->xid; + else + inode->i_xid = 0; + if (DLIMIT_ALLOC_INODE(sb, inode->i_xid)) { err = -ENOSPC; goto fail_dlim; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index dfc0ca26b..1ef02bccb 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1071,10 +1071,10 @@ void ext2_read_inode (struct inode * inode) uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } - inode->i_uid = INOXID_UID(uid, gid); - inode->i_gid = INOXID_GID(uid, gid); - if (inode->i_sb->s_flags & MS_TAGXID) - inode->i_xid = INOXID_XID(uid, gid, le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_xid)); inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); @@ -1168,8 +1168,8 @@ static int ext2_update_inode(struct inode * inode, int do_sync) struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid); - gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid); + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); int n; diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 556f9d909..3b3160d8e 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -19,6 +19,7 @@ #include #include #include +#include #include /* @@ -471,19 +472,29 @@ fail: static int ext3_has_free_blocks(struct super_block *sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); - int free_blocks, root_blocks; + int free_blocks, root_blocks, cond; free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): free=%u, root=%u", + sb, free_blocks, root_blocks); + DLIMIT_ADJUST_BLOCK(sb, vx_current_xid(), &free_blocks, &root_blocks); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + cond = (free_blocks < root_blocks + 1 && + !capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; + (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))); + + vxdprintk(VXD_CBIT(dlim, 3), + "ext3_has_free_blocks(%p): %u<%u+1, %c, %u!=%u r=%d", + sb, free_blocks, root_blocks, + !capable(CAP_SYS_RESOURCE)?'1':'0', + sbi->s_resuid, current->fsuid, cond?0:1); + + return (cond ? 0 : 1); } /* @@ -706,7 +717,8 @@ allocated: io_error: *errp = -EIO; out: - DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1); + if (!performed_allocation) + DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1); out_dlimit: if (fatal) { *errp = fatal; @@ -715,10 +727,8 @@ out_dlimit: /* * Undo the block allocation */ - if (!performed_allocation) { - DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1); + if (!performed_allocation) DQUOT_FREE_BLOCK(inode, 1); - } brelse(bitmap_bh); return 0; } diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 0c21ea4c2..f30a4f41a 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -446,9 +446,15 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); + + if (sb->s_flags & MS_TAGXID) + inode->i_xid = current->xid; + else + inode->i_xid = 0; + if (DLIMIT_ALLOC_INODE(sb, inode->i_xid)) { err = -ENOSPC; - goto fail_dlim; + goto out; } ei = EXT3_I(inode); @@ -626,7 +632,6 @@ got: goto really_out; fail: DLIMIT_FREE_INODE(sb, inode->i_xid); -fail_dlim: ext3_std_error(sb, err); out: iput(inode); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 66fccef57..962aef215 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2516,10 +2516,10 @@ void ext3_read_inode(struct inode * inode) uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } - inode->i_uid = INOXID_UID(uid, gid); - inode->i_gid = INOXID_GID(uid, gid); - if (inode->i_sb->s_flags & MS_TAGXID) - inode->i_xid = INOXID_XID(uid, gid, le16_to_cpu(raw_inode->i_raw_xid)); + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, + le16_to_cpu(raw_inode->i_raw_xid)); inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); @@ -2628,8 +2628,8 @@ static int ext3_do_update_inode(handle_t *handle, struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; - uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid); - gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid); + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); int err = 0, rc, block; /* For fields not not tracking in the in-memory inode, @@ -2874,7 +2874,9 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; - if (attr->ia_valid & ATTR_XID) + if ((attr->ia_valid & ATTR_XID) + && inode->i_sb + && (inode->i_sb->s_flags & MS_TAGXID)) inode->i_xid = attr->ia_xid; error = ext3_mark_inode_dirty(handle, inode); ext3_journal_stop(handle); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index e48747bd9..61943d319 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -724,6 +724,11 @@ static int parse_options (char * options, struct super_block *sb, break; #ifndef CONFIG_INOXID_NONE case Opt_tagxid: + if (is_remount) { + printk(KERN_ERR "EXT3-fs: cannot specify " + "tagxid on remount\n"); + return 0; + } set_opt (sbi->s_mount_opt, TAG_XID); break; #endif diff --git a/fs/fcntl.c b/fs/fcntl.c index 16586d95e..a7966c9be 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -142,7 +142,7 @@ int dupfd(struct file *file, unsigned int start) FD_SET(fd, files->open_fds); FD_CLR(fd, files->close_on_exec); spin_unlock(&files->file_lock); - vx_openfd_inc(fd); + // vx_openfd_inc(fd); fd_install(fd, file); } else { spin_unlock(&files->file_lock); @@ -192,7 +192,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) FD_SET(newfd, files->open_fds); FD_CLR(newfd, files->close_on_exec); spin_unlock(&files->file_lock); - vx_openfd_inc(newfd); + // vx_openfd_inc(newfd); if (tofree) filp_close(tofree, files); diff --git a/fs/inode.c b/fs/inode.c index 207cc73ea..899805f06 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -139,11 +139,10 @@ static struct inode *alloc_inode(struct super_block *sb) struct address_space * const mapping = &inode->i_data; inode->i_sb = sb; - if (sb->s_flags & MS_TAGXID) - inode->i_xid = current->xid; - else - inode->i_xid = 0; /* maybe xid -1 would be better? */ // inode->i_dqh = dqhget(sb->s_dqh); + + /* important because of inode slab reuse */ + inode->i_xid = 0; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; atomic_set(&inode->i_count, 1); @@ -163,7 +162,6 @@ static struct inode *alloc_inode(struct super_block *sb) inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_rdev = 0; - // inode->i_xid = 0; /* maybe not too wise ... */ inode->i_security = NULL; inode->dirtied_when = 0; if (security_inode_alloc(inode)) { @@ -595,7 +593,6 @@ struct inode *new_inode(struct super_block *sb) list_add(&inode->i_list, &inode_in_use); inode->i_ino = ++last_ino; inode->i_state = 0; - inode->i_xid = vx_current_xid(); spin_unlock(&inode_lock); } return inode; diff --git a/fs/ioctl.c b/fs/ioctl.c index cd9621af3..96a1b601e 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 31cfda180..6a6f147e8 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -3116,9 +3116,9 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) uid = le32_to_cpu(dip->di_uid); gid = le32_to_cpu(dip->di_gid); - ip->i_uid = INOXID_UID(uid, gid); - ip->i_gid = INOXID_GID(uid, gid); - ip->i_xid = INOXID_XID(uid, gid, 0); + ip->i_uid = INOXID_UID(XID_TAG(ip), uid, gid); + ip->i_gid = INOXID_GID(XID_TAG(ip), uid, gid); + ip->i_xid = INOXID_XID(XID_TAG(ip), uid, gid, 0); ip->i_size = le64_to_cpu(dip->di_size); ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); @@ -3181,8 +3181,8 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip) dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - uid = XIDINO_UID(ip->i_uid, ip->i_xid); - gid = XIDINO_GID(ip->i_gid, ip->i_xid); + uid = XIDINO_UID(XID_TAG(ip), ip->i_uid, ip->i_xid); + gid = XIDINO_GID(XID_TAG(ip), ip->i_gid, ip->i_xid); dip->di_uid = cpu_to_le32(uid); dip->di_gid = cpu_to_le32(gid); /* diff --git a/fs/namei.c b/fs/namei.c index 89a748b10..44262f3f6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -165,9 +165,6 @@ int vfs_permission(struct inode * inode, int mask) { umode_t mode = inode->i_mode; - if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN|VX_WATCH)) - return -EACCES; - if (mask & MAY_WRITE) { /* * Nobody gets write access to a read-only fs. @@ -213,12 +210,17 @@ int vfs_permission(struct inode * inode, int mask) return -EACCES; } -static inline int xid_permission(struct inode *inode) +static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd) { if (inode->i_xid == 0) return 0; if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT)) return 0; +/* + printk("VSW: xid=%d denied access to %p[#%d,%lu] »%*s«.\n", + vx_current_xid(), inode, inode->i_xid, inode->i_ino, + nd->dentry->d_name.len, nd->dentry->d_name.name); +*/ return -EACCES; } @@ -230,7 +232,7 @@ int permission(struct inode * inode,int mask, struct nameidata *nd) /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; - if ((retval = xid_permission(inode))) + if ((retval = xid_permission(inode, mask, nd))) return retval; if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, submask, nd); diff --git a/fs/namespace.c b/fs/namespace.c index c186ab44e..9b7d73b2b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -231,6 +231,7 @@ static int show_vfsmnt(struct seq_file *m, void *v) { MS_MANDLOCK, ",mand" }, { MS_NOATIME, ",noatime" }, { MS_NODIRATIME, ",nodiratime" }, + { MS_TAGXID, ",tagxid" }, { 0, NULL } }; static struct proc_fs_info mnt_info[] = { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b3a950c20..f061e70cd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1015,7 +1015,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, int error; int open_flags = 0; - dfprintk(VFS, "NFS: create(%s/%ld, %s)\n", dir->i_sb->s_id, + dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); attr.ia_mode = mode; @@ -1032,12 +1032,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, */ lock_kernel(); nfs_begin_data_update(dir); - dfprintk(VFS, "NFS: attr %d.%d #%d\n", attr.ia_uid, attr.ia_gid, attr.ia_xid); inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags); nfs_end_data_update(dir); if (!IS_ERR(inode)) { - dfprintk(VFS, "NFS: inode=%p %d.%d #%d\n", inode, - inode->i_uid, inode->i_gid, inode->i_xid); d_instantiate(dentry, inode); nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6d9150de8..91bd05377 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -156,163 +156,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) } static ssize_t -nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, - read_actor_t actor, void *target) -{ - struct dentry *dentry = filp->f_dentry; - struct inode *inode = dentry->d_inode; - ssize_t res; - - dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long long) *ppos); - - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (!res) - res = generic_file_sendfile(filp, ppos, count, actor, target); - return res; -} - -static int -nfs_file_mmap(struct file * file, struct vm_area_struct * vma) -{ - struct dentry *dentry = file->f_dentry; - struct inode *inode = dentry->d_inode; - int status; - - dfprintk(VFS, "nfs: mmap(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); - - status = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (!status) - status = generic_file_mmap(file, vma); - return status; -} - -/* - * Flush any dirty pages for this process, and check for write errors. - * The return status from this call provides a reliable indication of - * whether any write errors occurred for this process. - */ -static int -nfs_fsync(struct file *file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int status; - - dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); - - lock_kernel(); - status = nfs_wb_all(inode); - if (!status) { - status = file->f_error; - file->f_error = 0; - } - unlock_kernel(); - return status; -} - -/* - * This does the "real" work of the write. The generic routine has - * allocated the page, locked it, done all the page alignment stuff - * calculations etc. Now we should just copy the data from user - * space and write it back to the real medium.. - * - * If the writer ends up delaying the write, the writer needs to - * increment the page use counts until he is done with the page. - */ -static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) -{ - return nfs_flush_incompatible(file, page); -} - -static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) -{ - long status; - - lock_kernel(); - status = nfs_updatepage(file, page, offset, to-offset); - unlock_kernel(); - return status; -} - -struct address_space_operations nfs_file_aops = { - .readpage = nfs_readpage, - .readpages = nfs_readpages, - .set_page_dirty = __set_page_dirty_nobuffers, - .writepage = nfs_writepage, - .writepages = nfs_writepages, - .prepare_write = nfs_prepare_write, - .commit_write = nfs_commit_write, -#ifdef CONFIG_NFS_DIRECTIO - .direct_IO = nfs_direct_IO, -#endif -}; - -/* - * Write to a file (through the page cache). - */ -static ssize_t -nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) -{ - struct dentry * dentry = iocb->ki_filp->f_dentry; - struct inode * inode = dentry->d_inode; - ssize_t result; - -#ifdef CONFIG_NFS_DIRECTIO - if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, buf, count, pos); -#endif - - dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - inode->i_ino, (unsigned long) count, (unsigned long) pos); - - result = -EBUSY; - if (IS_SWAPFILE(inode)) - goto out_swapfile; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (result) - goto out; - - result = count; - if (!count) - goto out; - - result = generic_file_aio_write(iocb, buf, count, pos); -out: - return result; - -out_swapfile: - printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); - goto out; -} - -/* - * Lock a (portion of) a file - */ -int -nfs_lock(struct file *filp, int cmd, struct file_lock *fl) -{ - struct inode * inode = filp->f_mapping->host; - int status = 0; - int status2; - - dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", - inode->i_sb->s_id, inode->i_ino, - fl->fl_type, fl->fl_flags, - (long long)fl->fl_start, (long long)fl->fl_end); - - if (!inode) - return -EINVAL; - - /* No mandatory locks over NFS */ - if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) - return -ENOLCK; - - if (NFS_PROTO(inode)->version != 4) { - /* Fake OK code if mounted without NLM support */ - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { +nfs_file_sendfile(struct file *f if (IS_GETLK(cmd)) status = LOCK_USE_CLNT; goto out_ok; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bf31dc34e..7f25c31fd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -131,7 +131,6 @@ nfs_delete_inode(struct inode * inode) printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); } -// DLIMIT_FREE_INODE(inode->i_sb, inode->i_xid); clear_inode(inode); } @@ -391,6 +390,7 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) clnt->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; clnt->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; clnt->cl_droppriv = (server->flags & NFS_MOUNT_BROKEN_SUID) ? 1 : 0; + clnt->cl_tagxid = (server->flags & NFS_MOUNT_TAGXID) ? 1 : 0; clnt->cl_chatty = 1; return clnt; @@ -700,11 +700,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (inode->i_state & I_NEW) { struct nfs_inode *nfsi = NFS_I(inode); -/* if (DLIMIT_ALLOC_INODE(sb, inode->i_xid)) { - err = -ENOSPC; - goto fail_dlim; - } -*/ /* We set i_ino for the few things that still rely on it, * such as stat(2) */ inode->i_ino = hash; @@ -739,10 +734,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); inode->i_nlink = fattr->nlink; - inode->i_uid = INOXID_UID(fattr->uid, fattr->gid); - inode->i_gid = INOXID_GID(fattr->uid, fattr->gid); - if (inode->i_sb->s_flags & MS_TAGXID) - inode->i_xid = INOXID_XID(fattr->uid, fattr->gid, 0); + inode->i_uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); /* maybe fattr->xid someday */ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* @@ -1131,10 +1125,9 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) nfsi->flags |= NFS_INO_INVALID_ATTR; - uid = INOXID_UID(fattr->uid, fattr->gid); - gid = INOXID_GID(fattr->uid, fattr->gid); - if (inode->i_sb->s_flags & MS_TAGXID) - xid = INOXID_XID(fattr->uid, fattr->gid, 0); + uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) @@ -1259,10 +1252,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - uid = INOXID_UID(fattr->uid, fattr->gid); - gid = INOXID_GID(fattr->uid, fattr->gid); - if (inode->i_sb->s_flags & MS_TAGXID) - xid = INOXID_XID(fattr->uid, fattr->gid, 0); + uid = INOXID_UID(XID_TAG(inode), fattr->uid, fattr->gid); + gid = INOXID_GID(XID_TAG(inode), fattr->uid, fattr->gid); + xid = INOXID_XID(XID_TAG(inode), fattr->uid, fattr->gid, 0); if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != uid || diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 30d7151d4..ebfc60759 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -178,7 +178,7 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr) } static inline u32 * -xdr_encode_sattr(u32 *p, struct iattr *attr) +xdr_encode_sattr(u32 *p, struct iattr *attr, int tagxid) { if (attr->ia_valid & ATTR_MODE) { *p++ = xdr_one; @@ -186,15 +186,17 @@ xdr_encode_sattr(u32 *p, struct iattr *attr) } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_UID || attr->ia_valid & ATTR_XID) { + if (attr->ia_valid & ATTR_UID || + (tagxid && (attr->ia_valid & ATTR_XID))) { *p++ = xdr_one; - *p++ = htonl(XIDINO_UID(attr->ia_uid, attr->ia_xid)); + *p++ = htonl(XIDINO_UID(tagxid, attr->ia_uid, attr->ia_xid)); } else { *p++ = xdr_zero; } - if (attr->ia_valid & ATTR_GID || attr->ia_valid & ATTR_XID) { + if (attr->ia_valid & ATTR_GID || + (tagxid && (attr->ia_valid & ATTR_XID))) { *p++ = xdr_one; - *p++ = htonl(XIDINO_GID(attr->ia_gid, attr->ia_xid)); + *p++ = htonl(XIDINO_GID(tagxid, attr->ia_gid, attr->ia_xid)); } else { *p++ = xdr_zero; } @@ -279,7 +281,8 @@ static int nfs3_xdr_sattrargs(struct rpc_rqst *req, u32 *p, struct nfs3_sattrargs *args) { p = xdr_encode_fhandle(p, args->fh); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); *p++ = htonl(args->guard); if (args->guard) p = xdr_encode_time3(p, &args->guardtime); @@ -370,7 +373,8 @@ nfs3_xdr_createargs(struct rpc_rqst *req, u32 *p, struct nfs3_createargs *args) *p++ = args->verifier[0]; *p++ = args->verifier[1]; } else - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -384,7 +388,8 @@ nfs3_xdr_mkdirargs(struct rpc_rqst *req, u32 *p, struct nfs3_mkdirargs *args) { p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } @@ -397,7 +402,8 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args { p = xdr_encode_fhandle(p, args->fromfh); p = xdr_encode_array(p, args->fromname, args->fromlen); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); p = xdr_encode_array(p, args->topath, args->tolen); req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; @@ -412,7 +418,8 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, u32 *p, struct nfs3_mknodargs *args) p = xdr_encode_fhandle(p, args->fh); p = xdr_encode_array(p, args->name, args->len); *p++ = htonl(args->type); - p = xdr_encode_sattr(p, args->sattr); + p = xdr_encode_sattr(p, args->sattr, + req->rq_task->tk_client->cl_tagxid); if (args->type == NF3CHR || args->type == NF3BLK) { *p++ = htonl(MAJOR(args->rdev)); *p++ = htonl(MINOR(args->rdev)); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 171a34a87..8dc6a981c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -244,9 +244,6 @@ nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, int status; fattr.valid = 0; - memset(&fattr, 0, sizeof(struct nfs_fattr)); - - dprintk("NFS call create %s\n", name->name); status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); dprintk("NFS reply create: %d\n", status); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 05822ee82..8d35f45d9 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -43,20 +43,20 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) } if (cred->cr_uid != (uid_t) -1) - current->fsuid = INOXID_UID(cred->cr_uid, cred->cr_gid); + current->fsuid = INOXID_UID(1, cred->cr_uid, cred->cr_gid); else current->fsuid = exp->ex_anon_uid; if (cred->cr_gid != (gid_t) -1) - current->fsgid = INOXID_GID(cred->cr_uid, cred->cr_gid); + current->fsgid = INOXID_GID(1, cred->cr_uid, cred->cr_gid); else current->fsgid = exp->ex_anon_gid; - current->xid = INOXID_XID(cred->cr_uid, cred->cr_gid, 0); + current->xid = INOXID_XID(1, cred->cr_uid, cred->cr_gid, 0); if (!cred->cr_group_info) return -ENOMEM; ret = set_current_groups(cred->cr_group_info); - if ((cred->cr_uid)) { + if (INOXID_UID(1, cred->cr_uid, cred->cr_gid)) { cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; } else { cap_t(current->cap_effective) |= (CAP_NFSD_MASK & diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 0a94be47e..d102365fa 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -122,6 +122,8 @@ static inline u32 * decode_sattr3(u32 *p, struct iattr *iap) { u32 tmp; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -131,12 +133,15 @@ decode_sattr3(u32 *p, struct iattr *iap) } if (*p++) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = ntohl(*p++); + uid = ntohl(*p++); } if (*p++) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = ntohl(*p++); + gid = ntohl(*p++); } + iap->ia_uid = INOXID_UID(1, uid, gid); + iap->ia_gid = INOXID_GID(1, uid, gid); + iap->ia_xid = INOXID_XID(1, uid, gid, 0); if (*p++) { u64 newsize; @@ -178,9 +183,9 @@ encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) *p++ = htonl((u32) stat.mode); *p++ = htonl((u32) stat.nlink); *p++ = htonl((u32) nfsd_ruid(rqstp, - XIDINO_UID(stat.uid, stat.xid))); + XIDINO_UID(XID_TAG(dentry->d_inode), stat.uid, stat.xid))); *p++ = htonl((u32) nfsd_rgid(rqstp, - XIDINO_GID(stat.gid, stat.xid))); + XIDINO_GID(XID_TAG(dentry->d_inode), stat.gid, stat.xid))); if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5710f797c..e95b02a70 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1562,7 +1562,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, } if (bmval1 & FATTR4_WORD1_OWNER) { status = nfsd4_encode_user(rqstp, - XIDINO_UID(stat.uid, stat.xid), &p, &buflen); + XIDINO_UID(XID_TAG(dentry->d_inode), + stat.uid, stat.xid), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) @@ -1570,7 +1571,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, } if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { status = nfsd4_encode_group(rqstp, - XIDINO_GID(stat.gid, stat.xid), &p, &buflen); + XIDINO_GID(XID_TAG(dentry->d_inode), + stat.gid, stat.xid), &p, &buflen); if (status == nfserr_resource) goto out_resource; if (status) diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 6bee45f48..77b18baab 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -97,6 +97,8 @@ static inline u32 * decode_sattr(u32 *p, struct iattr *iap) { u32 tmp, tmp1; + uid_t uid = 0; + gid_t gid = 0; iap->ia_valid = 0; @@ -110,12 +112,15 @@ decode_sattr(u32 *p, struct iattr *iap) } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_UID; - iap->ia_uid = tmp; + uid = tmp; } if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_GID; - iap->ia_gid = tmp; + gid = tmp; } + iap->ia_uid = INOXID_UID(1, uid, gid); + iap->ia_gid = INOXID_GID(1, uid, gid); + iap->ia_xid = INOXID_XID(1, uid, gid, 0); if ((tmp = ntohl(*p++)) != (u32)-1) { iap->ia_valid |= ATTR_SIZE; iap->ia_size = tmp; @@ -162,9 +167,9 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) *p++ = htonl((u32) stat.mode); *p++ = htonl((u32) stat.nlink); *p++ = htonl((u32) nfsd_ruid(rqstp, - XIDINO_UID(stat.uid, stat.xid))); + XIDINO_UID(XID_TAG(dentry->d_inode), stat.uid, stat.xid))); *p++ = htonl((u32) nfsd_rgid(rqstp, - XIDINO_GID(stat.gid, stat.xid))); + XIDINO_GID(XID_TAG(dentry->d_inode), stat.gid, stat.xid))); if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b2eb46603..d50269bc5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1516,7 +1516,6 @@ int nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) { int err = fh_verify(rqstp, fhp, 0, MAY_NOP); - if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat)) err = nfserr_io; return err; @@ -1534,7 +1533,7 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) if (acc == MAY_NOP) return 0; #if 0 - printk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", + dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", acc, (acc & MAY_READ)? " read" : "", (acc & MAY_WRITE)? " write" : "", @@ -1547,7 +1546,7 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) IS_IMMUTABLE(inode)? " immut" : "", IS_APPEND(inode)? " append" : "", IS_RDONLY(inode)? " ro" : ""); - printk(" owner %d/%d user %d/%d\n", + dprintk(" owner %d/%d user %d/%d\n", inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); #endif diff --git a/fs/open.c b/fs/open.c index f8e7015b8..f19fe19a7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -613,9 +613,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) dentry = file->f_dentry; inode = dentry->d_inode; - err = -EPERM; - if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) - goto out_putf; err = -EROFS; if (IS_RDONLY(inode)) goto out_putf; @@ -648,10 +645,6 @@ asmlinkage long sys_chmod(const char __user * filename, mode_t mode) goto out; inode = nd.dentry->d_inode; - error = -EPERM; - if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) - goto dput_and_out; - error = -EROFS; if (IS_RDONLY(inode)) goto dput_and_out; @@ -894,7 +887,7 @@ repeat: FD_SET(fd, files->open_fds); FD_CLR(fd, files->close_on_exec); files->next_fd = fd + 1; - vx_openfd_inc(fd); + // vx_openfd_inc(fd); #if 1 /* Sanity check */ if (files->fd[fd] != NULL) { @@ -916,7 +909,7 @@ static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) __FD_CLR(fd, files->open_fds); if (fd < files->next_fd) files->next_fd = fd; - vx_openfd_dec(fd); + // vx_openfd_dec(fd); } void fastcall put_unused_fd(unsigned int fd) diff --git a/fs/proc/array.c b/fs/proc/array.c index 0f00cc93c..47e254345 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -158,9 +158,11 @@ static inline char * task_state(struct task_struct *p, char *buffer) { struct group_info *group_info; int g; - pid_t ppid; + pid_t pid, ppid, tgid; read_lock(&tasklist_lock); + tgid = vx_map_tgid(current->vx_info, p->tgid); + pid = vx_map_tgid(current->vx_info, p->pid); ppid = vx_map_tgid(current->vx_info, p->real_parent->pid); buffer += sprintf(buffer, "State:\t%s\n" @@ -173,8 +175,7 @@ static inline char * task_state(struct task_struct *p, char *buffer) "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), (p->sleep_avg/1024)*100/(1020000000/1024), - p->tgid, - p->pid, p->pid ? ppid : 0, + tgid, pid, p->pid ? ppid : 0, p->pid && p->ptrace ? p->parent->pid : 0, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); @@ -351,7 +352,7 @@ int proc_pid_stat(struct task_struct *task, char * buffer) sigset_t sigign, sigcatch; char state; int res; - pid_t ppid, pgid = -1, sid = -1; + pid_t pid, ppid, pgid = -1, sid = -1; int num_threads = 0; struct mm_struct *mm; unsigned long long start_time; @@ -367,6 +368,7 @@ int proc_pid_stat(struct task_struct *task, char * buffer) if (bias_jiffies > task->start_time) bias_jiffies = task->start_time; } + pid = vx_map_tgid(task->vx_info, task->pid); mm = task->mm; if(mm) @@ -419,7 +421,7 @@ int proc_pid_stat(struct task_struct *task, char * buffer) res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", - task->pid, + pid, task->comm, state, ppid, diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 8ee2ff9ac..e7ec42fe2 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -17,9 +17,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -388,6 +388,7 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); + inode->i_xid = vx_current_xid(); break; } } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index cba41f10e..bf513011e 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1114,9 +1114,9 @@ static void init_inode (struct inode * inode, struct path * path) REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd ); sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode ); } - inode->i_uid = INOXID_UID(uid, gid); - inode->i_gid = INOXID_GID(uid, gid); - inode->i_xid = INOXID_XID(uid, gid, 0); + inode->i_uid = INOXID_UID(XID_TAG(inode), uid, gid); + inode->i_gid = INOXID_GID(XID_TAG(inode), uid, gid); + inode->i_xid = INOXID_XID(XID_TAG(inode), uid, gid, 0); pathrelse (path); if (S_ISREG (inode->i_mode)) { @@ -1141,8 +1141,8 @@ static void init_inode (struct inode * inode, struct path * path) static void inode2sd (void * sd, struct inode * inode, loff_t size) { struct stat_data * sd_v2 = (struct stat_data *)sd; - uid_t uid = XIDINO_UID(inode->i_uid, inode->i_xid); - gid_t gid = XIDINO_GID(inode->i_gid, inode->i_xid); + uid_t uid = XIDINO_UID(XID_TAG(inode), inode->i_uid, inode->i_xid); + gid_t gid = XIDINO_GID(XID_TAG(inode), inode->i_gid, inode->i_xid); __u16 flags; set_sd_v2_uid(sd_v2, uid ); diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 21d7c8310..6885230ec 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -49,9 +49,11 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, return -EFAULT; oldflags = REISERFS_I(inode) -> i_attrs; - if ( (oldflags & REISERFS_IMMUTABLE_FL) || ( ( (flags ^ oldflags) & - (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | REISERFS_APPEND_FL)) && - !capable( CAP_LINUX_IMMUTABLE ) ) ) + if ( ( (oldflags & REISERFS_IMMUTABLE_FL) || + ( (flags ^ oldflags) & + (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | + REISERFS_APPEND_FL) ) ) && + !capable( CAP_LINUX_IMMUTABLE ) ) return -EPERM; if( ( flags & REISERFS_NOTAIL_FL ) && diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f90884d9c..71eaa0651 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -703,6 +703,7 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st {"noattrs", .clrmask = 1< #include /* for the emergency remount stuff */ #include +#include +#include #include @@ -786,6 +788,13 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) sb = type->get_sb(type, flags, name, data); if (IS_ERR(sb)) goto out_free_secdata; + + error = -EPERM; + if (!capable(CAP_SYS_ADMIN) && !sb->s_bdev && + (sb->s_magic != PROC_SUPER_MAGIC) && + (sb->s_magic != DEVPTS_SUPER_MAGIC)) + goto out_sb; + error = security_sb_kern_mount(sb, secdata); if (error) goto out_sb; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index c3e5dbe17..823813ce0 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -11,8 +11,6 @@ #include "sysfs.h" -/* Random magic number */ -#define SYSFS_MAGIC 0x62656572 struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; @@ -29,7 +27,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = SYSFS_MAGIC; + sb->s_magic = SYSFS_SUPER_MAGIC; sb->s_op = &sysfs_ops; sysfs_sb = sb; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 88354f8e1..d3f220b57 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -2,7 +2,6 @@ #define _LINUX_BINFMTS_H #include -#include struct pt_regs; diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index b672ddc00..5f8269936 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -30,5 +30,7 @@ static inline void devpts_pty_kill(int number) { } #endif +#define DEVPTS_SUPER_MAGIC 0x1cd1 + #endif /* _LINUX_DEVPTS_FS_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 73960d287..f2b93269d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -263,7 +263,7 @@ typedef void (dio_iodone_t)(struct inode *inode, loff_t offset, #define ATTR_ATTR_FLAG 1024 #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 -#define ATTR_XID 8192 +#define ATTR_XID 8192 /* * This is the Inode Attributes structure, used for notify_change(). It diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f6bcb2124..9bcf2db6d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -113,8 +113,8 @@ extern struct group_info init_groups; .switch_lock = SPIN_LOCK_UNLOCKED, \ .journal_info = NULL, \ .xid = 0, \ - .nid = 0, \ .vx_info = NULL, \ + .nid = 0, \ .nx_info = NULL, \ } diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index f8efcf10a..14a618109 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -8,6 +8,9 @@ #define SHMEM_NR_DIRECT 16 +#define TMPFS_SUPER_MAGIC 0x01021994 + + struct shmem_inode_info { spinlock_t lock; unsigned long next_index; diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index a196e9b76..4eea2acb0 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -28,6 +28,7 @@ struct auth_cred { uid_t uid; gid_t gid; + xid_t xid; struct group_info *group_info; }; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 917ec29d7..62f97051c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -53,7 +53,8 @@ struct rpc_clnt { cl_autobind : 1,/* use getport() */ cl_droppriv : 1,/* enable NFS suid hack */ cl_oneshot : 1,/* dispose after use */ - cl_dead : 1;/* abandoned */ + cl_dead : 1,/* abandoned */ + cl_tagxid : 1;/* do xid tagging */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ struct rpc_portmap * cl_pmap; /* port mapping */ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index f94c7ac77..23f783872 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -9,6 +9,8 @@ #ifndef _SYSFS_H_ #define _SYSFS_H_ +#define SYSFS_SUPER_MAGIC 0x62656572 + struct kobject; struct module; diff --git a/include/linux/vs_context.h b/include/linux/vs_context.h index 47b063b79..541935bcb 100644 --- a/include/linux/vs_context.h +++ b/include/linux/vs_context.h @@ -2,21 +2,12 @@ #define _VX_VS_CONTEXT_H -// #define VX_DEBUG - #include #include #include #include "vserver/context.h" - -#undef vxdprintk -#if defined(VX_DEBUG) -#define vxdprintk(x...) printk("vxd: " x) -#else -#define vxdprintk(x...) -#endif - +#include "vserver/debug.h" extern int proc_pid_vx_info(struct task_struct *, char *); @@ -29,7 +20,7 @@ static inline struct vx_info *__get_vx_info(struct vx_info *vxi, { if (!vxi) return NULL; - vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])", vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, _file, _line); atomic_inc(&vxi->vx_usecnt); @@ -46,7 +37,7 @@ static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _li { if (!vxi) return; - vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])", vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, _file, _line); if (atomic_dec_and_test(&vxi->vx_usecnt)) @@ -61,7 +52,7 @@ static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi, BUG_ON(*vxp); if (!vxi) return; - vxdprintk("set_vx_info(%p[#%d.%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(xid, 3), "set_vx_info(%p[#%d.%d.%d])", vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, vxi?atomic_read(&vxi->vx_refcnt):0, @@ -79,7 +70,7 @@ static inline void __clr_vx_info(struct vx_info **vxp, if (!vxo) return; - vxdprintk("clr_vx_info(%p[#%d.%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(xid, 3), "clr_vx_info(%p[#%d.%d.%d])", vxo, vxo?vxo->vx_id:0, vxo?atomic_read(&vxo->vx_usecnt):0, vxo?atomic_read(&vxo->vx_refcnt):0, @@ -100,6 +91,8 @@ static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p, struct vx_info *vxi; task_lock(p); + vxlprintk(VXD_CBIT(xid, 5), "task_get_vx_info(%p)", + p, _file, _line); vxi = __get_vx_info(p->vx_info, _file, _line); task_unlock(p); return vxi; @@ -120,9 +113,6 @@ static __inline__ void __vx_verify_info( } -#undef vxdprintk -#define vxdprintk(x...) - #else #warning duplicate inclusion #endif diff --git a/include/linux/vs_dlimit.h b/include/linux/vs_dlimit.h index f1404614e..53400ab98 100644 --- a/include/linux/vs_dlimit.h +++ b/include/linux/vs_dlimit.h @@ -1,21 +1,13 @@ #ifndef _VX_VS_DLIMIT_H #define _VX_VS_DLIMIT_H - -// #define VX_DEBUG - #include #include #include #include "vserver/context.h" #include "vserver/dlimit.h" - -#if defined(VX_DEBUG) -#define vxdprintk(x...) printk("vxd: " x) -#else -#define vxdprintk(x...) -#endif +#include "vserver/debug.h" #define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__) @@ -25,7 +17,7 @@ static inline struct dl_info *__get_dl_info(struct dl_info *dli, { if (!dli) return NULL; - vxdprintk("get_dl_info(%p[#%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(dlim, 4), "get_dl_info(%p[#%d.%d])", dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, _file, _line); atomic_inc(&dli->dl_usecnt); @@ -38,11 +30,12 @@ static inline struct dl_info *__get_dl_info(struct dl_info *dli, #define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__) -static inline void __put_dl_info(struct dl_info *dli, const char *_file, int _line) +static inline void __put_dl_info(struct dl_info *dli, + const char *_file, int _line) { if (!dli) return; - vxdprintk("put_dl_info(%p[#%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(dlim, 4), "put_dl_info(%p[#%d.%d])", dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, _file, _line); if (atomic_dec_and_test(&dli->dl_usecnt)) @@ -50,8 +43,6 @@ static inline void __put_dl_info(struct dl_info *dli, const char *_file, int _li } -extern int vx_debug_dlimit; - #define __dlimit_char(d) ((d)?'*':' ') static inline int __dl_alloc_space(struct super_block *sb, @@ -73,14 +64,14 @@ static inline int __dl_alloc_space(struct super_block *sb, spin_unlock(&dli->dl_lock); put_dl_info(dli); out: - if (vx_debug_dlimit) - printk("ALLOC (%p,#%d)%c %lld bytes (%d)@ %s:%d\n", - sb, xid, __dlimit_char(dli), nr, ret, file, line); + vxlprintk(VXD_CBIT(dlim, 1), + "ALLOC (%p,#%d)%c %lld bytes (%d)", + sb, xid, __dlimit_char(dli), nr, ret, file, line); return ret; } static inline void __dl_free_space(struct super_block *sb, - xid_t xid, dlsize_t nr, const char *file, int line) + xid_t xid, dlsize_t nr, const char *_file, int _line) { struct dl_info *dli = NULL; @@ -91,17 +82,20 @@ static inline void __dl_free_space(struct super_block *sb, goto out; spin_lock(&dli->dl_lock); - dli->dl_space_used -= nr; + if (dli->dl_space_used > nr) + dli->dl_space_used -= nr; + else + dli->dl_space_used = 0; spin_unlock(&dli->dl_lock); put_dl_info(dli); out: - if (vx_debug_dlimit) - printk("FREE (%p,#%d)%c %lld bytes @ %s:%d\n", - sb, xid, __dlimit_char(dli), nr, file, line); + vxlprintk(VXD_CBIT(dlim, 1), + "FREE (%p,#%d)%c %lld bytes", + sb, xid, __dlimit_char(dli), nr, _file, _line); } static inline int __dl_alloc_inode(struct super_block *sb, - xid_t xid, const char *file, int line) + xid_t xid, const char *_file, int _line) { struct dl_info *dli; int ret = 0; @@ -114,17 +108,24 @@ static inline int __dl_alloc_inode(struct super_block *sb, ret = (dli->dl_inodes_used >= dli->dl_inodes_total); if (!ret) dli->dl_inodes_used++; +#if 0 + else + printk("VSW: DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d\n", + sb, xid, + dli->dl_inodes_used, dli->dl_inodes_total, + file, line); +#endif spin_unlock(&dli->dl_lock); put_dl_info(dli); out: - if (vx_debug_dlimit) - printk("ALLOC (%p,#%d)%c inode (%d)@ %s:%d\n", - sb, xid, __dlimit_char(dli), ret, file, line); + vxlprintk(VXD_CBIT(dlim, 0), + "ALLOC (%p,#%d)%c inode (%d)", + sb, xid, __dlimit_char(dli), ret, _file, _line); return ret; } static inline void __dl_free_inode(struct super_block *sb, - xid_t xid, const char *file, int line) + xid_t xid, const char *_file, int _line) { struct dl_info *dli; @@ -133,15 +134,51 @@ static inline void __dl_free_inode(struct super_block *sb, goto out; spin_lock(&dli->dl_lock); - dli->dl_inodes_used--; + if (dli->dl_inodes_used > 1) + dli->dl_inodes_used--; + else + dli->dl_inodes_used = 0; spin_unlock(&dli->dl_lock); put_dl_info(dli); out: - if (vx_debug_dlimit) - printk("FREE (%p,#%d)%c inode @ %s:%d\n", - sb, xid, __dlimit_char(dli), file, line); + vxlprintk(VXD_CBIT(dlim, 0), + "FREE (%p,#%d)%c inode", + sb, xid, __dlimit_char(dli), _file, _line); } +static inline void __dl_adjust_block(struct super_block *sb, xid_t xid, + unsigned int *free_blocks, unsigned int *root_blocks, + const char *_file, int _line) +{ + struct dl_info *dli; + uint64_t broot, bfree; + + dli = locate_dl_info(sb, xid); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + broot = (dli->dl_space_total - + (dli->dl_space_total >> 10) * dli->dl_nrlmult) + >> sb->s_blocksize_bits; + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + spin_unlock(&dli->dl_lock); + + vxlprintk(VXD_CBIT(dlim, 2), + "ADJUST: %lld,%lld on %d,%d [mult=%d]", + bfree, broot, *free_blocks, *root_blocks, + dli->dl_nrlmult, _file, _line); + if (free_blocks) { + if (*free_blocks > bfree) + *free_blocks = bfree; + } + if (root_blocks) { + if (*root_blocks > broot) + *root_blocks = broot; + } + put_dl_info(dli); +} #define DLIMIT_ALLOC_BLOCK(sb, xid, nr) \ @@ -161,7 +198,8 @@ out: __dl_free_inode(sb, xid, __FILE__, __LINE__ ) -#define DLIMIT_ADJUST_BLOCK(sb, xid, fb, rb) +#define DLIMIT_ADJUST_BLOCK(sb, xid, fb, rb) \ + __dl_adjust_block(sb, xid, fb, rb, __FILE__, __LINE__ ) #else diff --git a/include/linux/vs_network.h b/include/linux/vs_network.h index d1b925630..915ad174f 100644 --- a/include/linux/vs_network.h +++ b/include/linux/vs_network.h @@ -1,20 +1,12 @@ #ifndef _NX_VS_NETWORK_H #define _NX_VS_NETWORK_H - -// #define NX_DEBUG - #include #include #include #include "vserver/network.h" - -#if defined(NX_DEBUG) -#define nxdprintk(x...) printk("nxd: " x) -#else -#define nxdprintk(x...) -#endif +#include "vserver/debug.h" extern int proc_pid_nx_info(struct task_struct *, char *); @@ -27,7 +19,7 @@ static inline struct nx_info *__get_nx_info(struct nx_info *nxi, { if (!nxi) return NULL; - nxdprintk("get_nx_info(%p[#%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(nid, 2), "get_nx_info(%p[#%d.%d])", nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, _file, _line); atomic_inc(&nxi->nx_usecnt); @@ -35,8 +27,8 @@ static inline struct nx_info *__get_nx_info(struct nx_info *nxi, } -#define free_nx_info(nxi) \ - call_rcu(&nxi->nx_rcu, rcu_free_nx_info); +#define free_nx_info(i) \ + call_rcu(&i->nx_rcu, rcu_free_nx_info); #define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__) @@ -44,7 +36,7 @@ static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _li { if (!nxi) return; - nxdprintk("put_nx_info(%p[#%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(nid, 2), "put_nx_info(%p[#%d.%d])", nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, _file, _line); if (atomic_dec_and_test(&nxi->nx_usecnt)) @@ -60,7 +52,7 @@ static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi, BUG_ON(*nxp); if (!nxi) return; - nxdprintk("set_nx_info(%p[#%d.%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(nid, 3), "set_nx_info(%p[#%d.%d.%d])", nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, nxi?atomic_read(&nxi->nx_refcnt):0, @@ -78,7 +70,7 @@ static inline void __clr_nx_info(struct nx_info **nxp, if (!nxo) return; - nxdprintk("clr_nx_info(%p[#%d.%d.%d])\t%s:%d\n", + vxlprintk(VXD_CBIT(nid, 3), "clr_nx_info(%p[#%d.%d.%d])", nxo, nxo?nxo->nx_id:0, nxo?atomic_read(&nxo->nx_usecnt):0, nxo?atomic_read(&nxo->nx_refcnt):0, @@ -100,6 +92,8 @@ static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p, task_lock(p); nxi = __get_nx_info(p->nx_info, _file, _line); + vxlprintk(VXD_CBIT(nid, 5), "task_get_nx_info(%p)", + p, _file, _line); task_unlock(p); return nxi; } @@ -126,9 +120,6 @@ static __inline__ void __nx_verify_info( #define nx_weak_check(c,m) ((m) ? nx_check(c,m) : 1) -#undef nxdprintk -#define nxdprintk(x...) - #define __nx_flags(v,m,f) (((v) & (m)) ^ (f)) diff --git a/include/linux/vserver/context.h b/include/linux/vserver/context.h index 36765de24..434bfbaa2 100644 --- a/include/linux/vserver/context.h +++ b/include/linux/vserver/context.h @@ -62,6 +62,8 @@ struct vx_info { #define VX_ATR_MASK 0x0F00 +struct rcu_head; + extern void rcu_free_vx_info(struct rcu_head *); extern void unhash_vx_info(struct vx_info *); diff --git a/include/linux/vserver/dlimit.h b/include/linux/vserver/dlimit.h index 502a12763..c0cfafcd9 100644 --- a/include/linux/vserver/dlimit.h +++ b/include/linux/vserver/dlimit.h @@ -57,6 +57,8 @@ struct dl_info { unsigned int dl_nrlmult; /* non root limit mult */ }; +struct rcu_head; + extern void rcu_free_dl_info(struct rcu_head *); extern void unhash_dl_info(struct dl_info *); diff --git a/include/linux/vserver/network.h b/include/linux/vserver/network.h index 77ad49bb5..a89265d3d 100644 --- a/include/linux/vserver/network.h +++ b/include/linux/vserver/network.h @@ -41,6 +41,8 @@ struct nx_info { }; +struct rcu_head; + extern void rcu_free_nx_info(struct rcu_head *); extern void unhash_nx_info(struct nx_info *); diff --git a/include/linux/vserver/sched.h b/include/linux/vserver/sched.h index d5d1a4105..fd6bc2af3 100644 --- a/include/linux/vserver/sched.h +++ b/include/linux/vserver/sched.h @@ -1,10 +1,12 @@ +/* _VX_SCHED_H defined below */ + #if defined(__KERNEL__) && defined(_VX_INFO_DEF_) #include #include +#include #include #include -#include /* context sub struct */ diff --git a/include/net/route.h b/include/net/route.h index a5e9c575e..20bfb3824 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -33,6 +33,9 @@ #include #include #include +#include +#include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. diff --git a/kernel/exit.c b/kernel/exit.c index ddc4154d6..5bc8fff46 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -43,8 +43,7 @@ static void __unhash_process(struct task_struct *p) /* tasklist_lock is held, is this sufficient? */ if (p->vx_info) { atomic_dec(&p->vx_info->cacct.nr_threads); - vx_nproc_dec(p->vx_info); - // atomic_dec(&p->vx_info->limit.res[RLIMIT_NPROC]); + atomic_dec(&p->vx_info->limit.rcur[RLIMIT_NPROC]); } detach_pid(p, PIDTYPE_PID); detach_pid(p, PIDTYPE_TGID); @@ -392,7 +391,7 @@ static inline void close_files(struct files_struct * files) filp_close(file, files); cond_resched(); } - vx_openfd_dec(fd); + // vx_openfd_dec(fd); } i++; set >>= 1; diff --git a/kernel/fork.c b/kernel/fork.c index 8e335d117..df85a9daa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -83,7 +84,6 @@ static kmem_cache_t *task_struct_cachep; static void free_task(struct task_struct *tsk) { free_thread_info(tsk->thread_info); - vxdprintk("freeing up task %p\n", tsk); clr_vx_info(&tsk->vx_info); clr_nx_info(&tsk->nx_info); free_task_struct(tsk); @@ -911,7 +911,6 @@ struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; retval = -ENOMEM; - p = dup_task_struct(current); if (!p) goto fork_out; @@ -931,18 +930,18 @@ struct task_struct *copy_process(unsigned long clone_flags, } if (p->mm && vx_flags(VXF_FORK_RSS, 0)) { if (!vx_rsspages_avail(p->mm, p->mm->rss)) - goto bad_fork_free; + goto bad_fork_cleanup_vm; } retval = -EAGAIN; if (!vx_nproc_avail(1)) - goto bad_fork_free; + goto bad_fork_cleanup_vm; if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) - goto bad_fork_free; + goto bad_fork_cleanup_vm; } atomic_inc(&p->user->__count); @@ -1130,12 +1129,12 @@ struct task_struct *copy_process(unsigned long clone_flags, link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid); nr_threads++; - vxi = current->vx_info; + /* p is copy of current */ + vxi = p->vx_info; if (vxi) { atomic_inc(&vxi->cacct.nr_threads); - // atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]); + atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]); } - vx_nproc_inc(); write_unlock_irq(&tasklist_lock); retval = 0; @@ -1179,6 +1178,9 @@ bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); +bad_fork_cleanup_vm: + if (p->mm && !(clone_flags & CLONE_VM)) + vx_pages_sub(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm); bad_fork_free: free_task(p); goto fork_out; diff --git a/kernel/signal.c b/kernel/signal.c index 5c6b9b08a..b3574b096 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -273,7 +273,6 @@ static struct sigqueue *__sigqueue_alloc(void) INIT_LIST_HEAD(&q->list); q->flags = 0; q->lock = NULL; -#warning MEF PLANETLAB: q->user = get_uid(current->user); is something new in Fedora Core. q->user = get_uid(current->user); atomic_inc(&q->user->sigpending); } @@ -728,7 +727,6 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, if (q) { q->flags = 0; -#warning MEF PLANETLAB: q->user = get_uid(t->user); is something new in Fedora Core. q->user = get_uid(t->user); atomic_inc(&q->user->sigpending); list_add_tail(&q->list, &signals->list); diff --git a/kernel/vserver/context.c b/kernel/vserver/context.c index 28f9c32eb..a1860ef93 100644 --- a/kernel/vserver/context.c +++ b/kernel/vserver/context.c @@ -43,7 +43,7 @@ static struct vx_info *__alloc_vx_info(xid_t xid) { struct vx_info *new = NULL; - vxdprintk("alloc_vx_info(%d)\n", xid); + vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid); /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); @@ -67,7 +67,8 @@ static struct vx_info *__alloc_vx_info(xid_t xid) new->vx_bcaps = CAP_INIT_EFF_SET; new->vx_ccaps = 0; - vxdprintk("alloc_vx_info(%d) = %p\n", xid, new); + vxdprintk(VXD_CBIT(xid, 0), + "alloc_vx_info(%d) = %p", xid, new); return new; } @@ -77,7 +78,8 @@ static struct vx_info *__alloc_vx_info(xid_t xid) static void __dealloc_vx_info(struct vx_info *vxi) { - vxdprintk("dealloc_vx_info(%p)\n", vxi); + vxdprintk(VXD_CBIT(xid, 0), + "dealloc_vx_info(%p)", vxi); vxi->vx_hlist.next = LIST_POISON1; vxi->vx_id = -1; @@ -124,7 +126,8 @@ static inline void __hash_vx_info(struct vx_info *vxi) { struct hlist_head *head; - vxdprintk("__hash_vx_info: %p[#%d]\n", vxi, vxi->vx_id); + vxdprintk(VXD_CBIT(xid, 4), + "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id); get_vx_info(vxi); head = &vx_info_hash[__hashval(vxi->vx_id)]; hlist_add_head_rcu(&vxi->vx_hlist, head); @@ -137,7 +140,8 @@ static inline void __hash_vx_info(struct vx_info *vxi) static inline void __unhash_vx_info(struct vx_info *vxi) { - vxdprintk("__unhash_vx_info: %p[#%d]\n", vxi, vxi->vx_id); + vxdprintk(VXD_CBIT(xid, 4), + "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id); hlist_del_rcu(&vxi->vx_hlist); put_vx_info(vxi); } @@ -178,8 +182,11 @@ static inline xid_t __vx_dynamic_id(void) do { if (++seq > MAX_S_CONTEXT) seq = MIN_D_CONTEXT; - if (!__lookup_vx_info(seq)) + if (!__lookup_vx_info(seq)) { + vxdprintk(VXD_CBIT(xid, 4), + "__vx_dynamic_id: [#%d]", seq); return seq; + } } while (barrier != seq); return 0; } @@ -193,7 +200,7 @@ static struct vx_info * __loc_vx_info(int id, int *err) { struct vx_info *new, *vxi = NULL; - vxdprintk("loc_vx_info(%d)\n", id); + vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id); if (!(new = __alloc_vx_info(id))) { *err = -ENOMEM; @@ -215,11 +222,13 @@ static struct vx_info * __loc_vx_info(int id, int *err) else if ((vxi = __lookup_vx_info(id))) { /* context in setup is not available */ if (vxi->vx_flags & VXF_STATE_SETUP) { - vxdprintk("loc_vx_info(%d) = %p (not available)\n", id, vxi); + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (not available)", id, vxi); vxi = NULL; *err = -EBUSY; } else { - vxdprintk("loc_vx_info(%d) = %p (found)\n", id, vxi); + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (found)", id, vxi); get_vx_info(vxi); *err = 0; } @@ -227,7 +236,8 @@ static struct vx_info * __loc_vx_info(int id, int *err) } /* new context requested */ - vxdprintk("loc_vx_info(%d) = %p (new)\n", id, new); + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (new)", id, new); __hash_vx_info(get_vx_info(new)); vxi = new, new = NULL; *err = 1; @@ -250,7 +260,7 @@ void rcu_free_vx_info(struct rcu_head *head) struct vx_info *vxi = container_of(head, struct vx_info, vx_rcu); int usecnt, refcnt; - BUG_ON(!vxi); + BUG_ON(!vxi || !head); usecnt = atomic_read(&vxi->vx_usecnt); BUG_ON(usecnt < 0); @@ -258,6 +268,8 @@ void rcu_free_vx_info(struct rcu_head *head) refcnt = atomic_read(&vxi->vx_refcnt); BUG_ON(refcnt < 0); + vxdprintk(VXD_CBIT(xid, 3), + "rcu_free_vx_info(%p): uc=%d", vxi, usecnt); if (!usecnt) __dealloc_vx_info(vxi); else @@ -390,25 +402,23 @@ void vx_mask_bcaps(struct task_struct *p) static inline int vx_nofiles_task(struct task_struct *tsk) { struct files_struct *files = tsk->files; - const unsigned long *obptr, *cbptr; + const unsigned long *obptr; int count, total; spin_lock(&files->file_lock); obptr = files->open_fds->fds_bits; - cbptr = files->close_on_exec->fds_bits; count = files->max_fds / (sizeof(unsigned long) * 8); for (total = 0; count > 0; count--) { if (*obptr) total += hweight_long(*obptr); obptr++; - /* if (*cbptr) - total += hweight_long(*cbptr); - cbptr++; */ } spin_unlock(&files->file_lock); return total; } +#if 0 + static inline int vx_openfd_task(struct task_struct *tsk) { struct files_struct *files = tsk->files; @@ -427,6 +437,8 @@ static inline int vx_openfd_task(struct task_struct *tsk) return total; } +#endif + /* * migrate task to new context * gets vxi, puts old_vxi on change @@ -444,26 +456,32 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) if (old_vxi == vxi) goto out; - vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi, + vxdprintk(VXD_CBIT(xid, 5), + "vx_migrate_task(%p,%p[#%d.%d])", p, vxi, vxi->vx_id, atomic_read(&vxi->vx_usecnt)); if (!(ret = vx_migrate_user(p, vxi))) { - int openfd, nofiles; + int nofiles; task_lock(p); - openfd = vx_openfd_task(p); + // openfd = vx_openfd_task(p); nofiles = vx_nofiles_task(p); if (old_vxi) { atomic_dec(&old_vxi->cacct.nr_threads); atomic_dec(&old_vxi->limit.rcur[RLIMIT_NPROC]); - atomic_sub(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]); - atomic_sub(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]); + atomic_sub(nofiles, &old_vxi->limit.rcur[RLIMIT_NOFILE]); + // atomic_sub(openfd, &old_vxi->limit.rcur[RLIMIT_OPENFD]); } atomic_inc(&vxi->cacct.nr_threads); atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]); atomic_add(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]); - atomic_add(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]); + // atomic_add(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]); + + vxdprintk(VXD_CBIT(xid, 5), + "moved task %p into vxi:%p[#%d]", + p, vxi, vxi->vx_id); + /* should be handled in set_vx_info !! */ if (old_vxi) clr_vx_info(&p->vx_info); diff --git a/kernel/vserver/dlimit.c b/kernel/vserver/dlimit.c index c7cbe7dc7..d9478ddd8 100644 --- a/kernel/vserver/dlimit.c +++ b/kernel/vserver/dlimit.c @@ -31,7 +31,8 @@ static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid) { struct dl_info *new = NULL; - vxdprintk("alloc_dl_info(%p,%d)\n", sb, xid); + vxdprintk(VXD_CBIT(dlim, 5), + "alloc_dl_info(%p,%d)*", sb, xid); /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct dl_info), GFP_KERNEL); @@ -49,7 +50,8 @@ static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid) /* rest of init goes here */ - vxdprintk("alloc_dl_info(%p,%d) = %p\n", sb, xid, new); + vxdprintk(VXD_CBIT(dlim, 4), + "alloc_dl_info(%p,%d) = %p", sb, xid, new); return new; } @@ -59,7 +61,8 @@ static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid) static void __dealloc_dl_info(struct dl_info *dli) { - vxdprintk("dealloc_dl_info(%p)\n", dli); + vxdprintk(VXD_CBIT(dlim, 4), + "dealloc_dl_info(%p)", dli); dli->dl_hlist.next = LIST_POISON1; dli->dl_xid = -1; @@ -83,7 +86,7 @@ static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED; static inline unsigned int __hashval(struct super_block *sb, xid_t xid) { - return ((xid ^ (unsigned int)sb) % DL_HASH_SIZE); + return ((xid ^ (unsigned long)sb) % DL_HASH_SIZE); } @@ -97,7 +100,8 @@ static inline void __hash_dl_info(struct dl_info *dli) { struct hlist_head *head; - vxdprintk("__hash_dl_info: %p[#%d]\n", dli, dli->dl_xid); + vxdprintk(VXD_CBIT(dlim, 6), + "__hash_dl_info: %p[#%d]", dli, dli->dl_xid); get_dl_info(dli); head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)]; hlist_add_head_rcu(&dli->dl_hlist, head); @@ -110,7 +114,8 @@ static inline void __hash_dl_info(struct dl_info *dli) static inline void __unhash_dl_info(struct dl_info *dli) { - vxdprintk("__unhash_dl_info: %p[#%d]\n", dli, dli->dl_xid); + vxdprintk(VXD_CBIT(dlim, 6), + "__unhash_dl_info: %p[#%d]", dli, dli->dl_xid); hlist_del_rcu(&dli->dl_hlist); put_dl_info(dli); } @@ -149,6 +154,8 @@ struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid) rcu_read_lock(); dli = get_dl_info(__lookup_dl_info(sb, xid)); + vxdprintk(VXD_CBIT(dlim, 7), + "locate_dl_info(%p,#%d) = %p", sb, xid, dli); rcu_read_unlock(); return dli; } @@ -158,7 +165,7 @@ void rcu_free_dl_info(struct rcu_head *head) struct dl_info *dli = container_of(head, struct dl_info, dl_rcu); int usecnt, refcnt; - BUG_ON(!dli); + BUG_ON(!dli || !head); usecnt = atomic_read(&dli->dl_usecnt); BUG_ON(usecnt < 0); @@ -166,6 +173,8 @@ void rcu_free_dl_info(struct rcu_head *head) refcnt = atomic_read(&dli->dl_refcnt); BUG_ON(refcnt < 0); + vxdprintk(VXD_CBIT(dlim, 3), + "rcu_free_dl_info(%p)", dli); if (!usecnt) __dealloc_dl_info(dli); else @@ -437,3 +446,10 @@ no_blim: return; } +#include + +EXPORT_SYMBOL_GPL(locate_dl_info); +EXPORT_SYMBOL_GPL(rcu_free_dl_info); +// EXPORT_SYMBOL_GPL(dl_info_hash_lock); +// EXPORT_SYMBOL_GPL(unhash_dl_info); + diff --git a/kernel/vserver/network.c b/kernel/vserver/network.c index 2a7d51ed1..a62d1c435 100644 --- a/kernel/vserver/network.c +++ b/kernel/vserver/network.c @@ -31,7 +31,7 @@ static struct nx_info *__alloc_nx_info(nid_t nid) { struct nx_info *new = NULL; - nxdprintk("alloc_nx_info()\n"); + vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid); /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct nx_info), GFP_KERNEL); @@ -47,7 +47,8 @@ static struct nx_info *__alloc_nx_info(nid_t nid) /* rest of init goes here */ - nxdprintk("alloc_nx_info() = %p\n", new); + vxdprintk(VXD_CBIT(nid, 0), + "alloc_nx_info() = %p", new); return new; } @@ -57,7 +58,8 @@ static struct nx_info *__alloc_nx_info(nid_t nid) static void __dealloc_nx_info(struct nx_info *nxi) { - nxdprintk("dealloc_nx_info(%p)\n", nxi); + vxdprintk(VXD_CBIT(nid, 0), + "dealloc_nx_info(%p)", nxi); nxi->nx_hlist.next = LIST_POISON1; nxi->nx_id = -1; @@ -94,7 +96,8 @@ static inline void __hash_nx_info(struct nx_info *nxi) { struct hlist_head *head; - nxdprintk("__hash_nx_info: %p[#%d]\n", nxi, nxi->nx_id); + vxdprintk(VXD_CBIT(nid, 4), + "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id); get_nx_info(nxi); head = &nx_info_hash[__hashval(nxi->nx_id)]; hlist_add_head_rcu(&nxi->nx_hlist, head); @@ -107,7 +110,8 @@ static inline void __hash_nx_info(struct nx_info *nxi) static inline void __unhash_nx_info(struct nx_info *nxi) { - nxdprintk("__unhash_nx_info: %p[#%d]\n", nxi, nxi->nx_id); + vxdprintk(VXD_CBIT(nid, 4), + "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id); hlist_del_rcu(&nxi->nx_hlist); put_nx_info(nxi); } @@ -148,8 +152,11 @@ static inline nid_t __nx_dynamic_id(void) do { if (++seq > MAX_N_CONTEXT) seq = MIN_D_CONTEXT; - if (!__lookup_nx_info(seq)) + if (!__lookup_nx_info(seq)) { + vxdprintk(VXD_CBIT(nid, 4), + "__nx_dynamic_id: [#%d]", seq); return seq; + } } while (barrier != seq); return 0; } @@ -163,7 +170,7 @@ static struct nx_info * __loc_nx_info(int id, int *err) { struct nx_info *new, *nxi = NULL; - nxdprintk("loc_nx_info(%d)\n", id); + vxdprintk(VXD_CBIT(nid, 1), "loc_nx_info(%d)*", id); if (!(new = __alloc_nx_info(id))) { *err = -ENOMEM; @@ -185,11 +192,13 @@ static struct nx_info * __loc_nx_info(int id, int *err) else if ((nxi = __lookup_nx_info(id))) { /* context in setup is not available */ if (nxi->nx_flags & VXF_STATE_SETUP) { - nxdprintk("loc_nx_info(%d) = %p (not available)\n", id, nxi); + vxdprintk(VXD_CBIT(nid, 0), + "loc_nx_info(%d) = %p (not available)", id, nxi); nxi = NULL; *err = -EBUSY; } else { - nxdprintk("loc_nx_info(%d) = %p (found)\n", id, nxi); + vxdprintk(VXD_CBIT(nid, 0), + "loc_nx_info(%d) = %p (found)", id, nxi); get_nx_info(nxi); *err = 0; } @@ -197,7 +206,8 @@ static struct nx_info * __loc_nx_info(int id, int *err) } /* new context requested */ - nxdprintk("loc_nx_info(%d) = %p (new)\n", id, new); + vxdprintk(VXD_CBIT(nid, 0), + "loc_nx_info(%d) = %p (new)", id, new); __hash_nx_info(get_nx_info(new)); nxi = new, new = NULL; *err = 1; @@ -221,12 +231,16 @@ void rcu_free_nx_info(struct rcu_head *head) struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu); int usecnt, refcnt; + BUG_ON(!nxi || !head); + usecnt = atomic_read(&nxi->nx_usecnt); BUG_ON(usecnt < 0); refcnt = atomic_read(&nxi->nx_refcnt); BUG_ON(refcnt < 0); + vxdprintk(VXD_CBIT(nid, 3), + "rcu_free_nx_info(%p): uc=%d", nxi, usecnt); if (!usecnt) __dealloc_nx_info(nxi); else @@ -287,7 +301,7 @@ struct nx_info *create_nx_info(void) struct nx_info *new; int err; - nxdprintk("create_nx_info()\n"); + vxdprintk(VXD_CBIT(nid, 5), "create_nx_info(%s)", "void"); if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err))) return NULL; return new; @@ -342,7 +356,8 @@ int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) if (!p || !nxi) BUG(); - nxdprintk("nx_migrate_task(%p,%p[#%d.%d.%d])\n", + vxdprintk(VXD_CBIT(nid, 5), + "nx_migrate_task(%p,%p[#%d.%d.%d])", p, nxi, nxi->nx_id, atomic_read(&nxi->nx_usecnt), atomic_read(&nxi->nx_refcnt)); diff --git a/kernel/vserver/sysctl.c b/kernel/vserver/sysctl.c index 32fde9a93..298c62f18 100644 --- a/kernel/vserver/sysctl.c +++ b/kernel/vserver/sysctl.c @@ -26,14 +26,22 @@ enum { CTL_DEBUG_SWITCH = 1, + CTL_DEBUG_XID, + CTL_DEBUG_NID, + CTL_DEBUG_NET, CTL_DEBUG_LIMIT, - CTL_DEBUG_DLIMIT, + CTL_DEBUG_DLIM, + CTL_DEBUG_CVIRT, }; unsigned int vx_debug_switch = 0; +unsigned int vx_debug_xid = 0; +unsigned int vx_debug_nid = 0; +unsigned int vx_debug_net = 0; unsigned int vx_debug_limit = 0; -unsigned int vx_debug_dlimit = 0; +unsigned int vx_debug_dlim = 0; +unsigned int vx_debug_cvirt = 0; static struct ctl_table_header *vserver_table_header; @@ -62,13 +70,13 @@ void vserver_unregister_sysctl(void) static int proc_dodebug(ctl_table *table, int write, - struct file *file, void *buffer, size_t *lenp) + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[20], *p, c; unsigned int value; size_t left, len; - if ((file->f_pos && !write) || !*lenp) { + if ((*ppos && !write) || !*lenp) { *lenp = 0; return 0; } @@ -114,7 +122,7 @@ static int proc_dodebug(ctl_table *table, int write, done: *lenp -= left; - file->f_pos += *lenp; + *ppos += *lenp; return 0; } @@ -129,6 +137,30 @@ static ctl_table debug_table[] = { .mode = 0644, .proc_handler = &proc_dodebug }, + { + .ctl_name = CTL_DEBUG_XID, + .procname = "debug_xid", + .data = &vx_debug_xid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_NID, + .procname = "debug_nid", + .data = &vx_debug_nid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_NET, + .procname = "debug_net", + .data = &vx_debug_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, { .ctl_name = CTL_DEBUG_LIMIT, .procname = "debug_limit", @@ -138,9 +170,17 @@ static ctl_table debug_table[] = { .proc_handler = &proc_dodebug }, { - .ctl_name = CTL_DEBUG_DLIMIT, - .procname = "debug_dlimit", - .data = &vx_debug_dlimit, + .ctl_name = CTL_DEBUG_DLIM, + .procname = "debug_dlim", + .data = &vx_debug_dlim, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dodebug + }, + { + .ctl_name = CTL_DEBUG_CVIRT, + .procname = "debug_cvirt", + .data = &vx_debug_cvirt, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dodebug @@ -158,3 +198,6 @@ static ctl_table vserver_table[] = { { .ctl_name = 0 } }; + +EXPORT_SYMBOL_GPL(vx_debug_dlim); + diff --git a/mm/mremap.c b/mm/mremap.c index cb6429ec0..c1cf3c09d 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/mm/rmap.c b/mm/rmap.c index 6d77b7671..d226b69ae 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -619,7 +619,8 @@ static int try_to_unmap_cluster(unsigned long cursor, page_remove_rmap(page); page_cache_release(page); - mm->rss--; + // mm->rss--; + vx_rsspages_dec(mm); (*mapcount)--; } diff --git a/mm/shmem.c b/mm/shmem.c index c3b4cc5d5..74596a4c6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -46,7 +46,6 @@ #include /* This magic number is used in glibc for posix shared memory */ -#define TMPFS_MAGIC 0x01021994 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) @@ -1508,7 +1507,7 @@ static int shmem_statfs(struct super_block *sb, struct kstatfs *buf) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - buf->f_type = TMPFS_MAGIC; + buf->f_type = TMPFS_SUPER_MAGIC; buf->f_bsize = PAGE_CACHE_SIZE; spin_lock(&sbinfo->stat_lock); buf->f_blocks = sbinfo->max_blocks; @@ -1838,7 +1837,7 @@ static int shmem_fill_super(struct super_block *sb, sb->s_maxbytes = SHMEM_MAX_BYTES; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = TMPFS_MAGIC; + sb->s_magic = TMPFS_SUPER_MAGIC; sb->s_op = &shmem_ops; inode = shmem_get_inode(sb, S_IFDIR | mode, 0); if (!inode) diff --git a/mm/swapfile.c b/mm/swapfile.c index b886b94bf..89bc19ef3 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -30,6 +30,7 @@ #include #include #include +#include spinlock_t swaplock = SPIN_LOCK_UNLOCKED; unsigned int nr_swapfiles; diff --git a/net/core/dev.c b/net/core/dev.c index 90ac1a1fc..60af16c41 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -113,6 +113,7 @@ #include #endif /* CONFIG_NET_RADIO */ #include +#include /* This define, if set, will randomly drop a packet when congestion * is more than moderate. It helps fairness in the multi-interface diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2c0ac07bd..eb044e25a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -112,9 +112,7 @@ #ifdef CONFIG_IP_MROUTE #include #endif -#include -#include -#include +#include DEFINE_SNMP_STAT(struct linux_mib, net_statistics); @@ -162,6 +160,7 @@ void inet_sock_destruct(struct sock *sk) if (inet->opt) kfree(inet->opt); + vx_sock_dec(sk); clr_vx_info(&sk->sk_vx_info); sk->sk_xid = -1; clr_nx_info(&sk->sk_nx_info); @@ -345,6 +344,7 @@ override: set_vx_info(&sk->sk_vx_info, current->vx_info); sk->sk_xid = vx_current_xid(); + vx_sock_inc(sk); set_nx_info(&sk->sk_nx_info, current->nx_info); sk->sk_nid = nx_current_nid(); @@ -410,6 +410,7 @@ int inet_release(struct socket *sock) !(current->flags & PF_EXITING)) timeout = sk->sk_lingertime; sock->sk = NULL; + vx_sock_dec(sk); clr_vx_info(&sk->sk_vx_info); sk->sk_xid = -1; clr_nx_info(&sk->sk_nx_info); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1481f4afb..e4a4a0994 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -526,7 +526,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) .saddr = saddr, .tos = RT_TOS(tos) } }, .proto = IPPROTO_ICMP }; - if (ip_route_output_key(&rt, &fl)) goto out_unlock; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index aa1f9719d..7bbe1cb55 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -74,8 +74,7 @@ #include #include #include - -#include +#include extern int sysctl_ip_dynaddr; int sysctl_tcp_tw_reuse; @@ -2211,6 +2210,9 @@ static void *listening_get_next(struct seq_file *seq, void *cur) req = req->dl_next; while (1) { while (req) { + vxdprintk(VXD_CBIT(net, 6), + "sk,req: %p [#%d] (from %d)", + req->sk, req->sk->sk_xid, current->xid); if (!vx_check(req->sk->sk_xid, VX_IDENT|VX_WATCH)) continue; if (req->class->family == st->family) { @@ -2231,6 +2233,8 @@ get_req: sk = sk_next(sk); get_sk: sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)", + sk, sk->sk_xid, current->xid); if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) continue; if (sk->sk_family == st->family) { @@ -2280,6 +2284,9 @@ static void *established_get_first(struct seq_file *seq) read_lock(&tcp_ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egf: %p [#%d] (from %d)", + sk, sk->sk_xid, current->xid); if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) continue; if (sk->sk_family != st->family) @@ -2290,6 +2297,9 @@ static void *established_get_first(struct seq_file *seq) st->state = TCP_SEQ_STATE_TIME_WAIT; tw_for_each(tw, node, &tcp_ehash[st->bucket + tcp_ehash_size].chain) { + vxdprintk(VXD_CBIT(net, 6), + "tw: %p [#%d] (from %d)", + tw, tw->tw_xid, current->xid); if (!vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) continue; if (tw->tw_family != st->family) @@ -2317,8 +2327,8 @@ static void *established_get_next(struct seq_file *seq, void *cur) tw = cur; tw = tw_next(tw); get_tw: - while (tw && tw->tw_family != st->family && - !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH)) { + while (tw && (tw->tw_family != st->family || + !vx_check(tw->tw_xid, VX_IDENT|VX_WATCH))) { tw = tw_next(tw); } if (tw) { @@ -2338,6 +2348,9 @@ get_tw: sk = sk_next(sk); sk_for_each_from(sk, node) { + vxdprintk(VXD_CBIT(net, 6), + "sk,egn: %p [#%d] (from %d)", + sk, sk->sk_xid, current->xid); if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH)) continue; if (sk->sk_family == st->family) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 9f15b82be..51ac81b75 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -25,9 +25,8 @@ #include #include #include +#include #include -#include -#include #include #include #include @@ -818,6 +817,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, set_vx_info(&newsk->sk_vx_info, sk->sk_vx_info); newsk->sk_xid = sk->sk_xid; + vx_sock_inc(newsk); set_nx_info(&newsk->sk_nx_info, sk->sk_nx_info); newsk->sk_nid = sk->sk_nid; #ifdef INET_REFCNT_DEBUG diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b8c77b180..23f8f511d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -107,7 +107,6 @@ #include #include #include -#include /* * Snmp MIB for the UDP layer diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 429cb4b96..5edc92cf8 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -70,9 +70,6 @@ #include #include #include -#include -#include -#include #ifdef CONFIG_INET #include diff --git a/net/socket.c b/net/socket.c index 22be11e4c..e28ec6ae2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -563,12 +563,13 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, else vx_sock_fail(sock->sk, size); } - vxdprintk("__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d\n", + vxdprintk(VXD_CBIT(net, 7), + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d", sock, sock->sk, (sock->sk)?sock->sk->sk_nx_info:0, (sock->sk)?sock->sk->sk_vx_info:0, (sock->sk)?sock->sk->sk_xid:0, - size, len); + (unsigned int)size, len); return len; } @@ -623,12 +624,13 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, len = sock->ops->recvmsg(iocb, sock, msg, size, flags); if ((len >= 0) && sock->sk) vx_sock_recv(sock->sk, len); - vxdprintk("__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d\n", + vxdprintk(VXD_CBIT(net, 7), + "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d", sock, sock->sk, (sock->sk)?sock->sk->sk_nx_info:0, (sock->sk)?sock->sk->sk_vx_info:0, (sock->sk)?sock->sk->sk_xid:0, - size, len); + (unsigned int)size, len); return len; } diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 694301a7e..2fd2975c8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -260,8 +260,9 @@ rpcauth_lookupcred(struct rpc_auth *auth, int taskflags) struct rpc_cred *ret; get_group_info(current->group_info); - acred.uid = XIDINO_UID(current->fsuid, current->xid); - acred.gid = XIDINO_GID(current->fsgid, current->xid); + acred.uid = current->fsuid; + acred.gid = current->fsgid; + acred.xid = current->xid; acred.group_info = current->group_info; dprintk("RPC: looking up %s cred\n", @@ -279,8 +280,9 @@ rpcauth_bindcred(struct rpc_task *task) struct rpc_cred *ret; get_group_info(current->group_info); - acred.uid = XIDINO_UID(current->fsuid, current->xid); - acred.gid = XIDINO_GID(current->fsgid, current->xid); + acred.uid = current->fsuid; + acred.gid = current->fsgid; + acred.xid = current->xid; acred.group_info = current->group_info; dprintk("RPC: %4d looking up %s cred\n", diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 33741fc59..294875e44 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -20,8 +20,10 @@ struct unx_cred { struct rpc_cred uc_base; gid_t uc_gid; + xid_t uc_xid; uid_t uc_puid; /* process uid */ gid_t uc_pgid; /* process gid */ + xid_t uc_pxid; /* process xid */ gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid @@ -81,6 +83,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) if (flags & RPC_TASK_ROOTCREDS) { cred->uc_uid = cred->uc_puid = 0; cred->uc_gid = cred->uc_pgid = 0; + cred->uc_xid = cred->uc_pxid = current->xid; cred->uc_gids[0] = NOGROUP; } else { int groups = acred->group_info->ngroups; @@ -89,10 +92,10 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; -// cred->uc_puid = XIDINO_UID(current->uid, current->xid); -// cred->uc_pgid = XIDINO_GID(current->gid, current->xid); + cred->uc_xid = acred->xid; cred->uc_puid = current->uid; cred->uc_pgid = current->gid; + cred->uc_pxid = current->xid; for (i = 0; i < groups; i++) cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) @@ -125,8 +128,10 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int taskflags) if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid - || cred->uc_puid != XIDINO_UID(current->uid, current->xid) - || cred->uc_pgid != XIDINO_GID(current->gid, current->xid)) + || cred->uc_xid != acred->xid + || cred->uc_puid != current->uid + || cred->uc_pgid != current->gid + || cred->uc_pxid != current->xid) return 0; groups = acred->group_info->ngroups; @@ -152,7 +157,7 @@ unx_marshal(struct rpc_task *task, u32 *p, int ruid) struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; u32 *base, *hold; - int i; + int i, tagxid; *p++ = htonl(RPC_AUTH_UNIX); base = p++; @@ -162,14 +167,19 @@ unx_marshal(struct rpc_task *task, u32 *p, int ruid) * Copy the UTS nodename captured when the client was created. */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); + tagxid = task->tk_client->cl_tagxid; /* Note: we don't use real uid if it involves raising privilege */ if (ruid && cred->uc_puid != 0 && cred->uc_pgid != 0) { - *p++ = htonl((u32) cred->uc_puid); - *p++ = htonl((u32) cred->uc_pgid); + *p++ = htonl((u32) XIDINO_UID(tagxid, + cred->uc_puid, cred->uc_pxid)); + *p++ = htonl((u32) XIDINO_GID(tagxid, + cred->uc_pgid, cred->uc_pxid)); } else { - *p++ = htonl((u32) cred->uc_uid); - *p++ = htonl((u32) cred->uc_gid); + *p++ = htonl((u32) XIDINO_UID(tagxid, + cred->uc_uid, cred->uc_xid)); + *p++ = htonl((u32) XIDINO_GID(tagxid, + cred->uc_gid, cred->uc_xid)); } hold = p++; for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 71b54acdf..08432ceda 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -120,6 +120,7 @@ #include #include #include +#include int sysctl_unix_max_dgram_qlen = 10; @@ -407,6 +408,7 @@ static int unix_release_sock (struct sock *sk, int embrion) mntput(mnt); } + vx_sock_dec(sk); clr_vx_info(&sk->sk_vx_info); clr_nx_info(&sk->sk_nx_info); sock_put(sk); @@ -564,8 +566,9 @@ static struct sock * unix_create1(struct socket *sock) sk_set_owner(sk, THIS_MODULE); set_vx_info(&sk->sk_vx_info, current->vx_info); - set_nx_info(&sk->sk_nx_info, current->nx_info); sk->sk_xid = vx_current_xid(); + vx_sock_inc(sk); + set_nx_info(&sk->sk_nx_info, current->nx_info); sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; -- 2.43.0