endchoice
-config X86_HZ
- int "Clock Tick Rate"
- default 1000 if !(M386 || M486 || M586 || M586TSC || M586MMX)
- default 100 if (M386 || M486 || M586 || M586TSC || M586MMX)
- help
- Select the kernel clock tick rate in interrupts per second.
- Slower processors should choose 100; everything else 1000.
-
config X86_GENERIC
bool "Generic x86 support"
help
depends on !SMP && X86_UP_IOAPIC
default y
+config KERNEL_HZ
+ int "Timer Frequency (100-20000)"
+ range 100 20000
+ default "1000"
+ help
+ This allows you to specify the frequency at which the
+ kernel timer interrupt will occur.
+
config X86_TSC
bool
depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
#include <linux/irq.h>
#include <linux/module.h>
+#include <linux/vserver/debug.h>
#include "mach_traps.h"
};
static int die_counter;
+ vxh_throw_oops();
if (die.lock_owner != smp_processor_id()) {
console_verbose();
spin_lock_irq(&die.lock);
bust_spinlocks(0);
die.lock_owner = -1;
spin_unlock_irq(&die.lock);
+ vxh_dump_history();
if (in_interrupt())
panic("Fatal exception in interrupt");
#include <linux/vs_memory.h>
#include <linux/vs_cvirt.h>
#include <linux/bitops.h>
+#include <linux/vs_memory.h>
+#include <linux/vs_cvirt.h>
#include <asm/errno.h>
#include <asm/intrinsics.h>
vma->vm_end += PAGE_SIZE;
// vma->vm_mm->total_vm += grow;
vx_vmpages_add(vma->vm_mm, grow);
- if (vma->vm_flags & VM_LOCKED) {
+ if (vma->vm_flags & VM_LOCKED)
// vma->vm_mm->locked_vm += grow;
vx_vmlocked_add(vma->vm_mm, grow);
- }
__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
return 0;
}
#include <linux/shm.h>
#include <linux/vs_cvirt.h>
#include <linux/compiler.h>
+#include <linux/vs_cvirt.h>
#include <asm/branch.h>
#include <asm/cachectl.h>
pte_t entry;
// mm->rss += (HPAGE_SIZE / PAGE_SIZE);
- vx_rsspages_sub(mm, HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
if (write_access) {
entry =
pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
put_page(page);
}
- mm->rss -= (end - start) >> PAGE_SHIFT;
+ // mm->rss -= (end - start) >> PAGE_SHIFT;
+ vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
flush_tlb_pending();
}
unsigned long i;
pte_t entry;
- mm->rss += (HPAGE_SIZE / PAGE_SIZE);
-
+ // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
if (write_access)
entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
vma->vm_page_prot)));
pte_val(entry) += PAGE_SIZE;
dst_pte++;
}
- dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
+ vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
addr += HPAGE_SIZE;
}
return 0;
pte++;
}
}
- mm->rss -= (end - start) >> PAGE_SHIFT;
+ // mm->rss -= (end - start) >> PAGE_SHIFT;
+ vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
flush_tlb_range(vma, start, end);
}
#include "linux/vs_cvirt.h"
#include "linux/proc_fs.h"
#include "linux/ptrace.h"
+#include "linux/vs_cvirt.h"
+
#include "asm/unistd.h"
#include "asm/mman.h"
#include "asm/segment.h"
#include <linux/fcntl.h>
#include <linux/quotaops.h>
#include <linux/security.h>
-#include <linux/vs_base.h>
#include <linux/proc_fs.h>
#include <linux/devpts_fs.h>
+#include <linux/vserver/debug.h>
/* Taken over from the old code... */
goto fine;
if (IS_BARRIER(inode)) {
- printk(KERN_WARNING
- "VSW: xid=%d messing with the barrier.\n",
+ vxwprintk(1, "xid=%d messing with the barrier.",
vx_current_xid());
goto error;
}
switch (inode->i_sb->s_magic) {
case PROC_SUPER_MAGIC:
- printk(KERN_WARNING
- "VSW: xid=%d messing with the procfs.\n",
+ vxwprintk(1, "xid=%d messing with the procfs.",
vx_current_xid());
goto error;
case DEVPTS_SUPER_MAGIC:
if (vx_check(inode->i_xid, VX_IDENT))
goto fine;
- printk(KERN_WARNING
- "VSW: xid=%d messing with the devpts.\n",
+ vxwprintk(1, "xid=%d messing with the devpts.",
vx_current_xid());
goto error;
}
NULL
};
+static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ int ret = -EACCES;
+
+ if (vx_check(inode->i_xid, VX_IDENT))
+ ret = generic_permission(inode, mask, NULL);
+ return ret;
+}
+
+struct inode_operations devpts_file_inode_operations = {
+#ifdef CONFIG_DEVPTS_FS_XATTR
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = generic_listxattr,
+ .removexattr = generic_removexattr,
+#endif
+ .permission = devpts_permission,
+};
+
static struct vfsmount *devpts_mnt;
static struct dentry *devpts_root;
return lookup_one_len(s, root, sprintf(s, "%d", num));
}
-#ifdef CONFIG_DEVPTS_FS_XATTR
-static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
-{
- int ret = -EACCES;
-
- if (vx_check(inode->i_xid, VX_IDENT))
- ret = generic_permission(inode, mask, NULL);
- return ret;
-}
-#endif
-
-struct inode_operations devpts_file_inode_operations = {
-#ifdef CONFIG_DEVPTS_FS_XATTR
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .listxattr = generic_listxattr,
- .removexattr = generic_removexattr,
- .permission = devpts_permission,
-#endif
-};
int devpts_pty_new(struct tty_struct *tty)
{
#include <linux/syscalls.h>
#include <linux/rmap.h>
#include <linux/ckrm.h>
-#include <linux/vs_memory.h>
#include <linux/ckrm_mem.h>
+#include <linux/vs_memory.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <linux/quotaops.h>
#include <linux/sched.h>
#include <linux/buffer_head.h>
-#include <linux/vs_base.h>
#include <linux/vs_dlimit.h>
/*
#include <linux/backing-dev.h>
#include <linux/buffer_head.h>
#include <linux/random.h>
-#include <linux/vs_base.h>
+
#include <linux/vs_dlimit.h>
#include "ext2.h"
return ERR_PTR(-ENOMEM);
if (sb->s_flags & MS_TAGXID)
- inode->i_xid = current->xid;
+ inode->i_xid = vx_current_xid();
else
inode->i_xid = 0;
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
}
-#ifdef CONFIG_INOXID_GID32
+#ifdef CONFIG_INOXID_INTERN
raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
#endif
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
*
* This test looks nicer. Thanks to Pauline Middelink
*/
- if (((oldflags & EXT2_IMMUTABLE_FL) ||
- ((flags ^ oldflags) &
- (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL)))
- && !capable(CAP_LINUX_IMMUTABLE)) {
- return -EPERM;
+ if ((oldflags & EXT2_IMMUTABLE_FL) ||
+ ((flags ^ oldflags) & (EXT2_APPEND_FL |
+ EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) {
+ if (!capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
}
flags = flags & EXT2_FL_USER_MODIFIABLE;
*/
#include <linux/pagemap.h>
+#include <linux/vserver/xid.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
inode = iget(dir->i_sb, ino);
if (!inode)
return ERR_PTR(-EACCES);
+ vx_propagate_xid(nd, inode);
}
if (inode)
return d_splice_alias(inode, dentry);
#include <linux/ext3_jbd.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
-#include <linux/vs_base.h>
#include <linux/vs_dlimit.h>
/*
#include <linux/random.h>
#include <linux/vs_dlimit.h>
#include <linux/bitops.h>
+#include <linux/vs_dlimit.h>
#include <asm/byteorder.h>
return ERR_PTR(-ENOMEM);
if (sb->s_flags & MS_TAGXID)
- inode->i_xid = current->xid;
+ inode->i_xid = vx_current_xid();
else
inode->i_xid = 0;
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
}
-#ifdef CONFIG_INOXID_GID32
+#ifdef CONFIG_INOXID_INTERN
raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
#endif
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
*
* This test looks nicer. Thanks to Pauline Middelink
*/
- if (((oldflags & EXT3_IMMUTABLE_FL) ||
- ((flags ^ oldflags) &
- (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL)))
- && !capable(CAP_LINUX_IMMUTABLE)) {
- return -EPERM;
+ if ((oldflags & EXT3_IMMUTABLE_FL) ||
+ ((flags ^ oldflags) & (EXT3_APPEND_FL |
+ EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) {
+ if (!capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
}
/*
remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
return ret;
}
-#endif
-#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE)
- case EXT3_IOC_SETXID: {
- handle_t *handle;
- struct ext3_iloc iloc;
- int xid;
- int err;
-
- /* fixme: if stealth, return -ENOTTY */
- if (!capable(CAP_CONTEXT))
- return -EPERM;
- if (IS_RDONLY(inode))
- return -EROFS;
- if (!(inode->i_sb->s_flags & MS_TAGXID))
- return -ENOSYS;
- if (get_user(xid, (int *) arg))
- return -EFAULT;
-
- handle = ext3_journal_start(inode, 1);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
- err = ext3_reserve_inode_write(handle, inode, &iloc);
- if (err)
- return err;
-
- inode->i_xid = (xid & 0xFFFF);
- inode->i_ctime = CURRENT_TIME;
-
- err = ext3_mark_iloc_dirty(handle, inode, &iloc);
- ext3_journal_stop(handle);
- return err;
- }
#endif
case EXT3_IOC_GETRSVSZ:
if (test_opt(inode->i_sb, RESERVATION) && S_ISREG(inode->i_mode)) {
return err;
}
+#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE)
+ case EXT3_IOC_SETXID: {
+ handle_t *handle;
+ struct ext3_iloc iloc;
+ int xid;
+ int err;
+
+ /* fixme: if stealth, return -ENOTTY */
+ if (!capable(CAP_CONTEXT))
+ return -EPERM;
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ if (!(inode->i_sb->s_flags & MS_TAGXID))
+ return -ENOSYS;
+ if (get_user(xid, (int *) arg))
+ return -EFAULT;
+
+ handle = ext3_journal_start(inode, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ err = ext3_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ return err;
+
+ inode->i_xid = (xid & 0xFFFF);
+ inode->i_ctime = CURRENT_TIME;
+
+ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ ext3_journal_stop(handle);
+ return err;
+ }
+#endif
+
default:
return -ENOTTY;
}
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/smp_lock.h>
+#include <linux/vserver/xid.h>
#include "xattr.h"
#include "acl.h"
if (!inode)
return ERR_PTR(-EACCES);
+ vx_propagate_xid(nd, inode);
}
if (inode)
return d_splice_alias(inode, dentry);
/* f->f_version: 0 */
INIT_LIST_HEAD(&f->f_list);
// set_vx_info(&f->f_vx_info, current->vx_info);
- f->f_xid = current->xid;
+ f->f_xid = vx_current_xid();
vx_files_inc(f);
return f;
}
inode->i_sb = sb;
// inode->i_dqh = dqhget(sb->s_dqh);
- /* important because of inode slab reuse */
+ /* essential because of inode slab reuse */
inode->i_xid = 0;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
break;
#endif
- case FIOC_SETIATTR:
- case FIOC_GETIATTR:
- /*
- * Verify that this filp is a file object,
- * not (say) a socket.
- */
- error = -ENOTTY;
- if (S_ISREG(filp->f_dentry->d_inode->i_mode) ||
- S_ISDIR(filp->f_dentry->d_inode->i_mode))
- error = vc_iattr_ioctl(filp->f_dentry,
- cmd, arg);
- break;
-
default:
error = -ENOTTY;
if (S_ISREG(filp->f_dentry->d_inode->i_mode))
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/vserver/xid.h>
-#include <linux/quotaops.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include <linux/syscalls.h>
#include <linux/mount.h>
#include <linux/audit.h>
-#include <linux/vs_base.h>
+#include <linux/proc_fs.h>
+#include <linux/vserver/inode.h>
+#include <linux/vserver/debug.h>
#include <asm/namei.h>
#include <asm/uaccess.h>
return -EACCES;
}
+static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN)) {
+ vxwprintk(1, "xid=%d did hit the barrier.",
+ vx_current_xid());
+ return -EACCES;
+ }
+ if (inode->i_xid == 0)
+ return 0;
+ if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
+ return 0;
+
+ vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] »%s«.",
+ vx_current_xid(), inode, inode->i_xid, inode->i_ino,
+ vxd_path(nd->dentry, nd->mnt));
+ return -EACCES;
+}
+
int permission(struct inode * inode,int mask, struct nameidata *nd)
{
int retval;
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
return -EROFS;
+ if ((retval = xid_permission(inode, mask, nd)))
+ return retval;
+
if (inode->i_op && inode->i_op->permission)
retval = inode->i_op->permission(inode, submask, nd);
else
{
struct vfsmount *mnt = nd->mnt;
struct dentry *dentry = __d_lookup(nd->dentry, name);
+ struct inode *inode;
if (!dentry)
goto need_lookup;
if (dentry->d_op && dentry->d_op->d_revalidate)
goto need_revalidate;
+ inode = dentry->d_inode;
+ if (!inode)
+ goto done;
+ if (!vx_check(inode->i_xid, VX_WATCH|VX_HOSTID|VX_IDENT))
+ goto hidden;
+ if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
+ struct proc_dir_entry *de = PDE(inode);
+
+ if (de && !vx_hide_check(0, de->vx_flags))
+ goto hidden;
+ }
done:
path->mnt = mnt;
path->dentry = dentry;
return 0;
+hidden:
+ vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] »%s«.",
+ vx_current_xid(), inode, inode->i_xid, inode->i_ino,
+ vxd_path(dentry, mnt));
+ dput(dentry);
+ return -ENOENT;
need_lookup:
if (atomic)
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/mount.h>
-#include <linux/vs_base.h>
#include <linux/vserver/namespace.h>
+#include <linux/vserver/xid.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
mnt->mnt_namespace = old->mnt_namespace;
+ mnt->mnt_xid = old->mnt_xid;
/* stick the duplicate mount on the same expiry list
* as the original if that was on one */
unsigned long s_flags = mnt->mnt_sb->s_flags;
int mnt_flags = mnt->mnt_flags;
+ if (vx_flags(VXF_HIDE_MOUNT, 0))
+ return 0;
+ if (!vx_check_vfsmount(current->vx_info, mnt))
+ return 0;
+
if (vx_flags(VXF_HIDE_MOUNT, 0))
return 0;
if (!vx_check_vfsmount(current->vx_info, mnt))
seq_puts(m, p->unset_str);
}
}
+ if (mnt->mnt_flags & MNT_XID)
+ seq_printf(m, ",xid=%d", mnt->mnt_xid);
if (mnt->mnt_sb->s_op->show_options)
err = mnt->mnt_sb->s_op->show_options(m, mnt);
seq_puts(m, " 0 0\n");
EXPORT_SYMBOL(may_umount);
-static inline void __umount_tree(struct vfsmount *mnt, struct list_head *kill)
+static inline void __umount_list(struct list_head *kill)
{
+ struct vfsmount *mnt;
+
while (!list_empty(kill)) {
mnt = list_entry(kill->next, struct vfsmount, mnt_list);
list_del_init(&mnt->mnt_list);
list_del(&p->mnt_list);
list_add(&p->mnt_list, &kill);
}
- __umount_tree(mnt, &kill);
+ __umount_list(&kill);
}
void umount_unused(struct vfsmount *mnt, struct fs_struct *fs)
list_del(&p->mnt_list);
list_add(&p->mnt_list, &kill);
}
- __umount_tree(mnt, &kill);
+ __umount_list(&kill);
}
static int do_umount(struct vfsmount *mnt, int flags)
/*
* do loopback mount.
*/
-static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
+static int do_loopback(struct nameidata *nd, char *old_name, xid_t xid, unsigned long flags, int mnt_flags)
{
struct nameidata old_nd;
struct vfsmount *mnt = NULL;
list_del_init(&mnt->mnt_fslink);
spin_unlock(&vfsmount_lock);
+ if (flags & MS_XID) {
+ mnt->mnt_xid = xid;
+ mnt->mnt_flags |= MNT_XID;
+ }
err = graft_tree(mnt, nd);
if (err) {
spin_lock(&vfsmount_lock);
*/
static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
- void *data)
+ void *data, xid_t xid)
{
int err;
struct super_block * sb = nd->mnt->mnt_sb;
mnt_flags |= MNT_NODEV;
down_write(&sb->s_umount);
err = do_remount_sb(sb, flags, data, 0);
- if (!err)
+ if (!err) {
nd->mnt->mnt_flags=mnt_flags;
+ if (flags & MS_XID)
+ nd->mnt->mnt_xid = xid;
+ }
up_write(&sb->s_umount);
if (!err)
security_sb_post_remount(nd->mnt, flags, data);
struct nameidata nd;
int retval = 0;
int mnt_flags = 0;
+ xid_t xid = 0;
/* Discard magic */
if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
+ retval = vx_parse_xid(data_page, &xid, 1);
+ if (retval) {
+ mnt_flags |= MNT_XID;
+ /* bind and re-mounts get xid flag */
+ if (flags & (MS_BIND|MS_REMOUNT))
+ flags |= MS_XID;
+ }
+
/* Separate the per-mountpoint flags */
if (flags & MS_RDONLY)
mnt_flags |= MNT_RDONLY;
if (flags & MS_REMOUNT)
retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
- data_page);
+ data_page, xid);
else if (flags & MS_BIND)
- retval = do_loopback(&nd, dev_name, flags, mnt_flags);
+ retval = do_loopback(&nd, dev_name, xid, flags, mnt_flags);
+
else if (flags & MS_MOVE)
retval = do_move_mount(&nd, dev_name);
else
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
#include <linux/namei.h>
+#include <linux/vserver/xid.h>
#include "delegation.h"
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
if (!inode)
goto out_unlock;
+ vx_propagate_xid(nd, inode);
no_entry:
error = 0;
d_add(dentry, inode);
out:
return inode;
-/*
+/* FIXME
fail_dlim:
make_bad_inode(inode);
iput(inode);
#include <linux/vs_dlimit.h>
#include <linux/vserver/xid.h>
#include <linux/syscalls.h>
+#include <linux/vs_limit.h>
+#include <linux/vs_dlimit.h>
+#include <linux/vserver/xid.h>
#include <asm/unistd.h>
#include <linux/highmem.h>
#include <linux/file.h>
#include <linux/times.h>
-#include <linux/vs_base.h>
#include <linux/vs_context.h>
#include <linux/vs_network.h>
#include <linux/vs_cvirt.h>
TASK_INTERRUPTIBLE |
TASK_UNINTERRUPTIBLE |
TASK_STOPPED |
- TASK_TRACED |
- TASK_ONHOLD)) |
+ TASK_TRACED |
+ TASK_ONHOLD)) |
(tsk->exit_state & (EXIT_ZOMBIE |
EXIT_DEAD));
const char **p = &task_state_array[0];
{
struct group_info *group_info;
int g;
- pid_t pid, ppid, tppid, tgid;
+ pid_t pid, ptgid, tppid, tgid;
read_lock(&tasklist_lock);
tgid = vx_map_tgid(p->tgid);
pid = vx_map_pid(p->pid);
- ppid = vx_map_pid(p->real_parent->pid);
+ ptgid = vx_map_pid(p->group_leader->real_parent->tgid);
tppid = vx_map_pid(p->parent->pid);
buffer += sprintf(buffer,
"State:\t%s\n"
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p),
(p->sleep_avg/1024)*100/(1020000000/1024),
- tgid, pid, (pid > 1) ? ppid : 0,
- p->pid && p->ptrace ? tppid : 0,
+ tgid, pid, (pid > 1) ? ptgid : 0,
+ pid_alive(p) && p->ptrace ? tppid : 0,
p->uid, p->euid, p->suid, p->fsuid,
p->gid, p->egid, p->sgid, p->fsgid);
read_unlock(&tasklist_lock);
stime += task->signal->stime;
}
}
- if (task_vx_flags(task, VXF_VIRT_UPTIME, 0)) {
- bias_uptime = task->vx_info->cvirt.bias_uptime.tv_sec * NSEC_PER_SEC
- + task->vx_info->cvirt.bias_uptime.tv_nsec;
- }
+ pid = vx_info_map_pid(task->vx_info, pid_alive(task) ? task->pid : 0);
+ ppid = (!(pid > 1)) ? 0 : vx_info_map_tgid(task->vx_info,
+ task->group_leader->real_parent->tgid);
+ pgid = vx_info_map_pid(task->vx_info, pgid);
+
read_unlock(&tasklist_lock);
if (!whole || num_threads<2) {
/* convert timespec -> nsec*/
start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
+ task->start_time.tv_nsec;
+
/* convert nsec -> ticks */
start_time = nsec_to_clock_t(start_time - bias_uptime);
+ /* fixup start time for virt uptime */
+ if (vx_flags(VXF_VIRT_UPTIME, 0)) {
+ unsigned long long bias =
+ current->vx_info->cvirt.bias_clock;
+
+ if (start_time > bias)
+ start_time -= bias;
+ else
+ start_time = 0;
+ }
+
res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
static struct inode_operations proc_tgid_attr_inode_operations;
#endif
+extern int proc_pid_vx_info(struct task_struct *, char *);
+extern int proc_pid_nx_info(struct task_struct *, char *);
+
/* SMP-safe */
static struct dentry *proc_pident_lookup(struct inode *dir,
struct dentry *dentry,
int buflen)
{
char tmp[30];
- sprintf(tmp, "%d", vx_map_pid(current->tgid));
+ sprintf(tmp, "%d", vx_map_tgid(current->tgid));
return vfs_readlink(dentry,buffer,buflen,tmp);
}
static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char tmp[30];
- sprintf(tmp, "%d", vx_map_pid(current->tgid));
+ sprintf(tmp, "%d", vx_map_tgid(current->tgid));
return vfs_follow_link(nd,tmp);
}
#include <linux/vs_base.h>
#include <linux/vserver/inode.h>
#include <linux/bitops.h>
+#include <linux/vserver/inode.h>
#include <asm/uaccess.h>
static ssize_t proc_file_read(struct file *file, char __user *buf,
error = -EINVAL;
inode = proc_get_inode(dir->i_sb, ino, de);
- inode->i_xid = vx_current_xid();
+ /* generic proc entries belong to the host */
+ inode->i_xid = 0;
break;
}
}
#include <asm/tlb.h>
#include <asm/div64.h>
+#include <linux/vs_cvirt.h>
+
#define LOAD_INT(x) ((x) >> FSHIFT)
#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
/*
#include <linux/reiserfs_xattr.h>
#include <linux/smp_lock.h>
#include <linux/quotaops.h>
+#include <linux/vserver/xid.h>
#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--;
reiserfs_write_unlock(dir->i_sb);
return ERR_PTR(-EACCES);
}
+ vx_propagate_xid(nd, inode);
/* Propogate the priv_object flag so we know we're in the priv tree */
if (is_reiserfs_priv_object (dir))
#include <linux/devpts_fs.h>
#include <linux/proc_fs.h>
#include <linux/kobject.h>
+#include <linux/devpts_fs.h>
+#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include "sysfs.h"
-/* Random magic number */
-#define SYSFS_MAGIC 0x62656572
struct vfsmount *sysfs_mount;
struct super_block * sysfs_sb = NULL;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
- sb->s_magic = SYSFS_MAGIC;
+ sb->s_magic = SYSFS_SUPER_MAGIC;
sb->s_op = &sysfs_ops;
sysfs_sb = sb;
#define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */
#define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */
#define LINUX_XFLAG_BARRIER 0x00004000 /* chroot() barrier */
-#define LINUX_XFLAG_IUNLINK 0x00008000 /* Immutable unlink */
+#define LINUX_XFLAG_IUNLINK 0x00008000 /* immutable unlink */
STATIC unsigned int
xfs_merge_ioc_xflags(
flags |= LINUX_XFLAG_IMMUTABLE;
if (di_flags & XFS_DIFLAG_IUNLINK)
flags |= LINUX_XFLAG_IUNLINK;
+ if (di_flags & XFS_DIFLAG_BARRIER)
+ flags |= LINUX_XFLAG_BARRIER;
if (di_flags & XFS_DIFLAG_APPEND)
flags |= LINUX_XFLAG_APPEND;
if (di_flags & XFS_DIFLAG_SYNC)
#define XFS_DIFLAG_PROJINHERIT_BIT 9 /* create with parents projid */
#define XFS_DIFLAG_NOSYMLINKS_BIT 10 /* disallow symlink creation */
#define XFS_DIFLAG_BARRIER_BIT 12 /* chroot() barrier */
-#define XFS_DIFLAG_IUNLINK_BIT 13 /* inode has iunlink */
+#define XFS_DIFLAG_IUNLINK_BIT 13 /* immutable unlink */
#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)
#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)
#define XFS_DIFLAG_BARRIER (1 << XFS_DIFLAG_BARRIER_BIT)
#define XFS_DIFLAG_IUNLINK (1 << XFS_DIFLAG_IUNLINK_BIT)
+
#define XFS_DIFLAG_ANY \
(XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
#define XFS_XFLAG_BARRIER 0x00004000 /* chroot() barrier */
-#define XFS_XFLAG_IUNLINK 0x00008000 /* Immutable unlink */
+#define XFS_XFLAG_IUNLINK 0x00008000 /* immutable unlink */
#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/*
#include <linux/config.h>
#ifdef __KERNEL__
-# define HZ (CONFIG_X86_HZ)
+
+#if defined(CONFIG_X86_HZ) && defined(CONFIG_KERNEL_HZ)
+#error MEF: fix up CONFIG to only use one of these
+#endif
+
+#ifdef CONFIG_X86_HZ
+# define HZ CONFIG_X86_HZ
+#else
+# ifdef CONFIG_KERNEL_HZ
+# define HZ CONFIG_KERNEL_HZ
+# else
+# define HZ 1000 /* Internal kernel timer frequency */
+# endif
+#endif
+
# define USER_HZ 100 /* .. some user interfaces are in "ticks" */
# define CLOCKS_PER_SEC (USER_HZ) /* like times() */
#endif
#define __NR_get_mempolicy (__NR_Linux + 261)
#define __NR_set_mempolicy (__NR_Linux + 262)
#define __NR_vserver (__NR_Linux + 273)
-#define __NR_Linux_syscalls 274
+
#define HPUX_GATEWAY_ADDR 0xC0000004
#define LINUX_GATEWAY_ADDR 0x100
if (rss < freed)
freed = rss;
- mm->rss = rss - freed;
+ // mm->rss = rss - freed;
+ vx_rsspages_sub(mm, freed);
tlb_flush_mmu(mp);
#endif
-#define DEVPTS_SUPER_MAGIC 0x1cd1
-
+#define DEVPTS_SUPER_MAGIC 0x1cd1
#endif /* _LINUX_DEVPTS_FS_H */
#define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */
#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
#define EXT2_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
-#define EXT2_MOUNT_TAG_XID (1<<16) /* Enable Context Tags */
+#define EXT2_MOUNT_TAG_XID (1<<24) /* Enable Context Tags */
#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
#define set_opt(o, opt) o |= EXT2_MOUNT_##opt
#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
#endif
+#ifdef CONFIG_VSERVER_LEGACY
+#define EXT3_IOC_SETXID FIOC_SETXIDJ
+#endif
/*
* Inode dynamic state flags
#define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
#define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */
#define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */
-#define EXT3_MOUNT_TAG_XID 0x40000 /* Enable Context Tags */
+#define EXT3_MOUNT_TAG_XID (1<<24) /* Enable Context Tags */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
#define MS_ONE_SECOND (1<<17) /* fs has 1 sec a/m/ctime resolution */
#define MS_TAGXID (1<<24) /* tag inodes with context information */
+#define MS_XID (1<<25) /* use specific xid for this mount */
#define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31)
#define S_DIRSYNC 64 /* Directory modifications are synchronous */
#define S_NOCMTIME 128 /* Do not update file c/mtime */
#define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
-#define S_BARRIER 512 /* Barrier for chroot() */
-#define S_IUNLINK 1024 /* Immutable unlink */
+#define S_BARRIER 1024 /* Barrier for chroot() */
+#define S_IUNLINK 2048 /* Immutable unlink */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
#define MNT_RDONLY 8
#define MNT_NOATIME 16
#define MNT_NODIRATIME 32
+#define MNT_XID 256
struct vfsmount
{
struct list_head mnt_list;
struct list_head mnt_fslink; /* link in fs-specific expiry list */
struct namespace *mnt_namespace; /* containing namespace */
+ xid_t mnt_xid; /* xid tagging used for vfsmount */
};
#define MNT_IS_RDONLY(m) ((m) && ((m)->mnt_flags & MNT_RDONLY))
REISERFS_BARRIER_NONE,
REISERFS_BARRIER_FLUSH,
REISERFS_TAGXID,
+
/* Actions on error */
REISERFS_ERROR_PANIC,
REISERFS_ERROR_RO,
#include <linux/pid.h>
#include <linux/percpu.h>
#include <linux/topology.h>
+#include <linux/vs_base.h>
struct exec_domain;
extern int exec_shield;
#ifdef CONFIG_SECURITY
/* code is in security.c */
extern int capable(int cap);
+extern int vx_capable(int cap, int ccap);
#else
static inline int capable(int cap)
{
+ if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
+ return 0;
if (cap_raised(current->cap_effective, cap)) {
current->flags |= PF_SUPERPRIV;
return 1;
}
return 0;
}
+
+static inline int vx_capable(int cap, int ccap)
+{
+ if (cap_raised(current->cap_effective, cap) &&
+ vx_ccaps(ccap)) {
+ current->flags |= PF_SUPERPRIV;
+ return 1;
+ }
+ return 0;
+}
#endif
KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */
KERN_HZ_TIMER=65, /* int: hz timer on or off */
KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */
- KERN_SETUID_DUMPABLE=67, /* int: behaviour of dumps for setuid core */
- KERN_VSHELPER=68, /* string: path to vshelper policy agent */
+ KERN_VSHELPER=67, /* string: path to vshelper policy agent */
+ KERN_SETUID_DUMPABLE=68, /* int: behaviour of dumps for setuid core */
KERN_DUMP=69, /* dir: dump parameters */
};
#include <asm/atomic.h>
+#define SYSFS_SUPER_MAGIC 0x62656572
+
struct kobject;
struct module;
#ifndef _VX_VS_BASE_H
#define _VX_VS_BASE_H
-#include "vserver/context.h"
-
-// #define VX_DEBUG
-
-#if defined(VX_DEBUG)
-#define vxdprintk(x...) printk("vxd: " x)
-#else
-#define vxdprintk(x...)
-#endif
+#include "vserver/context.h"
#define vx_task_xid(t) ((t)->xid)
* check current context for ADMIN/WATCH and
* optionally agains supplied argument
*/
-static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode)
+static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode)
{
if (mode & VX_ARG_MASK) {
if ((mode & VX_IDENT) &&
return 1;
}
return (((mode & VX_ADMIN) && (cid == 0)) ||
- ((mode & VX_WATCH) && (cid == 1)));
+ ((mode & VX_WATCH) && (cid == 1)) ||
+ ((mode & VX_HOSTID) && (id == 0)));
}
-#define __vx_flags(v,m,f) (((v) & (m)) ^ (f))
+#define __vx_state(v) ((v) ? ((v)->vx_state) : 0)
+
+#define vx_info_state(v,m) (__vx_state(v) & (m))
+
+
+/* generic flag merging */
+
+#define vx_check_flags(v,m,f) (((v) & (m)) ^ (f))
+
+#define vx_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m)))
+
+#define vx_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m)))
+
+#define vx_check_bit(v,n) ((v) & (1LL << (n)))
+
-#define __vx_task_flags(t,m,f) \
- (((t) && ((t)->vx_info)) ? \
- __vx_flags((t)->vx_info->vx_flags,(m),(f)) : 0)
+/* context flags */
-#define vx_current_flags() \
- ((current->vx_info) ? current->vx_info->vx_flags : 0)
+#define __vx_flags(v) ((v) ? (v)->vx_flags : 0)
-#define vx_flags(m,f) __vx_flags(vx_current_flags(),(m),(f))
+#define vx_current_flags() __vx_flags(current->vx_info)
+#define vx_info_flags(v,m,f) \
+ vx_check_flags(__vx_flags(v),(m),(f))
-#define vx_current_ccaps() \
- ((current->vx_info) ? current->vx_info->vx_ccaps : 0)
+#define task_vx_flags(t,m,f) \
+ ((t) && vx_info_flags((t)->vx_info, (m), (f)))
+
+#define vx_flags(m,f) vx_info_flags(current->vx_info,(m),(f))
+
+
+/* context caps */
+
+#define __vx_ccaps(v) ((v) ? (v)->vx_ccaps : 0)
+
+#define vx_current_ccaps() __vx_ccaps(current->vx_info)
+
+#define vx_info_ccaps(v,c) (__vx_ccaps(v) & (c))
+
+#define vx_ccaps(c) vx_info_ccaps(current->vx_info,(c))
+
+
+#define __vx_mcaps(v) ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
+
+#define vx_info_mcaps(v,c) (__vx_mcaps(v) & (c))
+
+#define vx_mcaps(c) vx_info_mcaps(current->vx_info,(c))
-#define vx_ccaps(c) (vx_current_ccaps() & (c))
#define vx_current_bcaps() \
(((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \
current->vx_info->vx_bcaps : cap_bset)
-/* generic flag merging */
-
-#define vx_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m)))
+#define vx_current_initpid(n) \
+ (current->vx_info && \
+ (current->vx_info->vx_initpid == (n)))
-#define vx_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m)))
+#else
+#warning duplicate inclusion
#endif
#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-
-#include "vserver/context.h"
#include "vserver/debug.h"
-extern int proc_pid_vx_info(struct task_struct *, char *);
-
-
#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__)
static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
{
if (!vxi)
return NULL;
+
vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
_file, _line);
+ vxh_get_vx_info(vxi);
+
atomic_inc(&vxi->vx_usecnt);
return vxi;
}
-
-extern void free_vx_info(struct vx_info *);
-
#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__)
static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
{
if (!vxi)
return;
+
vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
_file, _line);
+ vxh_put_vx_info(vxi);
+
if (atomic_dec_and_test(&vxi->vx_usecnt))
free_vx_info(vxi);
}
vxi?atomic_read(&vxi->vx_usecnt):0,
vxi?atomic_read(&vxi->vx_refcnt):0,
_file, _line);
+ vxh_set_vx_info(vxi, vxp);
atomic_inc(&vxi->vx_refcnt);
vxo = xchg(vxp, __get_vx_info(vxi, _file, _line));
vxo?atomic_read(&vxo->vx_usecnt):0,
vxo?atomic_read(&vxo->vx_refcnt):0,
_file, _line);
+ vxh_clr_vx_info(vxo, vxp);
if (atomic_dec_and_test(&vxo->vx_refcnt))
unhash_vx_info(vxo);
}
-#define task_get_vx_info(i) __task_get_vx_info(i,__FILE__,__LINE__)
+#define task_get_vx_info(p) __task_get_vx_info(p,__FILE__,__LINE__)
static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p,
const char *_file, int _line)
#define _VX_VS_CVIRT_H
-// #define VX_DEBUG
-
#include "vserver/cvirt.h"
-#include "vs_base.h"
-
-#if defined(VX_DEBUG)
-#define vxdprintk(x...) printk("vxd: " x)
-#else
-#define vxdprintk(x...)
-#endif
+#include "vserver/debug.h"
/* utsname virtualization */
/* pid faking stuff */
-#define vx_map_tgid(v,p) \
- __vx_map_tgid((v), (p), __FILE__, __LINE__)
+#define vx_info_map_pid(v,p) \
+ __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__)
+#define vx_info_map_tgid(v,p) vx_info_map_pid(v,p)
+#define vx_map_pid(p) vx_info_map_pid(current->vx_info, p)
+#define vx_map_tgid(p) vx_map_pid(p)
-static inline int __vx_map_tgid(struct vx_info *vxi, int pid,
- char *file, int line)
+static inline int __vx_info_map_pid(struct vx_info *vxi, int pid,
+ const char *func, const char *file, int line)
{
- if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
- vxdprintk("vx_map_tgid: %p/%llx: %d -> %d in %s:%d\n",
- vxi, vxi->vx_flags, pid,
- (pid == vxi->vx_initpid)?1:pid,
- file, line);
+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
+ vxfprintk(VXD_CBIT(cvirt, 2),
+ "vx_map_tgid: %p/%llx: %d -> %d",
+ vxi, (long long)vxi->vx_flags, pid,
+ (pid && pid == vxi->vx_initpid)?1:pid,
+ func, file, line);
+ if (pid == 0)
+ return 0;
if (pid == vxi->vx_initpid)
return 1;
}
return pid;
}
-#define vx_rmap_tgid(v,p) \
- __vx_rmap_tgid((v), (p), __FILE__, __LINE__)
+#define vx_info_rmap_pid(v,p) \
+ __vx_info_rmap_pid((v), (p), __FUNC__, __FILE__, __LINE__)
+#define vx_rmap_pid(p) vx_info_rmap_pid(current->vx_info, p)
+#define vx_rmap_tgid(p) vx_rmap_pid(p)
-static inline int __vx_rmap_tgid(struct vx_info *vxi, int pid,
- char *file, int line)
+static inline int __vx_info_rmap_pid(struct vx_info *vxi, int pid,
+ const char *func, const char *file, int line)
{
- if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
- vxdprintk("vx_rmap_tgid: %p/%llx: %d -> %d in %s:%d\n",
- vxi, vxi->vx_flags, pid,
+ if (vx_info_flags(vxi, VXF_INFO_INIT, 0)) {
+ vxfprintk(VXD_CBIT(cvirt, 2),
+ "vx_rmap_tgid: %p/%llx: %d -> %d",
+ vxi, (long long)vxi->vx_flags, pid,
(pid == 1)?vxi->vx_initpid:pid,
- file, line);
+ func, file, line);
if ((pid == 1) && vxi->vx_initpid)
return vxi->vx_initpid;
+ if (pid == vxi->vx_initpid)
+ return ~0U;
}
return pid;
}
-#undef vxdprintk
-#define vxdprintk(x...)
+
+static inline void vx_activate_task(struct task_struct *p)
+{
+ struct vx_info *vxi;
+
+ if ((vxi = p->vx_info)) {
+ vx_update_load(vxi);
+ atomic_inc(&vxi->cvirt.nr_running);
+ }
+}
+
+static inline void vx_deactivate_task(struct task_struct *p)
+{
+ struct vx_info *vxi;
+
+ if ((vxi = p->vx_info)) {
+ vx_update_load(vxi);
+ atomic_dec(&vxi->cvirt.nr_running);
+ }
+}
+
+static inline void vx_uninterruptible_inc(struct task_struct *p)
+{
+ struct vx_info *vxi;
+
+ if ((vxi = p->vx_info))
+ atomic_inc(&vxi->cvirt.nr_uninterruptible);
+}
+
+static inline void vx_uninterruptible_dec(struct task_struct *p)
+{
+ struct vx_info *vxi;
+
+ if ((vxi = p->vx_info))
+ atomic_dec(&vxi->cvirt.nr_uninterruptible);
+}
+
#else
#warning duplicate inclusion
#ifndef _VX_VS_DLIMIT_H
#define _VX_VS_DLIMIT_H
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-#include "vserver/context.h"
#include "vserver/dlimit.h"
#include "vserver/debug.h"
dli->dl_inodes_used++;
#if 0
else
- printk("VSW: DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d\n",
+ vxwprintk("DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d",
sb, xid,
dli->dl_inodes_used, dli->dl_inodes_total,
file, line);
#define _VX_VS_LIMIT_H
-// #define VX_DEBUG
-
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-
-#include "vserver/context.h"
#include "vserver/limit.h"
+#include "vserver/debug.h"
/* file limits */
-#define VX_DEBUG_ACC_FILE 0
-#define VX_DEBUG_ACC_OPENFD 0
-
-#if (VX_DEBUG_ACC_FILE) || (VX_DEBUG_ACC_OPENFD)
-#define vxdprintk(x...) printk("vxd: " x)
-#else
-#define vxdprintk(x...)
-#endif
-
-
-#define vx_acc_cres(v,d,r) \
- __vx_acc_cres((v), (r), (d), __FILE__, __LINE__)
static inline void __vx_acc_cres(struct vx_info *vxi,
- int res, int dir, char *file, int line)
+ int res, int dir, void *_data, char *_file, int _line)
{
- if (vxi) {
- if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
- (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
- printk("vx_acc_cres[%5d,%2d]: %5d%s in %s:%d\n",
- (vxi?vxi->vx_id:-1), res,
- (vxi?atomic_read(&vxi->limit.rcur[res]):0),
- (dir>0)?"++":"--", file, line);
- if (dir > 0)
- atomic_inc(&vxi->limit.rcur[res]);
- else
- atomic_dec(&vxi->limit.rcur[res]);
- }
+ if (VXD_RLIMIT(res, RLIMIT_NOFILE) ||
+ VXD_RLIMIT(res, RLIMIT_NPROC) ||
+ VXD_RLIMIT(res, VLIMIT_NSOCK))
+ vxlprintk(1, "vx_acc_cres[%5d,%s,%2d]: %5d%s (%p)",
+ (vxi?vxi->vx_id:-1), vlimit_name[res], res,
+ (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+ (dir>0)?"++":"--", _data, _file, _line);
+ if (vxi) {
+ if (dir > 0)
+ atomic_inc(&vxi->limit.rcur[res]);
+ else
+ atomic_dec(&vxi->limit.rcur[res]);
+ }
}
-#define vx_nproc_inc(p) vx_acc_cres(current->vx_info, 1, RLIMIT_NPROC)
-#define vx_nproc_dec(p) vx_acc_cres(current->vx_info,-1, RLIMIT_NPROC)
+#define vx_acc_cres(v,d,p,r) \
+ __vx_acc_cres((v), (r), (d), (p), __FILE__, __LINE__)
+
+#define vx_acc_cres_cond(x,d,p,r) \
+ __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0,\
+ (r), (d), (p), __FILE__, __LINE__)
+
+#define vx_nproc_inc(p) \
+ vx_acc_cres((p)->vx_info, 1, (p), RLIMIT_NPROC)
-#define vx_files_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_NOFILE)
-#define vx_files_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_NOFILE)
+#define vx_nproc_dec(p) \
+ vx_acc_cres((p)->vx_info,-1, (p), RLIMIT_NPROC)
-#define vx_openfd_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD)
-#define vx_openfd_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD)
+#define vx_files_inc(f) \
+ vx_acc_cres_cond((f)->f_xid, 1, (f), RLIMIT_NOFILE)
-/*
-#define vx_openfd_inc(f) do { \
- vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD); \
- printk("vx_openfd_inc: %d[#%d] in %s:%d\n", \
- f, current->xid, __FILE__, __LINE__); \
- } while (0)
+#define vx_files_dec(f) \
+ vx_acc_cres_cond((f)->f_xid,-1, (f), RLIMIT_NOFILE)
-#define vx_openfd_dec(f) do { \
- vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD); \
- printk("vx_openfd_dec: %d[#%d] in %s:%d\n", \
- f, current->xid, __FILE__, __LINE__); \
- } while (0)
-*/
#define vx_cres_avail(v,n,r) \
- __vx_cres_avail((v), (r), (n), __FILE__, __LINE__)
+ __vx_cres_avail((v), (r), (n), __FILE__, __LINE__)
static inline int __vx_cres_avail(struct vx_info *vxi,
- int res, int num, char *file, int line)
+ int res, int num, char *_file, int _line)
{
unsigned long value;
- if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
- (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
- printk("vx_cres_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
- (vxi?vxi->vx_id:-1), res,
+ if (VXD_RLIMIT(res, RLIMIT_NOFILE) ||
+ VXD_RLIMIT(res, RLIMIT_NPROC) ||
+ VXD_RLIMIT(res, VLIMIT_NSOCK))
+ vxlprintk(1, "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d",
+ (vxi?vxi->vx_id:-1), vlimit_name[res], res,
(vxi?vxi->limit.rlim[res]:1),
- (vxi?atomic_read(&vxi->limit.rcur[res]):0),
- num, file, line);
- if (!vxi)
- return 1;
- value = atomic_read(&vxi->limit.rcur[res]);
+ (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+ num, _file, _line);
+ if (!vxi)
+ return 1;
+ value = atomic_read(&vxi->limit.rcur[res]);
if (value > vxi->limit.rmax[res])
vxi->limit.rmax[res] = value;
- if (vxi->limit.rlim[res] == RLIM_INFINITY)
- return 1;
- if (value + num <= vxi->limit.rlim[res])
- return 1;
+ if (vxi->limit.rlim[res] == RLIM_INFINITY)
+ return 1;
+ if (value + num <= vxi->limit.rlim[res])
+ return 1;
atomic_inc(&vxi->limit.lhit[res]);
- return 0;
+ return 0;
}
#define vx_nproc_avail(n) \
#define vx_files_avail(n) \
vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE)
-#define vx_openfd_avail(n) \
- vx_cres_avail(current->vx_info, (n), RLIMIT_OPENFD)
-
/* socket limits */
-#define vx_sock_inc(f) vx_acc_cres(current->vx_info, 1, VLIMIT_SOCK)
-#define vx_sock_dec(f) vx_acc_cres(current->vx_info,-1, VLIMIT_SOCK)
+#define vx_sock_inc(s) \
+ vx_acc_cres((s)->sk_vx_info, 1, (s), VLIMIT_NSOCK)
+#define vx_sock_dec(s) \
+ vx_acc_cres((s)->sk_vx_info,-1, (s), VLIMIT_NSOCK)
#define vx_sock_avail(n) \
- vx_cres_avail(current->vx_info, (n), VLIMIT_SOCK)
-
+ vx_cres_avail(current->vx_info, (n), VLIMIT_NSOCK)
#else
#warning duplicate inclusion
#define _VX_VS_MEMORY_H
-// #define VX_DEBUG
-
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-
-#include "vserver/context.h"
#include "vserver/limit.h"
+#include "vserver/debug.h"
-#define VX_DEBUG_ACC_RSS 0
-#define VX_DEBUG_ACC_VM 0
-#define VX_DEBUG_ACC_VML 0
-
-#if (VX_DEBUG_ACC_RSS) || (VX_DEBUG_ACC_VM) || (VX_DEBUG_ACC_VML)
-#define vxdprintk(x...) printk("vxd: " x)
-#else
-#define vxdprintk(x...)
-#endif
-
#define vx_acc_page(m, d, v, r) \
__vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__)
static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi,
- int res, int dir, char *file, int line)
+ int res, int dir, char *file, int line)
{
- if (v) {
- if (dir > 0)
- ++(*v);
- else
- --(*v);
- }
- if (vxi) {
- if (dir > 0)
- atomic_inc(&vxi->limit.rcur[res]);
- else
- atomic_dec(&vxi->limit.rcur[res]);
- }
+ if (VXD_RLIMIT(res, RLIMIT_RSS) ||
+ VXD_RLIMIT(res, RLIMIT_AS) ||
+ VXD_RLIMIT(res, RLIMIT_MEMLOCK))
+ vxlprintk(1, "vx_acc_page[%5d,%s,%2d]: %5d%s",
+ (vxi?vxi->vx_id:-1), vlimit_name[res], res,
+ (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+ (dir?"++":"--"), file, line);
+ if (v) {
+ if (dir > 0)
+ ++(*v);
+ else
+ --(*v);
+ }
+ if (vxi) {
+ if (dir > 0)
+ atomic_inc(&vxi->limit.rcur[res]);
+ else
+ atomic_dec(&vxi->limit.rcur[res]);
+ }
}
__vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__)
static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi,
- int res, int pages, char *file, int line)
+ int res, int pages, char *_file, int _line)
{
- if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
- (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
- (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
- vxdprintk("vx_acc_pages [%5d,%2d]: %5d += %5d in %s:%d\n",
- (vxi?vxi->vx_id:-1), res,
- (vxi?atomic_read(&vxi->limit.res[res]):0),
- pages, file, line);
- if (pages == 0)
- return;
- if (v)
- *v += pages;
- if (vxi)
- atomic_add(pages, &vxi->limit.rcur[res]);
+ if (VXD_RLIMIT(res, RLIMIT_RSS) ||
+ VXD_RLIMIT(res, RLIMIT_AS) ||
+ VXD_RLIMIT(res, RLIMIT_MEMLOCK))
+ vxlprintk(1, "vx_acc_pages[%5d,%s,%2d]: %5d += %5d",
+ (vxi?vxi->vx_id:-1), vlimit_name[res], res,
+ (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+ pages, _file, _line);
+ if (pages == 0)
+ return;
+ if (v)
+ *v += pages;
+ if (vxi)
+ atomic_add(pages, &vxi->limit.rcur[res]);
}
-#define vx_acc_vmpage(m,d) vx_acc_page(m, d, total_vm, RLIMIT_AS)
-#define vx_acc_vmlpage(m,d) vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
-#define vx_acc_rsspage(m,d) vx_acc_page(m, d, rss, RLIMIT_RSS)
+#define vx_acc_vmpage(m,d) vx_acc_page(m, d, total_vm, RLIMIT_AS)
+#define vx_acc_vmlpage(m,d) vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspage(m,d) vx_acc_page(m, d, rss, RLIMIT_RSS)
-#define vx_acc_vmpages(m,p) vx_acc_pages(m, p, total_vm, RLIMIT_AS)
-#define vx_acc_vmlpages(m,p) vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
-#define vx_acc_rsspages(m,p) vx_acc_pages(m, p, rss, RLIMIT_RSS)
+#define vx_acc_vmpages(m,p) vx_acc_pages(m, p, total_vm, RLIMIT_AS)
+#define vx_acc_vmlpages(m,p) vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspages(m,p) vx_acc_pages(m, p, rss, RLIMIT_RSS)
-#define vx_pages_add(s,r,p) __vx_acc_pages(0, s, r, p, __FILE__, __LINE__)
-#define vx_pages_sub(s,r,p) __vx_pages_add(s, r, -(p))
+#define vx_pages_add(s,r,p) __vx_acc_pages(0, s, r, p, __FILE__, __LINE__)
+#define vx_pages_sub(s,r,p) vx_pages_add(s, r, -(p))
-#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1)
-#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1)
-#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p)
-#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p))
+#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1)
+#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1)
+#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p)
+#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p))
-#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1)
-#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1)
-#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p)
-#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p))
+#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1)
+#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1)
+#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p)
+#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p))
-#define vx_rsspages_inc(m) vx_acc_rsspage(m, 1)
-#define vx_rsspages_dec(m) vx_acc_rsspage(m,-1)
-#define vx_rsspages_add(m,p) vx_acc_rsspages(m, p)
-#define vx_rsspages_sub(m,p) vx_acc_rsspages(m,-(p))
+#define vx_rsspages_inc(m) vx_acc_rsspage(m, 1)
+#define vx_rsspages_dec(m) vx_acc_rsspage(m,-1)
+#define vx_rsspages_add(m,p) vx_acc_rsspages(m, p)
+#define vx_rsspages_sub(m,p) vx_acc_rsspages(m,-(p))
#define vx_pages_avail(m, p, r) \
- __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__)
+ __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__)
static inline int __vx_pages_avail(struct vx_info *vxi,
- int res, int pages, char *file, int line)
+ int res, int pages, char *_file, int _line)
{
unsigned long value;
- if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
- (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
- (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
- printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
- (vxi?vxi->vx_id:-1), res,
+ if (VXD_RLIMIT(res, RLIMIT_RSS) ||
+ VXD_RLIMIT(res, RLIMIT_AS) ||
+ VXD_RLIMIT(res, RLIMIT_MEMLOCK))
+ vxlprintk(1, "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d",
+ (vxi?vxi->vx_id:-1), vlimit_name[res], res,
(vxi?vxi->limit.rlim[res]:1),
- (vxi?atomic_read(&vxi->limit.rcur[res]):0),
- pages, file, line);
- if (!vxi)
- return 1;
- value = atomic_read(&vxi->limit.rcur[res]);
+ (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+ pages, _file, _line);
+ if (!vxi)
+ return 1;
+ value = atomic_read(&vxi->limit.rcur[res]);
if (value > vxi->limit.rmax[res])
vxi->limit.rmax[res] = value;
- if (vxi->limit.rlim[res] == RLIM_INFINITY)
- return 1;
- if (value + pages <= vxi->limit.rlim[res])
- return 1;
+ if (vxi->limit.rlim[res] == RLIM_INFINITY)
+ return 1;
+ if (value + pages <= vxi->limit.rlim[res])
+ return 1;
atomic_inc(&vxi->limit.lhit[res]);
- return 0;
+ return 0;
}
-#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS)
-#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
-#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
+#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS)
+#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
+#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
#else
#warning duplicate inclusion
#ifndef _NX_VS_NETWORK_H
#define _NX_VS_NETWORK_H
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
#include "vserver/network.h"
#include "vserver/debug.h"
-extern int proc_pid_nx_info(struct task_struct *, char *);
-
-
#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
return nxi;
}
-
-#define free_nx_info(i) \
- call_rcu(&i->nx_rcu, rcu_free_nx_info);
-
#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
--- /dev/null
+#ifndef _VX_VS_SCHED_H
+#define _VX_VS_SCHED_H
+
+
+#include "vserver/sched.h"
+
+
+#define VAVAVOOM_RATIO 50
+
+#define MAX_PRIO_BIAS 20
+#define MIN_PRIO_BIAS -20
+
+
+static inline int vx_tokens_avail(struct vx_info *vxi)
+{
+ return atomic_read(&vxi->sched.tokens);
+}
+
+static inline void vx_consume_token(struct vx_info *vxi)
+{
+ atomic_dec(&vxi->sched.tokens);
+}
+
+static inline int vx_need_resched(struct task_struct *p)
+{
+#ifdef CONFIG_VSERVER_HARDCPU
+ struct vx_info *vxi = p->vx_info;
+#endif
+ int slice = --p->time_slice;
+
+#ifdef CONFIG_VSERVER_HARDCPU
+ if (vxi) {
+ int tokens;
+
+ if ((tokens = vx_tokens_avail(vxi)) > 0)
+ vx_consume_token(vxi);
+ /* for tokens > 0, one token was consumed */
+ if (tokens < 2)
+ return 1;
+ }
+#endif
+ return (slice == 0);
+}
+
+
+static inline void vx_onhold_inc(struct vx_info *vxi)
+{
+ int onhold = atomic_read(&vxi->cvirt.nr_onhold);
+
+ atomic_inc(&vxi->cvirt.nr_onhold);
+ if (!onhold)
+ vxi->cvirt.onhold_last = jiffies;
+}
+
+static inline void __vx_onhold_update(struct vx_info *vxi)
+{
+ int cpu = smp_processor_id();
+ uint32_t now = jiffies;
+ uint32_t delta = now - vxi->cvirt.onhold_last;
+
+ vxi->cvirt.onhold_last = now;
+ vxi->sched.cpu[cpu].hold_ticks += delta;
+}
+
+static inline void vx_onhold_dec(struct vx_info *vxi)
+{
+ if (atomic_dec_and_test(&vxi->cvirt.nr_onhold))
+ __vx_onhold_update(vxi);
+}
+
+#else
+#warning duplicate inclusion
+#endif
-#ifndef _VX_VS_LIMIT_H
-#define _VX_VS_LIMIT_H
+#ifndef _VX_VS_SOCKET_H
+#define _VX_VS_SOCKET_H
-// #define VX_DEBUG
-
-#include <linux/kernel.h>
-#include <linux/rcupdate.h>
-#include <linux/sched.h>
-
-#include "vserver/context.h"
-#include "vserver/network.h"
+#include "vserver/debug.h"
/* socket accounting */
static inline void __vx_acc_sock(struct vx_info *vxi,
int family, int pos, int size, char *file, int line)
{
- if (vxi) {
+ if (vxi) {
int type = vx_sock_type(family);
atomic_inc(&vxi->cacct.sock[type][pos].count);
atomic_add(size, &vxi->cacct.sock[type][pos].total);
- }
+ }
}
#define vx_sock_recv(sk,s) \
vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s))
-#define sock_vx_init(s) do { \
+#define sock_vx_init(s) do { \
(s)->sk_xid = 0; \
(s)->sk_vx_info = NULL; \
} while (0)
-#define sock_nx_init(s) do { \
+#define sock_nx_init(s) do { \
(s)->sk_nid = 0; \
(s)->sk_nx_info = NULL; \
} while (0)
+++ /dev/null
-#ifndef _LINUX_VSERVER_H
-#define _LINUX_VSERVER_H
-
-#include <linux/vserver/context.h>
-#include <linux/vserver/network.h>
-#include <linux/vinline.h>
-#include <linux/ninline.h>
-
-#endif
#include <linux/types.h>
+
#define MAX_S_CONTEXT 65535 /* Arbitrary limit */
#define MIN_D_CONTEXT 49152 /* dynamic contexts start here */
#define VX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */
+/* context flags */
+
+#define VXF_INFO_LOCK 0x00000001
+#define VXF_INFO_SCHED 0x00000002
+#define VXF_INFO_NPROC 0x00000004
+#define VXF_INFO_PRIVATE 0x00000008
+
+#define VXF_INFO_INIT 0x00000010
+#define VXF_INFO_HIDE 0x00000020
+#define VXF_INFO_ULIMIT 0x00000040
+#define VXF_INFO_NSPACE 0x00000080
+
+#define VXF_SCHED_HARD 0x00000100
+#define VXF_SCHED_PRIO 0x00000200
+#define VXF_SCHED_PAUSE 0x00000400
+
+#define VXF_VIRT_MEM 0x00010000
+#define VXF_VIRT_UPTIME 0x00020000
+#define VXF_VIRT_CPU 0x00040000
+#define VXF_VIRT_LOAD 0x00080000
+
+#define VXF_HIDE_MOUNT 0x01000000
+#define VXF_HIDE_NETIF 0x02000000
+
+#define VXF_STATE_SETUP (1ULL<<32)
+#define VXF_STATE_INIT (1ULL<<33)
+
+#define VXF_FORK_RSS (1ULL<<48)
+#define VXF_PROLIFIC (1ULL<<49)
+
+#define VXF_IGNEG_NICE (1ULL<<52)
+
+#define VXF_ONE_TIME (0x0003ULL<<32)
+
+
+/* context caps */
+
+#define VXC_CAP_MASK 0x00000000
+
+#define VXC_SET_UTSNAME 0x00000001
+#define VXC_SET_RLIMIT 0x00000002
+
+#define VXC_RAW_ICMP 0x00000100
+
+#define VXC_SECURE_MOUNT 0x00010000
+#define VXC_SECURE_REMOUNT 0x00020000
+
+
+/* vshelper sync commands */
+
+#define VS_CONTEXT_CREATED 1
+#define VS_CONTEXT_DESTROY 2
+
+
#ifdef __KERNEL__
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
-#define _VX_INFO_DEF_
-#include "cvirt.h"
-#include "limit.h"
-#include "sched.h"
-#undef _VX_INFO_DEF_
+#include "limit_def.h"
+#include "sched_def.h"
+#include "cvirt_def.h"
struct vx_info {
struct hlist_node vx_hlist; /* linked list of contexts */
char vx_name[65]; /* vserver name */
};
+
/* status flags */
#define VXS_HASHED 0x0001
#define VX_ADMIN 0x0001
#define VX_WATCH 0x0002
-#define VX_DUMMY 0x0008
+#define VX_HIDE 0x0004
+#define VX_HOSTID 0x0008
#define VX_IDENT 0x0010
#define VX_EQUIV 0x0020
struct rcu_head;
-// extern void rcu_free_vx_info(struct rcu_head *);
extern void unhash_vx_info(struct vx_info *);
+extern void free_vx_info(struct vx_info *);
+
extern struct vx_info *locate_vx_info(int);
extern struct vx_info *locate_or_create_vx_info(int);
extern int vx_migrate_task(struct task_struct *, struct vx_info *);
-#endif /* __KERNEL__ */
-
-#include "switch.h"
-
-/* vinfo commands */
-
-#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
-#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
-
-#ifdef __KERNEL__
-extern int vc_task_xid(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
-#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
-
-struct vcmd_vx_info_v0 {
- uint32_t xid;
- uint32_t initpid;
- /* more to come */
-};
-
-#ifdef __KERNEL__
-extern int vc_vx_info(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_ctx_create VC_CMD(VPROC, 1, 0)
-#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 0)
-
-#ifdef __KERNEL__
-extern int vc_ctx_create(uint32_t, void __user *);
-extern int vc_ctx_migrate(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
-#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
-
-struct vcmd_ctx_flags_v0 {
- uint64_t flagword;
- uint64_t mask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_cflags(uint32_t, void __user *);
-extern int vc_set_cflags(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VXF_INFO_LOCK 0x00000001
-#define VXF_INFO_SCHED 0x00000002
-#define VXF_INFO_NPROC 0x00000004
-#define VXF_INFO_PRIVATE 0x00000008
-
-#define VXF_INFO_INIT 0x00000010
-#define VXF_INFO_HIDE 0x00000020
-#define VXF_INFO_ULIMIT 0x00000040
-#define VXF_INFO_NSPACE 0x00000080
-
-#define VXF_SCHED_HARD 0x00000100
-#define VXF_SCHED_PRIO 0x00000200
-#define VXF_SCHED_PAUSE 0x00000400
-
-#define VXF_VIRT_MEM 0x00010000
-#define VXF_VIRT_UPTIME 0x00020000
-#define VXF_VIRT_CPU 0x00040000
-#define VXF_VIRT_LOAD 0x00080000
-
-#define VXF_HIDE_MOUNT 0x01000000
-#define VXF_HIDE_NETIF 0x02000000
-
-#define VXF_STATE_SETUP (1ULL<<32)
-#define VXF_STATE_INIT (1ULL<<33)
-
-#define VXF_FORK_RSS (1ULL<<48)
-#define VXF_PROLIFIC (1ULL<<49)
-
-#define VXF_IGNEG_NICE (1ULL<<52)
+// extern int proc_pid_vx_info(struct task_struct *, char *);
-#define VXF_ONE_TIME (0x0003ULL<<32)
-
-#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 0)
-#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 0)
-
-struct vcmd_ctx_caps_v0 {
- uint64_t bcaps;
- uint64_t ccaps;
- uint64_t cmask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_ccaps(uint32_t, void __user *);
-extern int vc_set_ccaps(uint32_t, void __user *);
+extern long vs_context_state(unsigned int);
#endif /* __KERNEL__ */
-
-#define VXC_SET_UTSNAME 0x00000001
-#define VXC_SET_RLIMIT 0x00000002
-
-#define VXC_RAW_ICMP 0x00000100
-
-#define VXC_SECURE_MOUNT 0x00010000
-#define VXC_SECURE_REMOUNT 0x00020000
-
-
+#else /* _VX_CONTEXT_H */
+#warning duplicate inclusion
#endif /* _VX_CONTEXT_H */
--- /dev/null
+#ifndef _VX_CONTEXT_CMD_H
+#define _VX_CONTEXT_CMD_H
+
+
+/* vinfo commands */
+
+#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_xid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
+
+struct vcmd_vx_info_v0 {
+ uint32_t xid;
+ uint32_t initpid;
+ /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_vx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+
+/* context commands */
+
+#define VCMD_ctx_create VC_CMD(VPROC, 1, 0)
+#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 0)
+
+#ifdef __KERNEL__
+extern int vc_ctx_create(uint32_t, void __user *);
+extern int vc_ctx_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+
+/* flag commands */
+
+#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
+#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
+
+struct vcmd_ctx_flags_v0 {
+ uint64_t flagword;
+ uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_cflags(uint32_t, void __user *);
+extern int vc_set_cflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+
+/* context caps commands */
+
+#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 0)
+#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 0)
+
+struct vcmd_ctx_caps_v0 {
+ uint64_t bcaps;
+ uint64_t ccaps;
+ uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ccaps(uint32_t, void __user *);
+extern int vc_set_ccaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_CONTEXT_CMD_H */
-#if defined(__KERNEL__) && defined(_VX_INFO_DEF_)
-
-#include <linux/utsname.h>
-#include <linux/rwsem.h>
-#include <linux/jiffies.h>
-#include <linux/time.h>
-#include <asm/atomic.h>
-
-/* context sub struct */
-
-struct _vx_cvirt {
- int max_threads;
-
- unsigned int bias_cswtch;
- struct timespec bias_idle;
- struct timespec bias_tp;
- uint64_t bias_jiffies;
-
- struct new_utsname utsname;
-};
-
-struct sock_acc {
- atomic_t count;
- atomic_t total;
-};
-
-struct _vx_cacct {
- atomic_t nr_threads;
- int nr_running;
-
- unsigned long total_forks;
-
- struct sock_acc sock[5][3];
-};
-
-
-static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
-{
- return atomic_read(&cacct->sock[type][pos].count);
-}
-
-
-static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
-{
- return atomic_read(&cacct->sock[type][pos].total);
-}
-
-
-extern uint64_t vx_idle_jiffies(void);
-
-static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
-{
- uint64_t idle_jiffies = vx_idle_jiffies();
-
- // new->virt.bias_cswtch = kstat.context_swtch;
- cvirt->bias_jiffies = get_jiffies_64();
-
- jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
- do_posix_clock_monotonic_gettime(&cvirt->bias_tp);
-
- down_read(&uts_sem);
- cvirt->utsname = system_utsname;
- up_read(&uts_sem);
-}
-
-static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
-{
- return;
-}
-
-static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
-{
- int i,j;
-
- atomic_set(&cacct->nr_threads, 1);
- for (i=0; i<5; i++) {
- for (j=0; j<3; j++) {
- atomic_set(&cacct->sock[i][j].count, 0);
- atomic_set(&cacct->sock[i][j].total, 0);
- }
- }
-}
-
-static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
-{
- return;
-}
-
-static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
-{
- int length = 0;
- return length;
-}
-
-static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
-{
- int i,j, length = 0;
- static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
-
- for (i=0; i<5; i++) {
- length += sprintf(buffer + length,
- "%s:", type[i]);
- for (j=0; j<3; j++) {
- length += sprintf(buffer + length,
- "\t%12lu/%-12lu"
- ,vx_sock_count(cacct, i, j)
- ,vx_sock_total(cacct, i, j)
- );
- }
- buffer[length++] = '\n';
- }
- return length;
-}
-
-#else /* _VX_INFO_DEF_ */
#ifndef _VX_CVIRT_H
#define _VX_CVIRT_H
-#include "switch.h"
+#ifdef __KERNEL__
-/* cvirt vserver commands */
+struct timespec;
+void vx_vsi_uptime(struct timespec *, struct timespec *);
-#ifdef __KERNEL__
-struct timespec;
+struct vx_info;
-void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle);
+void vx_update_load(struct vx_info *);
#endif /* __KERNEL__ */
-
+#else /* _VX_CVIRT_H */
+#warning duplicate inclusion
#endif /* _VX_CVIRT_H */
-#endif
--- /dev/null
+#ifndef _VX_CVIRT_CMD_H
+#define _VX_CVIRT_CMD_H
+
+/* cvirt vserver commands */
+
+
+#endif /* _VX_CVIRT_CMD_H */
--- /dev/null
+#ifndef _VX_CVIRT_DEF_H
+#define _VX_CVIRT_DEF_H
+
+#include <linux/jiffies.h>
+#include <linux/utsname.h>
+#include <linux/spinlock.h>
+#include <linux/time.h>
+#include <asm/atomic.h>
+
+
+struct _vx_usage_stat {
+ uint64_t user;
+ uint64_t nice;
+ uint64_t system;
+ uint64_t softirq;
+ uint64_t irq;
+ uint64_t idle;
+ uint64_t iowait;
+};
+
+/* context sub struct */
+
+struct _vx_cvirt {
+ int max_threads; /* maximum allowed threads */
+ atomic_t nr_threads; /* number of current threads */
+ atomic_t nr_running; /* number of running threads */
+ atomic_t nr_uninterruptible; /* number of uninterruptible threads */
+
+ atomic_t nr_onhold; /* processes on hold */
+ uint32_t onhold_last; /* jiffies when put on hold */
+
+ struct timespec bias_idle;
+ struct timespec bias_uptime; /* context creation point */
+ uint64_t bias_clock; /* offset in clock_t */
+
+ struct new_utsname utsname;
+
+ spinlock_t load_lock; /* lock for the load averages */
+ atomic_t load_updates; /* nr of load updates done so far */
+ uint32_t load_last; /* last time load was cacled */
+ uint32_t load[3]; /* load averages 1,5,15 */
+
+ struct _vx_usage_stat cpustat[NR_CPUS];
+};
+
+struct _vx_sock_acc {
+ atomic_t count;
+ atomic_t total;
+};
+
+/* context sub struct */
+
+struct _vx_cacct {
+ unsigned long total_forks;
+
+ struct _vx_sock_acc sock[5][3];
+};
+
+#endif /* _VX_CVIRT_DEF_H */
#define _VX_DEBUG_H
+#define VXD_CBIT(n,m) (vx_debug_ ## n & (1 << (m)))
+#define VXD_CMIN(n,m) (vx_debug_ ## n > (m))
+#define VXD_MASK(n,m) (vx_debug_ ## n & (m))
+
+#define VXD_QPOS(v,p) (((uint32_t)(v) >> ((p)*8)) & 0xFF)
+#define VXD_QUAD(v) VXD_QPOS(v,0), VXD_QPOS(v,1), \
+ VXD_QPOS(v,2), VXD_QPOS(v,3)
+
+#define __FUNC__ __func__
+
+
+#ifdef CONFIG_VSERVER_DEBUG
+
extern unsigned int vx_debug_switch;
extern unsigned int vx_debug_xid;
extern unsigned int vx_debug_nid;
extern unsigned int vx_debug_cvirt;
-#define VXD_CBIT(n,m) (vx_debug_ ## n & (1 << (m)))
-#define VXD_CMIN(n,m) (vx_debug_ ## n > (m))
-#define VXD_MASK(n,m) (vx_debug_ ## n & (m))
-
-// #define VXD_HERE __FILE__, __LINE__
-
-
-#ifdef CONFIG_VSERVER_DEBUG
-
-#define VX_LOGLEVEL "vxD: "
+#define VX_LOGLEVEL "vxD: "
+#define VX_WARNLEVEL KERN_WARNING "vxW: "
#define vxdprintk(c,f,x...) \
do { \
if (c) \
- printk(VX_LOGLEVEL f "\n", x); \
- } while (0)
+ printk(VX_LOGLEVEL f "\n" , ##x); \
+ } while (0)
#define vxlprintk(c,f,x...) \
do { \
if (c) \
printk(VX_LOGLEVEL f " @%s:%d\n", x); \
- } while (0)
+ } while (0)
+
+#define vxfprintk(c,f,x...) \
+ do { \
+ if (c) \
+ printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
+ } while (0)
+
+
+#define vxwprintk(c,f,x...) \
+ do { \
+ if (c) \
+ printk(VX_WARNLEVEL f "\n" , ##x); \
+ } while (0)
+
+
+#define vxd_path(d,m) \
+ ({ static char _buffer[PATH_MAX]; \
+ d_path((d), (m), _buffer, sizeof(_buffer)); })
+
+#else /* CONFIG_VSERVER_DEBUG */
+
+#define vx_debug_switch 0
+#define vx_debug_xid 0
+#define vx_debug_nid 0
+#define vx_debug_net 0
+#define vx_debug_limit 0
+#define vx_debug_dlim 0
+#define vx_debug_cvirt 0
+
+#define vxdprintk(x...) do { } while (0)
+#define vxlprintk(x...) do { } while (0)
+#define vxfprintk(x...) do { } while (0)
+#define vxwprintk(x...) do { } while (0)
+
+#define vxd_path "<none>"
+
+#endif /* CONFIG_VSERVER_DEBUG */
+
+
+/* history stuff */
+
+#ifdef CONFIG_VSERVER_HISTORY
+
+
+extern unsigned volatile int vxh_active;
+
+struct _vxhe_vxi {
+ struct vx_info *ptr;
+ unsigned xid;
+ unsigned usecnt;
+ unsigned refcnt;
+};
+
+struct _vxhe_set_clr {
+ void *data;
+};
+
+struct _vxhe_loc_lookup {
+ unsigned arg;
+};
+
+enum {
+ VXH_UNUSED=0,
+ VXH_THROW_OOPS=1,
+
+ VXH_GET_VX_INFO,
+ VXH_PUT_VX_INFO,
+ VXH_SET_VX_INFO,
+ VXH_CLR_VX_INFO,
+ VXH_ALLOC_VX_INFO,
+ VXH_DEALLOC_VX_INFO,
+ VXH_HASH_VX_INFO,
+ VXH_UNHASH_VX_INFO,
+ VXH_LOC_VX_INFO,
+ VXH_LOOKUP_VX_INFO,
+};
+
+struct _vx_hist_entry {
+ void *loc;
+ unsigned short seq;
+ unsigned short type;
+ struct _vxhe_vxi vxi;
+ union {
+ struct _vxhe_set_clr sc;
+ struct _vxhe_loc_lookup ll;
+ };
+};
+
+struct _vx_hist_entry *vxh_advance(void *loc);
+
+#define VXH_HERE() \
+ ({ __label__ here; \
+ here:; \
+ &&here; })
+
+
+
+static inline void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
+{
+ entry->vxi.ptr = vxi;
+ if (vxi) {
+ entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
+ entry->vxi.refcnt = atomic_read(&vxi->vx_refcnt);
+ entry->vxi.xid = vxi->vx_id;
+ }
+}
+
+static inline void vxh_throw_oops(void)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ entry->type = VXH_THROW_OOPS;
+
+ /* prevent further acquisition */
+ vxh_active = 0;
+}
+
+static inline void vxh_get_vx_info(struct vx_info *vxi)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->type = VXH_GET_VX_INFO;
+}
+
+static inline void vxh_put_vx_info(struct vx_info *vxi)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->type = VXH_PUT_VX_INFO;
+}
+
+static inline void vxh_set_vx_info(struct vx_info *vxi, void *data)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->sc.data = data;
+ entry->type = VXH_SET_VX_INFO;
+}
+
+static inline void vxh_clr_vx_info(struct vx_info *vxi, void *data)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->sc.data = data;
+ entry->type = VXH_CLR_VX_INFO;
+}
+
+static inline void vxh_alloc_vx_info(struct vx_info *vxi)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->type = VXH_ALLOC_VX_INFO;
+}
+
+static inline void vxh_dealloc_vx_info(struct vx_info *vxi)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->type = VXH_DEALLOC_VX_INFO;
+}
+
+static inline void vxh_hash_vx_info(struct vx_info *vxi)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->type = VXH_HASH_VX_INFO;
+}
+
+static inline void vxh_unhash_vx_info(struct vx_info *vxi)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->type = VXH_UNHASH_VX_INFO;
+}
+
+static inline void vxh_loc_vx_info(unsigned arg, struct vx_info *vxi)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->ll.arg = arg;
+ entry->type = VXH_LOC_VX_INFO;
+}
+
+static inline void vxh_lookup_vx_info(unsigned arg, struct vx_info *vxi)
+{
+ struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
+
+ __vxh_copy_vxi(entry, vxi);
+ entry->ll.arg = arg;
+ entry->type = VXH_LOOKUP_VX_INFO;
+}
+
+extern void vxh_dump_history(void);
+
+#else /* CONFIG_VSERVER_HISTORY */
+
+#define vxh_throw_oops() do { } while (0)
+
+#define vxh_get_vx_info(v) do { } while (0)
+#define vxh_put_vx_info(v) do { } while (0)
+
+#define vxh_set_vx_info(v,d) do { } while (0)
+#define vxh_clr_vx_info(v,d) do { } while (0)
+
+#define vxh_alloc_vx_info(v) do { } while (0)
+#define vxh_dealloc_vx_info(v) do { } while (0)
-#else
+#define vxh_hash_vx_info(v) do { } while (0)
+#define vxh_unhash_vx_info(v) do { } while (0)
-#define vxdprintk(x...) do { } while (0)
-#define vxlprintk(x...) do { } while (0)
+#define vxh_loc_vx_info(a,v) do { } while (0)
+#define vxh_lookup_vx_info(a,v) do { } while (0)
-#endif
+#define vxh_dump_history() do { } while (0)
+#endif /* CONFIG_VSERVER_HISTORY */
#endif /* _VX_DEBUG_H */
--- /dev/null
+#ifndef _VX_DEBUG_CMD_H
+#define _VX_DEBUG_CMD_H
+
+
+/* debug commands */
+
+#define VCMD_dump_history VC_CMD(DEBUG, 1, 0)
+
+#ifdef __KERNEL__
+
+extern int vc_dump_history(uint32_t);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_DEBUG_CMD_H */
typedef uint64_t dlsize_t;
-
#endif /* __KERNEL__ */
-
+#else /* _VX_DLIMIT_H */
+#warning duplicate inclusion
#endif /* _VX_DLIMIT_H */
extern int vc_get_iattr(uint32_t, void __user *);
extern int vc_set_iattr(uint32_t, void __user *);
-extern int vc_iattr_ioctl(struct dentry *de,
- unsigned int cmd,
- unsigned long arg);
-
#endif /* __KERNEL__ */
/* inode ioctls */
#define FIOC_GETXFLG _IOR('x', 5, long)
#define FIOC_SETXFLG _IOW('x', 6, long)
-#define FIOC_GETIATTR _IOR('x', 7, long)
-#define FIOC_SETIATTR _IOR('x', 8, long)
-
+#else /* _VX_INODE_H */
+#warning duplicate inclusion
#endif /* _VX_INODE_H */
#define _VX_LEGACY_H
#include "switch.h"
-#include "network.h"
/* compatibiliy vserver commands */
/* compatibiliy vserver arguments */
-struct vcmd_new_s_context_v1 {
+struct vcmd_new_s_context_v1 {
uint32_t remove_cap;
uint32_t flags;
};
-struct vcmd_set_ipv4root_v3 {
+struct vcmd_set_ipv4root_v3 {
/* number of pairs in id */
uint32_t broadcast;
struct {
/* of the context */
#define VX_INFO_NAMESPACE 128 /* save private namespace */
-
+
#define NB_S_CONTEXT 16
#define NB_IPV4ROOT 16
-#if defined(__KERNEL__) && defined(_VX_INFO_DEF_)
-
-#include <asm/atomic.h>
-#include <asm/resource.h>
-
-/* context sub struct */
-
-#define RLIMIT_OPENFD 12
-
-#define NUM_RLIMITS 16
-
-#define VLIMIT_SOCK 16
-
-
-struct _vx_limit {
- atomic_t ticks;
-
- unsigned long rlim[NUM_RLIMITS]; /* Per context limit */
- atomic_t res[NUM_RLIMITS]; /* Current value */
-};
-
-static inline void vx_info_init_limit(struct _vx_limit *limit)
-{
- int lim;
-
- for (lim=0; lim<NUM_RLIMITS; lim++) {
- limit->rlim[lim] = RLIM_INFINITY;
- atomic_set(&limit->res[lim], 0);
- }
-}
-
-extern unsigned int vx_debug_limit;
-
-static inline void vx_info_exit_limit(struct _vx_limit *limit)
-{
- int lim, value;
-
- for (lim=0; lim<NUM_RLIMITS; lim++) {
- value = atomic_read(&limit->res[lim]);
- if (value && vx_debug_limit)
- printk("!!! limit: %p[%d] = %d on exit.\n",
- limit, lim, value);
- }
-}
-
-
-static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
-{
- return sprintf(buffer,
- "PROC:\t%8d/%ld\n"
- "VM:\t%8d/%ld\n"
- "VML:\t%8d/%ld\n"
- "RSS:\t%8d/%ld\n"
- "FILES:\t%8d/%ld\n"
- "OFD:\t%8d/%ld\n"
- ,atomic_read(&limit->res[RLIMIT_NPROC])
- ,limit->rlim[RLIMIT_NPROC]
- ,atomic_read(&limit->res[RLIMIT_AS])
- ,limit->rlim[RLIMIT_AS]
- ,atomic_read(&limit->res[RLIMIT_MEMLOCK])
- ,limit->rlim[RLIMIT_MEMLOCK]
- ,atomic_read(&limit->res[RLIMIT_RSS])
- ,limit->rlim[RLIMIT_RSS]
- ,atomic_read(&limit->res[RLIMIT_NOFILE])
- ,limit->rlim[RLIMIT_NOFILE]
- ,atomic_read(&limit->res[RLIMIT_OPENFD])
- ,limit->rlim[RLIMIT_OPENFD]
- );
-}
-
-#else /* _VX_INFO_DEF_ */
#ifndef _VX_LIMIT_H
#define _VX_LIMIT_H
-#include "switch.h"
-
-/* rlimit vserver commands */
-
-#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
-#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
-#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
-
-struct vcmd_ctx_rlimit_v0 {
- uint32_t id;
- uint64_t minimum;
- uint64_t softlimit;
- uint64_t maximum;
-};
-
-struct vcmd_ctx_rlimit_mask_v0 {
- uint32_t minimum;
- uint32_t softlimit;
- uint32_t maximum;
-};
-
-#define CRLIM_UNSET (0ULL)
-#define CRLIM_INFINITY (~0ULL)
-#define CRLIM_KEEP (~1ULL)
-
#ifdef __KERNEL__
-#include <linux/compiler.h>
-
-extern int vc_get_rlimit(uint32_t, void __user *);
-extern int vc_set_rlimit(uint32_t, void __user *);
-extern int vc_get_rlimit_mask(uint32_t, void __user *);
-
struct sysinfo;
void vx_vsi_meminfo(struct sysinfo *);
void vx_vsi_swapinfo(struct sysinfo *);
+#define VXD_RLIMIT(r,l) (VXD_CBIT(limit, (l)) && ((r) == (l)))
-#endif /* __KERNEL__ */
+#define NUM_LIMITS 20
+#define VLIMIT_NSOCK 16
+
+extern const char *vlimit_name[NUM_LIMITS];
+
+#endif /* __KERNEL__ */
#endif /* _VX_LIMIT_H */
-#endif
--- /dev/null
+#ifndef _VX_LIMIT_CMD_H
+#define _VX_LIMIT_CMD_H
+
+/* rlimit vserver commands */
+
+#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
+#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
+#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
+
+struct vcmd_ctx_rlimit_v0 {
+ uint32_t id;
+ uint64_t minimum;
+ uint64_t softlimit;
+ uint64_t maximum;
+};
+
+struct vcmd_ctx_rlimit_mask_v0 {
+ uint32_t minimum;
+ uint32_t softlimit;
+ uint32_t maximum;
+};
+
+#define CRLIM_UNSET (0ULL)
+#define CRLIM_INFINITY (~0ULL)
+#define CRLIM_KEEP (~1ULL)
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_get_rlimit(uint32_t, void __user *);
+extern int vc_set_rlimit(uint32_t, void __user *);
+extern int vc_get_rlimit_mask(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_LIMIT_CMD_H */
--- /dev/null
+#ifndef _VX_LIMIT_DEF_H
+#define _VX_LIMIT_DEF_H
+
+#include <asm/atomic.h>
+#include <asm/resource.h>
+
+#include "limit.h"
+
+/* context sub struct */
+
+struct _vx_limit {
+ atomic_t ticks;
+
+ unsigned long rlim[NUM_LIMITS]; /* Context limit */
+ unsigned long rmax[NUM_LIMITS]; /* Context maximum */
+ atomic_t rcur[NUM_LIMITS]; /* Current value */
+ atomic_t lhit[NUM_LIMITS]; /* Limit hits */
+};
+
+
+#endif /* _VX_LIMIT_DEF_H */
#include <linux/types.h>
-
+
/* virtual host info names */
#define VCMD_vx_set_vhi_name VC_CMD(VHOST, 1, 0)
#define VCMD_vx_get_vhi_name VC_CMD(VHOST, 2, 0)
-struct vcmd_vx_vhi_name_v0 {
+struct vcmd_vx_vhi_name_v0 {
uint32_t field;
char name[65];
};
struct vx_info;
struct namespace;
struct fs_struct;
+struct vfsmount;
+
+extern int vx_check_vfsmount(struct vx_info *, struct vfsmount *);
extern int vx_set_namespace(struct vx_info *, struct namespace *, struct fs_struct *);
extern int vc_set_namespace(uint32_t, void __user *);
#endif /* __KERNEL__ */
+#else /* _VX_NAMESPACE_H */
+#warning duplicate inclusion
#endif /* _VX_NAMESPACE_H */
#ifndef _VX_NETWORK_H
#define _VX_NETWORK_H
+#include <linux/types.h>
+
+
#define MAX_N_CONTEXT 65535 /* Arbitrary limit */
#define NX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */
#define NB_IPV4ROOT 16
+
#ifdef __KERNEL__
#include <linux/list.h>
#include <linux/spinlock.h>
-#include <linux/utsname.h>
#include <linux/rcupdate.h>
-#include <asm/resource.h>
#include <asm/atomic.h>
struct rcu_head;
-extern void rcu_free_nx_info(struct rcu_head *);
extern void unhash_nx_info(struct nx_info *);
+extern void free_nx_info(struct nx_info *);
+
extern struct nx_info *locate_nx_info(int);
extern struct nx_info *locate_or_create_nx_info(int);
int nx_addr_conflict(struct nx_info *, uint32_t, struct sock *);
-
#endif /* __KERNEL__ */
-
-#include "switch.h"
-
-/* vinfo commands */
-
-#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
-
-#ifdef __KERNEL__
-extern int vc_task_nid(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
-
-struct vcmd_nx_info_v0 {
- uint32_t nid;
- /* more to come */
-};
-
-#ifdef __KERNEL__
-extern int vc_nx_info(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_net_create VC_CMD(VNET, 1, 0)
-#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
-
-#define VCMD_net_add VC_CMD(NETALT, 1, 0)
-#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
-
-struct vcmd_net_nx_v0 {
- uint16_t type;
- uint16_t count;
- uint32_t ip[4];
- uint32_t mask[4];
- /* more to come */
-};
-
-// IPN_TYPE_IPV4
-
-
-#ifdef __KERNEL__
-extern int vc_net_create(uint32_t, void __user *);
-extern int vc_net_migrate(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
-#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
-
-struct vcmd_net_flags_v0 {
- uint64_t flagword;
- uint64_t mask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_nflags(uint32_t, void __user *);
-extern int vc_set_nflags(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define IPF_STATE_SETUP (1ULL<<32)
-
-
-#define IPF_ONE_TIME (0x0001ULL<<32)
-
-#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
-#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
-
-struct vcmd_net_caps_v0 {
- uint64_t ncaps;
- uint64_t cmask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_ncaps(uint32_t, void __user *);
-extern int vc_set_ncaps(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define IPC_WOSSNAME 0x00000001
-
-
+#else /* _VX_NETWORK_H */
+#warning duplicate inclusion
#endif /* _VX_NETWORK_H */
--- /dev/null
+#ifndef _VX_NETWORK_CMD_H
+#define _VX_NETWORK_CMD_H
+
+
+/* vinfo commands */
+
+#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_nid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
+
+struct vcmd_nx_info_v0 {
+ uint32_t nid;
+ /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_nx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_net_create VC_CMD(VNET, 1, 0)
+#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
+
+#define VCMD_net_add VC_CMD(NETALT, 1, 0)
+#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
+
+struct vcmd_net_nx_v0 {
+ uint16_t type;
+ uint16_t count;
+ uint32_t ip[4];
+ uint32_t mask[4];
+ /* more to come */
+};
+
+// IPN_TYPE_IPV4
+
+
+#ifdef __KERNEL__
+extern int vc_net_create(uint32_t, void __user *);
+extern int vc_net_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
+#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
+
+struct vcmd_net_flags_v0 {
+ uint64_t flagword;
+ uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_nflags(uint32_t, void __user *);
+extern int vc_set_nflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define IPF_STATE_SETUP (1ULL<<32)
+
+
+#define IPF_ONE_TIME (0x0001ULL<<32)
+
+#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
+#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
+
+struct vcmd_net_caps_v0 {
+ uint64_t ncaps;
+ uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ncaps(uint32_t, void __user *);
+extern int vc_set_ncaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_CONTEXT_CMD_H */
-/* _VX_SCHED_H defined below */
-
-#if defined(__KERNEL__) && defined(_VX_INFO_DEF_)
-
-#include <linux/spinlock.h>
-#include <linux/jiffies.h>
-#include <linux/cpumask.h>
-#include <asm/atomic.h>
-#include <asm/param.h>
-
-struct _vx_ticks {
- uint64_t user_ticks; /* token tick events */
- uint64_t sys_ticks; /* token tick events */
- uint64_t hold_ticks; /* token ticks paused */
- uint64_t unused[5]; /* cacheline ? */
-};
-
-/* context sub struct */
-
-struct _vx_sched {
- atomic_t tokens; /* number of CPU tokens */
- spinlock_t tokens_lock; /* lock for token bucket */
-
- int fill_rate; /* Fill rate: add X tokens... */
- int interval; /* Divisor: per Y jiffies */
- int tokens_min; /* Limit: minimum for unhold */
- int tokens_max; /* Limit: no more than N tokens */
- uint32_t jiffies; /* last time accounted */
-
- int priority_bias; /* bias offset for priority */
- cpumask_t cpus_allowed; /* cpu mask for context */
-
- struct _vx_ticks cpu[NR_CPUS];
-};
-
-static inline void vx_info_init_sched(struct _vx_sched *sched)
-{
- int i;
-
- /* scheduling; hard code starting values as constants */
- sched->fill_rate = 1;
- sched->interval = 4;
- sched->tokens_min = HZ >> 4;
- sched->tokens_max = HZ >> 1;
- sched->jiffies = jiffies;
- sched->tokens_lock = SPIN_LOCK_UNLOCKED;
-
- atomic_set(&sched->tokens, HZ >> 2);
- sched->cpus_allowed = CPU_MASK_ALL;
- sched->priority_bias = 0;
-
- for_each_cpu(i) {
- sched->cpu[i].user_ticks = 0;
- sched->cpu[i].sys_ticks = 0;
- sched->cpu[i].hold_ticks = 0;
- }
-}
-
-static inline void vx_info_exit_sched(struct _vx_sched *sched)
-{
- return;
-}
-
-static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
-{
- int length = 0;
- int i;
-
- length += sprintf(buffer,
- "Token:\t\t%8d\n"
- "FillRate:\t%8d\n"
- "Interval:\t%8d\n"
- "TokensMin:\t%8d\n"
- "TokensMax:\t%8d\n"
- "PrioBias:\t%8d\n"
- ,atomic_read(&sched->tokens)
- ,sched->fill_rate
- ,sched->interval
- ,sched->tokens_min
- ,sched->tokens_max
- ,sched->priority_bias
- );
-
- for_each_online_cpu(i) {
- length += sprintf(buffer + length,
- "cpu %d: %lld %lld %lld\n"
- ,i
- ,(long long)sched->cpu[i].user_ticks
- ,(long long)sched->cpu[i].sys_ticks
- ,(long long)sched->cpu[i].hold_ticks
- );
- }
-
- return length;
-}
-
-
-#else /* _VX_INFO_DEF_ */
#ifndef _VX_SCHED_H
#define _VX_SCHED_H
-#include "switch.h"
-
-/* sched vserver commands */
-
-#define VCMD_set_sched_v2 VC_CMD(SCHED, 1, 2)
-#define VCMD_set_sched VC_CMD(SCHED, 1, 3)
-
-struct vcmd_set_sched_v2 {
- int32_t fill_rate;
- int32_t interval;
- int32_t tokens;
- int32_t tokens_min;
- int32_t tokens_max;
- uint64_t cpu_mask;
-};
-
-struct vcmd_set_sched_v3 {
- uint32_t set_mask;
- int32_t fill_rate;
- int32_t interval;
- int32_t tokens;
- int32_t tokens_min;
- int32_t tokens_max;
- int32_t priority_bias;
-};
-
-
-#define VXSM_FILL_RATE 0x0001
-#define VXSM_INTERVAL 0x0002
-#define VXSM_TOKENS 0x0010
-#define VXSM_TOKENS_MIN 0x0020
-#define VXSM_TOKENS_MAX 0x0040
-#define VXSM_PRIO_BIAS 0x0100
-
-#define SCHED_KEEP (-2)
-
#ifdef __KERNEL__
-extern int vc_set_sched_v1(uint32_t, void __user *);
-extern int vc_set_sched_v2(uint32_t, void __user *);
-extern int vc_set_sched(uint32_t, void __user *);
+struct timespec;
+void vx_vsi_uptime(struct timespec *, struct timespec *);
-#define VAVAVOOM_RATIO 50
-#define MAX_PRIO_BIAS 20
-#define MIN_PRIO_BIAS -20
+struct vx_info;
-#include "context.h"
+void vx_update_load(struct vx_info *);
-/* scheduling stuff */
+struct task_struct;
int effective_vavavoom(struct task_struct *, int);
int vx_tokens_recalc(struct vx_info *);
-/* new stuff ;) */
-
-static inline int vx_tokens_avail(struct vx_info *vxi)
-{
- return atomic_read(&vxi->sched.tokens);
-}
-
-static inline void vx_consume_token(struct vx_info *vxi)
-{
- atomic_dec(&vxi->sched.tokens);
-}
-
-static inline int vx_need_resched(struct task_struct *p)
-{
-#ifdef CONFIG_VSERVER_HARDCPU
- struct vx_info *vxi = p->vx_info;
-#endif
- int slice = --p->time_slice;
-
-#ifdef CONFIG_VSERVER_HARDCPU
- if (vxi) {
- int tokens;
-
- if ((tokens = vx_tokens_avail(vxi)) > 0)
- vx_consume_token(vxi);
- /* for tokens > 0, one token was consumed */
- if (tokens < 2)
- return 1;
- }
-#endif
- return (slice == 0);
-}
-
-
-static inline void vx_onhold_inc(struct vx_info *vxi)
-{
- int onhold = atomic_read(&vxi->cvirt.nr_onhold);
-
- atomic_inc(&vxi->cvirt.nr_onhold);
- if (!onhold)
- vxi->cvirt.onhold_last = jiffies;
-}
-
-static inline void __vx_onhold_update(struct vx_info *vxi)
-{
- int cpu = smp_processor_id();
- uint32_t now = jiffies;
- uint32_t delta = now - vxi->cvirt.onhold_last;
-
- vxi->cvirt.onhold_last = now;
- vxi->sched.cpu[cpu].hold_ticks += delta;
-}
-
-static inline void vx_onhold_dec(struct vx_info *vxi)
-{
- if (atomic_dec_and_test(&vxi->cvirt.nr_onhold))
- __vx_onhold_update(vxi);
-}
-
#endif /* __KERNEL__ */
-
+#else /* _VX_SCHED_H */
+#warning duplicate inclusion
#endif /* _VX_SCHED_H */
-#endif
--- /dev/null
+#ifndef _VX_SCHED_CMD_H
+#define _VX_SCHED_CMD_H
+
+/* sched vserver commands */
+
+#define VCMD_set_sched_v2 VC_CMD(SCHED, 1, 2)
+#define VCMD_set_sched VC_CMD(SCHED, 1, 3)
+
+struct vcmd_set_sched_v2 {
+ int32_t fill_rate;
+ int32_t interval;
+ int32_t tokens;
+ int32_t tokens_min;
+ int32_t tokens_max;
+ uint64_t cpu_mask;
+};
+
+struct vcmd_set_sched_v3 {
+ uint32_t set_mask;
+ int32_t fill_rate;
+ int32_t interval;
+ int32_t tokens;
+ int32_t tokens_min;
+ int32_t tokens_max;
+ int32_t priority_bias;
+};
+
+
+#define VXSM_FILL_RATE 0x0001
+#define VXSM_INTERVAL 0x0002
+#define VXSM_TOKENS 0x0010
+#define VXSM_TOKENS_MIN 0x0020
+#define VXSM_TOKENS_MAX 0x0040
+#define VXSM_PRIO_BIAS 0x0100
+
+#define SCHED_KEEP (-2)
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_set_sched_v1(uint32_t, void __user *);
+extern int vc_set_sched_v2(uint32_t, void __user *);
+extern int vc_set_sched(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_SCHED_CMD_H */
--- /dev/null
+#ifndef _VX_SCHED_DEF_H
+#define _VX_SCHED_DEF_H
+
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/cpumask.h>
+#include <asm/atomic.h>
+#include <asm/param.h>
+
+
+struct _vx_ticks {
+ uint64_t user_ticks; /* token tick events */
+ uint64_t sys_ticks; /* token tick events */
+ uint64_t hold_ticks; /* token ticks paused */
+ uint64_t unused[5]; /* cacheline ? */
+};
+
+/* context sub struct */
+
+struct _vx_sched {
+ atomic_t tokens; /* number of CPU tokens */
+ spinlock_t tokens_lock; /* lock for token bucket */
+
+ int fill_rate; /* Fill rate: add X tokens... */
+ int interval; /* Divisor: per Y jiffies */
+ int tokens_min; /* Limit: minimum for unhold */
+ int tokens_max; /* Limit: no more than N tokens */
+ uint32_t jiffies; /* last time accounted */
+
+ int priority_bias; /* bias offset for priority */
+ cpumask_t cpus_allowed; /* cpu mask for context */
+
+ struct _vx_ticks cpu[NR_CPUS];
+};
+
+#endif /* _VX_SCHED_DEF_H */
/*
- Syscall Matrix V2.6
+ Syscall Matrix V2.8
- |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
- |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | |
- |INFO |SETUP | |MOVE | | | | | |
+ |VERSION|CREATE |MODIFY |MIGRATE|CONTROL|EXPERIM| |SPECIAL|SPECIAL|
+ |STATS |DESTROY|ALTER |CHANGE |LIMIT |TEST | | | |
+ |INFO |SETUP | |MOVE | | | | | |
-------+-------+-------+-------+-------+-------+-------+ +-------+-------+
SYSTEM |VERSION|VSETUP |VHOST | | | | |DEVICES| |
HOST | 00| 01| 02| 03| 04| 05| | 06| 07|
PROCESS| 08| 09| 10| 11| 12| 13| | 14| 15|
-------+-------+-------+-------+-------+-------+-------+ +-------+-------+
MEMORY | | | | | | | |SWAP | |
- | 16| 17| 18| 19| 20| 21| | 22| 23|
+ | 16| 17| 18| 19| 20| 21| | 22| 23|
-------+-------+-------+-------+-------+-------+-------+ +-------+-------+
NETWORK| |VNET |NETALT |NETMIG |NETCTL | | |SERIAL | |
- | 24| 25| 26| 27| 28| 29| | 30| 31|
+ | 24| 25| 26| 27| 28| 29| | 30| 31|
-------+-------+-------+-------+-------+-------+-------+ +-------+-------+
- DISK | | | | | | | |INODE | |
+ DISK | | | | |DLIMIT | | |INODE | |
VFS | 32| 33| 34| 35| 36| 37| | 38| 39|
-------+-------+-------+-------+-------+-------+-------+ +-------+-------+
OTHER | | | | | | | |VINFO | |
- | 40| 41| 42| 43| 44| 45| | 46| 47|
+ | 40| 41| 42| 43| 44| 45| | 46| 47|
=======+=======+=======+=======+=======+=======+=======+ +=======+=======+
- SPECIAL| | | | |FLAGS | | | | |
- | 48| 49| 50| 51| 52| 53| | 54| 55|
+ SPECIAL|EVENT | | | |FLAGS | | | | |
+ | 48| 49| 50| 51| 52| 53| | 54| 55|
-------+-------+-------+-------+-------+-------+-------+ +-------+-------+
- SPECIAL| | | | |RLIMIT |SYSCALL| | |COMPAT |
- | 56| 57| 58| 59| 60|TEST 61| | 62| 63|
+ SPECIAL|DEBUG | | | |RLIMIT |SYSCALL| | |COMPAT |
+ | 56| 57| 58| 59| 60|TEST 61| | 62| 63|
-------+-------+-------+-------+-------+-------+-------+ +-------+-------+
*/
#define VC_CAT_VSETUP 1
#define VC_CAT_VHOST 2
-
+
#define VC_CAT_VPROC 9
#define VC_CAT_PROCALT 10
#define VC_CAT_PROCMIG 11
#define VC_CAT_NETMIG 27
#define VC_CAT_NETCTRL 28
+#define VC_CAT_DLIMIT 36
#define VC_CAT_INODE 38
#define VC_CAT_VINFO 46
+#define VC_CAT_EVENT 48
#define VC_CAT_FLAGS 52
+#define VC_CAT_DEBUG 56
#define VC_CAT_RLIMIT 60
#define VC_CAT_SYSTEST 61
#define VC_CAT_COMPAT 63
-
+
/* interface version */
-#define VCI_VERSION 0x00010016
+#define VCI_VERSION 0x00010025
/* query version */
#include <linux/errno.h>
-#define ENOTSUP -EOPNOTSUPP
#else /* __KERNEL__ */
#define __user
-#ifndef _LINUX_XID_H_
-#define _LINUX_XID_H_
+#ifndef _VX_XID_H
+#define _VX_XID_H
+
+
+#define XID_TAG(in) (!(in) || \
+ (((struct inode *)in)->i_sb && \
+ (((struct inode *)in)->i_sb->s_flags & MS_TAGXID)))
+
#ifdef CONFIG_INOXID_NONE
#define MAX_UID 0xFFFFFFFF
#define MAX_GID 0xFFFFFFFF
-#define INOXID_XID(uid, gid, xid) (0)
+#define INOXID_XID(tag, uid, gid, xid) (0)
-#define XIDINO_UID(uid, xid) (uid)
-#define XIDINO_GID(gid, xid) (gid)
+#define XIDINO_UID(tag, uid, xid) (uid)
+#define XIDINO_GID(tag, gid, xid) (gid)
#endif
#define MAX_UID 0xFFFFFFFF
#define MAX_GID 0x0000FFFF
-#define INOXID_XID(uid, gid, xid) (((gid) >> 16) & 0xFFFF)
-
-#define XIDINO_UID(uid, xid) (uid)
-#define XIDINO_GID(gid, xid) (((gid) & 0xFFFF) | ((xid) << 16))
+#define INOXID_XID(tag, uid, gid, xid) \
+ ((tag) ? (((gid) >> 16) & 0xFFFF) : 0)
+#define XIDINO_UID(tag, uid, xid) (uid)
+#define XIDINO_GID(tag, gid, xid) \
+ ((tag) ? (((gid) & 0xFFFF) | ((xid) << 16)) : (gid))
#endif
-#ifdef CONFIG_INOXID_GID24
+#ifdef CONFIG_INOXID_UGID24
#define MAX_UID 0x00FFFFFF
#define MAX_GID 0x00FFFFFF
-#define INOXID_XID(uid, gid, xid) ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF))
+#define INOXID_XID(tag, uid, gid, xid) \
+ ((tag) ? ((((uid) >> 16) & 0xFF00) | (((gid) >> 24) & 0xFF)) : 0)
+
+#define XIDINO_UID(tag, uid, xid) \
+ ((tag) ? (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16)) : (uid))
+#define XIDINO_GID(tag, gid, xid) \
+ ((tag) ? (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24)) : (gid))
+
+#endif
+
+
+#ifdef CONFIG_INOXID_UID16
+
+#define MAX_UID 0x0000FFFF
+#define MAX_GID 0xFFFFFFFF
-#define XIDINO_UID(uid, xid) (((uid) & 0xFFFFFF) | (((xid) & 0xFF00) << 16))
-#define XIDINO_GID(gid, xid) (((gid) & 0xFFFFFF) | (((xid) & 0x00FF) << 24))
+#define INOXID_XID(tag, uid, gid, xid) \
+ ((tag) ? ((uid) >> 16) & 0xFFFF) : 0)
+
+#define XIDINO_UID(tag, uid, xid) \
+ ((tag) ? (((uid) & 0xFFFF) | ((xid) << 16)) : (uid))
+#define XIDINO_GID(tag, gid, xid) (gid)
#endif
-#ifdef CONFIG_INOXID_GID32
+#ifdef CONFIG_INOXID_INTERN
#define MAX_UID 0xFFFFFFFF
#define MAX_GID 0xFFFFFFFF
-#define INOXID_XID(uid, gid, xid) (xid)
+#define INOXID_XID(tag, uid, gid, xid) \
+ ((tag) ? (xid) : 0)
-#define XIDINO_UID(uid, xid) (uid)
-#define XIDINO_GID(gid, xid) (gid)
+#define XIDINO_UID(tag, uid, xid) (uid)
+#define XIDINO_GID(tag, gid, xid) (gid)
#endif
#define MAX_UID 0xFFFFFFFF
#define MAX_GID 0xFFFFFFFF
-#define INOXID_XID(uid, gid, xid) (0)
+#define INOXID_XID(tag, uid, gid, xid) (0)
-#define XIDINO_UID(uid, xid) (uid)
-#define XIDINO_GID(gid, xid) (gid)
+#define XIDINO_UID(tag, uid, xid) (uid)
+#define XIDINO_GID(tag, gid, xid) (gid)
#endif
-#define INOXID_UID(uid, gid) ((uid) & MAX_UID)
-#define INOXID_GID(uid, gid) ((gid) & MAX_GID)
+#define INOXID_UID(tag, uid, gid) \
+ ((tag) ? ((uid) & MAX_UID) : (uid))
+#define INOXID_GID(tag, uid, gid) \
+ ((tag) ? ((gid) & MAX_GID) : (gid))
+
static inline uid_t vx_map_uid(uid_t uid)
{
}
-#ifdef CONFIG_VSERVER_LEGACY
+#ifdef CONFIG_VSERVER_LEGACY
#define FIOC_GETXID _IOR('x', 1, long)
#define FIOC_SETXID _IOW('x', 2, long)
#define FIOC_SETXIDJ _IOW('x', 3, long)
#endif
-#endif /* _LINUX_XID_H_ */
+int vx_parse_xid(char *string, xid_t *xid, int remove);
+void vx_propagate_xid(struct nameidata *nd, struct inode *inode);
+
+#endif /* _VX_XID_H */
#include <linux/route.h>
#include <linux/ip.h>
#include <linux/cache.h>
-#include <linux/vs_base.h>
#include <linux/vs_context.h>
#include <linux/vs_network.h>
return ip_tos2prio[IPTOS_TOS(tos)>>1];
}
+#define IPI_LOOPBACK 0x0100007f
+
+static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl)
+{
+ int err;
+ int i, n = nxi->nbipv4;
+ u32 ipv4root = nxi->ipv4[0];
+
+ if (ipv4root == 0)
+ return 0;
+
+ if (fl->fl4_src == 0) {
+ if (n > 1) {
+ u32 foundsrc;
+
+ err = __ip_route_output_key(rp, fl);
+ if (err) {
+ fl->fl4_src = ipv4root;
+ err = __ip_route_output_key(rp, fl);
+ }
+ if (err)
+ return err;
+
+ foundsrc = (*rp)->rt_src;
+ ip_rt_put(*rp);
+
+ for (i=0; i<n; i++){
+ u32 mask = nxi->mask[i];
+ u32 ipv4 = nxi->ipv4[i];
+ u32 net4 = ipv4 & mask;
+
+ if (foundsrc == ipv4) {
+ fl->fl4_src = ipv4;
+ break;
+ }
+ if (!fl->fl4_src && (foundsrc & mask) == net4)
+ fl->fl4_src = ipv4;
+ }
+ }
+ if (fl->fl4_src == 0)
+ fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK)
+ ? IPI_LOOPBACK : ipv4root;
+ } else {
+ for (i=0; i<n; i++) {
+ if (nxi->ipv4[i] == fl->fl4_src)
+ break;
+ }
+ if (i == n)
+ return -EPERM;
+ }
+ return 0;
+}
+
static inline int ip_route_connect(struct rtable **rp, u32 dst,
u32 src, u32 tos, int oif, u8 protocol,
u16 sport, u16 dport, struct sock *sk)
.dport = dport } } };
int err;
- if (!dst || !src) {
+ struct nx_info *nx_info = current->nx_info;
+
+ if (sk)
+ nx_info = sk->sk_nx_info;
+ vxdprintk(VXD_CBIT(net, 4),
+ "ip_route_connect(%p) %p,%p;%lx",
+ sk, nx_info, sk->sk_socket,
+ (sk->sk_socket?sk->sk_socket->flags:0));
+
+ if (nx_info) {
+ err = ip_find_src(nx_info, rp, &fl);
+ if (err)
+ return err;
+ if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
+ fl.fl4_dst = nx_info->ipv4[0];
+ }
+ if (!fl.fl4_dst || !fl.fl4_src) {
err = __ip_route_output_key(rp, &fl);
if (err)
return err;
msq->q_perm.mode = (msgflg & S_IRWXUGO);
msq->q_perm.key = key;
- msq->q_perm.xid = current->xid;
+ msq->q_perm.xid = vx_current_xid();
msq->q_perm.security = NULL;
retval = security_msg_queue_alloc(msq);
sma->sem_perm.mode = (semflg & S_IRWXUGO);
sma->sem_perm.key = key;
- sma->sem_perm.xid = current->xid;
+ sma->sem_perm.xid = vx_current_xid();
sma->sem_perm.security = NULL;
retval = security_sem_alloc(sma);
return -ENOMEM;
shp->shm_perm.key = key;
- shp->shm_perm.xid = current->xid;
+ shp->shm_perm.xid = vx_current_xid();
shp->shm_flags = (shmflg & S_IRWXUGO);
shp->mlock_user = NULL;
subdir-y += vserver
obj-y += vserver/vserver.o
+subdir-y += vserver
+obj-y += vserver/vserver.o
+
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += cpu.o spinlock.o
#include <linux/security.h>
#include <linux/vs_cvirt.h>
#include <linux/syscalls.h>
+#include <linux/vs_cvirt.h>
+
#include <asm/uaccess.h>
unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
#include <linux/vs_limit.h>
#include <linux/ckrm_mem.h>
#include <linux/syscalls.h>
+#include <linux/vs_limit.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
struct file * file = xchg(&files->fd[i], NULL);
if (file)
filp_close(file, files);
+ // vx_openfd_dec(i);
}
i++;
set >>= 1;
struct task_struct *p, *reaper = father;
struct list_head *_p, *_n;
+ /* FIXME handle vchild_reaper/initpid */
do {
reaper = next_thread(reaper);
if (reaper == father) {
#include <linux/audit.h>
#include <linux/profile.h>
#include <linux/rmap.h>
-#include <linux/vs_network.h>
-#include <linux/vs_limit.h>
-#include <linux/vs_memory.h>
#include <linux/ckrm.h>
#include <linux/ckrm_tsk.h>
#include <linux/ckrm_mem_inline.h>
+#include <linux/vs_network.h>
+#include <linux/vs_limit.h>
+#include <linux/vs_memory.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
BUG_ON(mm == &init_mm);
mm_free_pgd(mm);
destroy_context(mm);
- clr_vx_info(&mm->mm_vx_info);
#ifdef CONFIG_CKRM_RES_MEM
/* class can be null and mm's tasklist can be empty here */
if (mm->memclass) {
mm->memclass = NULL;
}
#endif
+ clr_vx_info(&mm->mm_vx_info);
free_mm(mm);
}
goto bad_fork_cleanup_vm;
}
+ p->vx_info = NULL;
+ set_vx_info(&p->vx_info, current->vx_info);
+ p->nx_info = NULL;
+ set_nx_info(&p->nx_info, current->nx_info);
+
+ /* check vserver memory */
+ if (p->mm && !(clone_flags & CLONE_VM)) {
+ if (vx_vmpages_avail(p->mm, p->mm->total_vm))
+ vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
+ else
+ goto bad_fork_free;
+ }
+ if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
+ if (!vx_rsspages_avail(p->mm, p->mm->rss))
+ goto bad_fork_cleanup_vm;
+ }
+
retval = -EAGAIN;
if (!vx_nproc_avail(1))
goto bad_fork_cleanup_vm;
#include <linux/syscalls.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <linux/vs_cvirt.h>
#ifndef div_long_long_rem
#include <asm/div64.h>
return r;
}
+static volatile int printk_cpu = -1;
+
asmlinkage int vprintk(const char *fmt, va_list args)
{
unsigned long flags;
static char printk_buf[1024];
static int log_level_unknown = 1;
- if (unlikely(oops_in_progress))
+ if (unlikely(oops_in_progress && printk_cpu == smp_processor_id()))
zap_locks();
/* This stops the holder of console_sem just where we want him */
spin_lock_irqsave(&logbuf_lock, flags);
+ printk_cpu = smp_processor_id();
/* Emit the output into the temporary buffer */
printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
#include <asm/tlb.h>
#include <asm/unistd.h>
+#include <linux/vs_context.h>
+#include <linux/vs_cvirt.h>
+#include <linux/vs_sched.h>
#ifdef CONFIG_NUMA
#define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
task_t *migration_thread;
struct list_head migration_queue;
#endif
+#ifdef CONFIG_VSERVER_HARDCPU
+ struct list_head hold_queue;
+ int idle_tokens;
+#endif
#ifdef CONFIG_VSERVER_HARDCPU
struct list_head hold_queue;
bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
prio = p->static_prio - bonus;
-
#ifdef CONFIG_VSERVER_HARDCPU
if (task_vx_flags(p, VXF_SCHED_PRIO, 0))
prio += effective_vavavoom(p, MAX_USER_PRIO);
#endif
-
if (prio < MAX_RT_PRIO)
prio = MAX_RT_PRIO;
if (prio > MAX_PRIO-1)
p->array = NULL;
}
-static void deactivate_task(struct task_struct *p, runqueue_t *rq)
+static inline
+void deactivate_task(struct task_struct *p, runqueue_t *rq)
{
- __deactivate_task(p, rq);
vx_deactivate_task(p);
+ __deactivate_task(p, rq);
}
/*
* to be considered on this CPU.)
*/
activate_task(p, rq, cpu == this_cpu);
+ /* this is to get the accounting behind the load update */
+ if (old_state == TASK_UNINTERRUPTIBLE)
+ vx_uninterruptible_dec(p);
if (!sync || cpu != this_cpu) {
if (TASK_PREEMPTS_CURR(p, rq))
resched_task(rq->curr);
if (rcu_pending(cpu))
rcu_check_callbacks(cpu, user_ticks);
-
if (vxi) {
vxi->sched.cpu[cpu].user_ticks += user_ticks;
vxi->sched.cpu[cpu].sys_ticks += sys_ticks;
if (wake_priority_sleeper(rq))
goto out;
+
ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq));
#ifdef CONFIG_VSERVER_HARDCPU_IDLE
}
goto out_unlock;
}
+#warning MEF: vx_need_resched incorpates standard kernel code, which it should not.
if (vx_need_resched(p)) {
#ifdef CONFIG_CKRM_CPU_SCHEDULE
/* Hubertus ... we can abstract this out */
prio_array_t *array;
unsigned long long now;
unsigned long run_time;
- int cpu;
#ifdef CONFIG_VSERVER_HARDCPU
struct vx_info *vxi;
int maxidle = -HZ;
#endif
+ int cpu;
/*
* If crash dump is in progress, this other cpu's
if (unlikely(dump_oncpu))
goto dump_scheduling_disabled;
-
/*
* Test if we are atomic. Since do_exit() needs to call into
* schedule() atomically, we ignore that path for now.
unlikely(signal_pending(prev))))
prev->state = TASK_RUNNING;
else {
- if (prev->state == TASK_UNINTERRUPTIBLE)
+ if (prev->state == TASK_UNINTERRUPTIBLE) {
rq->nr_uninterruptible++;
+ vx_uninterruptible_inc(prev);
+ }
deactivate_task(prev, rq);
}
}
*/
next = rq_get_next_task(rq);
+#ifdef CONFIG_VSERVER_HARDCPU
+ vxi = next->vx_info;
+ if (vx_info_flags(vxi, VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) {
+ int ret = vx_tokens_recalc(vxi);
+
+ if (unlikely(ret <= 0)) {
+ if (ret && (rq->idle_tokens > -ret))
+ rq->idle_tokens = -ret;
+ __deactivate_task(next, rq);
+ recalc_task_prio(next, now);
+ // a new one on hold
+ vx_onhold_inc(vxi);
+ next->state |= TASK_ONHOLD;
+ list_add_tail(&next->run_list, &rq->hold_queue);
+ //printk("··· %8lu hold %p [%d]\n", jiffies, next, next->prio);
+ goto pick_next;
+ }
+ }
+#endif
+
#ifdef CONFIG_VSERVER_HARDCPU
vxi = next->vx_info;
if (vx_info_flags(vxi, VXF_SCHED_PAUSE|VXF_SCHED_HARD, 0)) {
if (sig < 0 || sig > _NSIG)
return error;
-
user = (!info ||
(info != SEND_SIG_PRIV &&
info != SEND_SIG_FORCED &&
#include <linux/dcookies.h>
#include <linux/suspend.h>
#include <linux/ckrm.h>
-#include <linux/vs_base.h>
-#include <linux/vs_cvirt.h>
#include <linux/tty.h>
-
+#include <linux/vs_cvirt.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
This enables the legacy API used in vs1.xx, which allows
to use older tools (for migration purposes).
-config PROC_SECURE
+config VSERVER_PROC_SECURE
bool "Enable Proc Security"
depends on PROC_FS
default y
help
Activate the Hard CPU Limits
+config VSERVER_HARDCPU_IDLE
+ bool "Limit the IDLE task"
+ depends on VSERVER_HARDCPU
+ default n
+ help
+ Limit the idle slices, so the the next context
+ will be scheduled as soon as possible.
+ might improve interactivity/latency but
+ increases scheduling overhead.
+
choice
prompt "Persistent Inode Context Tagging"
- default INOXID_GID24
+ default INOXID_UGID24
help
This adds persistent context information to filesystems
mounted with the tagxid option. Tagging is a requirement
help
no context information is store for inodes
+config INOXID_UID16
+ bool "UID16/GID32"
+ help
+ reduces UID to 16 bit, but leaves GID at 32 bit.
+
config INOXID_GID16
bool "UID32/GID16"
help
reduces GID to 16 bit, but leaves UID at 32 bit.
-config INOXID_GID24
+config INOXID_UGID24
bool "UID24/GID24"
help
uses the upper 8bit from UID and GID for XID tagging
which leaves 24bit for UID/GID each, which should be
more than sufficient for normal use.
-config INOXID_GID32
+config INOXID_INTERN
bool "UID32/GID32"
help
this uses otherwise reserved inode fields in the on
disk representation, which limits the use to a few
filesystems (currently ext2 and ext3)
-config INOXID_MAGIC
+config INOXID_RUNTIME
bool "Runtime"
depends on EXPERIMENTAL
help
endchoice
+config VSERVER_DEBUG
+ bool "Compile Debugging Code"
+ default n
+ help
+ Set this to yes if you want to be able to activate
+ debugging output at runtime. It adds a probably small
+ overhead (~ ??%) to all vserver related functions and
+ increases the kernel size by about 20k.
+
+config VSERVER_HISTORY
+ bool "Compile History Tracing"
+ depends on VSERVER_DEBUG
+ default n
+ help
+ Set this to yes if you want to record the history of
+ linux-vserver activities, so they can be replayed on
+ a kernel panic (oops)
+
+config VSERVER_HISTORY_SIZE
+ int "Per CPU History Size (32-65536)"
+ depends on VSERVER_HISTORY
+ range 32 65536
+ default 64
+ help
+ This allows you to specify the number of entries in
+ the per CPU history buffer.
+
endmenu
obj-y += vserver.o
vserver-y := switch.o context.o namespace.o sched.o network.o inode.o \
- limit.o cvirt.o signal.o proc.o sysctl.o init.o
+ limit.o cvirt.o signal.o proc.o helper.o init.o dlimit.o
+vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o
vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o
+vserver-$(CONFIG_VSERVER_HISTORY) += history.o
*
* Virtual Server: Context Support
*
- * Copyright (C) 2003-2004 Herbert Pötzl
+ * Copyright (C) 2003-2005 Herbert Pötzl
*
* V0.01 context helper
* V0.02 vx_ctx_kill syscall command
* V0.06 task_xid and info commands
* V0.07 context flags and caps
* V0.08 switch to RCU based hash
+ * V0.09 revert to non RCU for now
+ * V0.10 and back to working RCU hash
*
*/
#include <linux/config.h>
#include <linux/slab.h>
-#include <linux/vserver.h>
-#include <linux/vserver/legacy.h>
-#include <linux/vs_base.h>
-#include <linux/vs_context.h>
-#include <linux/kernel_stat.h>
+#include <linux/types.h>
#include <linux/namespace.h>
-#include <linux/rcupdate.h>
-#define CKRM_VSERVER_INTEGRATION
-#ifdef CKRM_VSERVER_INTEGRATION
-#include <linux/ckrm.h>
-#endif //CKRM_VSERVER_INTEGRATION
+#include <linux/sched.h>
+#include <linux/vserver/network.h>
+#include <linux/vserver/legacy.h>
+#include <linux/vserver/limit.h>
+#include <linux/vserver/debug.h>
+#include <linux/vs_context.h>
+#include <linux/vserver/context_cmd.h>
+#include <linux/ckrm.h> /* needed for ckrm_cb_xid() */
#include <asm/errno.h>
+#include "cvirt_init.h"
+#include "limit_init.h"
+#include "sched_init.h"
+
/* __alloc_vx_info()
vxdprintk(VXD_CBIT(xid, 0),
"alloc_vx_info(%d) = %p", xid, new);
+ vxh_alloc_vx_info(new);
return new;
}
{
vxdprintk(VXD_CBIT(xid, 0),
"dealloc_vx_info(%p)", vxi);
+ vxh_dealloc_vx_info(vxi);
vxi->vx_hlist.next = LIST_POISON1;
vxi->vx_id = -1;
return usecnt;
}
-#if 0
-
-static void __rcu_free_vx_info(struct rcu_head *head)
+static void __rcu_put_vx_info(struct rcu_head *head)
{
struct vx_info *vxi = container_of(head, struct vx_info, vx_rcu);
- BUG_ON(!head);
vxdprintk(VXD_CBIT(xid, 3),
- "rcu_free_vx_info(%p): uc=%d", vxi,
- atomic_read(&vxi->vx_usecnt));
-
- __free_vx_info(vxi);
+ "__rcu_put_vx_info(%p[#%d]): %d,%d",
+ vxi, vxi->vx_id,
+ atomic_read(&vxi->vx_usecnt),
+ atomic_read(&vxi->vx_refcnt));
+ put_vx_info(vxi);
}
-#endif
-
-void free_vx_info(struct vx_info *vxi)
+void __shutdown_vx_info(struct vx_info *vxi)
{
struct namespace *namespace;
struct fs_struct *fs;
- /* context shutdown is mandatory */
- // BUG_ON(vxi->vx_state != VXS_SHUTDOWN);
+ might_sleep();
namespace = xchg(&vxi->vx_namespace, NULL);
- fs = xchg(&vxi->vx_fs, NULL);
-
if (namespace)
put_namespace(namespace);
+
+ fs = xchg(&vxi->vx_fs, NULL);
if (fs)
put_fs_struct(fs);
+}
+
+/* exported stuff */
+
+void free_vx_info(struct vx_info *vxi)
+{
+ /* context shutdown is mandatory */
+ // BUG_ON(vxi->vx_state != VXS_SHUTDOWN);
+
+ BUG_ON(vxi->vx_state & VXS_HASHED);
+
+ BUG_ON(vxi->vx_namespace);
+ BUG_ON(vxi->vx_fs);
BUG_ON(__free_vx_info(vxi));
- // call_rcu(&i->vx_rcu, __rcu_free_vx_info);
}
vxdprintk(VXD_CBIT(xid, 4),
"__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
+ vxh_hash_vx_info(vxi);
+
get_vx_info(vxi);
vxi->vx_state |= VXS_HASHED;
head = &vx_info_hash[__hashval(vxi->vx_id)];
{
vxdprintk(VXD_CBIT(xid, 4),
"__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id);
+ vxh_unhash_vx_info(vxi);
+
vxi->vx_state &= ~VXS_HASHED;
hlist_del_rcu(&vxi->vx_hlist);
- put_vx_info(vxi);
+
+ call_rcu(&vxi->vx_rcu, __rcu_put_vx_info);
}
{
struct hlist_head *head = &vx_info_hash[__hashval(xid)];
struct hlist_node *pos;
+ struct vx_info *vxi;
hlist_for_each_rcu(pos, head) {
- struct vx_info *vxi =
- hlist_entry(pos, struct vx_info, vx_hlist);
+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
if ((vxi->vx_id == xid) &&
vx_info_state(vxi, VXS_HASHED))
- return vxi;
+ goto found;
}
- return NULL;
+ vxi = NULL;
+found:
+ vxdprintk(VXD_CBIT(xid, 0),
+ "__lookup_vx_info(#%u): %p[#%u]",
+ xid, vxi, vxi?vxi->vx_id:0);
+ vxh_lookup_vx_info(xid, vxi);
+ return vxi;
}
/* __vx_dynamic_id()
* find unused dynamic xid
+ * requires the rcu_read_lock()
* requires the hash_lock to be held */
static inline xid_t __vx_dynamic_id(void)
return NULL;
}
+ /* FIXME is this required at all ? */
+ rcu_read_lock();
+ /* required to make dynamic xids unique */
spin_lock(&vx_info_hash_lock);
/* dynamic context requested */
out_unlock:
spin_unlock(&vx_info_hash_lock);
+ rcu_read_unlock();
+ vxh_loc_vx_info(id, vxi);
if (new)
__dealloc_vx_info(new);
return vxi;
void unhash_vx_info(struct vx_info *vxi)
{
+ __shutdown_vx_info(vxi);
spin_lock(&vx_info_hash_lock);
__unhash_vx_info(vxi);
spin_unlock(&vx_info_hash_lock);
out:
-#ifdef CKRM_VSERVER_INTEGRATION
- do {
- ckrm_cb_xid(p);
- } while (0);
-#endif //CKRM_VSERVER_INTEGRATION
-
+ ckrm_cb_xid(p);
put_vx_info(old_vxi);
return ret;
read_unlock(&tasklist_lock);
}
else
- xid = current->xid;
+ xid = vx_current_xid();
return xid;
}
#include <linux/module.h>
-// EXPORT_SYMBOL_GPL(rcu_free_vx_info);
EXPORT_SYMBOL_GPL(free_vx_info);
-EXPORT_SYMBOL_GPL(vx_info_hash_lock);
EXPORT_SYMBOL_GPL(unhash_vx_info);
*/
#include <linux/config.h>
-#include <linux/vserver/cvirt.h>
-#include <linux/vserver/context.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/vs_context.h>
+#include <linux/vs_cvirt.h>
#include <linux/vserver/switch.h>
-#include <linux/vinline.h>
#include <asm/errno.h>
#include <asm/uaccess.h>
struct vx_info *vxi = current->vx_info;
set_normalized_timespec(uptime,
- uptime->tv_sec - vxi->cvirt.bias_tp.tv_sec,
- uptime->tv_nsec - vxi->cvirt.bias_tp.tv_nsec);
+ uptime->tv_sec - vxi->cvirt.bias_uptime.tv_sec,
+ uptime->tv_nsec - vxi->cvirt.bias_uptime.tv_nsec);
if (!idle)
return;
set_normalized_timespec(idle,
return;
}
-uint64_t vx_idle_jiffies()
+uint64_t vx_idle_jiffies(void)
{
return init_task.utime + init_task.stime;
}
+
+
+static inline uint32_t __update_loadavg(uint32_t load,
+ int wsize, int delta, int n)
+{
+ unsigned long long calc, prev;
+
+ /* just set it to n */
+ if (unlikely(delta >= wsize))
+ return (n << FSHIFT);
+
+ calc = delta * n;
+ calc <<= FSHIFT;
+ prev = (wsize - delta);
+ prev *= load;
+ calc += prev;
+ do_div(calc, wsize);
+ return calc;
+}
+
+
+void vx_update_load(struct vx_info *vxi)
+{
+ uint32_t now, last, delta;
+ unsigned int nr_running, nr_uninterruptible;
+ unsigned int total;
+
+ spin_lock(&vxi->cvirt.load_lock);
+
+ now = jiffies;
+ last = vxi->cvirt.load_last;
+ delta = now - last;
+
+ if (delta < 5*HZ)
+ goto out;
+
+ nr_running = atomic_read(&vxi->cvirt.nr_running);
+ nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
+ total = nr_running + nr_uninterruptible;
+
+ vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
+ 60*HZ, delta, total);
+ vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
+ 5*60*HZ, delta, total);
+ vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
+ 15*60*HZ, delta, total);
+
+ vxi->cvirt.load_last = now;
+out:
+ atomic_inc(&vxi->cvirt.load_updates);
+ spin_unlock(&vxi->cvirt.load_lock);
+}
+
+
+
--- /dev/null
+
+extern uint64_t vx_idle_jiffies(void);
+
+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
+{
+ uint64_t idle_jiffies = vx_idle_jiffies();
+ uint64_t nsuptime;
+
+ do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
+ nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
+ * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
+ cvirt->bias_clock = nsec_to_clock_t(nsuptime);
+
+ jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
+ atomic_set(&cvirt->nr_threads, 0);
+ atomic_set(&cvirt->nr_running, 0);
+ atomic_set(&cvirt->nr_uninterruptible, 0);
+ atomic_set(&cvirt->nr_onhold, 0);
+
+ down_read(&uts_sem);
+ cvirt->utsname = system_utsname;
+ up_read(&uts_sem);
+
+ spin_lock_init(&cvirt->load_lock);
+ cvirt->load_last = jiffies;
+ atomic_set(&cvirt->load_updates, 0);
+ cvirt->load[0] = 0;
+ cvirt->load[1] = 0;
+ cvirt->load[2] = 0;
+}
+
+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
+{
+#ifdef CONFIG_VSERVER_DEBUG
+ int value;
+
+ vxwprintk((value = atomic_read(&cvirt->nr_threads)),
+ "!!! cvirt: %p[nr_threads] = %d on exit.",
+ cvirt, value);
+ vxwprintk((value = atomic_read(&cvirt->nr_running)),
+ "!!! cvirt: %p[nr_running] = %d on exit.",
+ cvirt, value);
+ vxwprintk((value = atomic_read(&cvirt->nr_uninterruptible)),
+ "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
+ cvirt, value);
+#endif
+ return;
+}
+
+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
+{
+ int i,j;
+
+ for (i=0; i<5; i++) {
+ for (j=0; j<3; j++) {
+ atomic_set(&cacct->sock[i][j].count, 0);
+ atomic_set(&cacct->sock[i][j].total, 0);
+ }
+ }
+}
+
+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
+{
+ return;
+}
+
--- /dev/null
+#ifndef _VX_CVIRT_PROC_H
+#define _VX_CVIRT_PROC_H
+
+#include <linux/sched.h>
+
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
+{
+ int length = 0;
+ int a, b, c;
+
+ length += sprintf(buffer + length,
+ "BiasUptime:\t%lu.%02lu\n",
+ (unsigned long)cvirt->bias_uptime.tv_sec,
+ (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
+ length += sprintf(buffer + length,
+ "SysName:\t%.*s\n"
+ "NodeName:\t%.*s\n"
+ "Release:\t%.*s\n"
+ "Version:\t%.*s\n"
+ "Machine:\t%.*s\n"
+ "DomainName:\t%.*s\n"
+ ,__NEW_UTS_LEN, cvirt->utsname.sysname
+ ,__NEW_UTS_LEN, cvirt->utsname.nodename
+ ,__NEW_UTS_LEN, cvirt->utsname.release
+ ,__NEW_UTS_LEN, cvirt->utsname.version
+ ,__NEW_UTS_LEN, cvirt->utsname.machine
+ ,__NEW_UTS_LEN, cvirt->utsname.domainname
+ );
+
+ a = cvirt->load[0] + (FIXED_1/200);
+ b = cvirt->load[1] + (FIXED_1/200);
+ c = cvirt->load[2] + (FIXED_1/200);
+ length += sprintf(buffer + length,
+ "nr_threads:\t%d\n"
+ "nr_running:\t%d\n"
+ "nr_unintr:\t%d\n"
+ "nr_onhold:\t%d\n"
+ "load_updates:\t%d\n"
+ "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
+ ,atomic_read(&cvirt->nr_threads)
+ ,atomic_read(&cvirt->nr_running)
+ ,atomic_read(&cvirt->nr_uninterruptible)
+ ,atomic_read(&cvirt->nr_onhold)
+ ,atomic_read(&cvirt->load_updates)
+ ,LOAD_INT(a), LOAD_FRAC(a)
+ ,LOAD_INT(b), LOAD_FRAC(b)
+ ,LOAD_INT(c), LOAD_FRAC(c)
+ );
+ return length;
+}
+
+
+static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
+{
+ return atomic_read(&cacct->sock[type][pos].count);
+}
+
+
+static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
+{
+ return atomic_read(&cacct->sock[type][pos].total);
+}
+
+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
+{
+ int i,j, length = 0;
+ static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
+
+ for (i=0; i<5; i++) {
+ length += sprintf(buffer + length,
+ "%s:", type[i]);
+ for (j=0; j<3; j++) {
+ length += sprintf(buffer + length,
+ "\t%12lu/%-12lu"
+ ,vx_sock_count(cacct, i, j)
+ ,vx_sock_total(cacct, i, j)
+ );
+ }
+ buffer[length++] = '\n';
+ }
+ length += sprintf(buffer + length,
+ "forks:\t%lu\n", cacct->total_forks);
+ return length;
+}
+
+#endif /* _VX_CVIRT_PROC_H */
#include <linux/namei.h>
#include <linux/statfs.h>
#include <linux/vserver/switch.h>
-#include <linux/vs_base.h>
#include <linux/vs_context.h>
#include <linux/vs_dlimit.h>
__u64 blimit, bfree, bavail;
__u32 ifree;
- dli = locate_dl_info(sb, current->xid);
+ dli = locate_dl_info(sb, vx_current_xid());
if (!dli)
return;
#include <linux/errno.h>
#include <linux/reboot.h>
#include <linux/kmod.h>
-#include <linux/vserver.h>
-#include <linux/vs_base.h>
+#include <linux/sched.h>
#include <linux/vs_context.h>
#include <asm/uaccess.h>
switch (cmd) {
case LINUX_REBOOT_CMD_RESTART:
argv[1] = "restart";
- break;
+ break;
case LINUX_REBOOT_CMD_HALT:
argv[1] = "halt";
- break;
+ break;
case LINUX_REBOOT_CMD_POWER_OFF:
argv[1] = "poweroff";
- break;
+ break;
case LINUX_REBOOT_CMD_SW_SUSPEND:
argv[1] = "swsusp";
- break;
+ break;
case LINUX_REBOOT_CMD_RESTART2:
if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0)
argv[3] = buffer;
default:
argv[1] = "restart2";
- break;
+ break;
}
/* maybe we should wait ? */
return 0;
}
+long vs_context_state(unsigned int cmd)
+{
+ char id_buf[8], cmd_buf[32];
+
+ char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0};
+ char *envp[] = {"HOME=/", "TERM=linux",
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
+
+ snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid());
+ snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
+
+ switch (cmd) {
+ case VS_CONTEXT_CREATED:
+ argv[1] = "startup";
+ break;
+ case VS_CONTEXT_DESTROY:
+ argv[1] = "shutdown";
+ break;
+ default:
+ return 0;
+ }
+
+ if (call_usermodehelper(*argv, argv, envp, 1)) {
+ printk( KERN_WARNING
+ "vs_context_state(): failed to exec (%s %s %s %s)\n",
+ vshelper_path, argv[1], argv[2], argv[3]);
+ return 0;
+ }
+ return 0;
+}
+
#include <linux/config.h>
#include <linux/errno.h>
-#include <linux/vserver.h>
-// #include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
{
int ret = 0;
+#ifdef CONFIG_VSERVER_DEBUG
vserver_register_sysctl();
+#endif
return ret;
}
static void __exit exit_vserver(void)
{
+#ifdef CONFIG_VSERVER_DEBUG
vserver_unregister_sysctl();
+#endif
return;
}
*/
#include <linux/config.h>
-#include <linux/vs_base.h>
+#include <linux/sched.h>
#include <linux/vs_context.h>
-#include <linux/fs.h>
#include <linux/proc_fs.h>
+#include <linux/devpts_fs.h>
#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
#include <linux/vserver/inode.h>
+#include <linux/vserver/xid.h>
#include <asm/errno.h>
#include <asm/uaccess.h>
static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint32_t *mask)
{
+ struct proc_dir_entry *entry;
+
if (!in || !in->i_sb)
return -ESRCH;
*mask |= IATTR_XID;
}
- if (in->i_sb->s_magic == PROC_SUPER_MAGIC) {
- struct proc_dir_entry *entry = PROC_I(in)->pde;
+ switch (in->i_sb->s_magic) {
+ case PROC_SUPER_MAGIC:
+ entry = PROC_I(in)->pde;
// check for specific inodes ?
if (entry)
*flags |= (entry->vx_flags & IATTR_FLAGS);
else
*flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
+ break;
+
+ case DEVPTS_SUPER_MAGIC:
+ *xid = in->i_xid;
+ *mask |= IATTR_XID;
+ break;
+
+ default:
+ break;
}
return 0;
}
int vc_get_iattr(uint32_t id, void __user *data)
{
struct nameidata nd;
- struct vcmd_ctx_iattr_v1 vc_data;
+ struct vcmd_ctx_iattr_v1 vc_data = { .xid = -1 };
int ret;
if (!vx_check(0, VX_ADMIN))
static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uint32_t *mask)
{
struct inode *in = de->d_inode;
- int error = 0, is_proc = 0;
+ int error = 0, is_proc = 0, has_xid = 0;
if (!in || !in->i_sb)
return -ESRCH;
is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
if ((*mask & IATTR_FLAGS) && !is_proc)
return -EINVAL;
- if ((*mask & IATTR_XID) && !(in->i_sb->s_flags & MS_TAGXID))
+
+ has_xid = (in->i_sb->s_flags & MS_TAGXID) ||
+ (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
+ if ((*mask & IATTR_XID) && !has_xid)
return -EINVAL;
down(&in->i_sem);
return ret;
}
-int vc_iattr_ioctl(struct dentry *de, unsigned int cmd, unsigned long arg)
-{
- void __user *data = (void __user *)arg;
- struct vcmd_ctx_iattr_v1 vc_data;
- int ret;
-
- /*
- * I don't think we need any dget/dput pairs in here as long as
- * this function is always called from sys_ioctl i.e., de is
- * a field of a struct file that is guaranteed not to be freed.
- */
- if (cmd == FIOC_SETIATTR) {
- if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE))
- return -EPERM;
- if (copy_from_user (&vc_data, data, sizeof(vc_data)))
- return -EFAULT;
- ret = __vc_set_iattr(de,
- &vc_data.xid, &vc_data.flags, &vc_data.mask);
- }
- else {
- if (!vx_check(0, VX_ADMIN))
- return -ENOSYS;
- ret = __vc_get_iattr(de->d_inode,
- &vc_data.xid, &vc_data.flags, &vc_data.mask);
- }
-
- if (!ret && copy_to_user (data, &vc_data, sizeof(vc_data)))
- ret = -EFAULT;
- return ret;
-}
-
#ifdef CONFIG_VSERVER_LEGACY
-#include <linux/proc_fs.h>
#define PROC_DYNAMIC_FIRST 0xF0000000UL
}
#endif
+
+int vx_parse_xid(char *string, xid_t *xid, int remove)
+{
+ static match_table_t tokens = {
+ {1, "xid=%u"},
+ {0, NULL}
+ };
+ substring_t args[MAX_OPT_ARGS];
+ int token, option = 0;
+
+ if (!string)
+ return 0;
+
+ token = match_token(string, tokens, args);
+ if (token && xid && !match_int(args, &option))
+ *xid = option;
+
+ vxdprintk(VXD_CBIT(xid, 7),
+ "vx_parse_xid(»%s«): %d:#%d",
+ string, token, option);
+
+ if (token && remove) {
+ char *p = strstr(string, "xid=");
+ char *q = p;
+
+ if (p) {
+ while (*q != '\0' && *q != ',')
+ q++;
+ while (*q)
+ *p++ = *q++;
+ while (*p)
+ *p++ = '\0';
+ }
+ }
+ return token;
+}
+
+void vx_propagate_xid(struct nameidata *nd, struct inode *inode)
+{
+ xid_t new_xid = 0;
+ struct vfsmount *mnt;
+ int propagate;
+
+ if (!nd)
+ return;
+ mnt = nd->mnt;
+ if (!mnt)
+ return;
+
+ propagate = (mnt->mnt_flags & MNT_XID);
+ if (propagate)
+ new_xid = mnt->mnt_xid;
+
+ vxdprintk(VXD_CBIT(xid, 7),
+ "vx_propagate_xid(%p[#%lu.%d]): %d,%d",
+ inode, inode->i_ino, inode->i_xid,
+ new_xid, (propagate)?1:0);
+
+ if (propagate)
+ inode->i_xid = new_xid;
+}
+
+#include <linux/module.h>
+
+EXPORT_SYMBOL_GPL(vx_propagate_xid);
+
#include <linux/config.h>
#include <linux/sched.h>
-#include <linux/namespace.h>
-#include <linux/vserver/legacy.h>
-#include <linux/vserver/namespace.h>
-#include <linux/vserver.h>
-#include <linux/vs_base.h>
#include <linux/vs_context.h>
#include <linux/vs_network.h>
+#include <linux/vserver/legacy.h>
+#include <linux/vserver/namespace.h>
+#include <linux/namespace.h>
#include <asm/errno.h>
#include <asm/uaccess.h>
return ret;
}
- if (!vx_check(0, VX_ADMIN) ||
- !capable(CAP_SYS_ADMIN) || vx_flags(VX_INFO_PRIVATE, 0))
+ if (!vx_check(0, VX_ADMIN) || !capable(CAP_SYS_ADMIN)
+ /* might make sense in the future, or not ... */
+ || vx_flags(VX_INFO_LOCK, 0))
return -EPERM;
/* ugly hack for Spectator */
if (!new_vxi)
return -EINVAL;
+
+ ret = -EPERM;
+ if (!vx_info_flags(new_vxi, VXF_STATE_SETUP, 0) &&
+ vx_info_flags(new_vxi, VX_INFO_PRIVATE, 0))
+ goto out_put;
+
new_vxi->vx_flags &= ~(VXF_STATE_SETUP|VXF_STATE_INIT);
ret = vx_migrate_task(current, new_vxi);
current->signal->rlim[RLIMIT_NPROC].rlim_max;
ret = new_vxi->vx_id;
}
+out_put:
put_vx_info(new_vxi);
return ret;
}
*/
#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/vs_context.h>
+#include <linux/vs_limit.h>
#include <linux/vserver/limit.h>
-#include <linux/vserver/context.h>
#include <linux/vserver/switch.h>
-#include <linux/vinline.h>
+#include <linux/vserver/limit_cmd.h>
#include <asm/errno.h>
#include <asm/uaccess.h>
+const char *vlimit_name[NUM_LIMITS] = {
+ [RLIMIT_CPU] = "CPU",
+ [RLIMIT_RSS] = "RSS",
+ [RLIMIT_NPROC] = "NPROC",
+ [RLIMIT_NOFILE] = "NOFILE",
+ [RLIMIT_MEMLOCK] = "VML",
+ [RLIMIT_AS] = "VM",
+ [RLIMIT_LOCKS] = "LOCKS",
+ [RLIMIT_MSGQUEUE] = "MSGQ",
+ [VLIMIT_NSOCK] = "NSOCK",
+};
+
+EXPORT_SYMBOL_GPL(vlimit_name);
+
+
static int is_valid_rlimit(int id)
{
int valid = 0;
limit = vxi->limit.rlim[id];
if (limit == RLIM_INFINITY)
return CRLIM_INFINITY;
- return limit;
+ return limit;
}
int vc_get_rlimit(uint32_t id, void __user *data)
return -EFAULT;
if (!is_valid_rlimit(vc_data.id))
return -ENOTSUPP;
-
- vxi = find_vx_info(id);
+
+ vxi = locate_vx_info(id);
if (!vxi)
return -ESRCH;
if (!is_valid_rlimit(vc_data.id))
return -ENOTSUPP;
- vxi = find_vx_info(id);
+ vxi = locate_vx_info(id);
if (!vxi)
return -ESRCH;
if (vc_data.maximum != CRLIM_KEEP)
vxi->limit.rlim[vc_data.id] = vc_data.maximum;
- printk("setting [%d] = %d\n", vc_data.id, (int)vc_data.maximum);
put_vx_info(vxi);
return 0;
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
return -EPERM;
if (copy_to_user(data, &mask, sizeof(mask)))
- return -EFAULT;
+ return -EFAULT;
return 0;
}
v = vxi->limit.rlim[RLIMIT_RSS];
if (v != RLIM_INFINITY)
val->totalram = min(val->totalram, v);
- v = atomic_read(&vxi->limit.res[RLIMIT_RSS]);
+ v = atomic_read(&vxi->limit.rcur[RLIMIT_RSS]);
val->freeram = (v < val->totalram) ? val->totalram - v : 0;
val->bufferram = 0;
- val->totalhigh = 0;
- val->freehigh = 0;
+ val->totalhigh = 0;
+ val->freehigh = 0;
return;
}
void vx_vsi_swapinfo(struct sysinfo *val)
{
struct vx_info *vxi = current->vx_info;
- unsigned long w,v;
+ unsigned long v, w;
v = vxi->limit.rlim[RLIMIT_RSS];
w = vxi->limit.rlim[RLIMIT_AS];
if (w != RLIM_INFINITY)
val->totalswap = min(val->totalswap, w -
((v != RLIM_INFINITY) ? v : 0));
- w = atomic_read(&vxi->limit.res[RLIMIT_AS]);
+ w = atomic_read(&vxi->limit.rcur[RLIMIT_AS]);
val->freeswap = (w < val->totalswap) ? val->totalswap - w : 0;
return;
}
--- /dev/null
+
+static inline void vx_info_init_limit(struct _vx_limit *limit)
+{
+ int lim;
+
+ for (lim=0; lim<NUM_LIMITS; lim++) {
+ limit->rlim[lim] = RLIM_INFINITY;
+ limit->rmax[lim] = 0;
+ atomic_set(&limit->rcur[lim], 0);
+ atomic_set(&limit->lhit[lim], 0);
+ }
+}
+
+static inline void vx_info_exit_limit(struct _vx_limit *limit)
+{
+#ifdef CONFIG_VSERVER_DEBUG
+ unsigned long value;
+ unsigned int lim;
+
+ for (lim=0; lim<NUM_LIMITS; lim++) {
+ value = atomic_read(&limit->rcur[lim]);
+ vxwprintk(value,
+ "!!! limit: %p[%s,%d] = %ld on exit.",
+ limit, vlimit_name[lim], lim, value);
+ }
+#endif
+}
+
--- /dev/null
+#ifndef _VX_LIMIT_PROC_H
+#define _VX_LIMIT_PROC_H
+
+
+static inline void vx_limit_fixup(struct _vx_limit *limit)
+{
+ unsigned long value;
+ unsigned int lim;
+
+ for (lim=0; lim<NUM_LIMITS; lim++) {
+ value = atomic_read(&limit->rcur[lim]);
+ if (value > limit->rmax[lim])
+ limit->rmax[lim] = value;
+ if (limit->rmax[lim] > limit->rlim[lim])
+ limit->rmax[lim] = limit->rlim[lim];
+ }
+}
+
+#define VX_LIMIT_FMT ":\t%10d\t%10ld\t%10ld\t%6d\n"
+
+#define VX_LIMIT_ARG(r) \
+ ,atomic_read(&limit->rcur[r]) \
+ ,limit->rmax[r] \
+ ,limit->rlim[r] \
+ ,atomic_read(&limit->lhit[r])
+
+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
+{
+ vx_limit_fixup(limit);
+ return sprintf(buffer,
+ "PROC" VX_LIMIT_FMT
+ "VM" VX_LIMIT_FMT
+ "VML" VX_LIMIT_FMT
+ "RSS" VX_LIMIT_FMT
+ "FILES" VX_LIMIT_FMT
+ "SOCK" VX_LIMIT_FMT
+ VX_LIMIT_ARG(RLIMIT_NPROC)
+ VX_LIMIT_ARG(RLIMIT_AS)
+ VX_LIMIT_ARG(RLIMIT_MEMLOCK)
+ VX_LIMIT_ARG(RLIMIT_RSS)
+ VX_LIMIT_ARG(RLIMIT_NOFILE)
+ VX_LIMIT_ARG(VLIMIT_NSOCK)
+ );
+}
+
+#endif /* _VX_LIMIT_PROC_H */
+
+
#include <linux/config.h>
#include <linux/utsname.h>
+#include <linux/sched.h>
+#include <linux/vs_context.h>
#include <linux/vserver/namespace.h>
-#include <linux/vinline.h>
-#include <linux/namespace.h>
#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/fs.h>
#include <asm/errno.h>
#include <asm/uaccess.h>
+int vx_check_vfsmount(struct vx_info *vxi, struct vfsmount *mnt)
+{
+ struct vfsmount *root_mnt, *altroot_mnt;
+ struct dentry *root, *altroot, *point;
+ int r1, r2, s1, s2, ret = 0;
+
+ if (!vxi || !mnt)
+ return 1;
+
+ spin_lock(&dcache_lock);
+ altroot_mnt = current->fs->rootmnt;
+ altroot = current->fs->root;
+ point = altroot;
+
+ if (vxi->vx_fs) {
+ root_mnt = vxi->vx_fs->rootmnt;
+ root = vxi->vx_fs->root;
+ } else {
+ root_mnt = altroot_mnt;
+ root = altroot;
+ }
+ /* printk("··· %p:%p/%p:%p ",
+ root_mnt, root, altroot_mnt, altroot); */
+
+ while ((mnt != mnt->mnt_parent) &&
+ (mnt != root_mnt) && (mnt != altroot_mnt)) {
+ point = mnt->mnt_mountpoint;
+ mnt = mnt->mnt_parent;
+ }
+
+ r1 = (mnt == root_mnt);
+ s1 = is_subdir(point, root);
+ r2 = (mnt == altroot_mnt);
+ s2 = is_subdir(point, altroot);
+
+ ret = (((mnt == root_mnt) && is_subdir(point, root)) ||
+ ((mnt == altroot_mnt) && is_subdir(point, altroot)));
+ /* printk("··· for %p:%p -> %d:%d/%d:%d = %d\n",
+ mnt, point, r1, s1, r2, s2, ret); */
+ spin_unlock(&dcache_lock);
+
+ return (r2 && s2);
+}
+
+
/* virtual host info names */
static char * vx_vhi_name(struct vx_info *vxi, int id)
return -EPERM;
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
-
- vxi = find_vx_info(id);
+
+ vxi = locate_vx_info(id);
if (!vxi)
return -ESRCH;
-
+
name = vx_vhi_name(vxi, vc_data.field);
if (name)
memcpy(name, vc_data.name, 65);
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
- vxi = find_vx_info(id);
+ vxi = locate_vx_info(id);
if (!vxi)
return -ESRCH;
name = vx_vhi_name(vxi, vc_data.field);
if (!name)
goto out_put;
-
+
memcpy(vc_data.name, name, 65);
if (copy_to_user (data, &vc_data, sizeof(vc_data)))
return -EFAULT;
if (!vx_check(0, VX_ADMIN))
return -ENOSYS;
- vxi = find_vx_info(id);
+ vxi = locate_vx_info(id);
if (!vxi)
return -ESRCH;
old_ns = current->namespace;
old_fs = current->fs;
get_namespace(vxi->vx_namespace);
- current->namespace = vxi->vx_namespace;
+ current->namespace = vxi->vx_namespace;
current->fs = fs;
task_unlock(current);
int vc_cleanup_namespace(uint32_t id, void *data)
{
down_write(¤t->namespace->sem);
- // spin_lock(&dcache_lock);
spin_lock(&vfsmount_lock);
umount_unused(current->namespace->root, current->fs);
spin_unlock(&vfsmount_lock);
- // spin_unlock(&dcache_lock);
up_write(¤t->namespace->sem);
return 0;
}
#include <linux/config.h>
#include <linux/slab.h>
-#include <linux/vserver.h>
-#include <linux/vs_base.h>
+#include <linux/vserver/network_cmd.h>
#include <linux/rcupdate.h>
#include <net/tcp.h>
kfree(nxi);
}
+static inline int __free_nx_info(struct nx_info *nxi)
+{
+ int usecnt, refcnt;
+
+ BUG_ON(!nxi);
+
+ usecnt = atomic_read(&nxi->nx_usecnt);
+ BUG_ON(usecnt < 0);
+
+ refcnt = atomic_read(&nxi->nx_refcnt);
+ BUG_ON(refcnt < 0);
+
+ if (!usecnt)
+ __dealloc_nx_info(nxi);
+ return usecnt;
+}
+
+static void __rcu_put_nx_info(struct rcu_head *head)
+{
+ struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu);
+
+ vxdprintk(VXD_CBIT(nid, 3),
+ "__rcu_put_nx_info(%p[#%d]): %d,%d",
+ nxi, nxi->nx_id,
+ atomic_read(&nxi->nx_usecnt),
+ atomic_read(&nxi->nx_refcnt));
+ put_nx_info(nxi);
+}
+
/* hash table for nx_info hash */
vxdprintk(VXD_CBIT(nid, 4),
"__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
hlist_del_rcu(&nxi->nx_hlist);
- put_nx_info(nxi);
+ call_rcu(&nxi->nx_rcu, __rcu_put_nx_info);
}
/* __nx_dynamic_id()
* find unused dynamic nid
+ * requires the rcu_read_lock()
* requires the hash_lock to be held */
static inline nid_t __nx_dynamic_id(void)
return NULL;
}
+ /* FIXME is this required at all ? */
+ rcu_read_lock();
+ /* required to make dynamic xids unique */
spin_lock(&nx_info_hash_lock);
/* dynamic context requested */
out_unlock:
spin_unlock(&nx_info_hash_lock);
+ rcu_read_unlock();
if (new)
__dealloc_nx_info(new);
return nxi;
/* exported stuff */
-
-
-
-void rcu_free_nx_info(struct rcu_head *head)
+void free_nx_info(struct nx_info *nxi)
{
- struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu);
- int usecnt, refcnt;
-
- BUG_ON(!nxi || !head);
-
- usecnt = atomic_read(&nxi->nx_usecnt);
- BUG_ON(usecnt < 0);
-
- refcnt = atomic_read(&nxi->nx_refcnt);
- BUG_ON(refcnt < 0);
-
- vxdprintk(VXD_CBIT(nid, 3),
- "rcu_free_nx_info(%p): uc=%d", nxi, usecnt);
- if (!usecnt)
- __dealloc_nx_info(nxi);
- else
- printk("!!! rcu didn't free\n");
+ BUG_ON(__free_nx_info(nxi));
}
void unhash_nx_info(struct nx_info *nxi)
#include <linux/module.h>
-EXPORT_SYMBOL_GPL(rcu_free_nx_info);
-EXPORT_SYMBOL_GPL(nx_info_hash_lock);
+EXPORT_SYMBOL_GPL(free_nx_info);
EXPORT_SYMBOL_GPL(unhash_nx_info);
#include <linux/config.h>
#include <linux/errno.h>
#include <linux/proc_fs.h>
-#include <linux/vserver.h>
+#include <linux/sched.h>
+#include <linux/vs_context.h>
+#include <linux/vs_network.h>
+#include <linux/vs_cvirt.h>
+
+#include <linux/vserver/switch.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
+#include "cvirt_proc.h"
+#include "limit_proc.h"
+#include "sched_proc.h"
static struct proc_dir_entry *proc_virtual;
PROC_NID_STATUS,
};
-#define PROC_VID_MASK 0x60
+#define PROC_VID_MASK 0x60
/* first the actual feeds */
struct vx_info *vxi;
int length;
- vxi = find_vx_info(vid);
+ vxi = locate_vx_info(vid);
if (!vxi)
return 0;
length = sprintf(buffer,
struct vx_info *vxi;
int length;
- vxi = find_vx_info(vid);
+ vxi = locate_vx_info(vid);
if (!vxi)
return 0;
length = sprintf(buffer,
- "RefC:\t%d\n"
+ "UseCnt:\t%d\n"
+ "RefCnt:\t%d\n"
"Flags:\t%016llx\n"
"BCaps:\t%016llx\n"
"CCaps:\t%016llx\n"
- "Ticks:\t%d\n"
- ,atomic_read(&vxi->vx_refcount)
- ,vxi->vx_flags
- ,vxi->vx_bcaps
- ,vxi->vx_ccaps
+ "Ticks:\t%d\n"
+ ,atomic_read(&vxi->vx_usecnt)
+ ,atomic_read(&vxi->vx_refcnt)
+ ,(unsigned long long)vxi->vx_flags
+ ,(unsigned long long)vxi->vx_bcaps
+ ,(unsigned long long)vxi->vx_ccaps
,atomic_read(&vxi->limit.ticks)
);
put_vx_info(vxi);
struct vx_info *vxi;
int length;
- vxi = find_vx_info(vid);
+ vxi = locate_vx_info(vid);
if (!vxi)
return 0;
length = vx_info_proc_limit(&vxi->limit, buffer);
struct vx_info *vxi;
int length;
- vxi = find_vx_info(vid);
+ vxi = locate_vx_info(vid);
if (!vxi)
return 0;
length = vx_info_proc_sched(&vxi->sched, buffer);
struct vx_info *vxi;
int length;
- vxi = find_vx_info(vid);
+ vxi = locate_vx_info(vid);
if (!vxi)
return 0;
+ vx_update_load(vxi);
length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
put_vx_info(vxi);
return length;
struct vx_info *vxi;
int length;
- vxi = find_vx_info(vid);
+ vxi = locate_vx_info(vid);
if (!vxi)
return 0;
length = vx_info_proc_cacct(&vxi->cacct, buffer);
);
}
-#define atoquad(a) \
+#define atoquad(a) \
(((a)>>0) & 0xff), (((a)>>8) & 0xff), \
(((a)>>16) & 0xff), (((a)>>24) & 0xff)
struct nx_info *nxi;
int length, i;
- nxi = find_nx_info(vid);
+ nxi = locate_nx_info(vid);
if (!nxi)
return 0;
length = sprintf(buffer,
struct nx_info *nxi;
int length;
- nxi = find_nx_info(vid);
+ nxi = locate_nx_info(vid);
if (!nxi)
return 0;
length = sprintf(buffer,
- "RefC:\t%d\n"
- ,atomic_read(&nxi->nx_refcount)
+ "UseCnt:\t%d\n"
+ "RefCnt:\t%d\n"
+ ,atomic_read(&nxi->nx_usecnt)
+ ,atomic_read(&nxi->nx_refcnt)
);
put_nx_info(nxi);
return length;
/* here the inode helpers */
+#define fake_ino(id,nr) (((nr) & 0xFFFF) | \
+ (((id) & 0xFFFF) << 16))
-#define fake_ino(id,ino) (((id)<<16)|(ino))
-
-#define inode_vid(i) ((i)->i_ino >> 16)
-#define inode_type(i) ((i)->i_ino & 0xFFFF)
+#define inode_vid(i) (((i)->i_ino >> 16) & 0xFFFF)
+#define inode_type(i) ((i)->i_ino & 0xFFFF)
#define MAX_MULBY10 ((~0U-9)/10)
static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd)
{
struct inode * inode = dentry->d_inode;
- int vid, valid=0;
+ int vid, hashed=0;
vid = inode_vid(inode);
switch (inode_type(inode) & PROC_VID_MASK) {
case PROC_XID_INO:
- valid = vx_info_id_valid(vid);
+ hashed = vx_info_is_hashed(vid);
break;
case PROC_NID_INO:
- valid = nx_info_id_valid(vid);
+ hashed = nx_info_is_hashed(vid);
break;
- }
- if (valid)
+ }
+ if (hashed)
return 1;
d_drop(dentry);
return 0;
/*
static int proc_vid_delete_dentry(struct dentry * dentry)
{
- return 1;
+ return 1;
}
*/
};
static struct dentry_operations proc_vid_dentry_operations = {
- d_revalidate: proc_vid_revalidate,
+ d_revalidate: proc_vid_revalidate,
// d_delete: proc_vid_delete_dentry,
};
switch (inode_type(dir)) {
case PROC_XID_INO:
- p = vx_base_stuff;
+ p = vx_base_stuff;
break;
case PROC_NID_INO:
- p = vn_base_stuff;
+ p = vn_base_stuff;
break;
default:
goto out;
case PROC_NID_STATUS:
PROC_I(inode)->op.proc_vid_read = proc_nid_status;
break;
-
+
default:
printk("procfs: impossible type (%d)",p->type);
iput(inode);
inode->i_fop = &proc_vid_info_file_operations;
inode->i_nlink = 1;
inode->i_flags|=S_IMMUTABLE;
-
+
dentry->d_op = &proc_vid_dentry_operations;
d_add(dentry, inode);
error = 0;
int i, size;
struct inode *inode = filp->f_dentry->d_inode;
struct vid_entry *p;
-
+
i = filp->f_pos;
switch (i) {
case 0:
switch (inode_type(inode)) {
case PROC_XID_INO:
size = sizeof(vx_base_stuff);
- p = vx_base_stuff + i;
+ p = vx_base_stuff + i;
break;
case PROC_NID_INO:
size = sizeof(vn_base_stuff);
- p = vn_base_stuff + i;
+ p = vn_base_stuff + i;
break;
default:
return 1;
xid = atovid(name, len);
if (xid < 0)
goto out;
- vxi = find_vx_info(xid);
+ vxi = locate_vx_info(xid);
if (!vxi)
goto out;
dentry->d_op = &proc_vid_dentry_operations;
d_add(dentry, inode);
ret = 0;
-
+
out_release:
put_vx_info(vxi);
out:
nid = atovid(name, len);
if (nid < 0)
goto out;
- nxi = find_nx_info(nid);
+ nxi = locate_nx_info(nid);
if (!nxi)
goto out;
dentry->d_op = &proc_vid_dentry_operations;
d_add(dentry, inode);
ret = 0;
-
+
out_release:
put_nx_info(nxi);
out:
#define PROC_NUMBUF 10
#define PROC_MAXVIDS 32
-
-static int get_xid_list(int index, unsigned int *xids)
-{
- struct vx_info *p;
- int nr_xids = 0;
-
- index--;
- spin_lock(&vxlist_lock);
- list_for_each_entry(p, &vx_infos, vx_list) {
- int xid = p->vx_id;
-
- if (--index >= 0)
- continue;
- xids[nr_xids] = xid;
- if (++nr_xids >= PROC_MAXVIDS)
- break;
- }
- spin_unlock(&vxlist_lock);
- return nr_xids;
-}
-
int proc_virtual_readdir(struct file * filp,
void * dirent, filldir_t filldir)
{
filp->f_pos++;
/* fall through */
case 3:
- if (current->xid > 1) {
+ if (vx_current_xid() > 1) {
ino = fake_ino(1, PROC_XID_INO);
if (filldir(dirent, "current", 7,
filp->f_pos, ino, DT_LNK) < 0)
filp->f_pos++;
}
- nr_xids = get_xid_list(nr, xid_array);
-
+ nr_xids = get_xid_list(nr, xid_array, PROC_MAXVIDS);
for (i = 0; i < nr_xids; i++) {
int xid = xid_array[i];
ino_t ino = fake_ino(xid, PROC_XID_INO);
- unsigned long j = PROC_NUMBUF;
+ unsigned int j = PROC_NUMBUF;
do buf[--j] = '0' + (xid % 10); while (xid/=10);
};
-
-static int get_nid_list(int index, unsigned int *nids)
-{
- struct nx_info *p;
- int nr_nids = 0;
-
- index--;
- spin_lock(&nxlist_lock);
- list_for_each_entry(p, &nx_infos, nx_list) {
- int nid = p->nx_id;
-
- if (--index >= 0)
- continue;
- nids[nr_nids] = nid;
- if (++nr_nids >= PROC_MAXVIDS)
- break;
- }
- spin_unlock(&nxlist_lock);
- return nr_nids;
-}
-
int proc_vnet_readdir(struct file * filp,
void * dirent, filldir_t filldir)
{
filp->f_pos++;
/* fall through */
case 3:
- if (current->xid > 1) {
+ if (vx_current_xid() > 1) {
ino = fake_ino(1, PROC_NID_INO);
if (filldir(dirent, "current", 7,
filp->f_pos, ino, DT_LNK) < 0)
filp->f_pos++;
}
- nr_nids = get_nid_list(nr, nid_array);
-
+ nr_nids = get_nid_list(nr, nid_array, PROC_MAXVIDS);
for (i = 0; i < nr_nids; i++) {
int nid = nid_array[i];
ino_t ino = fake_ino(nid, PROC_NID_INO);
}
proc_virtual = ent;
- ent = proc_mkdir("vnet", 0);
+ ent = proc_mkdir("virtnet", 0);
if (ent) {
ent->proc_fops = &proc_vnet_dir_operations;
ent->proc_iops = &proc_vnet_dir_inode_operations;
char *task_vx_info(struct task_struct *p, char *buffer)
{
- return buffer + sprintf(buffer,
- "XID:\t%d\n"
- ,p->xid);
+ struct vx_info *vxi;
+
+ buffer += sprintf (buffer,"XID:\t%d\n", vx_task_xid(p));
+ vxi = task_get_vx_info(p);
+ if (vxi && !vx_flags(VXF_INFO_HIDE, 0)) {
+ buffer += sprintf (buffer,"BCaps:\t%016llx\n"
+ ,(unsigned long long)vxi->vx_bcaps);
+ buffer += sprintf (buffer,"CCaps:\t%016llx\n"
+ ,(unsigned long long)vxi->vx_ccaps);
+ buffer += sprintf (buffer,"CFlags:\t%016llx\n"
+ ,(unsigned long long)vxi->vx_flags);
+ buffer += sprintf (buffer,"CIPid:\t%d\n"
+ ,vxi->vx_initpid);
+ }
+ put_vx_info(vxi);
+ return buffer;
}
int proc_pid_vx_info(struct task_struct *p, char *buffer)
char *task_nx_info(struct task_struct *p, char *buffer)
{
- return buffer + sprintf(buffer,
- "NID:\t%d\n"
- ,p->nid);
+ struct nx_info *nxi;
+
+ buffer += sprintf (buffer,"NID:\t%d\n", nx_task_nid(p));
+ nxi = task_get_nx_info(p);
+ if (nxi && !vx_flags(VXF_INFO_HIDE, 0)) {
+ int i;
+
+ for (i=0; i<nxi->nbipv4; i++){
+ buffer += sprintf (buffer,
+ "V4Root[%d]:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i
+ ,NIPQUAD(nxi->ipv4[i])
+ ,NIPQUAD(nxi->mask[i]));
+ }
+ buffer += sprintf (buffer,
+ "V4Root[bcast]:\t%d.%d.%d.%d\n"
+ ,NIPQUAD(nxi->v4_bcast));
+ }
+ put_nx_info(nxi);
+ return buffer;
}
int proc_pid_nx_info(struct task_struct *p, char *buffer)
#include <linux/config.h>
#include <linux/sched.h>
-#include <linux/vinline.h>
-#include <linux/vserver/context.h>
-#include <linux/vserver/sched.h>
+// #include <linux/vs_base.h>
+#include <linux/vs_context.h>
+#include <linux/vs_sched.h>
+#include <linux/vserver/sched_cmd.h>
#include <asm/errno.h>
#include <asm/uaccess.h>
{
long delta, tokens = 0;
- if (__vx_flags(vxi->vx_flags, VXF_SCHED_PAUSE, 0))
+ if (vx_info_flags(vxi, VXF_SCHED_PAUSE, 0))
/* we are paused */
return 0;
atomic_add(tokens, &vxi->sched.tokens);
vxi->sched.jiffies += delta;
tokens = atomic_read(&vxi->sched.tokens);
-
+
if (tokens > vxi->sched.tokens_max) {
tokens = vxi->sched.tokens_max;
atomic_set(&vxi->sched.tokens, tokens);
spin_unlock(&vxi->sched.tokens_lock);
} else {
/* no new tokens */
- if ((tokens = vx_tokens_avail(vxi)) < vxi->sched.tokens_min) {
+ tokens = vx_tokens_avail(vxi);
+ if (tokens <= 0)
+ vxi->vx_state |= VXS_ONHOLD;
+ if (tokens < vxi->sched.tokens_min) {
/* enough tokens will be available in */
if (vxi->sched.tokens_min == 0)
return delta - vxi->sched.interval;
vxi->sched.tokens_min / vxi->sched.fill_rate;
}
}
+
/* we have some tokens left */
+ if (vx_info_state(vxi, VXS_ONHOLD) &&
+ (tokens >= vxi->sched.tokens_min))
+ vxi->vx_state &= ~VXS_ONHOLD;
+ if (vx_info_state(vxi, VXS_ONHOLD))
+ tokens -= vxi->sched.tokens_min;
+
return tokens;
}
}
-int vc_set_sched(uint32_t xid, void __user *data)
+int vc_set_sched_v2(uint32_t xid, void __user *data)
{
struct vcmd_set_sched_v2 vc_data;
struct vx_info *vxi;
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
-
- vxi = find_vx_info(xid);
+
+ vxi = locate_vx_info(xid);
if (!vxi)
return -EINVAL;
return 0;
}
+
+int vc_set_sched(uint32_t xid, void __user *data)
+{
+ struct vcmd_set_sched_v3 vc_data;
+ struct vx_info *vxi;
+ unsigned int set_mask;
+
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ vxi = locate_vx_info(xid);
+ if (!vxi)
+ return -EINVAL;
+
+ set_mask = vc_data.set_mask;
+
+ spin_lock(&vxi->sched.tokens_lock);
+
+ if (set_mask & VXSM_FILL_RATE)
+ vxi->sched.fill_rate = vc_data.fill_rate;
+ if (set_mask & VXSM_INTERVAL)
+ vxi->sched.interval = vc_data.interval;
+ if (set_mask & VXSM_TOKENS)
+ atomic_set(&vxi->sched.tokens, vc_data.tokens);
+ if (set_mask & VXSM_TOKENS_MIN)
+ vxi->sched.tokens_min = vc_data.tokens_min;
+ if (set_mask & VXSM_TOKENS_MAX)
+ vxi->sched.tokens_max = vc_data.tokens_max;
+ if (set_mask & VXSM_PRIO_BIAS)
+ vxi->sched.priority_bias = vc_data.priority_bias;
+
+ /* Sanity check the resultant values */
+ if (vxi->sched.fill_rate <= 0)
+ vxi->sched.fill_rate = 1;
+ if (vxi->sched.interval <= 0)
+ vxi->sched.interval = HZ;
+ if (vxi->sched.tokens_max == 0)
+ vxi->sched.tokens_max = 1;
+ if (atomic_read(&vxi->sched.tokens) > vxi->sched.tokens_max)
+ atomic_set(&vxi->sched.tokens, vxi->sched.tokens_max);
+ if (vxi->sched.tokens_min > vxi->sched.tokens_max)
+ vxi->sched.tokens_min = vxi->sched.tokens_max;
+ if (vxi->sched.priority_bias > MAX_PRIO_BIAS)
+ vxi->sched.priority_bias = MAX_PRIO_BIAS;
+ if (vxi->sched.priority_bias < MIN_PRIO_BIAS)
+ vxi->sched.priority_bias = MIN_PRIO_BIAS;
+
+ spin_unlock(&vxi->sched.tokens_lock);
+ put_vx_info(vxi);
+ return 0;
+}
+
--- /dev/null
+
+static inline void vx_info_init_sched(struct _vx_sched *sched)
+{
+ int i;
+
+ /* scheduling; hard code starting values as constants */
+ sched->fill_rate = 1;
+ sched->interval = 4;
+ sched->tokens_min = HZ >> 4;
+ sched->tokens_max = HZ >> 1;
+ sched->jiffies = jiffies;
+ sched->tokens_lock = SPIN_LOCK_UNLOCKED;
+
+ atomic_set(&sched->tokens, HZ >> 2);
+ sched->cpus_allowed = CPU_MASK_ALL;
+ sched->priority_bias = 0;
+
+ for_each_cpu(i) {
+ sched->cpu[i].user_ticks = 0;
+ sched->cpu[i].sys_ticks = 0;
+ sched->cpu[i].hold_ticks = 0;
+ }
+}
+
+static inline void vx_info_exit_sched(struct _vx_sched *sched)
+{
+ return;
+}
+
--- /dev/null
+#ifndef _VX_SCHED_PROC_H
+#define _VX_SCHED_PROC_H
+
+
+static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
+{
+ int length = 0;
+ int i;
+
+ length += sprintf(buffer,
+ "Token:\t\t%8d\n"
+ "FillRate:\t%8d\n"
+ "Interval:\t%8d\n"
+ "TokensMin:\t%8d\n"
+ "TokensMax:\t%8d\n"
+ "PrioBias:\t%8d\n"
+ ,atomic_read(&sched->tokens)
+ ,sched->fill_rate
+ ,sched->interval
+ ,sched->tokens_min
+ ,sched->tokens_max
+ ,sched->priority_bias
+ );
+
+ for_each_online_cpu(i) {
+ length += sprintf(buffer + length,
+ "cpu %d: %lld %lld %lld\n"
+ ,i
+ ,(long long)sched->cpu[i].user_ticks
+ ,(long long)sched->cpu[i].sys_ticks
+ ,(long long)sched->cpu[i].hold_ticks
+ );
+ }
+
+ return length;
+}
+
+#endif /* _VX_SCHED_PROC_H */
#include <asm/errno.h>
#include <asm/uaccess.h>
-#include <linux/vinline.h>
+#include <linux/vs_context.h>
#include <linux/vserver/signal.h>
return -ENOSYS;
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
-
+
info.si_signo = vc_data.sig;
info.si_errno = 0;
info.si_code = SI_USER;
info.si_pid = current->pid;
info.si_uid = current->uid;
- vxi = find_vx_info(id);
+ vxi = locate_vx_info(id);
if (!vxi)
return -ESRCH;
retval = err;
}
break;
-
+
default:
- p = find_task_by_pid(vc_data.pid);
+ p = find_task_by_real_pid(vc_data.pid);
if (p) {
if (!thread_group_leader(p)) {
struct task_struct *tg;
-
- tg = find_task_by_pid(p->tgid);
+
+ tg = find_task_by_real_pid(p->tgid);
if (tg)
p = tg;
}
}
+static int __wait_exit(struct vx_info *vxi)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ int ret = 0;
+
+ add_wait_queue(&vxi->vx_exit, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+wait:
+ if (vx_info_state(vxi, VXS_DEFUNCT))
+ goto out;
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
+ goto out;
+ }
+ schedule();
+ goto wait;
+
+out:
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&vxi->vx_exit, &wait);
+ return ret;
+}
+
+
+
+int vc_wait_exit(uint32_t id, void __user *data)
+{
+// struct vcmd_wait_exit_v0 vc_data;
+ struct vx_info *vxi;
+ int ret;
+
+ vxi = locate_vx_info(id);
+ if (!vxi)
+ return -ESRCH;
+
+ ret = __wait_exit(vxi);
+ put_vx_info(vxi);
+ return ret;
+}
+
*
* Virtual Server: Syscall Switch
*
- * Copyright (C) 2003-2004 Herbert Pötzl
+ * Copyright (C) 2003-2005 Herbert Pötzl
*
* V0.01 syscall switch
* V0.02 added signal to context
* V0.03 added rlimit functions
* V0.04 added iattr, task/xid functions
+ * V0.05 added debug/history stuff
*
*/
#include <linux/config.h>
#include <linux/linkage.h>
+#include <linux/sched.h>
#include <asm/errno.h>
+#include <linux/vserver/network.h>
#include <linux/vserver/switch.h>
-#include <linux/vinline.h>
+#include <linux/vserver/debug.h>
static inline int
return VCI_VERSION;
}
+#include <linux/vserver/context_cmd.h>
+#include <linux/vserver/cvirt_cmd.h>
+#include <linux/vserver/limit_cmd.h>
+#include <linux/vserver/network_cmd.h>
+#include <linux/vserver/sched_cmd.h>
+#include <linux/vserver/debug_cmd.h>
#include <linux/vserver/legacy.h>
-#include <linux/vserver/context.h>
-#include <linux/vserver/network.h>
#include <linux/vserver/namespace.h>
-#include <linux/vserver/sched.h>
-#include <linux/vserver/limit.h>
#include <linux/vserver/inode.h>
#include <linux/vserver/signal.h>
-
-
-extern unsigned int vx_debug_switch;
+#include <linux/vserver/dlimit.h>
extern asmlinkage long
sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
{
+ vxdprintk(VXD_CBIT(switch, 0),
+ "vc: VCMD_%02d_%d[%d], %d",
+ VC_CATEGORY(cmd), VC_COMMAND(cmd),
+ VC_VERSION(cmd), id);
- if (vx_debug_switch)
- printk( "vc: VCMD_%02d_%d[%d], %d\n",
- VC_CATEGORY(cmd), VC_COMMAND(cmd),
- VC_VERSION(cmd), id);
+#ifdef CONFIG_VSERVER_LEGACY
+ if (!capable(CAP_CONTEXT) &&
+ /* dirty hack for capremove */
+ !(cmd==VCMD_new_s_context && id==-2))
+ return -EPERM;
+#else
+ if (!capable(CAP_CONTEXT))
+ return -EPERM;
+#endif
switch (cmd) {
case VCMD_get_version:
return vc_get_version(id);
-#ifdef CONFIG_VSERVER_LEGACY
+ case VCMD_dump_history:
+#ifdef CONFIG_VSERVER_HISTORY
+ return vc_dump_history(id);
+#else
+ return -ENOSYS;
+#endif
+
+#ifdef CONFIG_VSERVER_LEGACY
case VCMD_new_s_context:
return vc_new_s_context(id, data);
case VCMD_set_ipv4root:
return vc_set_rlimit(id, data);
case VCMD_get_rlimit_mask:
return vc_get_rlimit_mask(id, data);
-
+
case VCMD_vx_get_vhi_name:
return vc_get_vhi_name(id, data);
case VCMD_vx_set_vhi_name:
case VCMD_get_ncaps:
return vc_get_ncaps(id, data);
+ case VCMD_set_sched_v2:
+ return vc_set_sched_v2(id, data);
+ /* this is version 3 */
case VCMD_set_sched:
return vc_set_sched(id, data);
+
+ case VCMD_add_dlimit:
+ return vc_add_dlimit(id, data);
+ case VCMD_rem_dlimit:
+ return vc_rem_dlimit(id, data);
+ case VCMD_set_dlimit:
+ return vc_set_dlimit(id, data);
+ case VCMD_get_dlimit:
+ return vc_get_dlimit(id, data);
}
/* below here only with VX_ADMIN */
case VCMD_ctx_kill:
return vc_ctx_kill(id, data);
-#ifdef CONFIG_VSERVER_LEGACY
+ case VCMD_wait_exit:
+ return vc_wait_exit(id, data);
+
case VCMD_create_context:
+#ifdef CONFIG_VSERVER_LEGACY
return vc_ctx_create(id, data);
+#else
+ return -ENOSYS;
#endif
case VCMD_get_iattr:
return vc_enter_namespace(id, data);
case VCMD_ctx_create:
-#ifdef CONFIG_VSERVER_LEGACY
+#ifdef CONFIG_VSERVER_LEGACY
if (id == 1) {
current->xid = 1;
return 1;
#include <linux/config.h>
#include <linux/errno.h>
-#include <linux/vserver.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/ctype.h>
{
if (!vserver_table_header) {
vserver_table_header = register_sysctl_table(vserver_table, 1);
-#ifdef CONFIG_PROC_FS
-// if (vserver_table[0].de)
-// vserver_table[0].de->owner = THIS_MODULE;
-#endif
}
}
#include <linux/module.h>
#include <linux/vs_memory.h>
#include <linux/syscalls.h>
+#include <linux/vs_memory.h>
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
*/
/* Only go through if we didn't race with anybody else... */
if (pte_none(*page_table)) {
- if (!PageReserved(new_page))
- //++mm->rss;
- vx_rsspages_inc(mm);
+ if (!PageReserved(new_page))
+ // ++mm->rss;
+ vx_rsspages_inc(mm);
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access)
#include <linux/mm.h>
#include <linux/vs_memory.h>
#include <linux/syscalls.h>
+#include <linux/vs_memory.h>
static int mlock_fixup(struct vm_area_struct * vma,
vma->vm_end = address;
// vma->vm_mm->total_vm += grow;
vx_vmpages_add(vma->vm_mm, grow);
- if (vma->vm_flags & VM_LOCKED) {
+ if (vma->vm_flags & VM_LOCKED)
// vma->vm_mm->locked_vm += grow;
vx_vmlocked_add(vma->vm_mm, grow);
- }
__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
anon_vma_unlock(vma);
return 0;
vma->vm_pgoff -= grow;
// vma->vm_mm->total_vm += grow;
vx_vmpages_add(vma->vm_mm, grow);
- if (vma->vm_flags & VM_LOCKED) {
+ if (vma->vm_flags & VM_LOCKED)
// vma->vm_mm->locked_vm += grow;
vx_vmlocked_add(vma->vm_mm, grow);
- }
__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
anon_vma_unlock(vma);
return 0;
// area->vm_mm->total_vm -= len >> PAGE_SHIFT;
vx_vmpages_sub(area->vm_mm, len >> PAGE_SHIFT);
-
- if (area->vm_flags & VM_LOCKED) {
+
+ if (area->vm_flags & VM_LOCKED)
// area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
vx_vmlocked_sub(area->vm_mm, len >> PAGE_SHIFT);
- }
vm_stat_unaccount(area);
area->vm_mm->unmap_area(area);
remove_vm_struct(area);
vx_vmpages_sub(mm, mm->total_vm);
// mm->locked_vm = 0;
vx_vmlocked_sub(mm, mm->locked_vm);
- arch_flush_exec_range(mm);
spin_unlock(&mm->page_table_lock);
#include <linux/security.h>
#include <linux/vs_memory.h>
#include <linux/syscalls.h>
+#include <linux/vs_memory.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
tblock->next = current->mm->context.tblock.next;
current->mm->context.tblock.next = tblock;
- current->mm->total_vm += len >> PAGE_SHIFT;
+ // current->mm->total_vm += len >> PAGE_SHIFT;
+ vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
#ifdef DEBUG
printk("do_mmap:\n");
realalloc -= kobjsize(tblock);
askedalloc -= sizeof(struct mm_tblock_struct);
kfree(tblock);
- mm->total_vm -= len >> PAGE_SHIFT;
+ // mm->total_vm -= len >> PAGE_SHIFT;
+ vx_vmpages_sub(mm, len >> PAGE_SHIFT);
#ifdef DEBUG
show_process_blocks();
void exit_mmap(struct mm_struct * mm)
{
struct mm_tblock_struct *tmp;
- mm->total_vm = 0;
+ // mm->total_vm = 0;
+ vx_vmpages_sub(mm, mm->total_vm);
if (!mm)
return;
* The memory size of the process is the basis for the badness.
*/
points = p->mm->total_vm;
- /* add vserver badness ;) */
+ /* FIXME add vserver badness ;) */
/*
* CPU time is in tens of seconds and run time is in thousands
#include <linux/vs_limit.h>
#include <linux/ckrm_mem_inline.h>
#include <linux/nodemask.h>
+#include <linux/vs_limit.h>
#include <asm/tlbflush.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <linux/swapops.h>
-#include <linux/vs_base.h>
#include <linux/vs_memory.h>
spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
#include <linux/ckrm_mem.h>
#ifndef AT_LIMIT_SUPPORT
-#warning "ckrm_at_limit disabled due to problems with memory hog tests -- seting ckrm_shrink_list_empty to true"
+#warning "ckrm_at_limit disabled due to problems with memory hog tests -- setting ckrm_shrink_list_empty to true"
#undef ckrm_shrink_list_empty
#define ckrm_shrink_list_empty() (1)
#endif
#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
#include <net/iw_handler.h>
#endif /* CONFIG_NET_RADIO */
+#include <linux/vs_network.h>
#include <asm/current.h>
#include <linux/vs_network.h>
total = 0;
for (dev = dev_base; dev; dev = dev->next) {
+ if (vx_flags(VXF_HIDE_NETIF, 0) &&
+ !dev_in_nx_info(dev, current->nx_info))
+ continue;
for (i = 0; i < NPROTO; i++) {
if (gifconf_list[i]) {
int done;
static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
{
+ struct nx_info *nxi = current->nx_info;
+
+ if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi))
+ return;
if (dev->get_stats) {
struct net_device_stats *stats = dev->get_stats(dev);
for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
if (idx < s_idx)
continue;
+ if (vx_info_flags(skb->sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
+ !dev_in_nx_info(dev, skb->sk->sk_nx_info))
+ continue;
if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
break;
}
sizeof(struct rtnl_link_ifmap) +
sizeof(struct rtnl_link_stats) + 128);
+ if (vx_flags(VXF_HIDE_NETIF, 0) &&
+ !dev_in_nx_info(dev, current->nx_info))
+ return;
skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return;
#include <net/sock.h>
#include <linux/netfilter.h>
-#include <linux/vs_base.h>
#include <linux/vs_socket.h>
static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
get_group_info(current->group_info);
acred.uid = current->fsuid;
acred.gid = current->fsgid;
- acred.xid = current->xid;
+ acred.xid = vx_current_xid();
acred.group_info = current->group_info;
dprintk("RPC: looking up %s cred\n",
get_group_info(current->group_info);
acred.uid = current->fsuid;
acred.gid = current->fsgid;
- acred.xid = current->xid;
+ acred.xid = vx_current_xid();
acred.group_info = current->group_info;
dprintk("RPC: %4d looking up %s cred\n",
if (flags & RPC_TASK_ROOTCREDS) {
cred->uc_uid = cred->uc_puid = 0;
cred->uc_gid = cred->uc_pgid = 0;
- cred->uc_xid = cred->uc_pxid = current->xid;
+ cred->uc_xid = cred->uc_pxid = vx_current_xid();
cred->uc_gids[0] = NOGROUP;
} else {
int groups = acred->group_info->ngroups;
cred->uc_xid = acred->xid;
cred->uc_puid = current->uid;
cred->uc_pgid = current->gid;
- cred->uc_pxid = current->xid;
+ cred->uc_pxid = vx_current_xid();
for (i = 0; i < groups; i++)
cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
if (i < NFS_NGROUPS)
|| cred->uc_xid != acred->xid
|| cred->uc_puid != current->uid
|| cred->uc_pgid != current->gid
- || cred->uc_pxid != current->xid)
+ || cred->uc_pxid != vx_current_xid())
return 0;
groups = acred->group_info->ngroups;
*/
int capable(int cap)
{
+ if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
+ return 0;
if (security_ops->capable(current, cap)) {
/* capability denied */
return 0;
return 1;
}
+int vx_capable(int cap, int ccap)
+{
+ if (security_ops->capable(current, cap)) {
+ /* capability denied */
+ return 0;
+ }
+ if (!vx_ccaps(ccap))
+ return 0;
+
+ /* capability granted */
+ current->flags |= PF_SUPERPRIV;
+ return 1;
+}
+
EXPORT_SYMBOL_GPL(register_security);
EXPORT_SYMBOL_GPL(unregister_security);
EXPORT_SYMBOL_GPL(mod_reg_security);
EXPORT_SYMBOL_GPL(mod_unreg_security);
EXPORT_SYMBOL(capable);
+EXPORT_SYMBOL(vx_capable);
EXPORT_SYMBOL(security_ops);