--- /dev/null
+/*
+ * linux/drivers/block/vroot.c
+ *
+ * written by Herbert Pötzl, 9/11/2002
+ * ported to 2.6.10 by Herbert Pötzl, 30/12/2004
+ *
+ * based on the loop.c code by Theodore Ts'o.
+ *
+ * Copyright (C) 2002-2005 by Herbert Pötzl.
+ * Redistribution of this file is permitted under the
+ * GNU General Public License.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/file.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/devfs_fs_kernel.h>
+
+#include <linux/vroot.h>
+
+
+static int max_vroot = 8;
+
+static struct vroot_device *vroot_dev;
+static struct gendisk **disks;
+
+
+static int vroot_set_dev(
+ struct vroot_device *vr,
+ struct file *vr_file,
+ struct block_device *bdev,
+ unsigned int arg)
+{
+ struct block_device *real_bdev;
+ struct file *file;
+ struct inode *inode;
+ int error;
+
+ error = -EBUSY;
+ if (vr->vr_state != Vr_unbound)
+ goto out;
+
+ error = -EBADF;
+ file = fget(arg);
+ if (!file)
+ goto out;
+
+ error = -EINVAL;
+ inode = file->f_dentry->d_inode;
+
+
+ if (S_ISBLK(inode->i_mode)) {
+ real_bdev = inode->i_bdev;
+ vr->vr_device = real_bdev;
+ __iget(real_bdev->bd_inode);
+ } else
+ goto out_fput;
+
+ printk(KERN_INFO "vroot[%d]_set_dev: dev=%p[%d,%d]\n",
+ vr->vr_number, real_bdev,
+ imajor(real_bdev->bd_inode), iminor(real_bdev->bd_inode));
+
+ vr->vr_state = Vr_bound;
+ error = 0;
+
+ out_fput:
+ fput(file);
+ out:
+ return error;
+}
+
+static int vroot_clr_dev(
+ struct vroot_device *vr,
+ struct file *vr_file,
+ struct block_device *bdev)
+{
+ struct block_device *real_bdev;
+
+ if (vr->vr_state != Vr_bound)
+ return -ENXIO;
+ if (vr->vr_refcnt > 1) /* we needed one fd for the ioctl */
+ return -EBUSY;
+
+ real_bdev = vr->vr_device;
+
+ printk(KERN_INFO "vroot[%d]_clr_dev: dev=%p[%d,%d]\n",
+ vr->vr_number, real_bdev,
+ imajor(real_bdev->bd_inode), iminor(real_bdev->bd_inode));
+
+ bdput(real_bdev);
+ vr->vr_state = Vr_unbound;
+ vr->vr_device = NULL;
+ return 0;
+}
+
+
+static int vr_ioctl(struct inode * inode, struct file * file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vroot_device *vr = inode->i_bdev->bd_disk->private_data;
+ int err;
+
+ down(&vr->vr_ctl_mutex);
+ switch (cmd) {
+ case VROOT_SET_DEV:
+ err = vroot_set_dev(vr, file, inode->i_bdev, arg);
+ break;
+ case VROOT_CLR_DEV:
+ err = vroot_clr_dev(vr, file, inode->i_bdev);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ up(&vr->vr_ctl_mutex);
+ return err;
+}
+
+static int vr_open(struct inode *inode, struct file *file)
+{
+ struct vroot_device *vr = inode->i_bdev->bd_disk->private_data;
+
+ down(&vr->vr_ctl_mutex);
+ vr->vr_refcnt++;
+ up(&vr->vr_ctl_mutex);
+ return 0;
+}
+
+static int vr_release(struct inode *inode, struct file *file)
+{
+ struct vroot_device *vr = inode->i_bdev->bd_disk->private_data;
+
+ down(&vr->vr_ctl_mutex);
+ --vr->vr_refcnt;
+ up(&vr->vr_ctl_mutex);
+ return 0;
+}
+
+static struct block_device_operations vr_fops = {
+ .owner = THIS_MODULE,
+ .open = vr_open,
+ .release = vr_release,
+ .ioctl = vr_ioctl,
+};
+
+struct block_device *vroot_get_real_bdev(struct block_device *bdev)
+{
+ struct inode *inode = bdev->bd_inode;
+ struct vroot_device *vr;
+ struct block_device *real_bdev;
+ int minor = iminor(inode);
+
+ vr = &vroot_dev[minor];
+ real_bdev = vr->vr_device;
+
+ printk(KERN_INFO "vroot[%d]_get_real_bdev: dev=%p[%p,%d,%d]\n",
+ vr->vr_number, real_bdev, real_bdev->bd_inode,
+ imajor(real_bdev->bd_inode), iminor(real_bdev->bd_inode));
+
+ if (vr->vr_state != Vr_bound)
+ return ERR_PTR(-ENXIO);
+
+ __iget(real_bdev->bd_inode);
+ return real_bdev;
+}
+
+/*
+ * And now the modules code and kernel interface.
+ */
+
+module_param(max_vroot, int, 0);
+
+MODULE_PARM_DESC(max_vroot, "Maximum number of vroot devices (1-256)");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_BLOCKDEV_MAJOR(VROOT_MAJOR);
+
+MODULE_AUTHOR ("Herbert Pötzl");
+MODULE_DESCRIPTION ("Virtual Root Device Mapper");
+
+
+int __init vroot_init(void)
+{
+ int i;
+
+ if (max_vroot < 1 || max_vroot > 256) {
+ max_vroot = MAX_VROOT_DEFAULT;
+ printk(KERN_WARNING "vroot: invalid max_vroot "
+ "(must be between 1 and 256), "
+ "using default (%d)\n", max_vroot);
+ }
+
+ if (register_blkdev(VROOT_MAJOR, "vroot"))
+ return -EIO;
+
+ vroot_dev = kmalloc(max_vroot * sizeof(struct vroot_device), GFP_KERNEL);
+ if (!vroot_dev)
+ goto out_mem1;
+ memset(vroot_dev, 0, max_vroot * sizeof(struct vroot_device));
+
+ disks = kmalloc(max_vroot * sizeof(struct gendisk *), GFP_KERNEL);
+ if (!disks)
+ goto out_mem2;
+
+ for (i = 0; i < max_vroot; i++) {
+ disks[i] = alloc_disk(1);
+ if (!disks[i])
+ goto out_mem3;
+ }
+
+ devfs_mk_dir("vroot");
+
+ for (i = 0; i < max_vroot; i++) {
+ struct vroot_device *vr = &vroot_dev[i];
+ struct gendisk *disk = disks[i];
+
+ memset(vr, 0, sizeof(*vr));
+ init_MUTEX(&vr->vr_ctl_mutex);
+ vr->vr_number = i;
+ disk->major = VROOT_MAJOR;
+ disk->first_minor = i;
+ disk->fops = &vr_fops;
+ sprintf(disk->disk_name, "vroot%d", i);
+ sprintf(disk->devfs_name, "vroot/%d", i);
+ disk->private_data = vr;
+ }
+
+ for (i = 0; i < max_vroot; i++)
+ add_disk(disks[i]);
+ printk(KERN_INFO "vroot: loaded (max %d devices)\n", max_vroot);
+ return 0;
+
+out_mem3:
+ while (i--)
+ put_disk(disks[i]);
+ kfree(disks);
+out_mem2:
+ kfree(vroot_dev);
+out_mem1:
+ unregister_blkdev(VROOT_MAJOR, "vroot");
+ printk(KERN_ERR "vroot: ran out of memory\n");
+ return -ENOMEM;
+}
+
+void vroot_exit(void)
+{
+ int i;
+
+ for (i = 0; i < max_vroot; i++) {
+ del_gendisk(disks[i]);
+ put_disk(disks[i]);
+ }
+ devfs_remove("vroot");
+ if (unregister_blkdev(VROOT_MAJOR, "vroot"))
+ printk(KERN_WARNING "vroot: cannot unregister blkdev\n");
+
+ kfree(disks);
+ kfree(vroot_dev);
+}
+
+module_init(vroot_init);
+module_exit(vroot_exit);
+
--- /dev/null
+
+/*
+ * include/linux/vroot.h
+ *
+ * written by Herbert Pötzl, 9/11/2002
+ * ported to 2.6 by Herbert Pötzl, 30/12/2004
+ *
+ * Copyright (C) 2002-2005 by Herbert Pötzl.
+ * Redistribution of this file is permitted under the
+ * GNU General Public License.
+ */
+
+#ifndef _LINUX_VROOT_H
+#define _LINUX_VROOT_H
+
+
+#ifdef __KERNEL__
+
+/* Possible states of device */
+enum {
+ Vr_unbound,
+ Vr_bound,
+};
+
+struct vroot_device {
+ int vr_number;
+ int vr_refcnt;
+
+ struct semaphore vr_ctl_mutex;
+ struct block_device *vr_device;
+ int vr_state;
+};
+
+#endif /* __KERNEL__ */
+
+#define MAX_VROOT_DEFAULT 8
+
+/*
+ * IOCTL commands --- we will commandeer 0x56 ('V')
+ */
+
+#define VROOT_SET_DEV 0x5600
+#define VROOT_CLR_DEV 0x5601
+
+#endif /* _LINUX_VROOT_H */
--- /dev/null
+#ifndef _VX_VS_SCHED_H
+#define _VX_VS_SCHED_H
+
+
+#include "vserver/sched.h"
+
+
+#define VAVAVOOM_RATIO 50
+
+#define MAX_PRIO_BIAS 20
+#define MIN_PRIO_BIAS -20
+
+
+static inline int vx_tokens_avail(struct vx_info *vxi)
+{
+ return atomic_read(&vxi->sched.tokens);
+}
+
+static inline void vx_consume_token(struct vx_info *vxi)
+{
+ atomic_dec(&vxi->sched.tokens);
+}
+
+static inline int vx_need_resched(struct task_struct *p)
+{
+#ifdef CONFIG_VSERVER_HARDCPU
+ struct vx_info *vxi = p->vx_info;
+#endif
+ int slice = --p->time_slice;
+
+#ifdef CONFIG_VSERVER_HARDCPU
+ if (vxi) {
+ int tokens;
+
+ if ((tokens = vx_tokens_avail(vxi)) > 0)
+ vx_consume_token(vxi);
+ /* for tokens > 0, one token was consumed */
+ if (tokens < 2)
+ return 1;
+ }
+#endif
+ return (slice == 0);
+}
+
+
+static inline void vx_onhold_inc(struct vx_info *vxi)
+{
+ int onhold = atomic_read(&vxi->cvirt.nr_onhold);
+
+ atomic_inc(&vxi->cvirt.nr_onhold);
+ if (!onhold)
+ vxi->cvirt.onhold_last = jiffies;
+}
+
+static inline void __vx_onhold_update(struct vx_info *vxi)
+{
+ int cpu = smp_processor_id();
+ uint32_t now = jiffies;
+ uint32_t delta = now - vxi->cvirt.onhold_last;
+
+ vxi->cvirt.onhold_last = now;
+ vxi->sched.cpu[cpu].hold_ticks += delta;
+}
+
+static inline void vx_onhold_dec(struct vx_info *vxi)
+{
+ if (atomic_dec_and_test(&vxi->cvirt.nr_onhold))
+ __vx_onhold_update(vxi);
+}
+
+#else
+#warning duplicate inclusion
+#endif
--- /dev/null
+#ifndef _VX_CONTEXT_CMD_H
+#define _VX_CONTEXT_CMD_H
+
+
+/* vinfo commands */
+
+#define VCMD_task_xid VC_CMD(VINFO, 1, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_xid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_vx_info VC_CMD(VINFO, 5, 0)
+
+struct vcmd_vx_info_v0 {
+ uint32_t xid;
+ uint32_t initpid;
+ /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_vx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+
+/* context commands */
+
+#define VCMD_ctx_create VC_CMD(VPROC, 1, 0)
+#define VCMD_ctx_migrate VC_CMD(PROCMIG, 1, 0)
+
+#ifdef __KERNEL__
+extern int vc_ctx_create(uint32_t, void __user *);
+extern int vc_ctx_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+
+/* flag commands */
+
+#define VCMD_get_cflags VC_CMD(FLAGS, 1, 0)
+#define VCMD_set_cflags VC_CMD(FLAGS, 2, 0)
+
+struct vcmd_ctx_flags_v0 {
+ uint64_t flagword;
+ uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_cflags(uint32_t, void __user *);
+extern int vc_set_cflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+
+/* context caps commands */
+
+#define VCMD_get_ccaps VC_CMD(FLAGS, 3, 0)
+#define VCMD_set_ccaps VC_CMD(FLAGS, 4, 0)
+
+struct vcmd_ctx_caps_v0 {
+ uint64_t bcaps;
+ uint64_t ccaps;
+ uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ccaps(uint32_t, void __user *);
+extern int vc_set_ccaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_CONTEXT_CMD_H */
--- /dev/null
+#ifndef _VX_CVIRT_CMD_H
+#define _VX_CVIRT_CMD_H
+
+/* cvirt vserver commands */
+
+
+#endif /* _VX_CVIRT_CMD_H */
--- /dev/null
+#ifndef _VX_CVIRT_DEF_H
+#define _VX_CVIRT_DEF_H
+
+#include <linux/jiffies.h>
+#include <linux/utsname.h>
+#include <linux/spinlock.h>
+#include <linux/time.h>
+#include <asm/atomic.h>
+
+
+struct _vx_usage_stat {
+ uint64_t user;
+ uint64_t nice;
+ uint64_t system;
+ uint64_t softirq;
+ uint64_t irq;
+ uint64_t idle;
+ uint64_t iowait;
+};
+
+/* context sub struct */
+
+struct _vx_cvirt {
+ int max_threads; /* maximum allowed threads */
+ atomic_t nr_threads; /* number of current threads */
+ atomic_t nr_running; /* number of running threads */
+ atomic_t nr_uninterruptible; /* number of uninterruptible threads */
+
+ atomic_t nr_onhold; /* processes on hold */
+ uint32_t onhold_last; /* jiffies when put on hold */
+
+ struct timespec bias_idle;
+ struct timespec bias_uptime; /* context creation point */
+ uint64_t bias_clock; /* offset in clock_t */
+
+ struct new_utsname utsname;
+
+ spinlock_t load_lock; /* lock for the load averages */
+ atomic_t load_updates; /* nr of load updates done so far */
+ uint32_t load_last; /* last time load was cacled */
+ uint32_t load[3]; /* load averages 1,5,15 */
+
+ struct _vx_usage_stat cpustat[NR_CPUS];
+};
+
+struct _vx_sock_acc {
+ atomic_t count;
+ atomic_t total;
+};
+
+/* context sub struct */
+
+struct _vx_cacct {
+ unsigned long total_forks;
+
+ struct _vx_sock_acc sock[5][3];
+};
+
+#endif /* _VX_CVIRT_DEF_H */
--- /dev/null
+#ifndef _VX_DEBUG_CMD_H
+#define _VX_DEBUG_CMD_H
+
+
+/* debug commands */
+
+#define VCMD_dump_history VC_CMD(DEBUG, 1, 0)
+
+#ifdef __KERNEL__
+
+extern int vc_dump_history(uint32_t);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_DEBUG_CMD_H */
--- /dev/null
+#ifndef _VX_INODE_CMD_H
+#define _VX_INODE_CMD_H
+
+/* inode vserver commands */
+
+#define VCMD_get_iattr_v0 VC_CMD(INODE, 1, 0)
+#define VCMD_set_iattr_v0 VC_CMD(INODE, 2, 0)
+
+#define VCMD_get_iattr VC_CMD(INODE, 1, 1)
+#define VCMD_set_iattr VC_CMD(INODE, 2, 1)
+
+struct vcmd_ctx_iattr_v0 {
+ /* device handle in id */
+ uint64_t ino;
+ uint32_t xid;
+ uint32_t flags;
+ uint32_t mask;
+};
+
+struct vcmd_ctx_iattr_v1 {
+ const char __user *name;
+ uint32_t xid;
+ uint32_t flags;
+ uint32_t mask;
+};
+
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_get_iattr_v0(uint32_t, void __user *);
+extern int vc_set_iattr_v0(uint32_t, void __user *);
+
+extern int vc_get_iattr(uint32_t, void __user *);
+extern int vc_set_iattr(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_INODE_CMD_H */
--- /dev/null
+#ifndef _VX_LIMIT_CMD_H
+#define _VX_LIMIT_CMD_H
+
+/* rlimit vserver commands */
+
+#define VCMD_get_rlimit VC_CMD(RLIMIT, 1, 0)
+#define VCMD_set_rlimit VC_CMD(RLIMIT, 2, 0)
+#define VCMD_get_rlimit_mask VC_CMD(RLIMIT, 3, 0)
+
+struct vcmd_ctx_rlimit_v0 {
+ uint32_t id;
+ uint64_t minimum;
+ uint64_t softlimit;
+ uint64_t maximum;
+};
+
+struct vcmd_ctx_rlimit_mask_v0 {
+ uint32_t minimum;
+ uint32_t softlimit;
+ uint32_t maximum;
+};
+
+#define CRLIM_UNSET (0ULL)
+#define CRLIM_INFINITY (~0ULL)
+#define CRLIM_KEEP (~1ULL)
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_get_rlimit(uint32_t, void __user *);
+extern int vc_set_rlimit(uint32_t, void __user *);
+extern int vc_get_rlimit_mask(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_LIMIT_CMD_H */
--- /dev/null
+#ifndef _VX_LIMIT_DEF_H
+#define _VX_LIMIT_DEF_H
+
+#include <asm/atomic.h>
+#include <asm/resource.h>
+
+#include "limit.h"
+
+/* context sub struct */
+
+struct _vx_limit {
+ atomic_t ticks;
+
+ unsigned long rlim[NUM_LIMITS]; /* Context limit */
+ unsigned long rmax[NUM_LIMITS]; /* Context maximum */
+ atomic_t rcur[NUM_LIMITS]; /* Current value */
+ atomic_t lhit[NUM_LIMITS]; /* Limit hits */
+};
+
+
+#endif /* _VX_LIMIT_DEF_H */
--- /dev/null
+#ifndef _VX_NETWORK_CMD_H
+#define _VX_NETWORK_CMD_H
+
+
+/* vinfo commands */
+
+#define VCMD_task_nid VC_CMD(VINFO, 2, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_nid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_nx_info VC_CMD(VINFO, 6, 0)
+
+struct vcmd_nx_info_v0 {
+ uint32_t nid;
+ /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_nx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_net_create VC_CMD(VNET, 1, 0)
+#define VCMD_net_migrate VC_CMD(NETMIG, 1, 0)
+
+#define VCMD_net_add VC_CMD(NETALT, 1, 0)
+#define VCMD_net_remove VC_CMD(NETALT, 2, 0)
+
+struct vcmd_net_nx_v0 {
+ uint16_t type;
+ uint16_t count;
+ uint32_t ip[4];
+ uint32_t mask[4];
+ /* more to come */
+};
+
+// IPN_TYPE_IPV4
+
+
+#ifdef __KERNEL__
+extern int vc_net_create(uint32_t, void __user *);
+extern int vc_net_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_get_nflags VC_CMD(FLAGS, 5, 0)
+#define VCMD_set_nflags VC_CMD(FLAGS, 6, 0)
+
+struct vcmd_net_flags_v0 {
+ uint64_t flagword;
+ uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_nflags(uint32_t, void __user *);
+extern int vc_set_nflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define IPF_STATE_SETUP (1ULL<<32)
+
+
+#define IPF_ONE_TIME (0x0001ULL<<32)
+
+#define VCMD_get_ncaps VC_CMD(FLAGS, 7, 0)
+#define VCMD_set_ncaps VC_CMD(FLAGS, 8, 0)
+
+struct vcmd_net_caps_v0 {
+ uint64_t ncaps;
+ uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ncaps(uint32_t, void __user *);
+extern int vc_set_ncaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_CONTEXT_CMD_H */
--- /dev/null
+#ifndef _VX_SCHED_CMD_H
+#define _VX_SCHED_CMD_H
+
+/* sched vserver commands */
+
+#define VCMD_set_sched_v2 VC_CMD(SCHED, 1, 2)
+#define VCMD_set_sched VC_CMD(SCHED, 1, 3)
+
+struct vcmd_set_sched_v2 {
+ int32_t fill_rate;
+ int32_t interval;
+ int32_t tokens;
+ int32_t tokens_min;
+ int32_t tokens_max;
+ uint64_t cpu_mask;
+};
+
+struct vcmd_set_sched_v3 {
+ uint32_t set_mask;
+ int32_t fill_rate;
+ int32_t interval;
+ int32_t tokens;
+ int32_t tokens_min;
+ int32_t tokens_max;
+ int32_t priority_bias;
+};
+
+
+#define VXSM_FILL_RATE 0x0001
+#define VXSM_INTERVAL 0x0002
+#define VXSM_TOKENS 0x0010
+#define VXSM_TOKENS_MIN 0x0020
+#define VXSM_TOKENS_MAX 0x0040
+#define VXSM_PRIO_BIAS 0x0100
+
+#define SCHED_KEEP (-2)
+
+#ifdef __KERNEL__
+
+#include <linux/compiler.h>
+
+extern int vc_set_sched_v1(uint32_t, void __user *);
+extern int vc_set_sched_v2(uint32_t, void __user *);
+extern int vc_set_sched(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+#endif /* _VX_SCHED_CMD_H */
--- /dev/null
+#ifndef _VX_SCHED_DEF_H
+#define _VX_SCHED_DEF_H
+
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/cpumask.h>
+#include <asm/atomic.h>
+#include <asm/param.h>
+
+
+struct _vx_ticks {
+ uint64_t user_ticks; /* token tick events */
+ uint64_t sys_ticks; /* token tick events */
+ uint64_t hold_ticks; /* token ticks paused */
+ uint64_t unused[5]; /* cacheline ? */
+};
+
+/* context sub struct */
+
+struct _vx_sched {
+ atomic_t tokens; /* number of CPU tokens */
+ spinlock_t tokens_lock; /* lock for token bucket */
+
+ int fill_rate; /* Fill rate: add X tokens... */
+ int interval; /* Divisor: per Y jiffies */
+ int tokens_min; /* Limit: minimum for unhold */
+ int tokens_max; /* Limit: no more than N tokens */
+ uint32_t jiffies; /* last time accounted */
+
+ int priority_bias; /* bias offset for priority */
+ cpumask_t cpus_allowed; /* cpu mask for context */
+
+ struct _vx_ticks cpu[NR_CPUS];
+};
+
+#endif /* _VX_SCHED_DEF_H */
--- /dev/null
+
+extern uint64_t vx_idle_jiffies(void);
+
+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
+{
+ uint64_t idle_jiffies = vx_idle_jiffies();
+ uint64_t nsuptime;
+
+ do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
+ nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
+ * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
+ cvirt->bias_clock = nsec_to_clock_t(nsuptime);
+
+ jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
+ atomic_set(&cvirt->nr_threads, 0);
+ atomic_set(&cvirt->nr_running, 0);
+ atomic_set(&cvirt->nr_uninterruptible, 0);
+ atomic_set(&cvirt->nr_onhold, 0);
+
+ down_read(&uts_sem);
+ cvirt->utsname = system_utsname;
+ up_read(&uts_sem);
+
+ spin_lock_init(&cvirt->load_lock);
+ cvirt->load_last = jiffies;
+ atomic_set(&cvirt->load_updates, 0);
+ cvirt->load[0] = 0;
+ cvirt->load[1] = 0;
+ cvirt->load[2] = 0;
+}
+
+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
+{
+#ifdef CONFIG_VSERVER_DEBUG
+ int value;
+
+ vxwprintk((value = atomic_read(&cvirt->nr_threads)),
+ "!!! cvirt: %p[nr_threads] = %d on exit.",
+ cvirt, value);
+ vxwprintk((value = atomic_read(&cvirt->nr_running)),
+ "!!! cvirt: %p[nr_running] = %d on exit.",
+ cvirt, value);
+ vxwprintk((value = atomic_read(&cvirt->nr_uninterruptible)),
+ "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
+ cvirt, value);
+#endif
+ return;
+}
+
+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
+{
+ int i,j;
+
+ for (i=0; i<5; i++) {
+ for (j=0; j<3; j++) {
+ atomic_set(&cacct->sock[i][j].count, 0);
+ atomic_set(&cacct->sock[i][j].total, 0);
+ }
+ }
+}
+
+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
+{
+ return;
+}
+
--- /dev/null
+#ifndef _VX_CVIRT_PROC_H
+#define _VX_CVIRT_PROC_H
+
+#include <linux/sched.h>
+
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
+{
+ int length = 0;
+ int a, b, c;
+
+ length += sprintf(buffer + length,
+ "BiasUptime:\t%lu.%02lu\n",
+ (unsigned long)cvirt->bias_uptime.tv_sec,
+ (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
+ length += sprintf(buffer + length,
+ "SysName:\t%.*s\n"
+ "NodeName:\t%.*s\n"
+ "Release:\t%.*s\n"
+ "Version:\t%.*s\n"
+ "Machine:\t%.*s\n"
+ "DomainName:\t%.*s\n"
+ ,__NEW_UTS_LEN, cvirt->utsname.sysname
+ ,__NEW_UTS_LEN, cvirt->utsname.nodename
+ ,__NEW_UTS_LEN, cvirt->utsname.release
+ ,__NEW_UTS_LEN, cvirt->utsname.version
+ ,__NEW_UTS_LEN, cvirt->utsname.machine
+ ,__NEW_UTS_LEN, cvirt->utsname.domainname
+ );
+
+ a = cvirt->load[0] + (FIXED_1/200);
+ b = cvirt->load[1] + (FIXED_1/200);
+ c = cvirt->load[2] + (FIXED_1/200);
+ length += sprintf(buffer + length,
+ "nr_threads:\t%d\n"
+ "nr_running:\t%d\n"
+ "nr_unintr:\t%d\n"
+ "nr_onhold:\t%d\n"
+ "load_updates:\t%d\n"
+ "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
+ ,atomic_read(&cvirt->nr_threads)
+ ,atomic_read(&cvirt->nr_running)
+ ,atomic_read(&cvirt->nr_uninterruptible)
+ ,atomic_read(&cvirt->nr_onhold)
+ ,atomic_read(&cvirt->load_updates)
+ ,LOAD_INT(a), LOAD_FRAC(a)
+ ,LOAD_INT(b), LOAD_FRAC(b)
+ ,LOAD_INT(c), LOAD_FRAC(c)
+ );
+ return length;
+}
+
+
+static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
+{
+ return atomic_read(&cacct->sock[type][pos].count);
+}
+
+
+static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
+{
+ return atomic_read(&cacct->sock[type][pos].total);
+}
+
+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
+{
+ int i,j, length = 0;
+ static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
+
+ for (i=0; i<5; i++) {
+ length += sprintf(buffer + length,
+ "%s:", type[i]);
+ for (j=0; j<3; j++) {
+ length += sprintf(buffer + length,
+ "\t%12lu/%-12lu"
+ ,vx_sock_count(cacct, i, j)
+ ,vx_sock_total(cacct, i, j)
+ );
+ }
+ buffer[length++] = '\n';
+ }
+ length += sprintf(buffer + length,
+ "forks:\t%lu\n", cacct->total_forks);
+ return length;
+}
+
+#endif /* _VX_CVIRT_PROC_H */
--- /dev/null
+/*
+ * kernel/vserver/history.c
+ *
+ * Virtual Context History Backtrace
+ *
+ * Copyright (C) 2004-2005 Herbert Pötzl
+ *
+ * V0.01 basic structure
+ * V0.02 hash/unhash and trace
+ * V0.03 preemption fixes
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+#include <asm/unistd.h>
+
+#include <linux/vserver/debug.h>
+
+
+#ifdef CONFIG_VSERVER_HISTORY
+#define VXH_SIZE CONFIG_VSERVER_HISTORY_SIZE
+#else
+#define VXH_SIZE 64
+#endif
+
+struct _vx_history {
+ unsigned int counter;
+
+ struct _vx_hist_entry entry[VXH_SIZE+1];
+};
+
+
+DEFINE_PER_CPU(struct _vx_history, vx_history_buffer);
+
+unsigned volatile int vxh_active = 1;
+
+static atomic_t sequence = ATOMIC_INIT(0);
+
+
+/* vxh_advance()
+
+ * requires disabled preemption */
+
+struct _vx_hist_entry *vxh_advance(void *loc)
+{
+ unsigned int cpu = smp_processor_id();
+ struct _vx_history *hist = &per_cpu(vx_history_buffer, cpu);
+ struct _vx_hist_entry *entry;
+ unsigned int index;
+
+ index = vxh_active ? (hist->counter++ % VXH_SIZE) : VXH_SIZE;
+ entry = &hist->entry[index];
+
+ entry->seq = atomic_inc_return(&sequence);
+ entry->loc = loc;
+ return entry;
+}
+
+
+#define VXH_LOC_FMTS "(#%04x,*%d):%p"
+
+#define VXH_LOC_ARGS(e) (e)->seq, cpu, (e)->loc
+
+
+#define VXH_VXI_FMTS "%p[#%d,%d.%d]"
+
+#define VXH_VXI_ARGS(e) (e)->vxi.ptr, \
+ (e)->vxi.ptr?(e)->vxi.xid:0, \
+ (e)->vxi.ptr?(e)->vxi.usecnt:0, \
+ (e)->vxi.ptr?(e)->vxi.tasks:0
+
+void vxh_dump_entry(struct _vx_hist_entry *e, unsigned cpu)
+{
+ switch (e->type) {
+ case VXH_THROW_OOPS:
+ printk( VXH_LOC_FMTS " oops \n", VXH_LOC_ARGS(e));
+ break;
+
+ case VXH_GET_VX_INFO:
+ case VXH_PUT_VX_INFO:
+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
+ VXH_LOC_ARGS(e),
+ (e->type==VXH_GET_VX_INFO)?"get":"put",
+ VXH_VXI_ARGS(e));
+ break;
+
+ case VXH_INIT_VX_INFO:
+ case VXH_SET_VX_INFO:
+ case VXH_CLR_VX_INFO:
+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
+ VXH_LOC_ARGS(e),
+ (e->type==VXH_INIT_VX_INFO)?"init":
+ ((e->type==VXH_SET_VX_INFO)?"set":"clr"),
+ VXH_VXI_ARGS(e), e->sc.data);
+ break;
+
+ case VXH_CLAIM_VX_INFO:
+ case VXH_RELEASE_VX_INFO:
+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS " @%p\n",
+ VXH_LOC_ARGS(e),
+ (e->type==VXH_CLAIM_VX_INFO)?"claim":"release",
+ VXH_VXI_ARGS(e), e->sc.data);
+ break;
+
+ case VXH_ALLOC_VX_INFO:
+ case VXH_DEALLOC_VX_INFO:
+ printk( VXH_LOC_FMTS " %s_vx_info " VXH_VXI_FMTS "\n",
+ VXH_LOC_ARGS(e),
+ (e->type==VXH_ALLOC_VX_INFO)?"alloc":"dealloc",
+ VXH_VXI_ARGS(e));
+ break;
+
+ case VXH_HASH_VX_INFO:
+ case VXH_UNHASH_VX_INFO:
+ printk( VXH_LOC_FMTS " __%s_vx_info " VXH_VXI_FMTS "\n",
+ VXH_LOC_ARGS(e),
+ (e->type==VXH_HASH_VX_INFO)?"hash":"unhash",
+ VXH_VXI_ARGS(e));
+ break;
+
+ case VXH_LOC_VX_INFO:
+ case VXH_LOOKUP_VX_INFO:
+ case VXH_CREATE_VX_INFO:
+ printk( VXH_LOC_FMTS " __%s_vx_info [#%d] -> " VXH_VXI_FMTS "\n",
+ VXH_LOC_ARGS(e),
+ (e->type==VXH_CREATE_VX_INFO)?"create":
+ ((e->type==VXH_LOC_VX_INFO)?"loc":"lookup"),
+ e->ll.arg, VXH_VXI_ARGS(e));
+ break;
+ }
+}
+
+static void __vxh_dump_history(void)
+{
+ unsigned int i,j;
+
+ printk("History:\tSEQ: %8x\tNR_CPUS: %d\n",
+ atomic_read(&sequence), NR_CPUS);
+
+ for (i=0; i < VXH_SIZE; i++) {
+ for (j=0; j < NR_CPUS; j++) {
+ struct _vx_history *hist =
+ &per_cpu(vx_history_buffer, j);
+ unsigned int index = (hist->counter-i) % VXH_SIZE;
+ struct _vx_hist_entry *entry = &hist->entry[index];
+
+ vxh_dump_entry(entry, j);
+ }
+ }
+}
+
+void vxh_dump_history(void)
+{
+ vxh_active = 0;
+#ifdef CONFIG_SMP
+ local_irq_enable();
+ smp_send_stop();
+ local_irq_disable();
+#endif
+ __vxh_dump_history();
+}
+
+
+/* vserver syscall commands below here */
+
+
+int vc_dump_history(uint32_t id)
+{
+ vxh_active = 0;
+ __vxh_dump_history();
+ vxh_active = 1;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(vxh_advance);
+
--- /dev/null
+
+/*
+ * linux/kernel/vserver/legacynet.c
+ *
+ * Virtual Server: Legacy Network Funtions
+ *
+ * Copyright (C) 2001-2003 Jacques Gelinas
+ * Copyright (C) 2003-2005 Herbert Pötzl
+ *
+ * V0.01 broken out from legacy.c
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/vs_context.h>
+#include <linux/vs_network.h>
+#include <linux/vserver/legacy.h>
+#include <linux/vserver/namespace.h>
+#include <linux/namespace.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+
+extern struct nx_info *create_nx_info(void);
+
+/* set ipv4 root (syscall) */
+
+int vc_set_ipv4root(uint32_t nbip, void __user *data)
+{
+ int i, err = -EPERM;
+ struct vcmd_set_ipv4root_v3 vc_data;
+ struct nx_info *new_nxi, *nxi = current->nx_info;
+
+ if (nbip < 0 || nbip > NB_IPV4ROOT)
+ return -EINVAL;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ if (!nxi || nxi->ipv4[0] == 0 || capable(CAP_NET_ADMIN))
+ // We are allowed to change everything
+ err = 0;
+ else if (nxi) {
+ int found = 0;
+
+ // We are allowed to select a subset of the currently
+ // installed IP numbers. No new one allowed
+ // We can't change the broadcast address though
+ for (i=0; i<nbip; i++) {
+ int j;
+ __u32 nxip = vc_data.nx_mask_pair[i].ip;
+ for (j=0; j<nxi->nbipv4; j++) {
+ if (nxip == nxi->ipv4[j]) {
+ found++;
+ break;
+ }
+ }
+ }
+ if ((found == nbip) &&
+ (vc_data.broadcast == nxi->v4_bcast))
+ err = 0;
+ }
+ if (err)
+ return err;
+
+ new_nxi = create_nx_info();
+ if (!new_nxi)
+ return -EINVAL;
+
+ new_nxi->nbipv4 = nbip;
+ for (i=0; i<nbip; i++) {
+ new_nxi->ipv4[i] = vc_data.nx_mask_pair[i].ip;
+ new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask;
+ }
+ new_nxi->v4_bcast = vc_data.broadcast;
+ // current->nx_info = new_nxi;
+ if (nxi) {
+ printk("!!! switching nx_info %p->%p\n", nxi, new_nxi);
+ clr_nx_info(¤t->nx_info);
+ }
+ nx_migrate_task(current, new_nxi);
+ // set_nx_info(¤t->nx_info, new_nxi);
+ // current->nid = new_nxi->nx_id;
+ put_nx_info(new_nxi);
+ return 0;
+}
+
+
--- /dev/null
+
+static inline void vx_info_init_limit(struct _vx_limit *limit)
+{
+ int lim;
+
+ for (lim=0; lim<NUM_LIMITS; lim++) {
+ limit->rlim[lim] = RLIM_INFINITY;
+ limit->rmax[lim] = 0;
+ atomic_set(&limit->rcur[lim], 0);
+ atomic_set(&limit->lhit[lim], 0);
+ }
+}
+
+static inline void vx_info_exit_limit(struct _vx_limit *limit)
+{
+#ifdef CONFIG_VSERVER_DEBUG
+ unsigned long value;
+ unsigned int lim;
+
+ for (lim=0; lim<NUM_LIMITS; lim++) {
+ value = atomic_read(&limit->rcur[lim]);
+ vxwprintk(value,
+ "!!! limit: %p[%s,%d] = %ld on exit.",
+ limit, vlimit_name[lim], lim, value);
+ }
+#endif
+}
+
--- /dev/null
+#ifndef _VX_LIMIT_PROC_H
+#define _VX_LIMIT_PROC_H
+
+
+static inline void vx_limit_fixup(struct _vx_limit *limit)
+{
+ unsigned long value;
+ unsigned int lim;
+
+ for (lim=0; lim<NUM_LIMITS; lim++) {
+ value = atomic_read(&limit->rcur[lim]);
+ if (value > limit->rmax[lim])
+ limit->rmax[lim] = value;
+ if (limit->rmax[lim] > limit->rlim[lim])
+ limit->rmax[lim] = limit->rlim[lim];
+ }
+}
+
+#define VX_LIMIT_FMT ":\t%10d\t%10ld\t%10ld\t%6d\n"
+
+#define VX_LIMIT_ARG(r) \
+ ,atomic_read(&limit->rcur[r]) \
+ ,limit->rmax[r] \
+ ,limit->rlim[r] \
+ ,atomic_read(&limit->lhit[r])
+
+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
+{
+ vx_limit_fixup(limit);
+ return sprintf(buffer,
+ "PROC" VX_LIMIT_FMT
+ "VM" VX_LIMIT_FMT
+ "VML" VX_LIMIT_FMT
+ "RSS" VX_LIMIT_FMT
+ "FILES" VX_LIMIT_FMT
+ "SOCK" VX_LIMIT_FMT
+ VX_LIMIT_ARG(RLIMIT_NPROC)
+ VX_LIMIT_ARG(RLIMIT_AS)
+ VX_LIMIT_ARG(RLIMIT_MEMLOCK)
+ VX_LIMIT_ARG(RLIMIT_RSS)
+ VX_LIMIT_ARG(RLIMIT_NOFILE)
+ VX_LIMIT_ARG(VLIMIT_NSOCK)
+ );
+}
+
+#endif /* _VX_LIMIT_PROC_H */
+
+
--- /dev/null
+
+static inline void vx_info_init_sched(struct _vx_sched *sched)
+{
+ int i;
+
+ /* scheduling; hard code starting values as constants */
+ sched->fill_rate = 1;
+ sched->interval = 4;
+ sched->tokens_min = HZ >> 4;
+ sched->tokens_max = HZ >> 1;
+ sched->jiffies = jiffies;
+ sched->tokens_lock = SPIN_LOCK_UNLOCKED;
+
+ atomic_set(&sched->tokens, HZ >> 2);
+ sched->cpus_allowed = CPU_MASK_ALL;
+ sched->priority_bias = 0;
+
+ for_each_cpu(i) {
+ sched->cpu[i].user_ticks = 0;
+ sched->cpu[i].sys_ticks = 0;
+ sched->cpu[i].hold_ticks = 0;
+ }
+}
+
+static inline void vx_info_exit_sched(struct _vx_sched *sched)
+{
+ return;
+}
+
--- /dev/null
+#ifndef _VX_SCHED_PROC_H
+#define _VX_SCHED_PROC_H
+
+
+static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
+{
+ int length = 0;
+ int i;
+
+ length += sprintf(buffer,
+ "Token:\t\t%8d\n"
+ "FillRate:\t%8d\n"
+ "Interval:\t%8d\n"
+ "TokensMin:\t%8d\n"
+ "TokensMax:\t%8d\n"
+ "PrioBias:\t%8d\n"
+ ,atomic_read(&sched->tokens)
+ ,sched->fill_rate
+ ,sched->interval
+ ,sched->tokens_min
+ ,sched->tokens_max
+ ,sched->priority_bias
+ );
+
+ for_each_online_cpu(i) {
+ length += sprintf(buffer + length,
+ "cpu %d: %lld %lld %lld\n"
+ ,i
+ ,(long long)sched->cpu[i].user_ticks
+ ,(long long)sched->cpu[i].sys_ticks
+ ,(long long)sched->cpu[i].hold_ticks
+ );
+ }
+
+ return length;
+}
+
+#endif /* _VX_SCHED_PROC_H */
--- /dev/null
+
+enum {
+ VCI_KCBIT_LEGACY = 1,
+ VCI_KCBIT_LEGACYNET,
+ VCI_KCBIT_NGNET,
+
+ VCI_KCBIT_PROC_SECURE,
+ VCI_KCBIT_HARDCPU,
+ VCI_KCBIT_HARDCPU_IDLE,
+
+ VCI_KCBIT_DEBUG = 16,
+ VCI_KCBIT_HISTORY = 20,
+ VCI_KCBIT_TAGXID = 24,
+};
+
+
+static inline uint32_t vci_kernel_config(void)
+{
+ return
+ /* various legacy options */
+#ifdef CONFIG_VSERVER_LEGACY
+ (1 << VCI_KCBIT_LEGACY) |
+#endif
+#ifdef CONFIG_VSERVER_LEGACYNET
+ (1 << VCI_KCBIT_LEGACYNET) |
+#endif
+
+ /* configured features */
+#ifdef CONFIG_VSERVER_PROC_SECURE
+ (1 << VCI_KCBIT_PROC_SECURE) |
+#endif
+#ifdef CONFIG_VSERVER_HARDCPU
+ (1 << VCI_KCBIT_HARDCPU) |
+#endif
+#ifdef CONFIG_VSERVER_HARDCPU_IDLE
+ (1 << VCI_KCBIT_HARDCPU_IDLE) |
+#endif
+
+ /* debug options */
+#ifdef CONFIG_VSERVER_DEBUG
+ (1 << VCI_KCBIT_DEBUG) |
+#endif
+#ifdef CONFIG_VSERVER_HISTORY
+ (1 << VCI_KCBIT_HISTORY) |
+#endif
+
+ /* inode xid tagging */
+#if defined(CONFIG_INOXID_NONE)
+ (0 << VCI_KCBIT_TAGXID) |
+#elif defined(CONFIG_INOXID_UID16)
+ (1 << VCI_KCBIT_TAGXID) |
+#elif defined(CONFIG_INOXID_GID16)
+ (2 << VCI_KCBIT_TAGXID) |
+#elif defined(CONFIG_INOXID_UGID24)
+ (3 << VCI_KCBIT_TAGXID) |
+#elif defined(CONFIG_INOXID_INTERN)
+ (4 << VCI_KCBIT_TAGXID) |
+#elif defined(CONFIG_INOXID_RUNTIME)
+ (5 << VCI_KCBIT_TAGXID) |
+#else
+ (7 << VCI_KCBIT_TAGXID) |
+#endif
+ 0;
+}
+