/* * linux/kernel/vserver/proc.c * * Virtual Context Support * * Copyright (C) 2003-2004 Herbert Pötzl * * V0.01 basic structure * V0.02 adaptation vs1.3.0 * V0.03 proc permissions * V0.04 locking/generic * V0.05 next generation procfs * V0.06 inode validation * V0.07 generic rewrite vid * */ #include #include #include #include #include #include static struct proc_dir_entry *proc_virtual; static struct proc_dir_entry *proc_vnet; enum vid_directory_inos { PROC_XID_INO = 32, PROC_XID_INFO, PROC_XID_STATUS, PROC_XID_LIMIT, PROC_XID_SCHED, PROC_XID_CVIRT, PROC_XID_CACCT, PROC_NID_INO = 64, PROC_NID_INFO, PROC_NID_STATUS, }; #define PROC_VID_MASK 0x60 /* first the actual feeds */ static int proc_virtual_info(int vid, char *buffer) { return sprintf(buffer, "VCIVersion:\t%04x:%04x\n" "VCISyscall:\t%d\n" ,VCI_VERSION >> 16 ,VCI_VERSION & 0xFFFF ,__NR_vserver ); } int proc_xid_info (int vid, char *buffer) { struct vx_info *vxi; int length; vxi = find_vx_info(vid); if (!vxi) return 0; length = sprintf(buffer, "ID:\t%d\n" "Info:\t%p\n" "Init:\t%d\n" ,vxi->vx_id ,vxi ,vxi->vx_initpid ); put_vx_info(vxi); return length; } int proc_xid_status (int vid, char *buffer) { struct vx_info *vxi; int length; vxi = find_vx_info(vid); if (!vxi) return 0; length = sprintf(buffer, "RefC:\t%d\n" "Flags:\t%016llx\n" "BCaps:\t%016llx\n" "CCaps:\t%016llx\n" "Ticks:\t%d\n" ,atomic_read(&vxi->vx_refcount) ,vxi->vx_flags ,vxi->vx_bcaps ,vxi->vx_ccaps ,atomic_read(&vxi->limit.ticks) ); put_vx_info(vxi); return length; } int proc_xid_limit (int vid, char *buffer) { struct vx_info *vxi; int length; vxi = find_vx_info(vid); if (!vxi) return 0; length = vx_info_proc_limit(&vxi->limit, buffer); put_vx_info(vxi); return length; } int proc_xid_sched (int vid, char *buffer) { struct vx_info *vxi; int length; vxi = find_vx_info(vid); if (!vxi) return 0; length = vx_info_proc_sched(&vxi->sched, buffer); put_vx_info(vxi); return length; } int proc_xid_cvirt (int vid, char *buffer) { struct vx_info *vxi; int length; vxi = find_vx_info(vid); if (!vxi) return 0; length = vx_info_proc_cvirt(&vxi->cvirt, buffer); put_vx_info(vxi); return length; } int proc_xid_cacct (int vid, char *buffer) { struct vx_info *vxi; int length; vxi = find_vx_info(vid); if (!vxi) return 0; length = vx_info_proc_cacct(&vxi->cacct, buffer); put_vx_info(vxi); return length; } static int proc_vnet_info(int vid, char *buffer) { return sprintf(buffer, "VCIVersion:\t%04x:%04x\n" "VCISyscall:\t%d\n" ,VCI_VERSION >> 16 ,VCI_VERSION & 0xFFFF ,__NR_vserver ); } #define atoquad(a) \ (((a)>>0) & 0xff), (((a)>>8) & 0xff), \ (((a)>>16) & 0xff), (((a)>>24) & 0xff) int proc_nid_info (int vid, char *buffer) { struct nx_info *nxi; int length, i; nxi = find_nx_info(vid); if (!nxi) return 0; length = sprintf(buffer, "ID:\t%d\n" "Info:\t%p\n" ,nxi->nx_id ,nxi ); for (i=0; inbipv4; i++) { length += sprintf(buffer + length, "%d:\t%d.%d.%d.%d/%d.%d.%d.%d\n", i, atoquad(nxi->ipv4[i]), atoquad(nxi->mask[i])); } put_nx_info(nxi); return length; } int proc_nid_status (int vid, char *buffer) { struct nx_info *nxi; int length; nxi = find_nx_info(vid); if (!nxi) return 0; length = sprintf(buffer, "RefC:\t%d\n" ,atomic_read(&nxi->nx_refcount) ); put_nx_info(nxi); return length; } /* here the inode helpers */ #define fake_ino(id,ino) (((id)<<16)|(ino)) #define inode_vid(i) ((i)->i_ino >> 16) #define inode_type(i) ((i)->i_ino & 0xFFFF) #define MAX_MULBY10 ((~0U-9)/10) static struct inode *proc_vid_make_inode(struct super_block * sb, int vid, int ino) { struct inode *inode = new_inode(sb); if (!inode) goto out; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_ino = fake_ino(vid, ino); inode->i_uid = 0; inode->i_gid = 0; // inode->i_xid = xid; out: return inode; } static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd) { struct inode * inode = dentry->d_inode; int vid, valid=0; vid = inode_vid(inode); switch (inode_type(inode) & PROC_VID_MASK) { case PROC_XID_INO: valid = vx_info_id_valid(vid); break; case PROC_NID_INO: valid = nx_info_id_valid(vid); break; } if (valid) return 1; d_drop(dentry); return 0; } /* static int proc_vid_delete_dentry(struct dentry * dentry) { return 1; } */ #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) static ssize_t proc_vid_info_read(struct file * file, char * buf, size_t count, loff_t *ppos) { struct inode * inode = file->f_dentry->d_inode; unsigned long page; ssize_t length; ssize_t end; int vid; if (count > PROC_BLOCK_SIZE) count = PROC_BLOCK_SIZE; if (!(page = __get_free_page(GFP_KERNEL))) return -ENOMEM; vid = inode_vid(inode); length = PROC_I(inode)->op.proc_vid_read(vid, (char*)page); if (length < 0) { free_page(page); return length; } /* Static 4kB (or whatever) block capacity */ if (*ppos >= length) { free_page(page); return 0; } if (count + *ppos > length) count = length - *ppos; end = count + *ppos; copy_to_user(buf, (char *) page + *ppos, count); *ppos = end; free_page(page); return count; } /* here comes the lower level (vid) */ static struct file_operations proc_vid_info_file_operations = { read: proc_vid_info_read, }; static struct dentry_operations proc_vid_dentry_operations = { d_revalidate: proc_vid_revalidate, // d_delete: proc_vid_delete_dentry, }; struct vid_entry { int type; int len; char *name; mode_t mode; }; #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} static struct vid_entry vx_base_stuff[] = { E(PROC_XID_INFO, "info", S_IFREG|S_IRUGO), E(PROC_XID_STATUS, "status", S_IFREG|S_IRUGO), E(PROC_XID_LIMIT, "limit", S_IFREG|S_IRUGO), E(PROC_XID_SCHED, "sched", S_IFREG|S_IRUGO), E(PROC_XID_CVIRT, "cvirt", S_IFREG|S_IRUGO), E(PROC_XID_CACCT, "cacct", S_IFREG|S_IRUGO), {0,0,NULL,0} }; static struct vid_entry vn_base_stuff[] = { E(PROC_NID_INFO, "info", S_IFREG|S_IRUGO), E(PROC_NID_STATUS, "status", S_IFREG|S_IRUGO), {0,0,NULL,0} }; static struct dentry *proc_vid_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode; struct vid_entry *p; int error; error = -ENOENT; inode = NULL; switch (inode_type(dir)) { case PROC_XID_INO: p = vx_base_stuff; break; case PROC_NID_INO: p = vn_base_stuff; break; default: goto out; } for (; p->name; p++) { if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) break; } if (!p->name) goto out; error = -EINVAL; inode = proc_vid_make_inode(dir->i_sb, inode_vid(dir), p->type); if (!inode) goto out; switch(p->type) { case PROC_XID_INFO: PROC_I(inode)->op.proc_vid_read = proc_xid_info; break; case PROC_XID_STATUS: PROC_I(inode)->op.proc_vid_read = proc_xid_status; break; case PROC_XID_LIMIT: PROC_I(inode)->op.proc_vid_read = proc_xid_limit; break; case PROC_XID_SCHED: PROC_I(inode)->op.proc_vid_read = proc_xid_sched; break; case PROC_XID_CVIRT: PROC_I(inode)->op.proc_vid_read = proc_xid_cvirt; break; case PROC_XID_CACCT: PROC_I(inode)->op.proc_vid_read = proc_xid_cacct; break; case PROC_NID_INFO: PROC_I(inode)->op.proc_vid_read = proc_nid_info; break; case PROC_NID_STATUS: PROC_I(inode)->op.proc_vid_read = proc_nid_status; break; default: printk("procfs: impossible type (%d)",p->type); iput(inode); return ERR_PTR(-EINVAL); } inode->i_mode = p->mode; // inode->i_op = &proc_vid_info_inode_operations; inode->i_fop = &proc_vid_info_file_operations; inode->i_nlink = 1; inode->i_flags|=S_IMMUTABLE; dentry->d_op = &proc_vid_dentry_operations; d_add(dentry, inode); error = 0; out: return ERR_PTR(error); } static int proc_vid_readdir(struct file * filp, void * dirent, filldir_t filldir) { int i, size; struct inode *inode = filp->f_dentry->d_inode; struct vid_entry *p; i = filp->f_pos; switch (i) { case 0: if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0) return 0; i++; filp->f_pos++; /* fall through */ case 1: if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0) return 0; i++; filp->f_pos++; /* fall through */ default: i -= 2; switch (inode_type(inode)) { case PROC_XID_INO: size = sizeof(vx_base_stuff); p = vx_base_stuff + i; break; case PROC_NID_INO: size = sizeof(vn_base_stuff); p = vn_base_stuff + i; break; default: return 1; } if (i >= size/sizeof(struct vid_entry)) return 1; while (p->name) { if (filldir(dirent, p->name, p->len, filp->f_pos, fake_ino(inode_vid(inode), p->type), p->mode >> 12) < 0) return 0; filp->f_pos++; p++; } } return 1; } /* now the upper level (virtual) */ static struct file_operations proc_vid_file_operations = { read: generic_read_dir, readdir: proc_vid_readdir, }; static struct inode_operations proc_vid_inode_operations = { lookup: proc_vid_lookup, }; static __inline__ int atovid(const char *str, int len) { int vid, c; vid = 0; while (len-- > 0) { c = *str - '0'; str++; if (c > 9) return -1; if (vid >= MAX_MULBY10) return -1; vid *= 10; vid += c; if (!vid) return -1; } return vid; } struct dentry *proc_virtual_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { int xid, len, ret; struct vx_info *vxi; const char *name; struct inode *inode; name = dentry->d_name.name; len = dentry->d_name.len; ret = -ENOMEM; if (len == 7 && !memcmp(name, "current", 7)) { inode = new_inode(dir->i_sb); if (!inode) goto out; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_ino = fake_ino(1, PROC_XID_INO); inode->i_mode = S_IFLNK|S_IRWXUGO; inode->i_uid = inode->i_gid = 0; inode->i_size = 64; // inode->i_op = &proc_current_inode_operations; d_add(dentry, inode); return NULL; } if (len == 4 && !memcmp(name, "info", 4)) { inode = proc_vid_make_inode(dir->i_sb, 0, PROC_XID_INFO); if (!inode) goto out; inode->i_fop = &proc_vid_info_file_operations; PROC_I(inode)->op.proc_vid_read = proc_virtual_info; inode->i_mode = S_IFREG|S_IRUGO; // inode->i_size = 64; // inode->i_op = &proc_current_inode_operations; d_add(dentry, inode); return NULL; } ret = -ENOENT; xid = atovid(name, len); if (xid < 0) goto out; vxi = find_vx_info(xid); if (!vxi) goto out; inode = NULL; if (vx_check(xid, VX_ADMIN|VX_WATCH|VX_IDENT)) inode = proc_vid_make_inode(dir->i_sb, vxi->vx_id, PROC_XID_INO); if (!inode) goto out_release; inode->i_mode = S_IFDIR|S_IRUGO; inode->i_op = &proc_vid_inode_operations; inode->i_fop = &proc_vid_file_operations; inode->i_nlink = 2; inode->i_flags|=S_IMMUTABLE; dentry->d_op = &proc_vid_dentry_operations; d_add(dentry, inode); ret = 0; out_release: put_vx_info(vxi); out: return ERR_PTR(ret); } struct dentry *proc_vnet_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { int nid, len, ret; struct nx_info *nxi; const char *name; struct inode *inode; name = dentry->d_name.name; len = dentry->d_name.len; ret = -ENOMEM; if (len == 7 && !memcmp(name, "current", 7)) { inode = new_inode(dir->i_sb); if (!inode) goto out; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_ino = fake_ino(1, PROC_NID_INO); inode->i_mode = S_IFLNK|S_IRWXUGO; inode->i_uid = inode->i_gid = 0; inode->i_size = 64; // inode->i_op = &proc_current_inode_operations; d_add(dentry, inode); return NULL; } if (len == 4 && !memcmp(name, "info", 4)) { inode = proc_vid_make_inode(dir->i_sb, 0, PROC_NID_INFO); if (!inode) goto out; inode->i_fop = &proc_vid_info_file_operations; PROC_I(inode)->op.proc_vid_read = proc_vnet_info; inode->i_mode = S_IFREG|S_IRUGO; // inode->i_size = 64; // inode->i_op = &proc_current_inode_operations; d_add(dentry, inode); return NULL; } ret = -ENOENT; nid = atovid(name, len); if (nid < 0) goto out; nxi = find_nx_info(nid); if (!nxi) goto out; inode = NULL; if (1) inode = proc_vid_make_inode(dir->i_sb, nxi->nx_id, PROC_NID_INO); if (!inode) goto out_release; inode->i_mode = S_IFDIR|S_IRUGO; inode->i_op = &proc_vid_inode_operations; inode->i_fop = &proc_vid_file_operations; inode->i_nlink = 2; inode->i_flags|=S_IMMUTABLE; dentry->d_op = &proc_vid_dentry_operations; d_add(dentry, inode); ret = 0; out_release: put_nx_info(nxi); out: return ERR_PTR(ret); } #define PROC_NUMBUF 10 #define PROC_MAXVIDS 32 static int get_xid_list(int index, unsigned int *xids) { struct vx_info *p; int nr_xids = 0; index--; spin_lock(&vxlist_lock); list_for_each_entry(p, &vx_infos, vx_list) { int xid = p->vx_id; if (--index >= 0) continue; xids[nr_xids] = xid; if (++nr_xids >= PROC_MAXVIDS) break; } spin_unlock(&vxlist_lock); return nr_xids; } int proc_virtual_readdir(struct file * filp, void * dirent, filldir_t filldir) { unsigned int xid_array[PROC_MAXVIDS]; char buf[PROC_NUMBUF]; unsigned int nr = filp->f_pos-3; unsigned int nr_xids, i; ino_t ino; switch ((long)filp->f_pos) { case 0: ino = fake_ino(0, PROC_XID_INO); if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) return 0; filp->f_pos++; /* fall through */ case 1: ino = filp->f_dentry->d_parent->d_inode->i_ino; if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) return 0; filp->f_pos++; /* fall through */ case 2: ino = fake_ino(0, PROC_XID_INFO); if (filldir(dirent, "info", 4, filp->f_pos, ino, DT_LNK) < 0) return 0; filp->f_pos++; /* fall through */ case 3: if (current->xid > 1) { ino = fake_ino(1, PROC_XID_INO); if (filldir(dirent, "current", 7, filp->f_pos, ino, DT_LNK) < 0) return 0; } filp->f_pos++; } nr_xids = get_xid_list(nr, xid_array); for (i = 0; i < nr_xids; i++) { int xid = xid_array[i]; ino_t ino = fake_ino(xid, PROC_XID_INO); unsigned long j = PROC_NUMBUF; do buf[--j] = '0' + (xid % 10); while (xid/=10); if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) break; filp->f_pos++; } return 0; } static struct file_operations proc_virtual_dir_operations = { read: generic_read_dir, readdir: proc_virtual_readdir, }; static struct inode_operations proc_virtual_dir_inode_operations = { lookup: proc_virtual_lookup, }; static int get_nid_list(int index, unsigned int *nids) { struct nx_info *p; int nr_nids = 0; index--; spin_lock(&nxlist_lock); list_for_each_entry(p, &nx_infos, nx_list) { int nid = p->nx_id; if (--index >= 0) continue; nids[nr_nids] = nid; if (++nr_nids >= PROC_MAXVIDS) break; } spin_unlock(&nxlist_lock); return nr_nids; } int proc_vnet_readdir(struct file * filp, void * dirent, filldir_t filldir) { unsigned int nid_array[PROC_MAXVIDS]; char buf[PROC_NUMBUF]; unsigned int nr = filp->f_pos-3; unsigned int nr_nids, i; ino_t ino; switch ((long)filp->f_pos) { case 0: ino = fake_ino(0, PROC_NID_INO); if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) return 0; filp->f_pos++; /* fall through */ case 1: ino = filp->f_dentry->d_parent->d_inode->i_ino; if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) return 0; filp->f_pos++; /* fall through */ case 2: ino = fake_ino(0, PROC_NID_INFO); if (filldir(dirent, "info", 4, filp->f_pos, ino, DT_LNK) < 0) return 0; filp->f_pos++; /* fall through */ case 3: if (current->xid > 1) { ino = fake_ino(1, PROC_NID_INO); if (filldir(dirent, "current", 7, filp->f_pos, ino, DT_LNK) < 0) return 0; } filp->f_pos++; } nr_nids = get_nid_list(nr, nid_array); for (i = 0; i < nr_nids; i++) { int nid = nid_array[i]; ino_t ino = fake_ino(nid, PROC_NID_INO); unsigned long j = PROC_NUMBUF; do buf[--j] = '0' + (nid % 10); while (nid/=10); if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) break; filp->f_pos++; } return 0; } static struct file_operations proc_vnet_dir_operations = { read: generic_read_dir, readdir: proc_vnet_readdir, }; static struct inode_operations proc_vnet_dir_inode_operations = { lookup: proc_vnet_lookup, }; void proc_vx_init(void) { struct proc_dir_entry *ent; ent = proc_mkdir("virtual", 0); if (ent) { ent->proc_fops = &proc_virtual_dir_operations; ent->proc_iops = &proc_virtual_dir_inode_operations; } proc_virtual = ent; ent = proc_mkdir("vnet", 0); if (ent) { ent->proc_fops = &proc_vnet_dir_operations; ent->proc_iops = &proc_vnet_dir_inode_operations; } proc_vnet = ent; } /* per pid info */ char *task_vx_info(struct task_struct *p, char *buffer) { return buffer + sprintf(buffer, "XID:\t%d\n" ,p->xid); } int proc_pid_vx_info(struct task_struct *p, char *buffer) { char * orig = buffer; buffer = task_vx_info(p, buffer); return buffer - orig; } char *task_nx_info(struct task_struct *p, char *buffer) { return buffer + sprintf(buffer, "NID:\t%d\n" ,p->nid); } int proc_pid_nx_info(struct task_struct *p, char *buffer) { char * orig = buffer; buffer = task_nx_info(p, buffer); return buffer - orig; }