From b5a4675bc5c23e9e41d1bac88e7a210763b763db Mon Sep 17 00:00:00 2001 From: Marc Fiuczynski Date: Fri, 16 Jul 2004 17:47:59 +0000 Subject: [PATCH] ckrm-E15 --- fs/Makefile | 1 + fs/exec.c | 2 + fs/proc/array.c | 18 ++ fs/proc/base.c | 18 ++ fs/rcfs/Makefile | 5 +- fs/rcfs/dir.c | 187 ++++++-------- fs/rcfs/inode.c | 71 +++--- fs/rcfs/magic.c | 202 +++++++-------- fs/rcfs/rootdir.c | 119 ++++----- fs/rcfs/socket_fs.c | 298 +++++++++++----------- fs/rcfs/super.c | 152 ++++++----- fs/rcfs/tc_magic.c | 95 ++++--- include/linux/ckrm.h | 57 +++-- include/linux/ckrm_ce.h | 60 +++-- include/linux/ckrm_net.h | 23 +- include/linux/ckrm_rc.h | 347 +++++++++++++------------ include/linux/ckrm_tc.h | 7 +- include/linux/ckrm_tsk.h | 15 +- include/linux/rcfs.h | 37 ++- include/linux/sched.h | 75 ++++++ include/linux/taskdelays.h | 20 +- include/linux/tcp.h | 36 ++- include/net/sock.h | 3 + include/net/tcp.h | 124 ++++++++- init/Kconfig | 104 ++++++++ init/main.c | 4 + kernel/Makefile | 2 +- kernel/ckrm/Makefile | 17 +- kernel/ckrm/ckrm.c | 491 +++++++++++++++++++----------------- kernel/ckrm/ckrm_listenaq.c | 346 +++++++++++++------------ kernel/ckrm/ckrm_sockc.c | 284 +++++++++++---------- kernel/ckrm/ckrm_tasks.c | 290 +++++++++++---------- kernel/ckrm/ckrm_tc.c | 463 +++++++++++++++++----------------- kernel/ckrm/ckrmutils.c | 58 ++--- kernel/exit.c | 7 + kernel/fork.c | 16 ++ kernel/sched.c | 18 ++ kernel/sys.c | 16 ++ mm/memory.c | 9 +- net/ipv4/Kconfig | 23 ++ net/ipv4/tcp.c | 172 ++++++++++++- net/ipv4/tcp_ipv4.c | 36 ++- net/ipv4/tcp_minisocks.c | 7 + net/ipv4/tcp_timer.c | 24 +- net/ipv6/tcp_ipv6.c | 35 ++- 45 files changed, 2556 insertions(+), 1838 deletions(-) diff --git a/fs/Makefile b/fs/Makefile index 41cac35d1..2b6285030 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -91,3 +91,4 @@ obj-$(CONFIG_JFS_FS) += jfs/ obj-$(CONFIG_XFS_FS) += xfs/ obj-$(CONFIG_AFS_FS) += afs/ obj-$(CONFIG_BEFS_FS) += befs/ +obj-$(CONFIG_RCFS_FS) += rcfs/ diff --git a/fs/exec.c b/fs/exec.c index f30b49540..61fba8f37 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -1030,6 +1031,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) fput(bprm->file); bprm->file = NULL; current->did_exec = 1; + ckrm_cb_exec(bprm->filename); return retval; } read_lock(&binfmt_lock); diff --git a/fs/proc/array.c b/fs/proc/array.c index fbcf8bea0..4c12808e9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -427,3 +427,21 @@ int proc_pid_statm(struct task_struct *task, char *buffer) return sprintf(buffer,"%d %d %d %d %d %d %d\n", size, resident, shared, text, lib, data, 0); } + + +int proc_pid_delay(struct task_struct *task, char * buffer) +{ + int res; + + res = sprintf(buffer,"%u %llu %llu %u %llu %u %llu\n", + get_delay(task,runs), + get_delay(task,runcpu_total), + get_delay(task,waitcpu_total), + get_delay(task,num_iowaits), + get_delay(task,iowait_total), + get_delay(task,num_memwaits), + get_delay(task,mem_iowait_total) + ); + return res; +} + diff --git a/fs/proc/base.c b/fs/proc/base.c index 5d6223272..525fce50f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -89,6 +89,10 @@ enum pid_directory_inos { PROC_TID_ATTR_PREV, PROC_TID_ATTR_EXEC, PROC_TID_ATTR_FSCREATE, +#endif +#ifdef CONFIG_DELAY_ACCT + PROC_TID_DELAY_ACCT, + PROC_TGID_DELAY_ACCT, #endif PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; @@ -120,6 +124,9 @@ static struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SECURITY E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif +#ifdef CONFIG_DELAY_ACCT + E(PROC_TGID_DELAY_ACCT,"delay", S_IFREG|S_IRUGO), +#endif #ifdef CONFIG_KALLSYMS E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), #endif @@ -142,6 +149,9 @@ static struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SECURITY E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif +#ifdef CONFIG_DELAY_ACCT + E(PROC_TGID_DELAY_ACCT,"delay", S_IFREG|S_IRUGO), +#endif #ifdef CONFIG_KALLSYMS E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), #endif @@ -181,6 +191,7 @@ int proc_pid_stat(struct task_struct*,char*); int proc_pid_status(struct task_struct*,char*); int proc_pid_statm(struct task_struct*,char*); int proc_pid_cpu(struct task_struct*,char*); +int proc_pid_delay(struct task_struct*,char*); static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { @@ -1374,6 +1385,13 @@ static struct dentry *proc_pident_lookup(struct inode *dir, inode->i_fop = &proc_info_file_operations; ei->op.proc_read = proc_pid_wchan; break; +#endif +#ifdef CONFIG_DELAY_ACCT + case PROC_TID_DELAY_ACCT: + case PROC_TGID_DELAY_ACCT: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_delay; + break; #endif default: printk("procfs: impossible type (%d)",p->type); diff --git a/fs/rcfs/Makefile b/fs/rcfs/Makefile index 29575223e..13c3d67d3 100644 --- a/fs/rcfs/Makefile +++ b/fs/rcfs/Makefile @@ -2,9 +2,8 @@ # Makefile for rcfs routines. # -obj-$(CONFIG_RCFS_FS) += rcfs.o - -rcfs-objs := super.o inode.o dir.o rootdir.o magic.o tc_magic.o socket_fs.o +obj-$(CONFIG_RCFS_FS) += rcfs.o +rcfs-objs := super.o inode.o dir.o rootdir.o magic.o tc_magic.o socket_fs.o rcfs-objs-$(CONFIG_CKRM_TYPE_TASKCLASS) += tc_magic.o rcfs-objs-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += socket_fs.o diff --git a/fs/rcfs/dir.c b/fs/rcfs/dir.c index 048fe09bd..a72c75448 100644 --- a/fs/rcfs/dir.c +++ b/fs/rcfs/dir.c @@ -22,11 +22,9 @@ * Created. */ - #include #include #include -#include #include #include #include @@ -42,50 +40,43 @@ #include - - #define rcfs_positive(dentry) ((dentry)->d_inode && !d_unhashed((dentry))) int rcfs_empty(struct dentry *dentry) { - struct dentry *child; - int ret = 0; - - spin_lock(&dcache_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_child) - if (!rcfs_is_magic(child) && rcfs_positive(child)) - goto out; - ret = 1; -out: - spin_unlock(&dcache_lock); - return ret; -} - - + struct dentry *child; + int ret = 0; + spin_lock(&dcache_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_child) + if (!rcfs_is_magic(child) && rcfs_positive(child)) + goto out; + ret = 1; + out: + spin_unlock(&dcache_lock); + return ret; +} /* Directory inode operations */ - -int -rcfs_create(struct inode *dir, struct dentry *dentry, int mode, +int +rcfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { return rcfs_mknod(dir, dentry, mode | S_IFREG, 0); } -EXPORT_SYMBOL(rcfs_create); +EXPORT_SYMBOL(rcfs_create); /* Symlinks permitted ?? */ -int -rcfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +int rcfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct inode *inode; int error = -ENOSPC; - inode = rcfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); + inode = rcfs_get_inode(dir->i_sb, S_IFLNK | S_IRWXUGO, 0); if (inode) { - int l = strlen(symname)+1; + int l = strlen(symname) + 1; error = page_symlink(inode, symname, l); if (!error) { if (dir->i_mode & S_ISGID) @@ -97,10 +88,10 @@ rcfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) } return error; } + EXPORT_SYMBOL(rcfs_symlink); -int -rcfs_create_coredir(struct inode *dir, struct dentry *dentry) +int rcfs_create_coredir(struct inode *dir, struct dentry *dentry) { struct rcfs_inode_info *ripar, *ridir; @@ -111,17 +102,16 @@ rcfs_create_coredir(struct inode *dir, struct dentry *dentry) // Inform RC's - do Core operations if (ckrm_is_core_valid(ripar->core)) { - sz = strlen(ripar->name) + strlen(dentry->d_name.name) + 2 ; + sz = strlen(ripar->name) + strlen(dentry->d_name.name) + 2; ridir->name = kmalloc(sz, GFP_KERNEL); if (!ridir->name) { return -ENOMEM; } - snprintf(ridir->name, sz,"%s/%s", ripar->name, + snprintf(ridir->name, sz, "%s/%s", ripar->name, dentry->d_name.name); ridir->core = (*(ripar->core->classtype->alloc)) - (ripar->core,ridir->name); - } - else { + (ripar->core, ridir->name); + } else { printk(KERN_ERR "rcfs_mkdir: Invalid parent core %p\n", ripar->core); return -EINVAL; @@ -129,24 +119,23 @@ rcfs_create_coredir(struct inode *dir, struct dentry *dentry) return 0; } -EXPORT_SYMBOL(rcfs_create_coredir); +EXPORT_SYMBOL(rcfs_create_coredir); -int -rcfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +int rcfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { int retval = 0; ckrm_classtype_t *clstype; #if 0 - struct dentry *pd = list_entry(dir->i_dentry.next, struct dentry, - d_alias); + struct dentry *pd = list_entry(dir->i_dentry.next, struct dentry, + d_alias); if ((!strcmp(pd->d_name.name, "/") && !strcmp(dentry->d_name.name, "ce"))) { // Call CE's mkdir if it has registered, else fail. if (rcfs_eng_callbacks.mkdir) { - return (*rcfs_eng_callbacks.mkdir)(dir, dentry, mode); + return (*rcfs_eng_callbacks.mkdir) (dir, dentry, mode); } else { return -EINVAL; } @@ -164,18 +153,16 @@ rcfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) dentry->d_inode->i_op = dir->i_op; dentry->d_inode->i_fop = dir->i_fop; - - retval = rcfs_create_coredir(dir, dentry); - if (retval) { - simple_rmdir(dir,dentry); + retval = rcfs_create_coredir(dir, dentry); + if (retval) { + simple_rmdir(dir, dentry); return retval; - // goto mkdir_err; + // goto mkdir_err; } - - // create the default set of magic files + // create the default set of magic files clstype = (RCFS_I(dentry->d_inode))->core->classtype; - rcfs_create_magic(dentry, &(((struct rcfs_magf*)clstype->mfdesc)[1]), - clstype->mfcount-1); + rcfs_create_magic(dentry, &(((struct rcfs_magf *)clstype->mfdesc)[1]), + clstype->mfcount - 2); return retval; @@ -183,37 +170,34 @@ rcfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) dir->i_nlink--; return retval; } -EXPORT_SYMBOL(rcfs_mkdir); +EXPORT_SYMBOL(rcfs_mkdir); -int -rcfs_rmdir(struct inode * dir, struct dentry * dentry) +int rcfs_rmdir(struct inode *dir, struct dentry *dentry) { struct rcfs_inode_info *ri = RCFS_I(dentry->d_inode); #if 0 - struct dentry *pd = list_entry(dir->i_dentry.next, + struct dentry *pd = list_entry(dir->i_dentry.next, struct dentry, d_alias); if ((!strcmp(pd->d_name.name, "/") && !strcmp(dentry->d_name.name, "ce"))) { // Call CE's mkdir if it has registered, else fail. if (rcfs_eng_callbacks.rmdir) { - return (*rcfs_eng_callbacks.rmdir)(dir, dentry); + return (*rcfs_eng_callbacks.rmdir) (dir, dentry); } else { return simple_rmdir(dir, dentry); } - } - else if ((!strcmp(pd->d_name.name, "/") && - !strcmp(dentry->d_name.name, "network"))) { + } else if ((!strcmp(pd->d_name.name, "/") && + !strcmp(dentry->d_name.name, "network"))) { return -EPERM; } #endif - + if (!rcfs_empty(dentry)) { printk(KERN_ERR "rcfs_rmdir: directory not empty\n"); - goto out; + return -ENOTEMPTY; } - // Core class removal if (ri->core == NULL) { @@ -222,90 +206,81 @@ rcfs_rmdir(struct inode * dir, struct dentry * dentry) return 0; } - if ((*(ri->core->classtype->free))(ri->core)) { + if ((*(ri->core->classtype->free)) (ri->core)) { printk(KERN_ERR "rcfs_rmdir: ckrm_free_core_class failed\n"); goto out; } - ri->core = NULL ; // just to be safe + ri->core = NULL; // just to be safe // Clear magic files only after core successfully removed - rcfs_clear_magic(dentry); + rcfs_clear_magic(dentry); return simple_rmdir(dir, dentry); -out: + out: return -EBUSY; } -EXPORT_SYMBOL(rcfs_rmdir); +EXPORT_SYMBOL(rcfs_rmdir); -int -rcfs_unlink(struct inode *dir, struct dentry *dentry) +int rcfs_unlink(struct inode *dir, struct dentry *dentry) { // -ENOENT and not -ENOPERM to allow rm -rf to work despite // magic files being present return -ENOENT; } + EXPORT_SYMBOL(rcfs_unlink); - + // rename is allowed on directories only int rcfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) + struct inode *new_dir, struct dentry *new_dentry) { - if (S_ISDIR(old_dentry->d_inode->i_mode)) + if (S_ISDIR(old_dentry->d_inode->i_mode)) return simple_rename(old_dir, old_dentry, new_dir, new_dentry); else return -EINVAL; } -EXPORT_SYMBOL(rcfs_rename); +EXPORT_SYMBOL(rcfs_rename); struct inode_operations rcfs_dir_inode_operations = { - .create = rcfs_create, - .lookup = simple_lookup, - .link = simple_link, - .unlink = rcfs_unlink, - .symlink = rcfs_symlink, - .mkdir = rcfs_mkdir, - .rmdir = rcfs_rmdir, - .mknod = rcfs_mknod, - .rename = rcfs_rename, + .create = rcfs_create, + .lookup = simple_lookup, + .link = simple_link, + .unlink = rcfs_unlink, + .symlink = rcfs_symlink, + .mkdir = rcfs_mkdir, + .rmdir = rcfs_rmdir, + .mknod = rcfs_mknod, + .rename = rcfs_rename, }; - - - - -int -rcfs_root_create(struct inode *dir, struct dentry *dentry, int mode, +int +rcfs_root_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { return -EPERM; } - -int -rcfs_root_symlink(struct inode * dir, struct dentry *dentry, - const char * symname) +int +rcfs_root_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { return -EPERM; } -int -rcfs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) +int rcfs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) { return -EPERM; } -int -rcfs_root_rmdir(struct inode * dir, struct dentry * dentry) +int rcfs_root_rmdir(struct inode *dir, struct dentry *dentry) { return -EPERM; } -int -rcfs_root_unlink(struct inode *dir, struct dentry *dentry) +int rcfs_root_unlink(struct inode *dir, struct dentry *dentry) { return -EPERM; } @@ -315,22 +290,22 @@ rcfs_root_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { return -EPERM; } - + int rcfs_root_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) + struct inode *new_dir, struct dentry *new_dentry) { return -EPERM; } struct inode_operations rcfs_rootdir_inode_operations = { - .create = rcfs_root_create, - .lookup = simple_lookup, - .link = simple_link, - .unlink = rcfs_root_unlink, - .symlink = rcfs_root_symlink, - .mkdir = rcfs_root_mkdir, - .rmdir = rcfs_root_rmdir, - .mknod = rcfs_root_mknod, - .rename = rcfs_root_rename, + .create = rcfs_root_create, + .lookup = simple_lookup, + .link = simple_link, + .unlink = rcfs_root_unlink, + .symlink = rcfs_root_symlink, + .mkdir = rcfs_root_mkdir, + .rmdir = rcfs_root_rmdir, + .mknod = rcfs_root_mknod, + .rename = rcfs_root_rename, }; diff --git a/fs/rcfs/inode.c b/fs/rcfs/inode.c index d9be67394..23ef014d9 100644 --- a/fs/rcfs/inode.c +++ b/fs/rcfs/inode.c @@ -25,12 +25,10 @@ * Parsing for shares added */ - #include #include #include #include -#include #include #include #include @@ -45,16 +43,13 @@ #include - - // Address of variable used as flag to indicate a magic file, // ; value unimportant int RCFS_IS_MAGIC; - struct inode *rcfs_get_inode(struct super_block *sb, int mode, dev_t dev) { - struct inode * inode = new_inode(sb); + struct inode *inode = new_inode(sb); if (inode) { inode->i_mode = mode; @@ -79,7 +74,7 @@ struct inode *rcfs_get_inode(struct super_block *sb, int mode, dev_t dev) // directory inodes start off with i_nlink == 2 // (for "." entry) - + inode->i_nlink++; break; case S_IFLNK: @@ -90,10 +85,7 @@ struct inode *rcfs_get_inode(struct super_block *sb, int mode, dev_t dev) return inode; } - - -int -_rcfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +int _rcfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { struct inode *inode; int error = -EPERM; @@ -109,49 +101,49 @@ _rcfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) inode->i_mode |= S_ISGID; } d_instantiate(dentry, inode); - dget(dentry); + dget(dentry); error = 0; } return error; } -EXPORT_SYMBOL(_rcfs_mknod); +EXPORT_SYMBOL(_rcfs_mknod); -int -rcfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +int rcfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { // User can only create directories, not files if ((mode & S_IFMT) != S_IFDIR) return -EINVAL; - return dir->i_op->mkdir(dir, dentry, mode); + return dir->i_op->mkdir(dir, dentry, mode); } -EXPORT_SYMBOL(rcfs_mknod); +EXPORT_SYMBOL(rcfs_mknod); -struct dentry * -rcfs_create_internal(struct dentry *parent, struct rcfs_magf *magf, int magic) +struct dentry *rcfs_create_internal(struct dentry *parent, + struct rcfs_magf *magf, int magic) { struct qstr qstr; - struct dentry *mfdentry ; + struct dentry *mfdentry; // Get new dentry for name - qstr.name = magf->name; - qstr.len = strlen(magf->name); - qstr.hash = full_name_hash(magf->name,qstr.len); - mfdentry = lookup_hash(&qstr,parent); + qstr.name = magf->name; + qstr.len = strlen(magf->name); + qstr.hash = full_name_hash(magf->name, qstr.len); + mfdentry = lookup_hash(&qstr, parent); if (!IS_ERR(mfdentry)) { - int err; + int err; down(&parent->d_inode->i_sem); - if (magic && (magf->mode & S_IFDIR)) - err = parent->d_inode->i_op->mkdir(parent->d_inode, - mfdentry, magf->mode); + if (magic && (magf->mode & S_IFDIR)) + err = parent->d_inode->i_op->mkdir(parent->d_inode, + mfdentry, + magf->mode); else { - err =_rcfs_mknod(parent->d_inode,mfdentry, - magf->mode,0); + err = _rcfs_mknod(parent->d_inode, mfdentry, + magf->mode, 0); // _rcfs_mknod doesn't increment parent's link count, // i_op->mkdir does. parent->d_inode->i_nlink++; @@ -163,18 +155,18 @@ rcfs_create_internal(struct dentry *parent, struct rcfs_magf *magf, int magic) return mfdentry; } } - return mfdentry ; + return mfdentry; } + EXPORT_SYMBOL(rcfs_create_internal); -int -rcfs_delete_internal(struct dentry *mfdentry) +int rcfs_delete_internal(struct dentry *mfdentry) { - struct dentry *parent ; + struct dentry *parent; if (!mfdentry || !mfdentry->d_parent) return -EINVAL; - + parent = mfdentry->d_parent; if (!mfdentry->d_inode) { @@ -191,14 +183,9 @@ rcfs_delete_internal(struct dentry *mfdentry) return 0; } + EXPORT_SYMBOL(rcfs_delete_internal); struct inode_operations rcfs_file_inode_operations = { - .getattr = simple_getattr, + .getattr = simple_getattr, }; - - - - - - diff --git a/fs/rcfs/magic.c b/fs/rcfs/magic.c index ad92a07a8..043df6e2d 100644 --- a/fs/rcfs/magic.c +++ b/fs/rcfs/magic.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -41,9 +40,6 @@ #include - - - /****************************************************** * Macros * @@ -51,12 +47,12 @@ * *****************************************************/ - #define MAGIC_SHOW(FUNC) \ static int \ FUNC ## _show(struct seq_file *s, void *v) \ { \ - int rc=0; \ + int rc=0; \ + ssize_t precnt; \ ckrm_core_class_t *core ; \ \ core = (ckrm_core_class_t *) \ @@ -65,13 +61,14 @@ FUNC ## _show(struct seq_file *s, void *v) \ if (!ckrm_is_core_valid(core)) { \ return -EINVAL; \ } \ - \ + precnt = s->count ; \ if (core->classtype->show_ ## FUNC) \ rc = (* core->classtype->show_ ## FUNC)(core, s); \ - \ + \ + if (s->count == precnt) \ + seq_printf(s, "No data to display\n"); \ return rc; \ -}; - +}; #define MAGIC_OPEN(FUNC) \ static int \ @@ -86,25 +83,24 @@ FUNC ## _open(struct inode *inode, struct file *file) \ ret = single_open(file,FUNC ## _show, (void *)ri); \ } \ return ret; \ -} - +} + #define MAGIC_CLOSE(FUNC) \ static int \ FUNC ## _close(struct inode *inode, struct file *file) \ { \ return single_release(inode,file); \ } - - #define MAGIC_PARSE(FUNC) \ static int \ FUNC ## _parse(char *options, char **resstr, char **otherstr) \ { \ char *p; \ + *resstr = NULL; \ \ if (!options) \ - return 1; \ + return -EINVAL; \ \ while ((p = strsep(&options, ",")) != NULL) { \ substring_t args[MAX_OPT_ARGS]; \ @@ -122,10 +118,12 @@ FUNC ## _parse(char *options, char **resstr, char **otherstr) \ *otherstr = match_strdup(args); \ break; \ default: \ - return 0; \ + return -EINVAL; \ } \ } \ - return 1; \ + if (*resstr) \ + return 0; \ + return -EINVAL; \ } #define MAGIC_WRITE(FUNC,CLSTYPEFUN) \ @@ -180,8 +178,7 @@ FUNC ## _write_out: \ kfree(resname); \ return rc ? rc : count; \ } - - + #define MAGIC_RD_FILEOPS(FUNC) \ struct file_operations FUNC ## _fileops = { \ .open = FUNC ## _open, \ @@ -191,7 +188,6 @@ struct file_operations FUNC ## _fileops = { \ }; \ EXPORT_SYMBOL(FUNC ## _fileops); - #define MAGIC_RDWR_FILEOPS(FUNC) \ struct file_operations FUNC ## _fileops = { \ .open = FUNC ## _open, \ @@ -202,95 +198,90 @@ struct file_operations FUNC ## _fileops = { \ }; \ EXPORT_SYMBOL(FUNC ## _fileops); - -/******************************************************************************** +/****************************************************************************** * Target * * pseudo file for manually reclassifying members to a class * - *******************************************************************************/ + *****************************************************************************/ #define TARGET_MAX_INPUT_SIZE 100 static ssize_t -target_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +target_write(struct file *file, const char __user * buf, + size_t count, loff_t * ppos) { - struct rcfs_inode_info *ri= RCFS_I(file->f_dentry->d_inode); + struct rcfs_inode_info *ri = RCFS_I(file->f_dentry->d_inode); char *optbuf; int rc = -EINVAL; ckrm_classtype_t *clstype; - if ((ssize_t) count < 0 || (ssize_t) count > TARGET_MAX_INPUT_SIZE) return -EINVAL; - + if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; - + down(&(ri->vfs_inode.i_sem)); - + optbuf = kmalloc(TARGET_MAX_INPUT_SIZE, GFP_KERNEL); __copy_from_user(optbuf, buf, count); - if (optbuf[count-1] == '\n') - optbuf[count-1]='\0'; + if (optbuf[count - 1] == '\n') + optbuf[count - 1] = '\0'; clstype = ri->core->classtype; if (clstype->forced_reclassify) - rc = (* clstype->forced_reclassify)(ri->core,optbuf); + rc = (*clstype->forced_reclassify) (ri->core, optbuf); up(&(ri->vfs_inode.i_sem)); kfree(optbuf); - return !rc ? count : rc; + return (!rc ? count : rc); } struct file_operations target_fileops = { - .write = target_write, + .write = target_write, }; -EXPORT_SYMBOL(target_fileops); - +EXPORT_SYMBOL(target_fileops); -/******************************************************************************** +/****************************************************************************** * Config * * Set/get configuration parameters of a class. * - *******************************************************************************/ + *****************************************************************************/ /* Currently there are no per-class config parameters defined. * Use existing code as a template */ - + #define config_max_input_size 300 enum config_token_t { - config_str, config_res_type, config_err + config_str, config_res_type, config_err }; static match_table_t config_tokens = { - {config_res_type,"res=%s"}, + {config_res_type, "res=%s"}, {config_str, "config=%s"}, - {config_err, NULL}, + {config_err, NULL}, }; - MAGIC_PARSE(config); -MAGIC_WRITE(config,set_config); +MAGIC_WRITE(config, set_config); MAGIC_SHOW(config); MAGIC_OPEN(config); MAGIC_CLOSE(config); MAGIC_RDWR_FILEOPS(config); - -/******************************************************************************** +/****************************************************************************** * Members * * List members of a class * - *******************************************************************************/ + *****************************************************************************/ MAGIC_SHOW(members); MAGIC_OPEN(members); @@ -298,46 +289,42 @@ MAGIC_CLOSE(members); MAGIC_RD_FILEOPS(members); - -/******************************************************************************** +/****************************************************************************** * Stats * * Get/reset class statistics * No standard set of stats defined. Each resource controller chooses * its own set of statistics to maintain and export. * - *******************************************************************************/ + *****************************************************************************/ #define stats_max_input_size 50 enum stats_token_t { - stats_res_type, stats_str,stats_err + stats_res_type, stats_str, stats_err }; static match_table_t stats_tokens = { - {stats_res_type,"res=%s"}, + {stats_res_type, "res=%s"}, {stats_str, NULL}, - {stats_err, NULL}, + {stats_err, NULL}, }; - MAGIC_PARSE(stats); -MAGIC_WRITE(stats,reset_stats); +MAGIC_WRITE(stats, reset_stats); MAGIC_SHOW(stats); MAGIC_OPEN(stats); MAGIC_CLOSE(stats); MAGIC_RDWR_FILEOPS(stats); - -/******************************************************************************** +/****************************************************************************** * Shares * * Set/get shares of a taskclass. * Share types and semantics are defined by rcfs and ckrm core * - *******************************************************************************/ - + *****************************************************************************/ #define SHARES_MAX_INPUT_SIZE 300 @@ -348,20 +335,19 @@ MAGIC_RDWR_FILEOPS(stats); the remaining ones are for token matching purposes */ enum share_token_t { - MY_GUAR, MY_LIM, TOT_GUAR, MAX_LIM, SHARE_RES_TYPE, SHARE_ERR + MY_GUAR, MY_LIM, TOT_GUAR, MAX_LIM, SHARE_RES_TYPE, SHARE_ERR }; /* Token matching for parsing input to this magic file */ static match_table_t shares_tokens = { {SHARE_RES_TYPE, "res=%s"}, - {MY_GUAR, "guarantee=%d"}, - {MY_LIM, "limit=%d"}, - {TOT_GUAR,"total_guarantee=%d"}, + {MY_GUAR, "guarantee=%d"}, + {MY_LIM, "limit=%d"}, + {TOT_GUAR, "total_guarantee=%d"}, {MAX_LIM, "max_limit=%d"}, - {SHARE_ERR, NULL} + {SHARE_ERR, NULL} }; - static int shares_parse(char *options, char **resstr, struct ckrm_shares *shares) { @@ -370,12 +356,12 @@ shares_parse(char *options, char **resstr, struct ckrm_shares *shares) if (!options) return 1; - + while ((p = strsep(&options, ",")) != NULL) { - + substring_t args[MAX_OPT_ARGS]; int token; - + if (!*p) continue; @@ -410,12 +396,11 @@ shares_parse(char *options, char **resstr, struct ckrm_shares *shares) } return 1; -} - +} static ssize_t -shares_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +shares_write(struct file *file, const char __user * buf, + size_t count, loff_t * ppos) { struct inode *inode = file->f_dentry->d_inode; struct rcfs_inode_info *ri; @@ -423,7 +408,7 @@ shares_write(struct file *file, const char __user *buf, int rc = 0; struct ckrm_core_class *core; int done; - char *resname; + char *resname = NULL; struct ckrm_shares newshares = { CKRM_SHARE_UNCHANGED, @@ -436,24 +421,29 @@ shares_write(struct file *file, const char __user *buf, if ((ssize_t) count < 0 || (ssize_t) count > SHARES_MAX_INPUT_SIZE) return -EINVAL; - + if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; ri = RCFS_I(file->f_dentry->d_parent->d_inode); - if (!ri || !ckrm_is_core_valid((ckrm_core_class_t *)(ri->core))) { + if (!ri || !ckrm_is_core_valid((ckrm_core_class_t *) (ri->core))) { printk(KERN_ERR "shares_write: Error accessing core class\n"); return -EFAULT; } - + down(&inode->i_sem); - - core = ri->core; + + core = ri->core; optbuf = kmalloc(SHARES_MAX_INPUT_SIZE, GFP_KERNEL); + if (!optbuf) { + up(&inode->i_sem); + return -ENOMEM; + } + __copy_from_user(optbuf, buf, count); - if (optbuf[count-1] == '\n') - optbuf[count-1]='\0'; + if (optbuf[count - 1] == '\n') + optbuf[count - 1] = '\0'; done = shares_parse(optbuf, &resname, &newshares); if (!done) { @@ -463,23 +453,23 @@ shares_write(struct file *file, const char __user *buf, } if (core->classtype->set_shares) { - rc = (*core->classtype->set_shares)(core,resname,&newshares); + rc = (*core->classtype->set_shares) (core, resname, &newshares); if (rc) { - printk(KERN_ERR "shares_write: resctlr share set error\n"); + printk(KERN_ERR + "shares_write: resctlr share set error\n"); goto write_out; } } - + printk(KERN_ERR "Set %s shares to %d %d %d %d\n", resname, - newshares.my_guarantee, - newshares.my_limit, - newshares.total_guarantee, - newshares.max_limit); - - rc = count ; + newshares.my_guarantee, + newshares.my_limit, + newshares.total_guarantee, newshares.max_limit); + + rc = count; -write_out: + write_out: up(&inode->i_sem); kfree(optbuf); @@ -487,49 +477,44 @@ write_out: return rc; } - MAGIC_SHOW(shares); MAGIC_OPEN(shares); MAGIC_CLOSE(shares); MAGIC_RDWR_FILEOPS(shares); - - /* * magic file creation/deletion * */ - -int -rcfs_clear_magic(struct dentry *parent) +int rcfs_clear_magic(struct dentry *parent) { - struct dentry *mftmp, *mfdentry ; + struct dentry *mftmp, *mfdentry; list_for_each_entry_safe(mfdentry, mftmp, &parent->d_subdirs, d_child) { - + if (!rcfs_is_magic(mfdentry)) - continue ; + continue; - if (rcfs_delete_internal(mfdentry)) - printk(KERN_ERR "rcfs_clear_magic: error deleting one\n"); + if (rcfs_delete_internal(mfdentry)) + printk(KERN_ERR + "rcfs_clear_magic: error deleting one\n"); } return 0; - + } -EXPORT_SYMBOL(rcfs_clear_magic); +EXPORT_SYMBOL(rcfs_clear_magic); -int -rcfs_create_magic(struct dentry *parent, struct rcfs_magf magf[], int count) +int rcfs_create_magic(struct dentry *parent, struct rcfs_magf magf[], int count) { int i; struct dentry *mfdentry; - for (i=0; i #include #include -#include #include #include #include @@ -42,43 +40,39 @@ #include - - rbce_eng_callback_t rcfs_eng_callbacks = { NULL, NULL }; -int -rcfs_register_engine(rbce_eng_callback_t *rcbs) +int rcfs_register_engine(rbce_eng_callback_t * rcbs) { if (!rcbs->mkdir || rcfs_eng_callbacks.mkdir) { return -EINVAL; } rcfs_eng_callbacks = *rcbs; + rcfs_engine_regd++; return 0; } -EXPORT_SYMBOL(rcfs_register_engine); - +EXPORT_SYMBOL(rcfs_register_engine); -int -rcfs_unregister_engine(rbce_eng_callback_t *rcbs) +int rcfs_unregister_engine(rbce_eng_callback_t * rcbs) { if (!rcbs->mkdir || !rcfs_eng_callbacks.mkdir || - (rcbs->mkdir != rcfs_eng_callbacks.mkdir)) { + (rcbs->mkdir != rcfs_eng_callbacks.mkdir)) { return -EINVAL; } rcfs_eng_callbacks.mkdir = NULL; rcfs_eng_callbacks.rmdir = NULL; + rcfs_engine_regd--; return 0; } -EXPORT_SYMBOL(rcfs_unregister_engine); - - +EXPORT_SYMBOL(rcfs_unregister_engine); /* rcfs_mkroot - * Create and return a "root" dentry under /rcfs. Also create associated magic files + * Create and return a "root" dentry under /rcfs. + * Also create associated magic files * * @mfdesc: array of rcfs_magf describing root dir and its magic files * @count: number of entries in mfdesc @@ -86,26 +80,25 @@ EXPORT_SYMBOL(rcfs_unregister_engine); * @rootde: output parameter to return the newly created root dentry */ -int -rcfs_mkroot(struct rcfs_magf *mfdesc, int mfcount, struct dentry **rootde) +int rcfs_mkroot(struct rcfs_magf *mfdesc, int mfcount, struct dentry **rootde) { int sz; struct rcfs_magf *rootdesc = &mfdesc[0]; - struct dentry *dentry ; + struct dentry *dentry; struct rcfs_inode_info *rootri; if ((mfcount < 0) || (!mfdesc)) return -EINVAL; - + rootdesc = &mfdesc[0]; - printk("allocating classtype root <%s>\n",rootdesc->name); - dentry = rcfs_create_internal(rcfs_rootde, rootdesc,0); - + printk("allocating classtype root <%s>\n", rootdesc->name); + dentry = rcfs_create_internal(rcfs_rootde, rootdesc, 0); + if (!dentry) { - printk(KERN_ERR "Could not create %s\n",rootdesc->name); + printk(KERN_ERR "Could not create %s\n", rootdesc->name); return -ENOMEM; - } - + } + rootri = RCFS_I(dentry->d_inode); sz = strlen(rootdesc->name) + strlen(RCFS_ROOT) + 2; rootri->name = kmalloc(sz, GFP_KERNEL); @@ -115,8 +108,8 @@ rcfs_mkroot(struct rcfs_magf *mfdesc, int mfcount, struct dentry **rootde) rcfs_delete_internal(dentry); return -ENOMEM; } - snprintf(rootri->name,sz,"%s/%s",RCFS_ROOT,rootdesc->name); - + snprintf(rootri->name, sz, "%s/%s", RCFS_ROOT, rootdesc->name); + if (rootdesc->i_fop) dentry->d_inode->i_fop = rootdesc->i_fop; if (rootdesc->i_op) @@ -127,41 +120,43 @@ rcfs_mkroot(struct rcfs_magf *mfdesc, int mfcount, struct dentry **rootde) return 0; } -EXPORT_SYMBOL(rcfs_mkroot); +EXPORT_SYMBOL(rcfs_mkroot); -int -rcfs_rmroot(struct dentry *rootde) +int rcfs_rmroot(struct dentry *rootde) { + struct rcfs_inode_info *ri; + if (!rootde) return -EINVAL; rcfs_clear_magic(rootde); - kfree(RCFS_I(rootde->d_inode)->name); + ri = RCFS_I(rootde->d_inode); + kfree(ri->name); + ri->name = NULL; rcfs_delete_internal(rootde); return 0; } -EXPORT_SYMBOL(rcfs_rmroot); +EXPORT_SYMBOL(rcfs_rmroot); -int -rcfs_register_classtype(ckrm_classtype_t *clstype) +int rcfs_register_classtype(ckrm_classtype_t * clstype) { - int rc ; + int rc; struct rcfs_inode_info *rootri; struct rcfs_magf *mfdesc; // Initialize mfdesc, mfcount - clstype->mfdesc = (void *) genmfdesc[clstype->mfidx]->rootmf; - clstype->mfcount = genmfdesc[clstype->mfidx]->rootmflen; + clstype->mfdesc = (void *)genmfdesc[clstype->mfidx]->rootmf; + clstype->mfcount = genmfdesc[clstype->mfidx]->rootmflen; mfdesc = (struct rcfs_magf *)clstype->mfdesc; - + /* rcfs root entry has the same name as the classtype */ - strncpy(mfdesc[0].name,clstype->name,RCFS_MAGF_NAMELEN) ; + strncpy(mfdesc[0].name, clstype->name, RCFS_MAGF_NAMELEN); - rc = rcfs_mkroot(mfdesc,clstype->mfcount, - (struct dentry **)&(clstype->rootde)); + rc = rcfs_mkroot(mfdesc, clstype->mfcount, + (struct dentry **)&(clstype->rootde)); if (rc) return rc; @@ -169,43 +164,43 @@ rcfs_register_classtype(ckrm_classtype_t *clstype) rootri->core = clstype->default_class; clstype->default_class->name = rootri->name; ckrm_core_grab(clstype->default_class); - + // Create magic files under root - if ((rc = rcfs_create_magic(clstype->rootde, &mfdesc[1], - clstype->mfcount-1))) { + if ((rc = rcfs_create_magic(clstype->rootde, &mfdesc[1], + clstype->mfcount - 1))) { kfree(rootri->name); + rootri->name = NULL; rcfs_delete_internal(clstype->rootde); return rc; } return rc; } -EXPORT_SYMBOL(rcfs_register_classtype); +EXPORT_SYMBOL(rcfs_register_classtype); -int -rcfs_deregister_classtype(ckrm_classtype_t *clstype) +int rcfs_deregister_classtype(ckrm_classtype_t * clstype) { int rc; rc = rcfs_rmroot((struct dentry *)clstype->rootde); if (!rc) { - clstype->default_class->name = NULL ; + clstype->default_class->name = NULL; ckrm_core_drop(clstype->default_class); } return rc; } -EXPORT_SYMBOL(rcfs_deregister_classtype); - +EXPORT_SYMBOL(rcfs_deregister_classtype); // Common root and magic file entries. -// root name, root permissions, magic file names and magic file permissions are needed by -// all entities (classtypes and classification engines) existing under the rcfs mount point +// root name, root permissions, magic file names and magic file permissions +// are needed by all entities (classtypes and classification engines) existing +// under the rcfs mount point -// The common sets of these attributes are listed here as a table. Individual classtypes and -// classification engines can simple specify the index into the table to initialize their -// magf entries. +// The common sets of these attributes are listed here as a table. Individual +// classtypes and classification engines can simple specify the index into the +// table to initialize their magf entries. // #ifdef CONFIG_CKRM_TYPE_TASKCLASS @@ -218,8 +213,7 @@ extern struct rcfs_mfdesc sock_mfdesc; // extern struct rcfs_magf rbce_mfdesc; - -struct rcfs_mfdesc *genmfdesc[]={ +struct rcfs_mfdesc *genmfdesc[] = { #ifdef CONFIG_CKRM_TYPE_TASKCLASS &tc_mfdesc, #else @@ -230,15 +224,4 @@ struct rcfs_mfdesc *genmfdesc[]={ #else NULL, #endif -// Create similar entry for RBCE ? -//#ifdef CONFIG_CKRM_CE -// &rbce_mfdesc, -//#else -// NULL, -//#endif - }; - - - - diff --git a/fs/rcfs/socket_fs.c b/fs/rcfs/socket_fs.c index 492fb092c..9d9ba5241 100644 --- a/fs/rcfs/socket_fs.c +++ b/fs/rcfs/socket_fs.c @@ -22,73 +22,69 @@ * connection control is supported based on prioritized accept queues. ******************************************************************************/ - #include #include -extern int rcfs_create(struct inode *,struct dentry *, int, struct nameidata *); +extern int rcfs_create(struct inode *, struct dentry *, int, + struct nameidata *); extern int rcfs_unlink(struct inode *, struct dentry *); -extern int rcfs_symlink(struct inode *, struct dentry *, const char *); +extern int rcfs_symlink(struct inode *, struct dentry *, const char *); extern int rcfs_mknod(struct inode *, struct dentry *, int mode, dev_t); extern int rcfs_mkdir(struct inode *, struct dentry *, int); extern int rcfs_rmdir(struct inode *, struct dentry *); -extern int rcfs_rename(struct inode *, struct dentry *, struct inode *, - struct dentry *); +extern int rcfs_rename(struct inode *, struct dentry *, struct inode *, + struct dentry *); extern int rcfs_create_coredir(struct inode *, struct dentry *); int sock_mkdir(struct inode *, struct dentry *, int mode); int sock_rmdir(struct inode *, struct dentry *); - -int sock_create_noperm(struct inode *, struct dentry *,int, struct nameidata *); -int sock_unlink_noperm(struct inode *,struct dentry *); -int sock_mkdir_noperm(struct inode *,struct dentry *,int); -int sock_rmdir_noperm(struct inode *,struct dentry *); -int sock_mknod_noperm(struct inode *,struct dentry *,int, dev_t); +int sock_create_noperm(struct inode *, struct dentry *, int, + struct nameidata *); +int sock_unlink_noperm(struct inode *, struct dentry *); +int sock_mkdir_noperm(struct inode *, struct dentry *, int); +int sock_rmdir_noperm(struct inode *, struct dentry *); +int sock_mknod_noperm(struct inode *, struct dentry *, int, dev_t); void sock_set_directory(void); extern struct file_operations config_fileops, - members_fileops, - shares_fileops, - stats_fileops, - target_fileops; - + members_fileops, shares_fileops, stats_fileops, target_fileops; struct inode_operations my_iops = { - .create = rcfs_create, - .lookup = simple_lookup, - .link = simple_link, - .unlink = rcfs_unlink, - .symlink = rcfs_symlink, - .mkdir = sock_mkdir, - .rmdir = sock_rmdir, - .mknod = rcfs_mknod, - .rename = rcfs_rename, + .create = rcfs_create, + .lookup = simple_lookup, + .link = simple_link, + .unlink = rcfs_unlink, + .symlink = rcfs_symlink, + .mkdir = sock_mkdir, + .rmdir = sock_rmdir, + .mknod = rcfs_mknod, + .rename = rcfs_rename, }; struct inode_operations class_iops = { - .create = sock_create_noperm, - .lookup = simple_lookup, - .link = simple_link, - .unlink = sock_unlink_noperm, - .symlink = rcfs_symlink, - .mkdir = sock_mkdir_noperm, - .rmdir = sock_rmdir_noperm, - .mknod = sock_mknod_noperm, - .rename = rcfs_rename, + .create = sock_create_noperm, + .lookup = simple_lookup, + .link = simple_link, + .unlink = sock_unlink_noperm, + .symlink = rcfs_symlink, + .mkdir = sock_mkdir_noperm, + .rmdir = sock_rmdir_noperm, + .mknod = sock_mknod_noperm, + .rename = rcfs_rename, }; struct inode_operations sub_iops = { - .create = sock_create_noperm, - .lookup = simple_lookup, - .link = simple_link, - .unlink = sock_unlink_noperm, - .symlink = rcfs_symlink, - .mkdir = sock_mkdir_noperm, - .rmdir = sock_rmdir_noperm, - .mknod = sock_mknod_noperm, - .rename = rcfs_rename, + .create = sock_create_noperm, + .lookup = simple_lookup, + .link = simple_link, + .unlink = sock_unlink_noperm, + .symlink = rcfs_symlink, + .mkdir = sock_mkdir_noperm, + .rmdir = sock_rmdir_noperm, + .mknod = sock_mknod_noperm, + .rename = rcfs_rename, }; struct rcfs_magf def_magf = { @@ -99,103 +95,105 @@ struct rcfs_magf def_magf = { struct rcfs_magf sock_rootdesc[] = { { - // .name = should not be set, copy from classtype name, - .mode = RCFS_DEFAULT_DIR_MODE, - .i_op = &my_iops, - //.i_fop = &simple_dir_operations, - .i_fop = NULL, - }, + // .name = should not be set, copy from classtype name, + .mode = RCFS_DEFAULT_DIR_MODE, + .i_op = &my_iops, + //.i_fop = &simple_dir_operations, + .i_fop = NULL, + }, { - .name = "members", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop = &members_fileops, - }, + .name = "members", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &members_fileops, + }, { - .name = "target", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop = &target_fileops, - }, + .name = "target", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &target_fileops, + }, }; struct rcfs_magf sock_magf[] = { { - .name = "config", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop = &config_fileops, - }, + .name = "config", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &config_fileops, + }, { - .name = "members", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop =&members_fileops, - }, + .name = "members", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &members_fileops, + }, { - .name = "shares", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop = &shares_fileops, - }, + .name = "shares", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &shares_fileops, + }, { - .name = "stats", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop = &stats_fileops, - }, + .name = "stats", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &stats_fileops, + }, { - .name = "target", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop = &target_fileops, - }, + .name = "target", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &target_fileops, + }, }; struct rcfs_magf sub_magf[] = { { - .name = "config", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop = &config_fileops, - }, + .name = "config", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &config_fileops, + }, { - .name = "shares", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop = &shares_fileops, - }, + .name = "shares", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &shares_fileops, + }, { - .name = "stats", - .mode = RCFS_DEFAULT_FILE_MODE, - .i_op = &my_iops, - .i_fop = &stats_fileops, - }, + .name = "stats", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_op = &my_iops, + .i_fop = &stats_fileops, + }, }; struct rcfs_mfdesc sock_mfdesc = { - .rootmf = sock_rootdesc, - .rootmflen = (sizeof(sock_rootdesc)/sizeof(struct rcfs_magf)), + .rootmf = sock_rootdesc, + .rootmflen = (sizeof(sock_rootdesc) / sizeof(struct rcfs_magf)), }; - #define SOCK_MAX_MAGF (sizeof(sock_magf)/sizeof(struct rcfs_magf)) #define LAQ_MAX_SUBMAGF (sizeof(sub_magf)/sizeof(struct rcfs_magf)) -int -sock_rmdir(struct inode *p, struct dentry *me) +int sock_rmdir(struct inode *p, struct dentry *me) { - struct dentry *mftmp, *mfdentry ; + struct dentry *mftmp, *mfdentry; + int ret = 0; // delete all magic sub directories list_for_each_entry_safe(mfdentry, mftmp, &me->d_subdirs, d_child) { - if (S_ISDIR(mfdentry->d_inode->i_mode)) - rcfs_rmdir(me->d_inode, mfdentry); + if (S_ISDIR(mfdentry->d_inode->i_mode)) { + ret = rcfs_rmdir(me->d_inode, mfdentry); + if (ret) + return ret; + } } // delete ourselves - rcfs_rmdir(p,me); + ret = rcfs_rmdir(p, me); - return 0; + return ret; } #ifdef NUM_ACCEPT_QUEUES @@ -204,50 +202,49 @@ sock_rmdir(struct inode *p, struct dentry *me) #define LAQ_NUM_ACCEPT_QUEUES 0 #endif -int -sock_mkdir(struct inode *dir, struct dentry *dentry, int mode) +int sock_mkdir(struct inode *dir, struct dentry *dentry, int mode) { int retval = 0; - int i,j; + int i, j; struct dentry *pentry, *mfdentry; if (_rcfs_mknod(dir, dentry, mode | S_IFDIR, 0)) { printk(KERN_ERR "rcfs_mkdir: error reaching parent\n"); return retval; } - // Needed if only _rcfs_mknod is used instead of i_op->mkdir dir->i_nlink++; retval = rcfs_create_coredir(dir, dentry); - if (retval) + if (retval) goto mkdir_err; /* create the default set of magic files */ - for (i =0; i < SOCK_MAX_MAGF; i++) { - mfdentry = rcfs_create_internal(dentry, &sock_magf[i],0); + for (i = 0; i < SOCK_MAX_MAGF; i++) { + mfdentry = rcfs_create_internal(dentry, &sock_magf[i], 0); mfdentry->d_fsdata = &RCFS_IS_MAGIC; - RCFS_I(mfdentry->d_inode)->core = - RCFS_I(dentry->d_inode)->core; + RCFS_I(mfdentry->d_inode)->core = RCFS_I(dentry->d_inode)->core; if (sock_magf[i].i_fop) mfdentry->d_inode->i_fop = sock_magf[i].i_fop; if (sock_magf[i].i_op) mfdentry->d_inode->i_op = sock_magf[i].i_op; } - - for (i=1; i < LAQ_NUM_ACCEPT_QUEUES; i++) { - j = sprintf(def_magf.name, "%d",i); + + for (i = 1; i < LAQ_NUM_ACCEPT_QUEUES; i++) { + j = sprintf(def_magf.name, "%d", i); def_magf.name[j] = '\0'; - pentry = rcfs_create_internal(dentry, &def_magf,0); + pentry = rcfs_create_internal(dentry, &def_magf, 0); retval = rcfs_create_coredir(dentry->d_inode, pentry); if (retval) goto mkdir_err; - for (j=0; j < LAQ_MAX_SUBMAGF; j++) { - mfdentry = rcfs_create_internal(pentry, &sub_magf[j],0); + pentry->d_fsdata = &RCFS_IS_MAGIC; + for (j = 0; j < LAQ_MAX_SUBMAGF; j++) { + mfdentry = + rcfs_create_internal(pentry, &sub_magf[j], 0); mfdentry->d_fsdata = &RCFS_IS_MAGIC; - RCFS_I(mfdentry->d_inode)->core = - RCFS_I(pentry->d_inode)->core; + RCFS_I(mfdentry->d_inode)->core = + RCFS_I(pentry->d_inode)->core; if (sub_magf[j].i_fop) mfdentry->d_inode->i_fop = sub_magf[j].i_fop; if (sub_magf[j].i_op) @@ -258,81 +255,74 @@ sock_mkdir(struct inode *dir, struct dentry *dentry, int mode) dentry->d_inode->i_op = &class_iops; return 0; -mkdir_err: + mkdir_err: // Needed dir->i_nlink--; return retval; } + #ifndef NUM_ACCEPT_QUEUES #define NUM_ACCEPT_QUEUES 0 #endif -char * -sock_get_name(struct ckrm_core_class *c) +char *sock_get_name(struct ckrm_core_class *c) { char *p = (char *)c->name; - - while(*p) + + while (*p) p++; - while( *p != '/' && p != c->name) + while (*p != '/' && p != c->name) p--; return ++p; } -int -sock_create_noperm(struct inode *dir,struct dentry *dentry,int mode, struct nameidata *nd) +int +sock_create_noperm(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { return -EPERM; } -int -sock_unlink_noperm(struct inode *dir,struct dentry *dentry) +int sock_unlink_noperm(struct inode *dir, struct dentry *dentry) { return -EPERM; } -int -sock_mkdir_noperm(struct inode *dir,struct dentry *dentry, int mode) +int sock_mkdir_noperm(struct inode *dir, struct dentry *dentry, int mode) { return -EPERM; } -int -sock_rmdir_noperm(struct inode *dir,struct dentry *dentry) +int sock_rmdir_noperm(struct inode *dir, struct dentry *dentry) { return -EPERM; } -int -sock_mknod_noperm(struct inode *dir,struct dentry *dentry,int mode, dev_t dev) +int +sock_mknod_noperm(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { return -EPERM; } #if 0 -void -sock_set_directory() +void sock_set_directory() { struct dentry *pentry, *dentry; pentry = rcfs_set_magf_byname("listen_aq", (void *)&my_dir_magf[0]); if (pentry) { - dentry = rcfs_create_internal(pentry, &my_dir_magf[1],0); + dentry = rcfs_create_internal(pentry, &my_dir_magf[1], 0); if (my_dir_magf[1].i_fop) dentry->d_inode->i_fop = my_dir_magf[1].i_fop; - RCFS_I(dentry->d_inode)->core = - RCFS_I(pentry->d_inode)->core; - dentry = rcfs_create_internal(pentry, &my_dir_magf[2],0); + RCFS_I(dentry->d_inode)->core = RCFS_I(pentry->d_inode)->core; + dentry = rcfs_create_internal(pentry, &my_dir_magf[2], 0); if (my_dir_magf[2].i_fop) dentry->d_inode->i_fop = my_dir_magf[2].i_fop; - RCFS_I(dentry->d_inode)->core = - RCFS_I(pentry->d_inode)->core; - } - else { + RCFS_I(dentry->d_inode)->core = RCFS_I(pentry->d_inode)->core; + } else { printk(KERN_ERR "Could not create /rcfs/listen_aq\n" - "Perhaps /rcfs needs to be mounted\n"); + "Perhaps /rcfs needs to be mounted\n"); } } #endif - diff --git a/fs/rcfs/super.c b/fs/rcfs/super.c index d0e78c447..871b7fb17 100644 --- a/fs/rcfs/super.c +++ b/fs/rcfs/super.c @@ -22,11 +22,9 @@ * Created. */ - #include #include #include -#include #include #include #include @@ -42,57 +40,55 @@ #include #include - +#include +#include static kmem_cache_t *rcfs_inode_cachep; - inline struct rcfs_inode_info *RCFS_I(struct inode *inode) { return container_of(inode, struct rcfs_inode_info, vfs_inode); } -EXPORT_SYMBOL(RCFS_I); - +EXPORT_SYMBOL(RCFS_I); -static struct inode * -rcfs_alloc_inode(struct super_block *sb) +static struct inode *rcfs_alloc_inode(struct super_block *sb) { struct rcfs_inode_info *ri; - ri = (struct rcfs_inode_info *) kmem_cache_alloc(rcfs_inode_cachep, - SLAB_KERNEL); + ri = (struct rcfs_inode_info *)kmem_cache_alloc(rcfs_inode_cachep, + SLAB_KERNEL); if (!ri) return NULL; ri->name = NULL; return &ri->vfs_inode; } -static void -rcfs_destroy_inode(struct inode *inode) +static void rcfs_destroy_inode(struct inode *inode) { struct rcfs_inode_info *ri = RCFS_I(inode); kfree(ri->name); - kmem_cache_free(rcfs_inode_cachep, RCFS_I(inode)); + kmem_cache_free(rcfs_inode_cachep, ri); } -static void -rcfs_init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void +rcfs_init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) { - struct rcfs_inode_info *ri = (struct rcfs_inode_info *) foo; + struct rcfs_inode_info *ri = (struct rcfs_inode_info *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ri->vfs_inode); } -int -rcfs_init_inodecache(void) +int rcfs_init_inodecache(void) { rcfs_inode_cachep = kmem_cache_create("rcfs_inode_cache", - sizeof(struct rcfs_inode_info), - 0, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT, - rcfs_init_once, NULL); + sizeof(struct rcfs_inode_info), + 0, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT, + rcfs_init_once, NULL); if (rcfs_inode_cachep == NULL) return -ENOMEM; return 0; @@ -102,31 +98,28 @@ void rcfs_destroy_inodecache(void) { printk(KERN_WARNING "destroy inodecache was called\n"); if (kmem_cache_destroy(rcfs_inode_cachep)) - printk(KERN_INFO "rcfs_inode_cache: not all structures were freed\n"); + printk(KERN_INFO + "rcfs_inode_cache: not all structures were freed\n"); } -struct super_operations rcfs_super_ops = -{ - .alloc_inode = rcfs_alloc_inode, - .destroy_inode = rcfs_destroy_inode, - .statfs = simple_statfs, - .drop_inode = generic_delete_inode, +struct super_operations rcfs_super_ops = { + .alloc_inode = rcfs_alloc_inode, + .destroy_inode = rcfs_destroy_inode, + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, }; - -struct dentry *rcfs_rootde; /* redundant since one can also get it from sb */ +struct dentry *rcfs_rootde; /* redundant; can also get it from sb */ static struct inode *rcfs_root; static struct rcfs_inode_info *rcfs_rootri; -static int rcfs_mounted; - -static int rcfs_fill_super(struct super_block * sb, void * data, int silent) +static int rcfs_fill_super(struct super_block *sb, void *data, int silent) { - struct inode * inode; - struct dentry * root; + struct inode *inode; + struct dentry *root; struct rcfs_inode_info *rootri; struct ckrm_classtype *clstype; - int i,rc; + int i, rc; sb->s_fs_info = NULL; if (rcfs_mounted) { @@ -135,7 +128,7 @@ static int rcfs_fill_super(struct super_block * sb, void * data, int silent) rcfs_mounted++; sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RCFS_MAGIC; sb->s_op = &rcfs_super_ops; inode = rcfs_get_inode(sb, S_IFDIR | 0755, 0); @@ -150,7 +143,6 @@ static int rcfs_fill_super(struct super_block * sb, void * data, int silent) } sb->s_root = root; - // Link inode and core class rootri = RCFS_I(inode); rootri->name = kmalloc(strlen(RCFS_ROOT) + 1, GFP_KERNEL); @@ -164,20 +156,31 @@ static int rcfs_fill_super(struct super_block * sb, void * data, int silent) rcfs_root = inode; sb->s_fs_info = rcfs_root = inode; - rcfs_rootde = root ; - rcfs_rootri = rootri ; + rcfs_rootde = root; + rcfs_rootri = rootri; // register metatypes - for ( i=0; is_fs_info != rcfs_root) { @@ -206,10 +207,10 @@ rcfs_kill_sb(struct super_block *sb) } rcfs_mounted--; - for ( i=0; i < CKRM_MAX_CLASSTYPES; i++) { + for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) { clstype = ckrm_classtypes[i]; - if (clstype == NULL || clstype->rootde == NULL) + if (clstype == NULL || clstype->rootde == NULL) continue; if ((rc = rcfs_deregister_classtype(clstype))) { @@ -218,8 +219,19 @@ rcfs_kill_sb(struct super_block *sb) // return ; // can also choose to stop here } } - - // do not remove comment block until ce directory issue resolved + + // do pre-umount shutdown needed by CE + // this is distinct from CE deregistration done on rcfs module unload + if (rcfs_engine_regd) { + if (rcfs_eng_callbacks.umnt) + if ((rc = (*rcfs_eng_callbacks.umnt) ())) { + printk(KERN_ERR "Error in CE umnt %d\n", rc); + // return ; until error handling improves + } + } + // Following comment handled by code above; keep nonetheless if it + // can be done better + // // deregister CE with rcfs // Check if loaded // if ce is in one directory /rcfs/ce, @@ -236,23 +248,22 @@ rcfs_kill_sb(struct super_block *sb) generic_shutdown_super(sb); // printk(KERN_ERR "Removed all entries\n"); -} - +} static struct file_system_type rcfs_fs_type = { - .name = "rcfs", - .get_sb = rcfs_get_sb, - .kill_sb = rcfs_kill_sb, + .name = "rcfs", + .get_sb = rcfs_get_sb, + .kill_sb = rcfs_kill_sb, }; struct rcfs_functions my_rcfs_fn = { - .mkroot = rcfs_mkroot, - .rmroot = rcfs_rmroot, - .register_classtype = rcfs_register_classtype, + .mkroot = rcfs_mkroot, + .rmroot = rcfs_rmroot, + .register_classtype = rcfs_register_classtype, .deregister_classtype = rcfs_deregister_classtype, }; -extern struct rcfs_functions rcfs_fn ; +extern struct rcfs_functions rcfs_fn; static int __init init_rcfs_fs(void) { @@ -266,13 +277,16 @@ static int __init init_rcfs_fs(void) if (ret) goto init_cache_err; - rcfs_fn = my_rcfs_fn ; - + rcfs_fn = my_rcfs_fn; + + // Due to tight coupling of this module with ckrm + // do not allow this module to be removed. + try_module_get(THIS_MODULE); return ret; -init_cache_err: + init_cache_err: unregister_filesystem(&rcfs_fs_type); -init_register_err: + init_register_err: return ret; } @@ -283,6 +297,6 @@ static void __exit exit_rcfs_fs(void) } module_init(init_rcfs_fs) -module_exit(exit_rcfs_fs) + module_exit(exit_rcfs_fs) -MODULE_LICENSE("GPL"); + MODULE_LICENSE("GPL"); diff --git a/fs/rcfs/tc_magic.c b/fs/rcfs/tc_magic.c index 16864094c..1a9f69729 100644 --- a/fs/rcfs/tc_magic.c +++ b/fs/rcfs/tc_magic.c @@ -28,67 +28,66 @@ #include #include - -/******************************************************************************* +/***************************************************************************** * Taskclass general * * Define structures for taskclass root directory and its magic files * In taskclasses, there is one set of magic files, created automatically under * the taskclass root (upon classtype registration) and each directory (class) * created subsequently. However, classtypes can also choose to have different - * sets of magic files created under their root and other directories under root - * using their mkdir function. RCFS only provides helper functions for creating - * the root directory and its magic files + * sets of magic files created under their root and other directories under + * root using their mkdir function. RCFS only provides helper functions for + * creating the root directory and its magic files * - *******************************************************************************/ + *****************************************************************************/ + +#define TC_FILE_MODE (S_IFREG | S_IRUGO | S_IWUSR) -#define TC_FILE_MODE (S_IFREG | S_IRUGO | S_IWUSR) - #define NR_TCROOTMF 6 struct rcfs_magf tc_rootdesc[NR_TCROOTMF] = { /* First entry must be root */ - { -// .name = should not be set, copy from classtype name - .mode = RCFS_DEFAULT_DIR_MODE, - .i_op = &rcfs_dir_inode_operations, - .i_fop = &simple_dir_operations, - }, + { +// .name = should not be set, copy from classtype name + .mode = RCFS_DEFAULT_DIR_MODE, + .i_op = &rcfs_dir_inode_operations, + .i_fop = &simple_dir_operations, + }, /* Rest are root's magic files */ - { - .name = "target", - .mode = TC_FILE_MODE, - .i_fop = &target_fileops, - .i_op = &rcfs_file_inode_operations, - }, - { - .name = "config", - .mode = TC_FILE_MODE, - .i_fop = &config_fileops, - .i_op = &rcfs_file_inode_operations, - }, - { - .name = "members", - .mode = TC_FILE_MODE, - .i_fop = &members_fileops, - .i_op = &rcfs_file_inode_operations, - }, - { - .name = "stats", - .mode = TC_FILE_MODE, - .i_fop = &stats_fileops, - .i_op = &rcfs_file_inode_operations, - }, - { - .name = "shares", - .mode = TC_FILE_MODE, - .i_fop = &shares_fileops, - .i_op = &rcfs_file_inode_operations, - }, + { + .name = "target", + .mode = TC_FILE_MODE, + .i_fop = &target_fileops, + .i_op = &rcfs_file_inode_operations, + }, + { + .name = "members", + .mode = TC_FILE_MODE, + .i_fop = &members_fileops, + .i_op = &rcfs_file_inode_operations, + }, + { + .name = "stats", + .mode = TC_FILE_MODE, + .i_fop = &stats_fileops, + .i_op = &rcfs_file_inode_operations, + }, + { + .name = "shares", + .mode = TC_FILE_MODE, + .i_fop = &shares_fileops, + .i_op = &rcfs_file_inode_operations, + }, + // Config should be made available only at the root level + // Make sure this is the last entry, as rcfs_mkdir depends on it + { + .name = "config", + .mode = TC_FILE_MODE, + .i_fop = &config_fileops, + .i_op = &rcfs_file_inode_operations, + }, }; struct rcfs_mfdesc tc_mfdesc = { - .rootmf = tc_rootdesc, - .rootmflen = NR_TCROOTMF, + .rootmf = tc_rootdesc, + .rootmflen = NR_TCROOTMF, }; - - diff --git a/include/linux/ckrm.h b/include/linux/ckrm.h index 99ab97e3a..8dba64362 100644 --- a/include/linux/ckrm.h +++ b/include/linux/ckrm.h @@ -45,16 +45,16 @@ enum ckrm_event { /* we distinguish various events types - * + * * (a) CKRM_LATCHABLE_EVENTS - * events can be latched for event callbacks by classtypes - * + * events can be latched for event callbacks by classtypes + * * (b) CKRM_NONLATACHBLE_EVENTS - * events can not be latched but can be used to call classification - * + * events can not be latched but can be used to call classification + * * (c) event that are used for notification purposes * range: [ CKRM_EVENT_CANNOT_CLASSIFY .. ) - */ + */ /* events (a) */ @@ -83,7 +83,7 @@ enum ckrm_event { CKRM_NOTCLASSIFY_EVENTS, CKRM_EVENT_MANUAL = CKRM_NOTCLASSIFY_EVENTS, - + CKRM_NUM_EVENTS }; #endif @@ -93,7 +93,7 @@ enum ckrm_event { extern void ckrm_invoke_event_cb_chain(enum ckrm_event ev, void *arg); -typedef void (*ckrm_event_cb)(void *arg); +typedef void (*ckrm_event_cb) (void *arg); struct ckrm_hook_cb { ckrm_event_cb fct; @@ -112,7 +112,7 @@ static inline void ckrm_cb_##fct(argtp arg) \ ckrm_invoke_event_cb_chain(CKRM_EVENT_##EV,(void*)arg); \ } -#else // !CONFIG_CKRM +#else // !CONFIG_CKRM #define CKRM_DEF_CB(EV,fct) \ static inline void ckrm_cb_##fct(void) { } @@ -120,7 +120,7 @@ static inline void ckrm_cb_##fct(void) { } #define CKRM_DEF_CB_ARG(EV,fct,argtp) \ static inline void ckrm_cb_##fct(argtp arg) { } -#endif // CONFIG_CKRM +#endif // CONFIG_CKRM /*----------------------------------------------------------------- * define the CKRM event functions @@ -132,25 +132,30 @@ struct task_struct; struct sock; struct user_struct; -CKRM_DEF_CB_ARG( FORK , fork, struct task_struct *); -CKRM_DEF_CB_ARG( EXEC , exec, const char* ); -CKRM_DEF_CB ( UID , uid ); -CKRM_DEF_CB ( GID , gid ); -CKRM_DEF_CB ( APPTAG , apptag ); -CKRM_DEF_CB ( LOGIN , login ); -CKRM_DEF_CB_ARG( USERADD , useradd, struct user_struct *); -CKRM_DEF_CB_ARG( USERDEL , userdel, struct user_struct *); -CKRM_DEF_CB_ARG( LISTEN_START , listen_start, struct sock * ); -CKRM_DEF_CB_ARG( LISTEN_STOP , listen_stop, struct sock * ); - -// and a few special one's -void ckrm_cb_newtask(struct task_struct *); -void ckrm_cb_exit(struct task_struct *); +CKRM_DEF_CB_ARG(FORK, fork, struct task_struct *); +CKRM_DEF_CB_ARG(EXEC, exec, const char *); +CKRM_DEF_CB(UID, uid); +CKRM_DEF_CB(GID, gid); +CKRM_DEF_CB(APPTAG, apptag); +CKRM_DEF_CB(LOGIN, login); +CKRM_DEF_CB_ARG(USERADD, useradd, struct user_struct *); +CKRM_DEF_CB_ARG(USERDEL, userdel, struct user_struct *); +CKRM_DEF_CB_ARG(LISTEN_START, listen_start, struct sock *); +CKRM_DEF_CB_ARG(LISTEN_STOP, listen_stop, struct sock *); // some other functions required +#ifdef CONFIG_CKRM extern void ckrm_init(void); +void ckrm_cb_newtask(struct task_struct *); +void ckrm_cb_exit(struct task_struct *); +#else +#define ckrm_init(x) do { } while (0) +#define ckrm_cb_newtask(x) do { } while (0) +#define ckrm_cb_exit(x) do { } while (0) +#endif + extern int get_exe_path_name(struct task_struct *, char *, int); -#endif // __KERNEL__ +#endif // __KERNEL__ -#endif // _LINUX_CKRM_H +#endif // _LINUX_CKRM_H diff --git a/include/linux/ckrm_ce.h b/include/linux/ckrm_ce.h index 0bde15dd3..f3cbd9132 100644 --- a/include/linux/ckrm_ce.h +++ b/include/linux/ckrm_ce.h @@ -29,49 +29,67 @@ #ifdef CONFIG_CKRM -#include "ckrm.h" // getting the event names +#include "ckrm.h" // getting the event names /* Action parameters identifying the cause of a task<->class notify callback - * these can perculate up to user daemon consuming records send by the classification - * engine + * these can perculate up to user daemon consuming records send by the + * classification engine */ #ifdef __KERNEL__ -typedef void* (*ce_classify_fct_t)(enum ckrm_event event, void *obj, ... ); -typedef void (*ce_notify_fct_t) (enum ckrm_event event, void *classobj, void *obj); +typedef void *(*ce_classify_fct_t) (enum ckrm_event event, void *obj, ...); +typedef void (*ce_notify_fct_t) (enum ckrm_event event, void *classobj, + void *obj); typedef struct ckrm_eng_callback { /* general state information */ - int always_callback; /* set if CE should always be called back regardless of numclasses */ + int always_callback; /* set if CE should always be called back + regardless of numclasses */ + + + /* callbacks which are called without holding locks */ - unsigned long c_interest; /* set of classification events CE is interested in */ - ce_classify_fct_t classify; /* generic classify */ + unsigned long c_interest; /* set of classification events of + interest to CE + */ + + /* generic classify */ + ce_classify_fct_t classify; + /* class added */ + void (*class_add) (const char *name, void *core, int classtype); + /* class deleted */ + void (*class_delete) (const char *name, void *core, int classtype); + - void (*class_add) (const char *name, void *core); /* class added */ - void (*class_delete)(const char *name, void *core); /* class deleted */ + /* callbacks which are called while holding task_lock(tsk) */ - /* callback which are called while holding task_lock(tsk) */ - unsigned long n_interest; /* set of notification events CE is interested in */ - ce_notify_fct_t notify; /* notify on class switch */ + unsigned long n_interest; /* set of notification events of + interest to CE + */ + /* notify on class switch */ + ce_notify_fct_t notify; } ckrm_eng_callback_t; struct inode; -struct dentry; +struct dentry; typedef struct rbce_eng_callback { - int (*mkdir)(struct inode *, struct dentry *, int); // mkdir - int (*rmdir)(struct inode *, struct dentry *); // rmdir + int (*mkdir) (struct inode *, struct dentry *, int); // mkdir + int (*rmdir) (struct inode *, struct dentry *); // rmdir + int (*mnt) (void); + int (*umnt) (void); } rbce_eng_callback_t; -extern int ckrm_register_engine (const char *name, ckrm_eng_callback_t *); +extern int ckrm_register_engine(const char *name, ckrm_eng_callback_t *); extern int ckrm_unregister_engine(const char *name); extern void *ckrm_classobj(char *, int *classtype); -extern int get_exe_path_name(struct task_struct *t, char *filename, int max_size); +extern int get_exe_path_name(struct task_struct *t, char *filename, + int max_size); extern int rcfs_register_engine(rbce_eng_callback_t *); extern int rcfs_unregister_engine(rbce_eng_callback_t *); @@ -84,8 +102,8 @@ extern void ckrm_core_grab(void *); extern void ckrm_core_drop(void *); #endif -#endif // CONFIG_CKRM +#endif // CONFIG_CKRM -#endif // __KERNEL__ +#endif // __KERNEL__ -#endif // _LINUX_CKRM_CE_H +#endif // _LINUX_CKRM_CE_H diff --git a/include/linux/ckrm_net.h b/include/linux/ckrm_net.h index 0cbf784bb..bb4bdbb84 100644 --- a/include/linux/ckrm_net.h +++ b/include/linux/ckrm_net.h @@ -20,20 +20,21 @@ struct ckrm_sock_class; struct ckrm_net_struct { - int ns_type; // type of net class - struct sock *ns_sk; // pointer to socket - pid_t ns_tgid; // real process id - pid_t ns_pid; // calling thread's pid - int ns_family; // IPPROTO_IPV4 || IPPROTO_IPV6 - // Currently only IPV4 is supported + int ns_type; // type of net class + struct sock *ns_sk; // pointer to socket + pid_t ns_tgid; // real process id + pid_t ns_pid; // calling thread's pid + struct task_struct *ns_tsk; + int ns_family; // IPPROTO_IPV4 || IPPROTO_IPV6 + // Currently only IPV4 is supported union { - __u32 ns_dipv4; // V4 listener's address + __u32 ns_dipv4; // V4 listener's address } ns_daddr; - __u16 ns_dport; // listener's port - __u16 ns_sport; // sender's port + __u16 ns_dport; // listener's port + __u16 ns_sport; // sender's port atomic_t ns_refcnt; - struct ckrm_sock_class *core; - struct list_head ckrm_link; + struct ckrm_sock_class *core; + struct list_head ckrm_link; }; #define ns_daddrv4 ns_daddr.ns_dipv4 diff --git a/include/linux/ckrm_rc.h b/include/linux/ckrm_rc.h index e514f1c72..b46cfd9f3 100644 --- a/include/linux/ckrm_rc.h +++ b/include/linux/ckrm_rc.h @@ -32,152 +32,152 @@ #include #include -#include +#include #include - /* maximum number of class types */ -#define CKRM_MAX_CLASSTYPES 32 +#define CKRM_MAX_CLASSTYPES 32 /* maximum classtype name length */ -#define CKRM_MAX_CLASSTYPE_NAME 32 +#define CKRM_MAX_CLASSTYPE_NAME 32 /* maximum resource controllers per classtype */ -#define CKRM_MAX_RES_CTLRS 8 +#define CKRM_MAX_RES_CTLRS 8 /* maximum resource controller name length */ -#define CKRM_MAX_RES_NAME 128 - +#define CKRM_MAX_RES_NAME 128 struct ckrm_core_class; struct ckrm_classtype; -/******************************************************************************** +/***************************************************************************** * Share specifications - *******************************************************************************/ + *****************************************************************************/ typedef struct ckrm_shares { int my_guarantee; int my_limit; int total_guarantee; int max_limit; - int unused_guarantee; // not used as parameters - int cur_max_limit; // not used as parameters + int unused_guarantee; // not used as parameters + int cur_max_limit; // not used as parameters } ckrm_shares_t; -#define CKRM_SHARE_UNCHANGED (-1) // value to indicate no change -#define CKRM_SHARE_DONTCARE (-2) // value to indicate don't care. -#define CKRM_SHARE_DFLT_TOTAL_GUARANTEE (100) // Start off with these values -#define CKRM_SHARE_DFLT_MAX_LIMIT (100) // to simplify set_res_shares logic - +#define CKRM_SHARE_UNCHANGED (-1) +#define CKRM_SHARE_DONTCARE (-2) +#define CKRM_SHARE_DFLT_TOTAL_GUARANTEE (100) +#define CKRM_SHARE_DFLT_MAX_LIMIT (100) -/******************************************************************************** +/****************************************************************************** * RESOURCE CONTROLLERS - *******************************************************************************/ + *****************************************************************************/ /* resource controller callback structure */ typedef struct ckrm_res_ctlr { char res_name[CKRM_MAX_RES_NAME]; - int res_hdepth; // maximum hierarchy - int resid; // (for now) same as the enum resid - struct ckrm_classtype *classtype; // classtype owning this resource controller + int res_hdepth; // maximum hierarchy + int resid; // (for now) same as the enum resid + struct ckrm_classtype *classtype; // classtype owning this res ctlr /* allocate/free new resource class object for resource controller */ - void *(*res_alloc) (struct ckrm_core_class *this, struct ckrm_core_class *parent); - void (*res_free) (void *); + void *(*res_alloc) (struct ckrm_core_class * this, + struct ckrm_core_class * parent); + void (*res_free) (void *); /* set/get limits/guarantees for a resource controller class */ - int (*set_share_values) (void* , struct ckrm_shares *shares); - int (*get_share_values) (void* , struct ckrm_shares *shares); + int (*set_share_values) (void *, struct ckrm_shares * shares); + int (*get_share_values) (void *, struct ckrm_shares * shares); /* statistics and configuration access */ - int (*get_stats) (void* , struct seq_file *); - int (*reset_stats) (void *); - int (*show_config) (void* , struct seq_file *); - int (*set_config) (void* , const char *cfgstr); + int (*get_stats) (void *, struct seq_file *); + int (*reset_stats) (void *); + int (*show_config) (void *, struct seq_file *); + int (*set_config) (void *, const char *cfgstr); - void (*change_resclass)(void *, void *, void *); + void (*change_resclass) (void *, void *, void *); } ckrm_res_ctlr_t; -/*************************************************************************************** +/****************************************************************************** * CKRM_CLASSTYPE * - * A object describes a dimension for CKRM to classify - * along. I needs to provide methods to create and manipulate class objects in - * this dimension - ***************************************************************************************/ + * A object describes a dimension for CKRM to classify + * along. Need to provide methods to create and manipulate class objects in + * this dimension + *****************************************************************************/ /* list of predefined class types, we always recognize */ #define CKRM_CLASSTYPE_TASK_CLASS 0 -#define CKRM_CLASSTYPE_SOCKET_CLASS 1 -#define CKRM_RESV_CLASSTYPES 2 /* always +1 of last known type */ +#define CKRM_CLASSTYPE_SOCKET_CLASS 1 +#define CKRM_RESV_CLASSTYPES 2 /* always +1 of last known type */ #define CKRM_MAX_TYPENAME_LEN 32 - typedef struct ckrm_classtype { - /* Hubertus: Rearrange slots so that they are more cache friendly during access */ + /* Hubertus: Rearrange slots later for cache friendliness */ /* resource controllers */ - spinlock_t res_ctlrs_lock; /* protect data below (other than atomics) */ - int max_res_ctlrs; /* maximum number of resource controller allowed */ - int max_resid; /* maximum resid used */ - int resid_reserved; /* maximum number of reserved controllers */ - long bit_res_ctlrs; /* bitmap of resource ID used */ - atomic_t nr_resusers[CKRM_MAX_RES_CTLRS]; - ckrm_res_ctlr_t* res_ctlrs[CKRM_MAX_RES_CTLRS]; + spinlock_t res_ctlrs_lock; // protect res ctlr related data + int max_res_ctlrs; // max number of res ctlrs allowed + int max_resid; // max resid used + int resid_reserved; // max number of reserved controllers + long bit_res_ctlrs; // bitmap of resource ID used + atomic_t nr_resusers[CKRM_MAX_RES_CTLRS]; + ckrm_res_ctlr_t *res_ctlrs[CKRM_MAX_RES_CTLRS]; + /* state about my classes */ - struct ckrm_core_class *default_class; // pointer to default class - struct list_head classes; // listhead to link up all classes of this classtype - int num_classes; // how many classes do exist + struct ckrm_core_class *default_class; + struct list_head classes; // link all classes of this classtype + int num_classes; /* state about my ce interaction */ - int ce_regd; // Has a CE been registered for this classtype - int ce_cb_active; // are callbacks active - atomic_t ce_nr_users; // how many transient calls active - struct ckrm_eng_callback ce_callbacks; // callback engine - - // Begin classtype-rcfs private data. No rcfs/fs specific types used. - int mfidx; // Index into genmfdesc array used to initialize - // mfdesc and mfcount - void *mfdesc; // Array of descriptors of root and magic files - int mfcount; // length of above array - void *rootde; // root dentry created by rcfs - // End rcfs private data - - char name[CKRM_MAX_TYPENAME_LEN]; // currently same as mfdesc[0]->name but could be different - int typeID; /* unique TypeID */ - int maxdepth; /* maximum depth supported */ + int ce_regd; // if CE registered + int ce_cb_active; // if Callbacks active + atomic_t ce_nr_users; // number of active transient calls + struct ckrm_eng_callback ce_callbacks; // callback engine + + // Begin classtype-rcfs private data. No rcfs/fs specific types used. + int mfidx; // Index into genmfdesc array used to initialize + void *mfdesc; // Array of descriptors of root and magic files + int mfcount; // length of above array + void *rootde; // root dentry created by rcfs + // End rcfs private data + + char name[CKRM_MAX_TYPENAME_LEN]; // currently same as mfdesc[0]->name + // but could be different + int typeID; // unique TypeID + int maxdepth; // maximum depth supported /* functions to be called on any class type by external API's */ - struct ckrm_core_class* (*alloc)(struct ckrm_core_class *parent, const char *name); /* alloc class instance */ - int (*free) (struct ckrm_core_class *cls); /* free class instance */ - - int (*show_members)(struct ckrm_core_class *, struct seq_file *); - int (*show_stats) (struct ckrm_core_class *, struct seq_file *); - int (*show_config) (struct ckrm_core_class *, struct seq_file *); - int (*show_shares) (struct ckrm_core_class *, struct seq_file *); - - int (*reset_stats) (struct ckrm_core_class *, const char *resname, - const char *); - int (*set_config) (struct ckrm_core_class *, const char *resname, - const char *cfgstr); - int (*set_shares) (struct ckrm_core_class *, const char *resname, - struct ckrm_shares *shares); - int (*forced_reclassify)(struct ckrm_core_class *, const char *); - - + + struct ckrm_core_class *(*alloc) (struct ckrm_core_class * parent, + const char *name); + int (*free) (struct ckrm_core_class * cls); + int (*show_members) (struct ckrm_core_class *, struct seq_file *); + int (*show_stats) (struct ckrm_core_class *, struct seq_file *); + int (*show_config) (struct ckrm_core_class *, struct seq_file *); + int (*show_shares) (struct ckrm_core_class *, struct seq_file *); + + int (*reset_stats) (struct ckrm_core_class *, const char *resname, + const char *); + int (*set_config) (struct ckrm_core_class *, const char *resname, + const char *cfgstr); + int (*set_shares) (struct ckrm_core_class *, const char *resname, + struct ckrm_shares * shares); + int (*forced_reclassify) (struct ckrm_core_class *, const char *); + /* functions to be called on a class type by ckrm internals */ - void (*add_resctrl)(struct ckrm_core_class *, int resid); // class initialization for new RC - + + /* class initialization for new RC */ + void (*add_resctrl) (struct ckrm_core_class *, int resid); + } ckrm_classtype_t; -/****************************************************************************************** +/****************************************************************************** * CKRM CORE CLASS * common part to any class structure (i.e. instance of a classtype) - ******************************************************************************************/ + ******************************************************************************/ /* basic definition of a hierarchy that is to be used by the the CORE classes * and can be used by the resource class objects @@ -186,24 +186,28 @@ typedef struct ckrm_classtype { #define CKRM_CORE_MAGIC 0xBADCAFFE typedef struct ckrm_hnode { - struct ckrm_core_class *parent; - struct list_head siblings; /* linked list of siblings */ - struct list_head children; /* anchor for children */ + struct ckrm_core_class *parent; + struct list_head siblings; + struct list_head children; } ckrm_hnode_t; typedef struct ckrm_core_class { - struct ckrm_classtype *classtype; // what type does this core class belong to - void* res_class[CKRM_MAX_RES_CTLRS]; // pointer to array of resource classes - spinlock_t class_lock; // to protect the list and the array above - struct list_head objlist; // generic list for any object list to be maintained by class - struct list_head clslist; // to link up all classes in a single list type wrt to type - struct dentry *dentry; // dentry of inode in the RCFS + struct ckrm_classtype *classtype; + void *res_class[CKRM_MAX_RES_CTLRS]; // resource classes + spinlock_t class_lock; // protects list,array above + + + struct list_head objlist; // generic object list + struct list_head clslist; // peer classtype classes + struct dentry *dentry; // dentry of inode in the RCFS int magic; - struct ckrm_hnode hnode; // hierarchy - rwlock_t hnode_rwlock; // rw_clock protecting the hnode above. + + struct ckrm_hnode hnode; // hierarchy + rwlock_t hnode_rwlock; // protects hnode above. atomic_t refcnt; const char *name; - int delayed; // core deletion delayed because of race conditions + int delayed; // core deletion delayed + // because of race conditions } ckrm_core_class_t; /* type coerce between derived class types and ckrm core class type */ @@ -215,59 +219,72 @@ typedef struct ckrm_core_class { /* what type is a class of ISA */ #define class_isa(clsptr) (class_core(clsptr)->classtype) - -/****************************************************************************************** +/****************************************************************************** * OTHER - ******************************************************************************************/ + ******************************************************************************/ #define ckrm_get_res_class(rescls,resid,type) ((type*)((rescls)->res_class[resid])) -extern int ckrm_register_res_ctlr (struct ckrm_classtype *, ckrm_res_ctlr_t *); -extern int ckrm_unregister_res_ctlr (ckrm_res_ctlr_t *); +extern int ckrm_register_res_ctlr(struct ckrm_classtype *, ckrm_res_ctlr_t *); +extern int ckrm_unregister_res_ctlr(ckrm_res_ctlr_t *); extern int ckrm_validate_and_grab_core(struct ckrm_core_class *core); -extern int ckrm_init_core_class(struct ckrm_classtype *clstype,struct ckrm_core_class *dcore, - struct ckrm_core_class *parent, const char *name); -extern int ckrm_release_core_class(struct ckrm_core_class *); // Hubertus .. can disappear after cls del debugging -extern struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *type, const char *resname); +extern int ckrm_init_core_class(struct ckrm_classtype *clstype, + struct ckrm_core_class *dcore, + struct ckrm_core_class *parent, + const char *name); +extern int ckrm_release_core_class(struct ckrm_core_class *); +// Hubertus .. can disappear after cls del debugging +extern struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *type, + const char *resname); #if 0 -// Hubertus ... need to straighten out all these I don't think we will even call thsie ore are we +// Hubertus ... need to straighten out all these I don't think we will even +// call this or are we /* interface to the RCFS filesystem */ -extern struct ckrm_core_class *ckrm_alloc_core_class(struct ckrm_core_class *, const char *, int); +extern struct ckrm_core_class *ckrm_alloc_core_class(struct ckrm_core_class *, + const char *, int); // Reclassify the given pid to the given core class by force extern void ckrm_forced_reclassify_pid(int, struct ckrm_core_class *); // Reclassify the given net_struct to the given core class by force -extern void ckrm_forced_reclassify_laq(struct ckrm_net_struct *, - struct ckrm_core_class *); +extern void ckrm_forced_reclassify_laq(struct ckrm_net_struct *, + struct ckrm_core_class *); #endif extern void ckrm_lock_hier(struct ckrm_core_class *); extern void ckrm_unlock_hier(struct ckrm_core_class *); -extern struct ckrm_core_class * ckrm_get_next_child(struct ckrm_core_class *, - struct ckrm_core_class *); +extern struct ckrm_core_class *ckrm_get_next_child(struct ckrm_core_class *, + struct ckrm_core_class *); extern void child_guarantee_changed(struct ckrm_shares *, int, int); extern void child_maxlimit_changed(struct ckrm_shares *, int); -extern int set_shares(struct ckrm_shares *, struct ckrm_shares *, struct ckrm_shares *); +extern int set_shares(struct ckrm_shares *, struct ckrm_shares *, + struct ckrm_shares *); /* classtype registration and lookup */ -extern int ckrm_register_classtype (struct ckrm_classtype *clstype); +extern int ckrm_register_classtype(struct ckrm_classtype *clstype); extern int ckrm_unregister_classtype(struct ckrm_classtype *clstype); -extern struct ckrm_classtype* ckrm_find_classtype_by_name(const char *name); +extern struct ckrm_classtype *ckrm_find_classtype_by_name(const char *name); /* default functions that can be used in classtypes's function table */ -extern int ckrm_class_show_shares(struct ckrm_core_class *core, struct seq_file *seq); -extern int ckrm_class_show_stats(struct ckrm_core_class *core, struct seq_file *seq); -extern int ckrm_class_show_config(struct ckrm_core_class *core, struct seq_file *seq); -extern int ckrm_class_set_config(struct ckrm_core_class *core, const char *resname, const char *cfgstr); -extern int ckrm_class_set_shares(struct ckrm_core_class *core, const char *resname, struct ckrm_shares *shares); -extern int ckrm_class_reset_stats(struct ckrm_core_class *core, const char *resname, const char *unused); +extern int ckrm_class_show_shares(struct ckrm_core_class *core, + struct seq_file *seq); +extern int ckrm_class_show_stats(struct ckrm_core_class *core, + struct seq_file *seq); +extern int ckrm_class_show_config(struct ckrm_core_class *core, + struct seq_file *seq); +extern int ckrm_class_set_config(struct ckrm_core_class *core, + const char *resname, const char *cfgstr); +extern int ckrm_class_set_shares(struct ckrm_core_class *core, + const char *resname, + struct ckrm_shares *shares); +extern int ckrm_class_reset_stats(struct ckrm_core_class *core, + const char *resname, const char *unused); #if 0 extern void ckrm_ns_hold(struct ckrm_net_struct *); @@ -275,21 +292,21 @@ extern void ckrm_ns_put(struct ckrm_net_struct *); extern void *ckrm_set_rootcore_byname(char *, void *); #endif -static inline void ckrm_core_grab(struct ckrm_core_class *core) -{ - if (core) atomic_inc(&core->refcnt); +static inline void ckrm_core_grab(struct ckrm_core_class *core) +{ + if (core) + atomic_inc(&core->refcnt); } -static inline void ckrm_core_drop(struct ckrm_core_class *core) -{ +static inline void ckrm_core_drop(struct ckrm_core_class *core) +{ // only make definition available in this context - extern void ckrm_free_core_class(struct ckrm_core_class *core); + extern void ckrm_free_core_class(struct ckrm_core_class *core); if (core && (atomic_dec_and_test(&core->refcnt))) - ckrm_free_core_class(core); + ckrm_free_core_class(core); } -static inline unsigned int -ckrm_is_core_valid(ckrm_core_class_t *core) +static inline unsigned int ckrm_is_core_valid(ckrm_core_class_t * core) { return (core && (core->magic == CKRM_CORE_MAGIC)); } @@ -299,14 +316,16 @@ ckrm_is_core_valid(ckrm_core_class_t *core) // ckrm_res_ctrl *ctlr, // void *robj, // int bmap) -#define forall_class_resobjs(cls,rcbs,robj,bmap) \ - for ( bmap=((cls->classtype)->bit_res_ctlrs) ; \ - ({ int rid; ((rid=ffs(bmap)-1) >= 0) && \ - (bmap&=~(1<classtype->res_ctlrs[rid]) && (robj=cls->res_class[rid]))); }) ; \ +#define forall_class_resobjs(cls,rcbs,robj,bmap) \ + for ( bmap=((cls->classtype)->bit_res_ctlrs) ; \ + ({ int rid; ((rid=ffs(bmap)-1) >= 0) && \ + (bmap &= ~(1<classtype->res_ctlrs[rid]) \ + && (robj=cls->res_class[rid]))); }); \ ) -extern struct ckrm_classtype* ckrm_classtypes[]; /* should provide a different interface */ - +extern struct ckrm_classtype *ckrm_classtypes[]; +/* should provide a different interface */ /*----------------------------------------------------------------------------- * CKRM event callback specification for the classtypes or resource controllers @@ -317,51 +336,61 @@ extern struct ckrm_classtype* ckrm_classtypes[]; /* should provide a different i *-----------------------------------------------------------------------------*/ struct ckrm_event_spec { - enum ckrm_event ev; + enum ckrm_event ev; struct ckrm_hook_cb cb; }; -#define CKRM_EVENT_SPEC(EV,FCT) { CKRM_EVENT_##EV, { (ckrm_event_cb)FCT, NULL } } +#define CKRM_EVENT_SPEC(EV,FCT) { CKRM_EVENT_##EV, \ + { (ckrm_event_cb)FCT, NULL } } int ckrm_register_event_set(struct ckrm_event_spec especs[]); int ckrm_unregister_event_set(struct ckrm_event_spec especs[]); int ckrm_register_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb); int ckrm_unregister_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb); -/****************************************************************************************** +/****************************************************************************** * CE Invocation interface - ******************************************************************************************/ + ******************************************************************************/ #define ce_protect(ctype) (atomic_inc(&((ctype)->ce_nr_users))) #define ce_release(ctype) (atomic_dec(&((ctype)->ce_nr_users))) // CE Classification callbacks with -#define CE_CLASSIFY_NORET(ctype, event, objs_to_classify...) \ -do { \ - if ((ctype)->ce_cb_active && (test_bit(event,&(ctype)->ce_callbacks.c_interest))) \ - (*(ctype)->ce_callbacks.classify)(event, objs_to_classify); \ +#define CE_CLASSIFY_NORET(ctype, event, objs_to_classify...) \ +do { \ + if ((ctype)->ce_cb_active \ + && (test_bit(event,&(ctype)->ce_callbacks.c_interest))) \ + (*(ctype)->ce_callbacks.classify)(event, \ + objs_to_classify); \ } while (0) -#define CE_CLASSIFY_RET(ret, ctype, event, objs_to_classify...) \ -do { \ - if ((ctype)->ce_cb_active && (test_bit(event,&(ctype)->ce_callbacks.c_interest))) \ - ret = (*(ctype)->ce_callbacks.classify)(event, objs_to_classify); \ +#define CE_CLASSIFY_RET(ret, ctype, event, objs_to_classify...) \ +do { \ + if ((ctype)->ce_cb_active \ + && (test_bit(event,&(ctype)->ce_callbacks.c_interest))) \ + ret = (*(ctype)->ce_callbacks.classify)(event, \ + objs_to_classify);\ } while (0) -#define CE_NOTIFY(ctype, event, cls, objs_to_classify) \ -do { \ - if ((ctype)->ce_cb_active && (test_bit(event,&(ctype)->ce_callbacks.n_interest))) \ - (*(ctype)->ce_callbacks.notify)(event,cls,objs_to_classify); \ +#define CE_NOTIFY(ctype, event, cls, objs_to_classify) \ +do { \ + if ((ctype)->ce_cb_active \ + && (test_bit(event,&(ctype)->ce_callbacks.n_interest))) \ + (*(ctype)->ce_callbacks.notify)(event, \ + cls,objs_to_classify); \ } while (0) +/*************** + * RCFS related + ***************/ -#endif // CONFIG_CKRM - -#endif // __KERNEL__ - -#endif // _LINUX_CKRM_RC_H - +/* vars needed by other modules/core */ +extern int rcfs_mounted; +extern int rcfs_engine_regd; +#endif // CONFIG_CKRM +#endif // __KERNEL__ +#endif // _LINUX_CKRM_RC_H diff --git a/include/linux/ckrm_tc.h b/include/linux/ckrm_tc.h index 6a570252b..5650dd3c3 100644 --- a/include/linux/ckrm_tc.h +++ b/include/linux/ckrm_tc.h @@ -1,18 +1,13 @@ #include - - #define TASK_CLASS_TYPE_NAME "taskclass" typedef struct ckrm_task_class { - struct ckrm_core_class core; + struct ckrm_core_class core; } ckrm_task_class_t; - // Index into genmfdesc array, defined in rcfs/dir_modules.c, // which has the mfdesc entry that taskclass wants to use #define TC_MF_IDX 0 - extern int ckrm_forced_reclassify_pid(int pid, struct ckrm_task_class *cls); - diff --git a/include/linux/ckrm_tsk.h b/include/linux/ckrm_tsk.h index 64d20dd27..d0b4530d4 100644 --- a/include/linux/ckrm_tsk.h +++ b/include/linux/ckrm_tsk.h @@ -24,18 +24,11 @@ #include -#ifdef CONFIG_CKRM_RES_NUMTASKS +typedef int (*get_ref_t) (void *, int); +typedef void (*put_ref_t) (void *); extern int numtasks_get_ref(void *, int); -extern int numtasks_get_ref_resid(void *, int, int); extern void numtasks_put_ref(void *); +extern void ckrm_numtasks_register(get_ref_t, put_ref_t); -#else - -#define numtasks_get_ref(a, b) 1 -#define numtasks_get_ref_resid(a, b, c) 1 -#define numtasks_put_ref(a) - -#endif - -#endif // _LINUX_CKRM_RES_H +#endif // _LINUX_CKRM_RES_H diff --git a/include/linux/rcfs.h b/include/linux/rcfs.h index a2a65e8a6..232d58ef1 100644 --- a/include/linux/rcfs.h +++ b/include/linux/rcfs.h @@ -7,10 +7,8 @@ #include #include - - -/* The following declarations cannot be included in any of ckrm*.h files without - jumping hoops. Remove later when rearrangements done */ +/* The following declarations cannot be included in any of ckrm*.h files + without jumping hoops. Remove later when rearrangements done */ // Hubertus .. taken out //extern ckrm_res_callback_t ckrm_res_ctlrs[CKRM_MAX_RES_CTLRS]; @@ -30,7 +28,6 @@ typedef struct rcfs_inode_info { #define RCFS_DEFAULT_DIR_MODE (S_IFDIR | S_IRUGO | S_IXUGO) #define RCFS_DEFAULT_FILE_MODE (S_IFREG | S_IRUSR | S_IWUSR | S_IRGRP |S_IROTH) - struct rcfs_magf { char name[RCFS_MAGF_NAMELEN]; int mode; @@ -39,9 +36,10 @@ struct rcfs_magf { }; struct rcfs_mfdesc { - struct rcfs_magf *rootmf; // Root directory and its magic files - int rootmflen; // length of above array - // Can have a different magf describing magic files for non-root entries too + struct rcfs_magf *rootmf; // Root directory and its magic files + int rootmflen; // length of above array + // Can have a different magf describing magic files + // for non-root entries too }; extern struct rcfs_mfdesc *genmfdesc[]; @@ -51,17 +49,16 @@ inline struct rcfs_inode_info *RCFS_I(struct inode *inode); int rcfs_empty(struct dentry *); struct inode *rcfs_get_inode(struct super_block *, int, dev_t); int rcfs_mknod(struct inode *, struct dentry *, int, dev_t); -int _rcfs_mknod(struct inode *, struct dentry *, int , dev_t); +int _rcfs_mknod(struct inode *, struct dentry *, int, dev_t); int rcfs_mkdir(struct inode *, struct dentry *, int); ckrm_core_class_t *rcfs_make_core(struct dentry *, struct ckrm_core_class *); struct dentry *rcfs_set_magf_byname(char *, void *); -struct dentry * rcfs_create_internal(struct dentry *, struct rcfs_magf *, int); +struct dentry *rcfs_create_internal(struct dentry *, struct rcfs_magf *, int); int rcfs_delete_internal(struct dentry *); int rcfs_create_magic(struct dentry *, struct rcfs_magf *, int); int rcfs_clear_magic(struct dentry *); - extern struct super_operations rcfs_super_ops; extern struct address_space_operations rcfs_aops; @@ -69,7 +66,6 @@ extern struct inode_operations rcfs_dir_inode_operations; extern struct inode_operations rcfs_rootdir_inode_operations; extern struct inode_operations rcfs_file_inode_operations; - extern struct file_operations target_fileops; extern struct file_operations shares_fileops; extern struct file_operations stats_fileops; @@ -80,19 +76,20 @@ extern struct file_operations rcfs_file_operations; // Callbacks into rcfs from ckrm typedef struct rcfs_functions { - int (* mkroot)(struct rcfs_magf *,int, struct dentry **); - int (* rmroot)(struct dentry *); - int (* register_classtype)(ckrm_classtype_t *); - int (* deregister_classtype)(ckrm_classtype_t *); + int (*mkroot) (struct rcfs_magf *, int, struct dentry **); + int (*rmroot) (struct dentry *); + int (*register_classtype) (ckrm_classtype_t *); + int (*deregister_classtype) (ckrm_classtype_t *); } rcfs_fn_t; int rcfs_register_classtype(ckrm_classtype_t *); int rcfs_deregister_classtype(ckrm_classtype_t *); -int rcfs_mkroot(struct rcfs_magf *, int , struct dentry **); +int rcfs_mkroot(struct rcfs_magf *, int, struct dentry **); int rcfs_rmroot(struct dentry *); -#define RCFS_ROOT "/rcfs" // Hubertus .. we should use the mount point instead of hardcoded +#define RCFS_ROOT "/rcfs" // Hubertus .. we should use the mount point + // instead of hardcoded extern struct dentry *rcfs_rootde; +extern rbce_eng_callback_t rcfs_eng_callbacks; - -#endif /* _LINUX_RCFS_H */ +#endif /* _LINUX_RCFS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d8d111ec..82fa3bea0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -133,6 +133,7 @@ struct sched_param { #ifdef __KERNEL__ +#include #include /* @@ -513,6 +514,18 @@ struct task_struct { struct mempolicy *mempolicy; short il_next; /* could be shared with used_math */ #endif + +#ifdef CONFIG_CKRM + spinlock_t ckrm_tsklock; + void *ce_data; +#ifdef CONFIG_CKRM_TYPE_TASKCLASS + // .. Hubertus should change to CONFIG_CKRM_TYPE_TASKCLASS + struct ckrm_task_class *taskclass; + struct list_head taskclass_link; +#endif // CONFIG_CKRM_TYPE_TASKCLASS +#endif // CONFIG_CKRM + + struct task_delay_info delays; }; static inline pid_t process_group(struct task_struct *tsk) @@ -550,6 +563,9 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ +#define PF_MEMIO 0x00400000 /* I am potentially doing I/O for mem */ +#define PF_IOWAIT 0x00800000 /* I am waiting on disk I/O */ + #ifdef CONFIG_SMP #define SCHED_LOAD_SCALE 128UL /* increase resolution of load */ @@ -1067,6 +1083,65 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ + +/* API for registering delay info */ +#ifdef CONFIG_DELAY_ACCT + +#define test_delay_flag(tsk,flg) ((tsk)->flags & (flg)) +#define set_delay_flag(tsk,flg) ((tsk)->flags |= (flg)) +#define clear_delay_flag(tsk,flg) ((tsk)->flags &= ~(flg)) + +#define def_delay_var(var) unsigned long long var +#define get_delay(tsk,field) ((tsk)->delays.field) +#define delay_value(x) (((unsigned long)(x))/1000) + +#define start_delay(var) ((var) = sched_clock()) +#define start_delay_set(var,flg) (set_delay_flag(current,flg),(var) = sched_clock()) + +#define inc_delay(tsk,field) (((tsk)->delays.field)++) +#define add_delay_ts(tsk,field,start_ts,end_ts) ((tsk)->delays.field += delay_value((end_ts)-(start_ts))) +#define add_delay_clear(tsk,field,start_ts,flg) (add_delay_ts(tsk,field,start_ts,sched_clock()),clear_delay_flag(tsk,flg)) + +static inline void add_io_delay(unsigned long dstart) +{ + struct task_struct * tsk = current; + unsigned long val = delay_value(sched_clock()-dstart); + if (test_delay_flag(tsk,PF_MEMIO)) { + tsk->delays.mem_iowait_total += val; + tsk->delays.num_memwaits++; + } else { + tsk->delays.iowait_total += val; + tsk->delays.num_iowaits++; + } + clear_delay_flag(tsk,PF_IOWAIT); +} + +inline static void init_delays(struct task_struct *tsk) +{ + memset((void*)&tsk->delays,0,sizeof(tsk->delays)); +} + +#else + +#define test_delay_flag(tsk,flg) (0) +#define set_delay_flag(tsk,flg) do { } while (0) +#define clear_delay_flag(tsk,flg) do { } while (0) + +#define def_delay_var(var) +#define get_delay(tsk,field) (0) + +#define start_delay(var) do { } while (0) +#define start_delay_set(var,flg) do { } while (0) + +#define inc_delay(tsk,field) do { } while (0) +#define add_delay_ts(tsk,field,start_ts,now) do { } while (0) +#define add_delay_clear(tsk,field,start_ts,flg) do { } while (0) +#define add_io_delay(dstart) do { } while (0) +#define init_delays(tsk) do { } while (0) +#endif + + + #endif /* __KERNEL__ */ #endif diff --git a/include/linux/taskdelays.h b/include/linux/taskdelays.h index 698b23b61..eafb1e77f 100644 --- a/include/linux/taskdelays.h +++ b/include/linux/taskdelays.h @@ -2,19 +2,19 @@ #define _LINUX_TASKDELAYS_H #include +#include struct task_delay_info { #ifdef CONFIG_DELAY_ACCT - /* delay statistics in usecs */ - unsigned long runs; - unsigned long waitcpu_total; - unsigned long runcpu_total; - unsigned long iowait_total; - unsigned long mem_iowait_total; - unsigned long num_iowaits; - unsigned long num_memwaits; + /* delay statistics in usecs */ + uint64_t waitcpu_total; + uint64_t runcpu_total; + uint64_t iowait_total; + uint64_t mem_iowait_total; + uint32_t runs; + uint32_t num_iowaits; + uint32_t num_memwaits; #endif }; -#endif // _LINUX_TASKDELAYS_H - +#endif // _LINUX_TASKDELAYS_H diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 39e2d2261..9ceaf7393 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -128,6 +128,10 @@ enum { #define TCP_INFO 11 /* Information about this connection. */ #define TCP_QUICKACK 12 /* Block/reenable quick acks */ +#ifdef CONFIG_ACCEPT_QUEUES +#define TCP_ACCEPTQ_SHARE 13 /* Set accept queue share */ +#endif + #define TCPI_OPT_TIMESTAMPS 1 #define TCPI_OPT_SACK 2 #define TCPI_OPT_WSCALE 4 @@ -188,6 +192,18 @@ struct tcp_info __u32 tcpi_rcv_space; }; +#ifdef CONFIG_ACCEPT_QUEUES + +#define NUM_ACCEPT_QUEUES 8 /* Must be power of 2 */ + +struct tcp_acceptq_info { + unsigned char acceptq_shares; + unsigned long acceptq_wait_time; + unsigned int acceptq_qcount; + unsigned int acceptq_count; +}; +#endif + #ifdef __KERNEL__ #include @@ -369,8 +385,9 @@ struct tcp_opt { /* FIFO of established children */ struct open_request *accept_queue; - struct open_request *accept_queue_tail; - +#ifndef CONFIG_ACCEPT_QUEUES + struct open_request *accept_queue_tail; +#endif int write_pending; /* A write to socket waits to start. */ unsigned int keepalive_time; /* time before keep alive takes place */ @@ -424,6 +441,21 @@ struct tcp_opt { __u32 last_max_cwnd; /* last maximium snd_cwnd */ __u32 last_cwnd; /* the last snd_cwnd */ } bictcp; + +#ifdef CONFIG_ACCEPT_QUEUES + /* move to listen opt... */ + char class_index; + struct { + struct open_request *aq_head; + struct open_request *aq_tail; + unsigned int aq_cnt; + unsigned int aq_ratio; + unsigned int aq_count; + unsigned int aq_qcount; + unsigned int aq_backlog; + unsigned int aq_wait_time; + } acceptq[NUM_ACCEPT_QUEUES]; +#endif }; /* WARNING: don't change the layout of the members in tcp_sock! */ diff --git a/include/net/sock.h b/include/net/sock.h index eb7580c94..c578bf795 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -245,6 +245,7 @@ struct sock { struct timeval sk_stamp; struct socket *sk_socket; void *sk_user_data; + void *sk_ns; // For use by CKRM struct module *sk_owner; void *sk_security; void (*sk_state_change)(struct sock *sk); @@ -398,6 +399,7 @@ static inline int sock_flag(struct sock *sk, enum sock_flags flag) return test_bit(flag, &sk->sk_flags); } +#ifndef CONFIG_ACCEPT_QUEUES static inline void sk_acceptq_removed(struct sock *sk) { sk->sk_ack_backlog--; @@ -412,6 +414,7 @@ static inline int sk_acceptq_is_full(struct sock *sk) { return sk->sk_ack_backlog > sk->sk_max_ack_backlog; } +#endif /* The per-socket spinlock must be held here. */ #define sk_add_backlog(__sk, __skb) \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 86e9f9add..c03a2c418 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -671,6 +671,10 @@ struct open_request { struct tcp_v6_open_req v6_req; #endif } af; +#ifdef CONFIG_ACCEPT_QUEUES + unsigned long acceptq_time_stamp; + int acceptq_class; +#endif }; /* SLAB cache for open requests. */ @@ -1794,6 +1798,83 @@ static inline int tcp_full_space( struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } +struct tcp_listen_opt +{ + u8 max_qlen_log; /* log_2 of maximal queued SYNs */ + int qlen; +#ifdef CONFIG_ACCEPT_QUEUES + int qlen_young[NUM_ACCEPT_QUEUES]; +#else + int qlen_young; +#endif + int clock_hand; + u32 hash_rnd; + struct open_request *syn_table[TCP_SYNQ_HSIZE]; +}; + +#ifdef CONFIG_ACCEPT_QUEUES +static inline void sk_acceptq_removed(struct sock *sk, int class) +{ + tcp_sk(sk)->acceptq[class].aq_backlog--; +} + +static inline void sk_acceptq_added(struct sock *sk, int class) +{ + tcp_sk(sk)->acceptq[class].aq_backlog++; +} + +static inline int sk_acceptq_is_full(struct sock *sk, int class) +{ + return tcp_sk(sk)->acceptq[class].aq_backlog > + sk->sk_max_ack_backlog; +} + +static inline void tcp_set_acceptq(struct tcp_opt *tp, struct open_request *req) +{ + int class = req->acceptq_class; + int prev_class; + + if (!tp->acceptq[class].aq_ratio) { + req->acceptq_class = 0; + class = 0; + } + + tp->acceptq[class].aq_qcount++; + req->acceptq_time_stamp = jiffies; + + if (tp->acceptq[class].aq_tail) { + req->dl_next = tp->acceptq[class].aq_tail->dl_next; + tp->acceptq[class].aq_tail->dl_next = req; + tp->acceptq[class].aq_tail = req; + } else { /* if first request in the class */ + tp->acceptq[class].aq_head = req; + tp->acceptq[class].aq_tail = req; + + prev_class = class - 1; + while (prev_class >= 0) { + if (tp->acceptq[prev_class].aq_tail) + break; + prev_class--; + } + if (prev_class < 0) { + req->dl_next = tp->accept_queue; + tp->accept_queue = req; + } + else { + req->dl_next = tp->acceptq[prev_class].aq_tail->dl_next; + tp->acceptq[prev_class].aq_tail->dl_next = req; + } + } +} +static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req, + struct sock *child) +{ + tcp_set_acceptq(tcp_sk(sk),req); + req->sk = child; + sk_acceptq_added(sk,req->acceptq_class); +} + +#else static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req, struct sock *child) { @@ -1811,15 +1892,41 @@ static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req, req->dl_next = NULL; } -struct tcp_listen_opt +#endif + + +#ifdef CONFIG_ACCEPT_QUEUES +static inline void +tcp_synq_removed(struct sock *sk, struct open_request *req) { - u8 max_qlen_log; /* log_2 of maximal queued SYNs */ - int qlen; - int qlen_young; - int clock_hand; - u32 hash_rnd; - struct open_request *syn_table[TCP_SYNQ_HSIZE]; -}; + struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; + + if (--lopt->qlen == 0) + tcp_delete_keepalive_timer(sk); + if (req->retrans == 0) + lopt->qlen_young[req->acceptq_class]--; +} + +static inline void tcp_synq_added(struct sock *sk, struct open_request *req) +{ + struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; + + if (lopt->qlen++ == 0) + tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); + lopt->qlen_young[req->acceptq_class]++; +} + +static inline int tcp_synq_len(struct sock *sk) +{ + return tcp_sk(sk)->listen_opt->qlen; +} + +static inline int tcp_synq_young(struct sock *sk, int class) +{ + return tcp_sk(sk)->listen_opt->qlen_young[class]; +} + +#else static inline void tcp_synq_removed(struct sock *sk, struct open_request *req) @@ -1850,6 +1957,7 @@ static inline int tcp_synq_young(struct sock *sk) { return tcp_sk(sk)->listen_opt->qlen_young; } +#endif static inline int tcp_synq_is_full(struct sock *sk) { diff --git a/init/Kconfig b/init/Kconfig index 210ca2cb7..2d20a5e13 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -85,6 +85,8 @@ config SYSVIPC you want to run the DOS emulator dosemu under Linux (read the DOSEMU-HOWTO, available from ), you'll need to say Y here. + DOSEMU-HOWTO, available from ), + you'll need to say Y here. You can find documentation about IPC with "info ipc" and also in section 6.4 of the Linux Programmer's Guide, available from @@ -121,6 +123,99 @@ config BSD_PROCESS_ACCT up to the user level program to do useful things with this information. This is generally a good idea, so say Y. +menu "Class Based Kernel Resource Management" + +config CKRM + bool "Class Based Kernel Resource Management Core" + depends on EXPERIMENTAL + help + Class-based Kernel Resource Management is a framework for controlling + and monitoring resource allocation of user-defined groups of tasks or + incoming socket connections. For more information, please visit + http://ckrm.sf.net. + + If you say Y here, enable the Resource Class File System and atleast + one of the resource controllers below. Say N if you are unsure. + +config RCFS_FS + tristate "Resource Class File System (User API)" + depends on CKRM + help + RCFS is the filesystem API for CKRM. This separate configuration + option is provided only for debugging and will eventually disappear + since rcfs will be automounted whenever CKRM is configured. + + Say N if unsure, Y if you've enabled CKRM, M to debug rcfs + initialization. + +config CKRM_TYPE_TASKCLASS + bool "Class Manager for Task Groups" + depends on CKRM + help + TASKCLASS provides the extensions for CKRM to track task classes + This is the base to enable task class based resource control for + cpu, memory and disk I/O. + + Say N if unsure + +config CKRM_RES_NUMTASKS + tristate "Number of Tasks Resource Manager" + depends on CKRM_TYPE_TASKCLASS + default m + help + Provides a Resource Controller for CKRM that allows limiting no of + tasks a task class can have. + + Say N if unsure, Y to use the feature. + +config CKRM_TYPE_SOCKETCLASS + bool "Class Manager for socket groups" + depends on CKRM + help + SOCKET provides the extensions for CKRM to track per socket + classes. This is the base to enable socket based resource + control for inbound connection control, bandwidth control etc. + + Say N if unsure. + +config CKRM_RES_LISTENAQ + tristate "Multiple Accept Queues Resource Manager" + depends on CKRM_TYPE_SOCKETCLASS && ACCEPT_QUEUES + default m + help + Provides a resource controller for CKRM to prioritize inbound + connection requests. See inbound control description for + "IP: TCP Multiple accept queues support". If you choose that + option choose this option to control the queue weights. + + If unsure, say N. + +config CKRM_RBCE + tristate "Vanilla Rule-based Classification Engine (RBCE)" + depends on CKRM && RCFS_FS + default m + help + Provides an optional module to support creation of rules for automatic + classification of kernel objects. Rules are created/deleted/modified + through an rcfs interface. RBCE is not required for CKRM. + + If unsure, say N. + +config CKRM_CRBCE + tristate "Enhanced Rule-based Classification Engine (RBCE)" + depends on CKRM && RCFS_FS && RELAYFS_FS && DELAY_ACCT + default m + help + Provides an optional module to support creation of rules for automatic + classification of kernel objects, just like RBCE above. In addition, + CRBCE provides per-process delay data (requires DELAY_ACCT configured) + enabled) and makes information on significant kernel events available + to userspace tools through relayfs (requires RELAYFS_FS configured). + + If unsure, say N. + +endmenu + config SYSCTL bool "Sysctl support" ---help--- @@ -227,6 +322,15 @@ menuconfig EMBEDDED environments which can tolerate a "non-standard" kernel. Only use this if you really know what you are doing. +config DELAY_ACCT + bool "Enable delay accounting (EXPERIMENTAL)" + help + In addition to counting frequency the total delay in ns is also + recorded. CPU delays are specified as cpu-wait and cpu-run. + I/O delays are recorded for memory and regular I/O. + Information is accessible through /proc//delay. + + config KALLSYMS bool "Load all symbols for debugging/kksymoops" if EMBEDDED default y diff --git a/init/main.c b/init/main.c index 613aaaba8..7080522cd 100644 --- a/init/main.c +++ b/init/main.c @@ -47,6 +47,8 @@ #include #include +#include + /* * This is one of the first .c files built. Error out early * if we have compiler trouble.. @@ -432,6 +434,7 @@ asmlinkage void __init start_kernel(void) rcu_init(); init_IRQ(); pidhash_init(); + ckrm_init(); init_timers(); softirq_init(); time_init(); @@ -480,6 +483,7 @@ asmlinkage void __init start_kernel(void) #ifdef CONFIG_PROC_FS proc_root_init(); #endif + check_bugs(); /* diff --git a/kernel/Makefile b/kernel/Makefile index 238c65f60..107df8eb1 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ sysctl.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o \ - kthread.o + kthread.o ckrm/ obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o diff --git a/kernel/ckrm/Makefile b/kernel/ckrm/Makefile index 58b9aad74..4ace47acd 100644 --- a/kernel/ckrm/Makefile +++ b/kernel/ckrm/Makefile @@ -1,14 +1,11 @@ # -# Makefile for CKRM +# Makefile for CKRM # ifeq ($(CONFIG_CKRM),y) - obj-y = ckrm.o ckrmutils.o -endif - -obj-$(CONFIG_CKRM_TYPE_TASKCLASS) += ckrm_tc.o -obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_tasks.o - -obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o -obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o - + obj-y = ckrm.o ckrmutils.o ckrm_tasks_stub.o rbce/ +endif + obj-$(CONFIG_CKRM_TYPE_TASKCLASS) += ckrm_tc.o + obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_tasks.o + obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o + obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o diff --git a/kernel/ckrm/ckrm.c b/kernel/ckrm/ckrm.c index 43d14a8a9..5217ea003 100644 --- a/kernel/ckrm/ckrm.c +++ b/kernel/ckrm/ckrm.c @@ -52,12 +52,17 @@ #include #include +rwlock_t ckrm_class_lock = RW_LOCK_UNLOCKED; // protect classlists -rwlock_t ckrm_class_lock = RW_LOCK_UNLOCKED; // protect classlists - -struct rcfs_functions rcfs_fn ; +struct rcfs_functions rcfs_fn; EXPORT_SYMBOL(rcfs_fn); +// rcfs state needed by another module +int rcfs_engine_regd; +EXPORT_SYMBOL(rcfs_engine_regd); + +int rcfs_mounted; +EXPORT_SYMBOL(rcfs_mounted); /************************************************************************** * Helper Functions * @@ -70,33 +75,36 @@ EXPORT_SYMBOL(rcfs_fn); /* * Return TRUE if the given resource is registered. */ -inline unsigned int -is_res_regd(struct ckrm_classtype *clstype, int resid) +inline unsigned int is_res_regd(struct ckrm_classtype *clstype, int resid) { - return ( (resid>=0) && (resid < clstype->max_resid) && - test_bit(resid, &clstype->bit_res_ctlrs) - ); + return ((resid >= 0) && (resid < clstype->max_resid) && + test_bit(resid, &clstype->bit_res_ctlrs) + ); } -struct ckrm_res_ctlr* -ckrm_resctlr_lookup(struct ckrm_classtype *clstype, const char *resname) +struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *clstype, + const char *resname) { int resid = -1; - - for (resid=0; resid < clstype->max_resid; resid++) { + + if (!clstype || !resname) { + return NULL; + } + for (resid = 0; resid < clstype->max_resid; resid++) { if (test_bit(resid, &clstype->bit_res_ctlrs)) { struct ckrm_res_ctlr *rctrl = clstype->res_ctlrs[resid]; - if (!strncmp(resname, rctrl->res_name,CKRM_MAX_RES_NAME)) + if (!strncmp(resname, rctrl->res_name, + CKRM_MAX_RES_NAME)) return rctrl; } } return NULL; } + EXPORT_SYMBOL(ckrm_resctlr_lookup); /* given a classname return the class handle and its classtype*/ -void * -ckrm_classobj(char *classname, int *classTypeID) +void *ckrm_classobj(char *classname, int *classTypeID) { int i; @@ -106,11 +114,11 @@ ckrm_classobj(char *classname, int *classTypeID) } read_lock(&ckrm_class_lock); - for ( i=0 ; iclasses, clslist) { if (core->name && !strcmp(core->name, classname)) { @@ -132,15 +140,14 @@ EXPORT_SYMBOL(ckrm_classobj); * Internal Functions/macros * **************************************************************************/ -static inline void -set_callbacks_active(struct ckrm_classtype *ctype) +static inline void set_callbacks_active(struct ckrm_classtype *ctype) { ctype->ce_cb_active = ((atomic_read(&ctype->ce_nr_users) > 0) && - (ctype->ce_callbacks.always_callback || (ctype->num_classes > 1))); + (ctype->ce_callbacks.always_callback + || (ctype->num_classes > 1))); } -int -ckrm_validate_and_grab_core(struct ckrm_core_class *core) +int ckrm_validate_and_grab_core(struct ckrm_core_class *core) { int rc = 0; read_lock(&ckrm_class_lock); @@ -161,13 +168,12 @@ ckrm_validate_and_grab_core(struct ckrm_core_class *core) * * Returns typeId of class on success -errno for failure. */ -int -ckrm_register_engine(const char *typename, ckrm_eng_callback_t *ecbs) +int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs) { struct ckrm_classtype *ctype; ctype = ckrm_find_classtype_by_name(typename); - if (ctype == NULL) + if (ctype == NULL) return (-ENOENT); ce_protect(ctype); @@ -176,31 +182,41 @@ ckrm_register_engine(const char *typename, ckrm_eng_callback_t *ecbs) ce_release(ctype); return (-EBUSY); } - - /* we require that either classify and class_delete are set (due to object reference) - * or that notify is set (in case no real classification is supported only notification - * also require that the function pointer be set the momement the mask is non-null + + /* One of the following must be set: + classify, class_delete (due to object reference) or + notify (case where notification supported but not classification) + The function pointer must be set the momement the mask is non-null */ - if ( ! (((ecbs->classify) && (ecbs->class_delete)) || (ecbs->notify)) || - (ecbs->c_interest && ecbs->classify == NULL) || - (ecbs->n_interest && ecbs->notify == NULL) ) - { + + if (!(((ecbs->classify) && (ecbs->class_delete)) || (ecbs->notify)) || + (ecbs->c_interest && ecbs->classify == NULL) || + (ecbs->n_interest && ecbs->notify == NULL)) { ce_release(ctype); return (-EINVAL); } - /* Is any other engine registered for this classtype ? */ if (ctype->ce_regd) { ce_release(ctype); return (-EINVAL); } - + ctype->ce_regd = 1; ctype->ce_callbacks = *ecbs; set_callbacks_active(ctype); - if (ctype->ce_callbacks.class_add) - (*ctype->ce_callbacks.class_add)(ctype->default_class->name,ctype->default_class); + + if (ctype->ce_callbacks.class_add) { + struct ckrm_core_class *core; + + read_lock(&ckrm_class_lock); + + list_for_each_entry(core, &ctype->classes, clslist) { + (*ctype->ce_callbacks.class_add) (core->name, core, + ctype->typeID); + } + read_unlock(&ckrm_class_lock); + } return ctype->typeID; } @@ -209,16 +225,15 @@ ckrm_register_engine(const char *typename, ckrm_eng_callback_t *ecbs) * * Returns 0 on success -errno for failure. */ -int -ckrm_unregister_engine(const char *typename) +int ckrm_unregister_engine(const char *typename) { struct ckrm_classtype *ctype; ctype = ckrm_find_classtype_by_name(typename); - if (ctype == NULL) + if (ctype == NULL) return (-ENOENT); - ctype->ce_cb_active = 0; + ctype->ce_cb_active = 0; if (atomic_dec_and_test(&ctype->ce_nr_users) != 1) { // Somebody is currently using the engine, cannot deregister. @@ -243,20 +258,22 @@ ckrm_add_child(struct ckrm_core_class *parent, struct ckrm_core_class *child) struct ckrm_hnode *cnode = &child->hnode; if (!ckrm_is_core_valid(child)) { - printk(KERN_ERR "Invalid child %p given in ckrm_add_child\n", child); + printk(KERN_ERR "Invalid child %p given in ckrm_add_child\n", + child); return; } - + class_lock(child); INIT_LIST_HEAD(&cnode->children); INIT_LIST_HEAD(&cnode->siblings); - if (parent) { + if (parent) { struct ckrm_hnode *pnode; if (!ckrm_is_core_valid(parent)) { - printk(KERN_ERR "Invalid parent %p given in ckrm_add_child\n", - parent); + printk(KERN_ERR + "Invalid parent %p given in ckrm_add_child\n", + parent); parent = NULL; } else { pnode = &parent->hnode; @@ -272,21 +289,23 @@ ckrm_add_child(struct ckrm_core_class *parent, struct ckrm_core_class *child) /* */ -static int -ckrm_remove_child(struct ckrm_core_class *child) +static int ckrm_remove_child(struct ckrm_core_class *child) { struct ckrm_hnode *cnode, *pnode; struct ckrm_core_class *parent; if (!ckrm_is_core_valid(child)) { - printk(KERN_ERR "Invalid child %p given in ckrm_remove_child\n", child); + printk(KERN_ERR "Invalid child %p given" + " in ckrm_remove_child\n", + child); return 0; } cnode = &child->hnode; parent = cnode->parent; if (!ckrm_is_core_valid(parent)) { - printk(KERN_ERR "Invalid parent %p in ckrm_remove_child\n", parent); + printk(KERN_ERR "Invalid parent %p in ckrm_remove_child\n", + parent); return 0; } @@ -306,16 +325,14 @@ ckrm_remove_child(struct ckrm_core_class *child) return 1; } -void -ckrm_lock_hier(struct ckrm_core_class *parent) +void ckrm_lock_hier(struct ckrm_core_class *parent) { if (ckrm_is_core_valid(parent)) { read_lock(&parent->hnode_rwlock); } } -void -ckrm_unlock_hier(struct ckrm_core_class *parent) +void ckrm_unlock_hier(struct ckrm_core_class *parent) { if (ckrm_is_core_valid(parent)) { read_unlock(&parent->hnode_rwlock); @@ -327,18 +344,19 @@ ckrm_unlock_hier(struct ckrm_core_class *parent) * external callers should 've called ckrm_lock_hier before calling this * function. */ -#define hnode_2_core(ptr) ((ptr) ? container_of(ptr, struct ckrm_core_class, hnode) : NULL) +#define hnode_2_core(ptr) \ +((ptr)? container_of(ptr, struct ckrm_core_class, hnode) : NULL) -struct ckrm_core_class * -ckrm_get_next_child(struct ckrm_core_class *parent, - struct ckrm_core_class *child) +struct ckrm_core_class *ckrm_get_next_child(struct ckrm_core_class *parent, + struct ckrm_core_class *child) { struct list_head *cnode; struct ckrm_hnode *next_cnode; struct ckrm_core_class *next_childcore; if (!ckrm_is_core_valid(parent)) { - printk(KERN_ERR "Invalid parent %p in ckrm_get_next_child\n", parent); + printk(KERN_ERR "Invalid parent %p in ckrm_get_next_child\n", + parent); return NULL; } if (list_empty(&parent->hnode.children)) { @@ -347,7 +365,9 @@ ckrm_get_next_child(struct ckrm_core_class *parent, if (child) { if (!ckrm_is_core_valid(child)) { - printk(KERN_ERR "Invalid child %p in ckrm_get_next_child\n", child); + printk(KERN_ERR + "Invalid child %p in ckrm_get_next_child\n", + child); return NULL; } cnode = child->hnode.siblings.next; @@ -355,7 +375,7 @@ ckrm_get_next_child(struct ckrm_core_class *parent, cnode = parent->hnode.children.next; } - if (cnode == &parent->hnode.children) { // back at the anchor + if (cnode == &parent->hnode.children) { // back at the anchor return NULL; } @@ -363,8 +383,9 @@ ckrm_get_next_child(struct ckrm_core_class *parent, next_childcore = hnode_2_core(next_cnode); if (!ckrm_is_core_valid(next_childcore)) { - printk(KERN_ERR "Invalid next child %p in ckrm_get_next_child\n", - next_childcore); + printk(KERN_ERR + "Invalid next child %p in ckrm_get_next_child\n", + next_childcore); return NULL; } return next_childcore; @@ -374,10 +395,9 @@ EXPORT_SYMBOL(ckrm_lock_hier); EXPORT_SYMBOL(ckrm_unlock_hier); EXPORT_SYMBOL(ckrm_get_next_child); -static void +static void ckrm_alloc_res_class(struct ckrm_core_class *core, - struct ckrm_core_class *parent, - int resid) + struct ckrm_core_class *parent, int resid) { struct ckrm_classtype *clstype; @@ -388,7 +408,7 @@ ckrm_alloc_res_class(struct ckrm_core_class *core, */ if (!ckrm_is_core_valid(core)) - return ; + return; clstype = core->classtype; core->res_class[resid] = NULL; @@ -398,9 +418,10 @@ ckrm_alloc_res_class(struct ckrm_core_class *core, atomic_inc(&clstype->nr_resusers[resid]); rcbs = clstype->res_ctlrs[resid]; - + if (rcbs && rcbs->res_alloc) { - core->res_class[resid] =(*rcbs->res_alloc)(core,parent); + core->res_class[resid] = + (*rcbs->res_alloc) (core, parent); if (core->res_class[resid]) return; printk(KERN_ERR "Error creating res class\n"); @@ -414,30 +435,33 @@ ckrm_alloc_res_class(struct ckrm_core_class *core, * */ -#define CLS_DEBUG(fmt, args...) do { /* printk("%s: " fmt, __FUNCTION__ , ## args); */ } while (0) - +#define CLS_DEBUG(fmt, args...) \ +do { /* printk("%s: " fmt, __FUNCTION__ , ## args); */ } while (0) int -ckrm_init_core_class(struct ckrm_classtype *clstype, +ckrm_init_core_class(struct ckrm_classtype *clstype, struct ckrm_core_class *dcore, - struct ckrm_core_class *parent, - const char *name) + struct ckrm_core_class *parent, const char *name) { // Hubertus ... should replace name with dentry or add dentry ? int i; // Hubertus .. how is this used in initialization - CLS_DEBUG("name %s => %p\n", name?name:"default",dcore); - - if ((dcore != clstype->default_class) && ( !ckrm_is_core_valid(parent))) { + CLS_DEBUG("name %s => %p\n", name ? name : "default", dcore); + + if ((dcore != clstype->default_class) && (!ckrm_is_core_valid(parent))){ printk("error not a valid parent %p\n", parent); return -EINVAL; } -#if 0 // Hubertus .. dynamic allocation still breaks when RCs registers. See def in ckrm_rc.h +#if 0 +// Hubertus .. dynamic allocation still breaks when RCs registers. +// See def in ckrm_rc.h dcore->res_class = NULL; if (clstype->max_resid > 0) { - dcore->res_class = (void**)kmalloc(clstype->max_resid * sizeof(void*) , GFP_KERNEL); + dcore->res_class = + (void **)kmalloc(clstype->max_resid * sizeof(void *), + GFP_KERNEL); if (dcore->res_class == NULL) { printk("error no mem\n"); return -ENOMEM; @@ -445,45 +469,42 @@ ckrm_init_core_class(struct ckrm_classtype *clstype, } #endif - dcore->classtype = clstype; - dcore->magic = CKRM_CORE_MAGIC; - dcore->name = name; - dcore->class_lock = SPIN_LOCK_UNLOCKED; + dcore->classtype = clstype; + dcore->magic = CKRM_CORE_MAGIC; + dcore->name = name; + dcore->class_lock = SPIN_LOCK_UNLOCKED; dcore->hnode_rwlock = RW_LOCK_UNLOCKED; - dcore->delayed = 0; + dcore->delayed = 0; atomic_set(&dcore->refcnt, 0); write_lock(&ckrm_class_lock); INIT_LIST_HEAD(&dcore->objlist); - list_add(&dcore->clslist,&clstype->classes); + list_add_tail(&dcore->clslist, &clstype->classes); clstype->num_classes++; set_callbacks_active(clstype); write_unlock(&ckrm_class_lock); - ckrm_add_child(parent, dcore); + ckrm_add_child(parent, dcore); - for (i = 0; i < clstype->max_resid; i++) - ckrm_alloc_res_class(dcore,parent,i); + for (i = 0; i < clstype->max_resid; i++) + ckrm_alloc_res_class(dcore, parent, i); // fix for race condition seen in stress with numtasks - if (parent) + if (parent) ckrm_core_grab(parent); - ckrm_core_grab( dcore ); + ckrm_core_grab(dcore); return 0; } - -static void -ckrm_free_res_class(struct ckrm_core_class *core, int resid) +static void ckrm_free_res_class(struct ckrm_core_class *core, int resid) { /* * Free a resource class only if the resource controller has * registered with core */ - if (core->res_class[resid]) { ckrm_res_ctlr_t *rcbs; struct ckrm_classtype *clstype = core->classtype; @@ -492,15 +513,15 @@ ckrm_free_res_class(struct ckrm_core_class *core, int resid) rcbs = clstype->res_ctlrs[resid]; if (rcbs->res_free) { - (*rcbs->res_free)(core->res_class[resid]); - atomic_dec(&clstype->nr_resusers[resid]); // for inc in alloc - core->res_class[resid] = NULL; + (*rcbs->res_free) (core->res_class[resid]); + // compensate inc in alloc + atomic_dec(&clstype->nr_resusers[resid]); } atomic_dec(&clstype->nr_resusers[resid]); } + core->res_class[resid] = NULL; } - /* * Free a core class * requires that all tasks were previously reassigned to another class @@ -508,31 +529,31 @@ ckrm_free_res_class(struct ckrm_core_class *core, int resid) * Returns 0 on success -errno on failure. */ -void -ckrm_free_core_class(struct ckrm_core_class *core) +void ckrm_free_core_class(struct ckrm_core_class *core) { int i; struct ckrm_classtype *clstype = core->classtype; struct ckrm_core_class *parent = core->hnode.parent; - - CLS_DEBUG("core=%p:%s parent=%p:%s\n",core,core->name,parent,parent->name); + + CLS_DEBUG("core=%p:%s parent=%p:%s\n", core, core->name, parent, + parent->name); if (core->delayed) { /* this core was marked as late */ - printk("class <%s> finally deleted %lu\n",core->name,jiffies); + printk("class <%s> finally deleted %lu\n", core->name, jiffies); } if (ckrm_remove_child(core) == 0) { printk("Core class removal failed. Chilren present\n"); } for (i = 0; i < clstype->max_resid; i++) { - ckrm_free_res_class(core,i); + ckrm_free_res_class(core, i); } write_lock(&ckrm_class_lock); // Clear the magic, so we would know if this core is reused. core->magic = 0; -#if 0 // Dynamic not yet enabled +#if 0 // Dynamic not yet enabled core->res_class = NULL; #endif // Remove this core class from its linked list. @@ -542,14 +563,13 @@ ckrm_free_core_class(struct ckrm_core_class *core) write_unlock(&ckrm_class_lock); // fix for race condition seen in stress with numtasks - if (parent) + if (parent) ckrm_core_drop(parent); - + kfree(core); } -int -ckrm_release_core_class(struct ckrm_core_class *core) +int ckrm_release_core_class(struct ckrm_core_class *core) { if (!ckrm_is_core_valid(core)) { // Invalid core @@ -557,13 +577,13 @@ ckrm_release_core_class(struct ckrm_core_class *core) } if (core == core->classtype->default_class) - return 0; + return 0; /* need to make sure that the classgot really dropped */ if (atomic_read(&core->refcnt) != 1) { CLS_DEBUG("class <%s> deletion delayed refcnt=%d jif=%ld\n", - core->name,atomic_read(&core->refcnt),jiffies); - core->delayed = 1; /* just so we have a ref point */ + core->name, atomic_read(&core->refcnt), jiffies); + core->delayed = 1; /* just so we have a ref point */ } ckrm_core_drop(core); return 0; @@ -578,25 +598,27 @@ ckrm_release_core_class(struct ckrm_core_class *core) * Returns the resource id(0 or +ve) on success, -errno for failure. */ static int -ckrm_register_res_ctlr_intern(struct ckrm_classtype *clstype, ckrm_res_ctlr_t *rcbs) +ckrm_register_res_ctlr_intern(struct ckrm_classtype *clstype, + ckrm_res_ctlr_t * rcbs) { - int resid, ret,i; - + int resid, ret, i; + if (!rcbs) return -EINVAL; resid = rcbs->resid; - + spin_lock(&clstype->res_ctlrs_lock); - - printk(KERN_WARNING "resid is %d name is %s %s\n", - resid, rcbs->res_name,clstype->res_ctlrs[resid]->res_name); + + printk(KERN_WARNING "resid is %d name is %s %s\n", + resid, rcbs->res_name, clstype->res_ctlrs[resid]->res_name); if (resid >= 0) { - if ((resid < CKRM_MAX_RES_CTLRS) && (clstype->res_ctlrs[resid] == NULL)) { + if ((resid < CKRM_MAX_RES_CTLRS) + && (clstype->res_ctlrs[resid] == NULL)) { clstype->res_ctlrs[resid] = rcbs; atomic_set(&clstype->nr_resusers[resid], 0); - set_bit(resid, &clstype->bit_res_ctlrs); + set_bit(resid, &clstype->bit_res_ctlrs); ret = resid; if (resid >= clstype->max_resid) { clstype->max_resid = resid + 1; @@ -613,7 +635,7 @@ ckrm_register_res_ctlr_intern(struct ckrm_classtype *clstype, ckrm_res_ctlr_t *r clstype->res_ctlrs[i] = rcbs; rcbs->resid = i; atomic_set(&clstype->nr_resusers[i], 0); - set_bit(i, &clstype->bit_res_ctlrs); + set_bit(i, &clstype->bit_res_ctlrs); if (i >= clstype->max_resid) { clstype->max_resid = i + 1; } @@ -621,30 +643,35 @@ ckrm_register_res_ctlr_intern(struct ckrm_classtype *clstype, ckrm_res_ctlr_t *r return i; } } - + spin_unlock(&clstype->res_ctlrs_lock); return (-ENOMEM); } int -ckrm_register_res_ctlr(struct ckrm_classtype *clstype, ckrm_res_ctlr_t *rcbs) +ckrm_register_res_ctlr(struct ckrm_classtype *clstype, ckrm_res_ctlr_t * rcbs) { struct ckrm_core_class *core; int resid; - - resid = ckrm_register_res_ctlr_intern(clstype,rcbs); - + + resid = ckrm_register_res_ctlr_intern(clstype, rcbs); + if (resid >= 0) { - /* run through all classes and create the resource class object and - * if necessary "initialize" class in context of this resource + /* run through all classes and create the resource class + * object and if necessary "initialize" class in context + * of this resource */ read_lock(&ckrm_class_lock); list_for_each_entry(core, &clstype->classes, clslist) { - printk("CKRM .. create res clsobj for resouce <%s> class <%s> par=%p\n", - rcbs->res_name, core->name, core->hnode.parent); + printk("CKRM .. create res clsobj for resouce <%s>" + "class <%s> par=%p\n", rcbs->res_name, + core->name, core->hnode.parent); ckrm_alloc_res_class(core, core->hnode.parent, resid); - if (clstype->add_resctrl) // FIXME: this should be mandatory - (*clstype->add_resctrl)(core,resid); + + if (clstype->add_resctrl) { + // FIXME: this should be mandatory + (*clstype->add_resctrl) (core, resid); + } } read_unlock(&ckrm_class_lock); } @@ -656,27 +683,35 @@ ckrm_register_res_ctlr(struct ckrm_classtype *clstype, ckrm_res_ctlr_t *rcbs) * * Returns 0 on success -errno for failure. */ -int -ckrm_unregister_res_ctlr(struct ckrm_res_ctlr *rcbs) -{ +int ckrm_unregister_res_ctlr(struct ckrm_res_ctlr *rcbs) +{ struct ckrm_classtype *clstype = rcbs->classtype; + struct ckrm_core_class *core = NULL; int resid = rcbs->resid; - if ((clstype == NULL) || (resid < 0)) + if ((clstype == NULL) || (resid < 0)) { return -EINVAL; - - if (atomic_read(&clstype->nr_resusers[resid])) - return -EBUSY; - + } // FIXME: probably need to also call deregistration function + read_lock(&ckrm_class_lock); + // free up this resource from all the classes + list_for_each_entry(core, &clstype->classes, clslist) { + ckrm_free_res_class(core, resid); + } + read_unlock(&ckrm_class_lock); + + if (atomic_read(&clstype->nr_resusers[resid])) { + return -EBUSY; + } + spin_lock(&clstype->res_ctlrs_lock); clstype->res_ctlrs[resid] = NULL; - clear_bit(resid, &clstype->bit_res_ctlrs); + clear_bit(resid, &clstype->bit_res_ctlrs); clstype->max_resid = fls(clstype->bit_res_ctlrs); rcbs->resid = -1; spin_unlock(&clstype->res_ctlrs_lock); - + return 0; } @@ -686,50 +721,52 @@ ckrm_unregister_res_ctlr(struct ckrm_res_ctlr *rcbs) /* Hubertus ... we got to do some locking here */ -struct ckrm_classtype* ckrm_classtypes[CKRM_MAX_CLASSTYPES]; -EXPORT_SYMBOL(ckrm_classtypes); // really should build a better interface for this -int -ckrm_register_classtype(struct ckrm_classtype *clstype) +struct ckrm_classtype *ckrm_classtypes[CKRM_MAX_CLASSTYPES]; +// really should build a better interface for this +EXPORT_SYMBOL(ckrm_classtypes); + +int ckrm_register_classtype(struct ckrm_classtype *clstype) { int tid = clstype->typeID; if (tid != -1) { - if ((tid < 0) || (tid > CKRM_MAX_CLASSTYPES) || (ckrm_classtypes[tid])) + if ((tid < 0) || (tid > CKRM_MAX_CLASSTYPES) + || (ckrm_classtypes[tid])) return -EINVAL; } else { int i; - for ( i=CKRM_RESV_CLASSTYPES ; itypeID = tid; ckrm_classtypes[tid] = clstype; - + /* Hubertus .. we need to call the callbacks of the RCFS client */ if (rcfs_fn.register_classtype) { - (* rcfs_fn.register_classtype)(clstype); + (*rcfs_fn.register_classtype) (clstype); // No error return for now ; } return tid; } -int -ckrm_unregister_classtype(struct ckrm_classtype *clstype) +int ckrm_unregister_classtype(struct ckrm_classtype *clstype) { int tid = clstype->typeID; - if ((tid < 0) || (tid > CKRM_MAX_CLASSTYPES) || (ckrm_classtypes[tid] != clstype)) + if ((tid < 0) || (tid > CKRM_MAX_CLASSTYPES) + || (ckrm_classtypes[tid] != clstype)) return -EINVAL; if (rcfs_fn.deregister_classtype) { - (* rcfs_fn.deregister_classtype)(clstype); + (*rcfs_fn.deregister_classtype) (clstype); // No error return for now } @@ -738,43 +775,39 @@ ckrm_unregister_classtype(struct ckrm_classtype *clstype) return 0; } -struct ckrm_classtype* -ckrm_find_classtype_by_name(const char *name) +struct ckrm_classtype *ckrm_find_classtype_by_name(const char *name) { int i; - for ( i=0 ; iname,name,CKRM_MAX_TYPENAME_LEN)) + if (ctype && !strncmp(ctype->name, name, CKRM_MAX_TYPENAME_LEN)) return ctype; } return NULL; } - /******************************************************************* * Event callback invocation *******************************************************************/ -struct ckrm_hook_cb* ckrm_event_callbacks[CKRM_NONLATCHABLE_EVENTS]; +struct ckrm_hook_cb *ckrm_event_callbacks[CKRM_NONLATCHABLE_EVENTS]; /* Registration / Deregistration / Invocation functions */ -int -ckrm_register_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb) +int ckrm_register_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb) { struct ckrm_hook_cb **cbptr; if ((ev < CKRM_LATCHABLE_EVENTS) || (ev >= CKRM_NONLATCHABLE_EVENTS)) return 1; cbptr = &ckrm_event_callbacks[ev]; - while (*cbptr != NULL) + while (*cbptr != NULL) cbptr = &((*cbptr)->next); *cbptr = cb; return 0; } -int -ckrm_unregister_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb) +int ckrm_unregister_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb) { struct ckrm_hook_cb **cbptr; @@ -788,37 +821,35 @@ ckrm_unregister_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb) return (*cbptr == NULL); } -int -ckrm_register_event_set(struct ckrm_event_spec especs[]) +int ckrm_register_event_set(struct ckrm_event_spec especs[]) { struct ckrm_event_spec *espec = especs; - for ( espec = especs ; espec->ev != -1 ; espec++ ) - ckrm_register_event_cb(espec->ev,&espec->cb); + for (espec = especs; espec->ev != -1; espec++) + ckrm_register_event_cb(espec->ev, &espec->cb); return 0; } -int -ckrm_unregister_event_set(struct ckrm_event_spec especs[]) +int ckrm_unregister_event_set(struct ckrm_event_spec especs[]) { struct ckrm_event_spec *espec = especs; - for ( espec = especs ; espec->ev != -1 ; espec++ ) - ckrm_unregister_event_cb(espec->ev,&espec->cb); + for (espec = especs; espec->ev != -1; espec++) + ckrm_unregister_event_cb(espec->ev, &espec->cb); return 0; } -#define ECC_PRINTK(fmt, args...) // printk("%s: " fmt, __FUNCTION__ , ## args) +#define ECC_PRINTK(fmt, args...) \ +// printk("%s: " fmt, __FUNCTION__ , ## args) -void -ckrm_invoke_event_cb_chain(enum ckrm_event ev, void *arg) +void ckrm_invoke_event_cb_chain(enum ckrm_event ev, void *arg) { struct ckrm_hook_cb *cb, *anchor; - ECC_PRINTK("%d %x\n",current,ev,arg); + ECC_PRINTK("%d %x\n", current, ev, arg); if ((anchor = ckrm_event_callbacks[ev]) != NULL) { - for ( cb = anchor ; cb ; cb = cb->next ) - (*cb->fct)(arg); + for (cb = anchor; cb; cb = cb->next) + (*cb->fct) (arg); } } @@ -829,8 +860,7 @@ ckrm_invoke_event_cb_chain(enum ckrm_event ev, void *arg) * (b) function invoker on a named resource *******************************************************************/ -int -ckrm_class_show_shares(struct ckrm_core_class *core, struct seq_file *seq) +int ckrm_class_show_shares(struct ckrm_core_class *core, struct seq_file *seq) { int i; struct ckrm_res_ctlr *rcbs; @@ -841,12 +871,11 @@ ckrm_class_show_shares(struct ckrm_core_class *core, struct seq_file *seq) atomic_inc(&clstype->nr_resusers[i]); rcbs = clstype->res_ctlrs[i]; if (rcbs && rcbs->get_share_values) { - (*rcbs->get_share_values)(core->res_class[i], &shares); - seq_printf(seq,"res=%s,guarantee=%d,limit=%d,total_guarantee=%d,max_limit=%d\n", - rcbs->res_name, - shares.my_guarantee, - shares.my_limit, - shares.total_guarantee, + (*rcbs->get_share_values) (core->res_class[i], &shares); + seq_printf(seq,"res=%s,guarantee=%d,limit=%d," + "total_guarantee=%d,max_limit=%d\n", + rcbs->res_name, shares.my_guarantee, + shares.my_limit, shares.total_guarantee, shares.max_limit); } atomic_dec(&clstype->nr_resusers[i]); @@ -854,8 +883,7 @@ ckrm_class_show_shares(struct ckrm_core_class *core, struct seq_file *seq) return 0; } -int -ckrm_class_show_stats(struct ckrm_core_class *core, struct seq_file *seq) +int ckrm_class_show_stats(struct ckrm_core_class *core, struct seq_file *seq) { int i; struct ckrm_res_ctlr *rcbs; @@ -864,15 +892,14 @@ ckrm_class_show_stats(struct ckrm_core_class *core, struct seq_file *seq) for (i = 0; i < clstype->max_resid; i++) { atomic_inc(&clstype->nr_resusers[i]); rcbs = clstype->res_ctlrs[i]; - if (rcbs && rcbs->get_stats) - (*rcbs->get_stats)(core->res_class[i], seq); + if (rcbs && rcbs->get_stats) + (*rcbs->get_stats) (core->res_class[i], seq); atomic_dec(&clstype->nr_resusers[i]); } return 0; } -int -ckrm_class_show_config(struct ckrm_core_class *core, struct seq_file *seq) +int ckrm_class_show_config(struct ckrm_core_class *core, struct seq_file *seq) { int i; struct ckrm_res_ctlr *rcbs; @@ -881,89 +908,95 @@ ckrm_class_show_config(struct ckrm_core_class *core, struct seq_file *seq) for (i = 0; i < clstype->max_resid; i++) { atomic_inc(&clstype->nr_resusers[i]); rcbs = clstype->res_ctlrs[i]; - if (rcbs && rcbs->show_config) - (*rcbs->show_config)(core->res_class[i], seq); + if (rcbs && rcbs->show_config) + (*rcbs->show_config) (core->res_class[i], seq); atomic_dec(&clstype->nr_resusers[i]); } return 0; } -int -ckrm_class_set_config(struct ckrm_core_class *core, const char *resname, const char *cfgstr) +int ckrm_class_set_config(struct ckrm_core_class *core, const char *resname, + const char *cfgstr) { struct ckrm_classtype *clstype = core->classtype; - struct ckrm_res_ctlr *rcbs = ckrm_resctlr_lookup(clstype,resname); + struct ckrm_res_ctlr *rcbs = ckrm_resctlr_lookup(clstype, resname); int rc; if (rcbs == NULL || rcbs->set_config == NULL) - return -EINVAL; - rc = (*rcbs->set_config)(core->res_class[rcbs->resid],cfgstr); + return -EINVAL; + rc = (*rcbs->set_config) (core->res_class[rcbs->resid], cfgstr); return rc; } -int -ckrm_class_set_shares(struct ckrm_core_class *core, const char *resname, - struct ckrm_shares *shares) +#define legalshare(a) \ + ( ((a) >=0) \ + || ((a) == CKRM_SHARE_UNCHANGED) \ + || ((a) == CKRM_SHARE_DONTCARE) ) + +int ckrm_class_set_shares(struct ckrm_core_class *core, const char *resname, + struct ckrm_shares *shares) { struct ckrm_classtype *clstype = core->classtype; struct ckrm_res_ctlr *rcbs; int rc; - printk("ckrm_class_set_shares(%s,%s)\n",core->name,resname); - rcbs = ckrm_resctlr_lookup(clstype,resname); + // Check for legal values + if (!legalshare(shares->my_guarantee) || !legalshare(shares->my_limit) + || !legalshare(shares->total_guarantee) + || !legalshare(shares->max_limit)) + return -EINVAL; + + rcbs = ckrm_resctlr_lookup(clstype, resname); if (rcbs == NULL || rcbs->set_share_values == NULL) - return -EINVAL; - rc = (*rcbs->set_share_values)(core->res_class[rcbs->resid],shares); + return -EINVAL; + rc = (*rcbs->set_share_values) (core->res_class[rcbs->resid], shares); return rc; } -int -ckrm_class_reset_stats(struct ckrm_core_class *core, const char *resname, const char *unused) +int ckrm_class_reset_stats(struct ckrm_core_class *core, const char *resname, + const char *unused) { struct ckrm_classtype *clstype = core->classtype; - struct ckrm_res_ctlr *rcbs = ckrm_resctlr_lookup(clstype,resname); + struct ckrm_res_ctlr *rcbs = ckrm_resctlr_lookup(clstype, resname); int rc; if (rcbs == NULL || rcbs->reset_stats == NULL) - return -EINVAL; - rc = (*rcbs->reset_stats)(core->res_class[rcbs->resid]); + return -EINVAL; + rc = (*rcbs->reset_stats) (core->res_class[rcbs->resid]); return rc; -} +} /******************************************************************* * Initialization *******************************************************************/ -void -ckrm_cb_newtask(struct task_struct *tsk) +void ckrm_cb_newtask(struct task_struct *tsk) { - tsk->ce_data = NULL; + tsk->ce_data = NULL; spin_lock_init(&tsk->ckrm_tsklock); - ckrm_invoke_event_cb_chain(CKRM_EVENT_NEWTASK,tsk); + ckrm_invoke_event_cb_chain(CKRM_EVENT_NEWTASK, tsk); } -void -ckrm_cb_exit(struct task_struct *tsk) +void ckrm_cb_exit(struct task_struct *tsk) { - ckrm_invoke_event_cb_chain(CKRM_EVENT_EXIT,tsk); + ckrm_invoke_event_cb_chain(CKRM_EVENT_EXIT, tsk); tsk->ce_data = NULL; } -void __init -ckrm_init(void) +void __init ckrm_init(void) { printk("CKRM Initialization\n"); - + // register/initialize the Metatypes - + #ifdef CONFIG_CKRM_TYPE_TASKCLASS - { + { extern void ckrm_meta_init_taskclass(void); ckrm_meta_init_taskclass(); } #endif #ifdef CONFIG_CKRM_TYPE_SOCKETCLASS - { + { extern void ckrm_meta_init_sockclass(void); ckrm_meta_init_sockclass(); } @@ -1005,5 +1038,3 @@ EXPORT_SYMBOL(ckrm_class_set_config); EXPORT_SYMBOL(ckrm_class_set_shares); EXPORT_SYMBOL(ckrm_class_reset_stats); - - diff --git a/kernel/ckrm/ckrm_listenaq.c b/kernel/ckrm/ckrm_listenaq.c index 235ac0699..0fe858633 100644 --- a/kernel/ckrm/ckrm_listenaq.c +++ b/kernel/ckrm/ckrm_listenaq.c @@ -32,40 +32,38 @@ #include #define hnode_2_core(ptr) \ - ((ptr) ? container_of(ptr, struct ckrm_core_class, hnode) : NULL) + ((ptr) ? container_of(ptr, struct ckrm_core_class, hnode) : NULL) - -#define CKRM_SAQ_MAX_DEPTH 3 // 0 => /rcfs +#define CKRM_SAQ_MAX_DEPTH 3 // 0 => /rcfs // 1 => socket_aq // 2 => socket_aq/listen_class // 3 => socket_aq/listen_class/accept_queues // 4 => Not allowed typedef struct ckrm_laq_res { - spinlock_t reslock; - atomic_t refcnt; - struct ckrm_shares shares; + spinlock_t reslock; + atomic_t refcnt; + struct ckrm_shares shares; struct ckrm_core_class *core; struct ckrm_core_class *pcore; - int my_depth; - int my_id; + int my_depth; + int my_id; + unsigned int min_ratio; } ckrm_laq_res_t; static int my_resid = -1; -extern struct ckrm_core_class *rcfs_create_under_netroot(char *, int, int); -extern struct ckrm_core_class *rcfs_make_core(struct dentry *, - struct ckrm_core_class * ) ; +extern struct ckrm_core_class *rcfs_create_under_netroot(char *, int, int); +extern struct ckrm_core_class *rcfs_make_core(struct dentry *, + struct ckrm_core_class *); -void -laq_res_hold(struct ckrm_laq_res *res) +void laq_res_hold(struct ckrm_laq_res *res) { - atomic_inc(&res->refcnt); + atomic_inc(&res->refcnt); return; } -void -laq_res_put(struct ckrm_laq_res *res) +void laq_res_put(struct ckrm_laq_res *res) { if (atomic_dec_and_test(&res->refcnt)) kfree(res); @@ -74,43 +72,40 @@ laq_res_put(struct ckrm_laq_res *res) /* Initialize rescls values */ -static void -laq_res_initcls(void *my_res) +static void laq_res_initcls(void *my_res) { ckrm_laq_res_t *res = my_res; - res->shares.my_guarantee = CKRM_SHARE_DONTCARE; - res->shares.my_limit = CKRM_SHARE_DONTCARE; - res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; - res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; + res->shares.my_guarantee = CKRM_SHARE_DONTCARE; + res->shares.my_limit = CKRM_SHARE_DONTCARE; + res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; - res->shares.cur_max_limit = 0; + res->shares.cur_max_limit = 0; } -static int -atoi(char *s) +static int atoi(char *s) { int k = 0; - while(*s) + while (*s) k = *s++ - '0' + (k * 10); return k; } -static char * -laq_get_name(struct ckrm_core_class *c) +static char *laq_get_name(struct ckrm_core_class *c) { - char *p = (char *)c->name; + char *p = (char *)c->name; - while(*p) - p++; - while( *p != '/' && p != c->name) - p--; + while (*p) + p++; + while (*p != '/' && p != c->name) + p--; - return ++p; + return ++p; } -static void * -laq_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) +static void *laq_res_alloc(struct ckrm_core_class *core, + struct ckrm_core_class *parent) { ckrm_laq_res_t *res, *pres; int pdepth; @@ -120,7 +115,7 @@ laq_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) else pres = NULL; - if (core == core->classtype->default_class) + if (core == core->classtype->default_class) pdepth = 1; else { if (!parent) @@ -133,7 +128,7 @@ laq_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) memset(res, 0, sizeof(res)); spin_lock_init(&res->reslock); laq_res_hold(res); - res->my_depth = pdepth; + res->my_depth = pdepth; if (pdepth == 2) // listen class res->my_id = 0; else if (pdepth == 3) @@ -143,19 +138,18 @@ laq_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) // rescls in place, now initialize contents other than // hierarchy pointers - laq_res_initcls(res); // acts as initialising value + laq_res_initcls(res); // acts as initialising value } return res; } -static void -laq_res_free(void *my_res) +static void laq_res_free(void *my_res) { - ckrm_laq_res_t *res = (ckrm_laq_res_t *)my_res; + ckrm_laq_res_t *res = (ckrm_laq_res_t *) my_res; ckrm_laq_res_t *parent; - if (!res) + if (!res) return; if (res->my_depth != 3) { @@ -164,7 +158,7 @@ laq_res_free(void *my_res) } parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t); - if (!parent) // Should never happen + if (!parent) // Should never happen return; spin_lock(&parent->reslock); @@ -175,7 +169,7 @@ laq_res_free(void *my_res) child_guarantee_changed(&parent->shares, res->shares.my_guarantee, 0); spin_unlock(&res->reslock); - laq_res_put(res); + laq_res_put(res); spin_unlock(&parent->reslock); return; } @@ -184,112 +178,126 @@ laq_res_free(void *my_res) * SHARES *** **************************************************************************/ -void -laq_set_aq_values(ckrm_laq_res_t *my_res, ckrm_laq_res_t *parent, int updatep) +void laq_set_aq_value(struct ckrm_net_struct *ns, unsigned int *aq_ratio) +{ + int i; + struct tcp_opt *tp; + + tp = tcp_sk(ns->ns_sk); + for (i = 0; i < NUM_ACCEPT_QUEUES; i++) + tp->acceptq[i].aq_ratio = aq_ratio[i]; + return; +} +void laq_set_aq_values(ckrm_laq_res_t * parent, unsigned int *aq_ratio) { struct ckrm_net_struct *ns; struct ckrm_core_class *core = parent->core; - struct tcp_opt *tp; - - if (my_res->my_depth < 2) - return; - - // XXX Instead of holding a class_lock introduce a rw - // lock to be write locked by listen callbacks and read locked here. - // - VK + class_lock(core); - list_for_each_entry(ns, &core->objlist,ckrm_link) { - tp = tcp_sk(ns->ns_sk); - if (updatep) - tp->acceptq[0].aq_ratio = - parent->shares.total_guarantee/ - parent->shares.unused_guarantee; - - tp->acceptq[my_res->my_id].aq_ratio = - my_res->shares.total_guarantee/ - parent->shares.my_guarantee; + list_for_each_entry(ns, &core->objlist, ckrm_link) { + laq_set_aq_value(ns, aq_ratio); } class_unlock(core); return; } -static int -laq_set_share_values(void *my_res, struct ckrm_shares *shares) +static void calculate_aq_ratios(ckrm_laq_res_t * res, unsigned int *aq_ratio) +{ + struct ckrm_hnode *chnode; + ckrm_laq_res_t *child; + unsigned int min; + int i; + + min = aq_ratio[0] = (unsigned int)res->shares.unused_guarantee; + + list_for_each_entry(chnode, &res->core->hnode.children, siblings) { + child = hnode_2_core(chnode)->res_class[my_resid]; + + aq_ratio[child->my_id] = + (unsigned int)child->shares.my_guarantee; + if (aq_ratio[child->my_id] == CKRM_SHARE_DONTCARE) + aq_ratio[child->my_id] = 0; + if (aq_ratio[child->my_id] && + ((unsigned int)aq_ratio[child->my_id] < min)) + min = (unsigned int)child->shares.my_guarantee; + } + + if (min == 0) { + min = 1; + // default takes all if nothing specified + aq_ratio[0] = 1; + } + res->min_ratio = min; + + for (i = 0; i < NUM_ACCEPT_QUEUES; i++) + aq_ratio[i] = aq_ratio[i] / min; +} + +static int laq_set_share_values(void *my_res, struct ckrm_shares *shares) { ckrm_laq_res_t *res = my_res; - ckrm_laq_res_t *parent, *child; - struct ckrm_hnode *chnode; + ckrm_laq_res_t *parent; + unsigned int aq_ratio[NUM_ACCEPT_QUEUES]; int rc = 0; - if (!res) + if (!res) return -EINVAL; - if (!res->pcore) { + if (!res->pcore) { // something is badly wrong printk(KERN_ERR "socketaq internal inconsistency\n"); return -EBADF; } parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t); - if (!parent) // socket_class does not have a share interface + if (!parent) // socket_class does not have a share interface return -EINVAL; // Ensure that we ignore limit values - shares->my_limit = shares->max_limit = CKRM_SHARE_UNCHANGED; - - switch (res->my_depth) { - - case 0: printk(KERN_ERR "socketaq bad entry\n"); - rc = -EBADF; - break; + shares->my_limit = CKRM_SHARE_DONTCARE; + shares->max_limit = CKRM_SHARE_UNCHANGED; - case 1: // can't be written to. this is internal default. - // return -EINVAL - rc = -EINVAL; - break; - - case 2: // nothing to inherit + if (res->my_depth == 0) { + printk(KERN_ERR "socketaq bad entry\n"); + return -EBADF; + } else if (res->my_depth == 1) { + // can't be written to. This is an internal default. + return -EINVAL; + } else if (res->my_depth == 2) { + //nothin to inherit if (!shares->total_guarantee) { - rc = -EINVAL; - break; + return -EINVAL; } + parent = res; + shares->my_guarantee = CKRM_SHARE_DONTCARE; + } else if (res->my_depth == 3) { + // accept queue itself. + shares->total_guarantee = CKRM_SHARE_UNCHANGED; + } - ckrm_lock_hier(res->pcore); - spin_lock(&res->reslock); - rc = set_shares(shares, &res->shares, NULL); - if (!rc) { - list_for_each_entry(chnode, - &res->core->hnode.children,siblings){ - child=hnode_2_core(chnode)->res_class[my_resid]; - laq_set_aq_values(child,res,(child->my_id==1)); - } - } + ckrm_lock_hier(parent->pcore); + spin_lock(&parent->reslock); + rc = set_shares(shares, &res->shares, + (parent == res) ? NULL : &parent->shares); + if (rc) { spin_unlock(&res->reslock); ckrm_unlock_hier(res->pcore); - break; - - case 3: // accept queue itself. Check against parent. - ckrm_lock_hier(parent->pcore); - spin_lock(&parent->reslock); - rc = set_shares(shares, &res->shares, &parent->shares); - if (!rc) { - laq_set_aq_values(res,parent,1); - } - spin_unlock(&parent->reslock); - ckrm_unlock_hier(parent->pcore); - break; + return rc; } + calculate_aq_ratios(parent, aq_ratio); + laq_set_aq_values(parent, aq_ratio); + spin_unlock(&parent->reslock); + ckrm_unlock_hier(parent->pcore); return rc; } -static int -laq_get_share_values(void *my_res, struct ckrm_shares *shares) +static int laq_get_share_values(void *my_res, struct ckrm_shares *shares) { ckrm_laq_res_t *res = my_res; - if (!res) + if (!res) return -EINVAL; *shares = res->shares; return 0; @@ -303,9 +311,9 @@ void laq_print_aq_stats(struct seq_file *sfile, struct tcp_acceptq_info *taq, int i) { seq_printf(sfile, "Class %d connections:\n\taccepted: %u\n\t" - "queued: %u\n\twait_time: %lu\n\t", - i, taq->acceptq_count, taq->acceptq_qcount, - taq->acceptq_wait_time); + "queued: %u\n\twait_time: %u\n", + i, taq->acceptq_count, taq->acceptq_qcount, + jiffies_to_msecs(taq->acceptq_wait_time)); if (i) return; @@ -317,16 +325,16 @@ laq_print_aq_stats(struct seq_file *sfile, struct tcp_acceptq_info *taq, int i) } seq_printf(sfile, "Totals :\n\taccepted: %u\n\t" - "queued: %u\n\twait_time: %lu\n", - taq->acceptq_count, taq->acceptq_qcount, - taq->acceptq_wait_time); + "queued: %u\n\twait_time: %u\n", + taq->acceptq_count, taq->acceptq_qcount, + jiffies_to_msecs(taq->acceptq_wait_time)); return; } void -laq_get_aq_stats(ckrm_laq_res_t *pres, ckrm_laq_res_t *mres, - struct tcp_acceptq_info *taq) +laq_get_aq_stats(ckrm_laq_res_t * pres, ckrm_laq_res_t * mres, + struct tcp_acceptq_info *taq) { struct ckrm_net_struct *ns; struct ckrm_core_class *core = pres->core; @@ -337,15 +345,15 @@ laq_get_aq_stats(ckrm_laq_res_t *pres, ckrm_laq_res_t *mres, if (a == 0) z = NUM_ACCEPT_QUEUES; else - z = a+1; + z = a + 1; // XXX Instead of holding a class_lock introduce a rw // lock to be write locked by listen callbacks and read locked here. // - VK class_lock(pres->core); - list_for_each_entry(ns, &core->objlist,ckrm_link) { + list_for_each_entry(ns, &core->objlist, ckrm_link) { tp = tcp_sk(ns->ns_sk); - for (; a< z; a++) { + for (; a < z; a++) { taq->acceptq_wait_time += tp->acceptq[a].aq_wait_time; taq->acceptq_qcount += tp->acceptq[a].aq_qcount; taq->acceptq_count += tp->acceptq[a].aq_count; @@ -355,26 +363,24 @@ laq_get_aq_stats(ckrm_laq_res_t *pres, ckrm_laq_res_t *mres, class_unlock(pres->core); } - -static int -laq_get_stats(void *my_res, struct seq_file *sfile) +static int laq_get_stats(void *my_res, struct seq_file *sfile) { ckrm_laq_res_t *res = my_res; ckrm_laq_res_t *parent; struct tcp_acceptq_info taq[NUM_ACCEPT_QUEUES]; int rc = 0; - if (!res) + if (!res) return -EINVAL; - - if (!res->pcore) { + + if (!res->pcore) { // something is badly wrong printk(KERN_ERR "socketaq internal inconsistency\n"); return -EBADF; } parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t); - if (!parent) { // socket_class does not have a stat interface + if (!parent) { // socket_class does not have a stat interface printk(KERN_ERR "socketaq internal fs inconsistency\n"); return -EINVAL; } @@ -384,23 +390,24 @@ laq_get_stats(void *my_res, struct seq_file *sfile) switch (res->my_depth) { default: - case 0: printk(KERN_ERR "socket class bad entry\n"); + case 0: + printk(KERN_ERR "socket class bad entry\n"); rc = -EBADF; break; - case 1: // can't be read from. this is internal default. + case 1: // can't be read from. this is internal default. // return -EINVAL rc = -EINVAL; break; - case 2: // return the default and total + case 2: // return the default and total ckrm_lock_hier(res->core); // block any deletes laq_get_aq_stats(res, res, &taq[0]); laq_print_aq_stats(sfile, &taq[0], 0); ckrm_unlock_hier(res->core); // block any deletes break; - case 3: + case 3: ckrm_lock_hier(parent->core); // block any deletes laq_get_aq_stats(parent, res, &taq[res->my_id]); laq_print_aq_stats(sfile, &taq[res->my_id], res->my_id); @@ -415,59 +422,47 @@ laq_get_stats(void *my_res, struct seq_file *sfile) * The network connection is reclassified to this class. Update its shares. * The socket lock is held. */ -static void -laq_change_resclass(void *n, void *old, void *r) +static void laq_change_resclass(void *n, void *old, void *r) { struct ckrm_net_struct *ns = (struct ckrm_net_struct *)n; struct ckrm_laq_res *res = (struct ckrm_laq_res *)r; - struct ckrm_hnode *chnode = NULL; + unsigned int aq_ratio[NUM_ACCEPT_QUEUES]; - - if (res->my_depth != 2) - return; + if (res->my_depth != 2) + return; // a change to my_depth == 3 ie. the accept classes cannot happen. // there is no target file - if (res->my_depth == 2) { // it is one of the socket classes - struct ckrm_laq_res *reschild; - struct sock *sk = ns->ns_sk; - struct tcp_opt *tp = tcp_sk(sk); - + if (res->my_depth == 2) { // it is one of the socket classes + ckrm_lock_hier(res->pcore); // share rule: hold parent resource lock. then self. // However, since my_depth == 1 is a generic class it is not // needed here. Self lock is enough. spin_lock(&res->reslock); - tp->acceptq[0].aq_ratio = res->shares.total_guarantee/ - res->shares.unused_guarantee; - list_for_each_entry(chnode,&res->core->hnode.children,siblings){ - reschild = hnode_2_core(chnode)->res_class[my_resid]; - - spin_lock(&reschild->reslock); - tp->acceptq[reschild->my_id].aq_ratio= - reschild->shares.total_guarantee/ - res->shares.my_guarantee; - spin_unlock(&reschild->reslock); - } + calculate_aq_ratios(res, aq_ratio); + class_lock(res->pcore); + laq_set_aq_value(ns, aq_ratio); + class_unlock(res->pcore); spin_unlock(&res->reslock); + ckrm_unlock_hier(res->pcore); } - + return; } struct ckrm_res_ctlr laq_rcbs = { - .res_name = "laq", - .resid = -1 , // dynamically assigned - .res_alloc = laq_res_alloc, - .res_free = laq_res_free, - .set_share_values = laq_set_share_values, - .get_share_values = laq_get_share_values, - .get_stats = laq_get_stats, - .change_resclass = laq_change_resclass, - // .res_initcls = laq_res_initcls, // LAQ_HUBERTUS: no need for this !! + .res_name = "laq", + .resid = -1, // dynamically assigned + .res_alloc = laq_res_alloc, + .res_free = laq_res_free, + .set_share_values = laq_set_share_values, + .get_share_values = laq_get_share_values, + .get_stats = laq_get_stats, + .change_resclass = laq_change_resclass, + //.res_initcls = laq_res_initcls, //HUBERTUS: unnecessary !! }; -int __init -init_ckrm_laq_res(void) +int __init init_ckrm_laq_res(void) { struct ckrm_classtype *clstype; int resid; @@ -479,25 +474,22 @@ init_ckrm_laq_res(void) } if (my_resid == -1) { - resid = ckrm_register_res_ctlr(clstype,&laq_rcbs); + resid = ckrm_register_res_ctlr(clstype, &laq_rcbs); if (resid >= 0) my_resid = resid; - printk("........init_ckrm_listen_aq_res -> %d\n",my_resid); + printk("........init_ckrm_listen_aq_res -> %d\n", my_resid); } return 0; -} +} -void __exit -exit_ckrm_laq_res(void) +void __exit exit_ckrm_laq_res(void) { ckrm_unregister_res_ctlr(&laq_rcbs); my_resid = -1; } - module_init(init_ckrm_laq_res) -module_exit(exit_ckrm_laq_res) - -MODULE_LICENSE("GPL"); + module_exit(exit_ckrm_laq_res) + MODULE_LICENSE("GPL"); diff --git a/kernel/ckrm/ckrm_sockc.c b/kernel/ckrm/ckrm_sockc.c index 26731bb20..a8a3b4bd5 100644 --- a/kernel/ckrm/ckrm_sockc.c +++ b/kernel/ckrm/ckrm_sockc.c @@ -56,70 +56,71 @@ struct ckrm_sock_class { struct ckrm_core_class core; }; -static struct ckrm_sock_class sockclass_dflt_class = { +static struct ckrm_sock_class sockclass_dflt_class = { }; #define SOCKET_CLASS_TYPE_NAME "socket_class" const char *dflt_sockclass_name = SOCKET_CLASS_TYPE_NAME; -static struct ckrm_core_class *sock_alloc_class(struct ckrm_core_class *parent, const char *name); -static int sock_free_class(struct ckrm_core_class *core); +static struct ckrm_core_class *sock_alloc_class(struct ckrm_core_class *parent, + const char *name); +static int sock_free_class(struct ckrm_core_class *core); -static int sock_forced_reclassify(ckrm_core_class_t *target, const char *resname); -static int sock_show_members(struct ckrm_core_class *core, struct seq_file *seq); +static int sock_forced_reclassify(ckrm_core_class_t * target, + const char *resname); +static int sock_show_members(struct ckrm_core_class *core, + struct seq_file *seq); static void sock_add_resctrl(struct ckrm_core_class *core, int resid); static void sock_reclassify_class(struct ckrm_sock_class *cls); struct ckrm_classtype CT_sockclass = { - .mfidx = 1, - .name = SOCKET_CLASS_TYPE_NAME, - .typeID = CKRM_CLASSTYPE_SOCKET_CLASS, - .maxdepth = 3, - .resid_reserved = 0, - .max_res_ctlrs = CKRM_MAX_RES_CTLRS, - .max_resid = 0, - .bit_res_ctlrs = 0L, + .mfidx = 1, + .name = SOCKET_CLASS_TYPE_NAME, + .typeID = CKRM_CLASSTYPE_SOCKET_CLASS, + .maxdepth = 3, + .resid_reserved = 0, + .max_res_ctlrs = CKRM_MAX_RES_CTLRS, + .max_resid = 0, + .bit_res_ctlrs = 0L, .res_ctlrs_lock = SPIN_LOCK_UNLOCKED, - .classes = LIST_HEAD_INIT(CT_sockclass.classes), + .classes = LIST_HEAD_INIT(CT_sockclass.classes), + + .default_class = &sockclass_dflt_class.core, - .default_class = &sockclass_dflt_class.core, - // private version of functions - .alloc = &sock_alloc_class, - .free = &sock_free_class, - .show_members = &sock_show_members, + .alloc = &sock_alloc_class, + .free = &sock_free_class, + .show_members = &sock_show_members, .forced_reclassify = &sock_forced_reclassify, // use of default functions - .show_shares = &ckrm_class_show_shares, - .show_stats = &ckrm_class_show_stats, - .show_config = &ckrm_class_show_config, - .set_config = &ckrm_class_set_config, - .set_shares = &ckrm_class_set_shares, - .reset_stats = &ckrm_class_reset_stats, + .show_shares = &ckrm_class_show_shares, + .show_stats = &ckrm_class_show_stats, + .show_config = &ckrm_class_show_config, + .set_config = &ckrm_class_set_config, + .set_shares = &ckrm_class_set_shares, + .reset_stats = &ckrm_class_reset_stats, // mandatory private version .. no dflt available - .add_resctrl = &sock_add_resctrl, + .add_resctrl = &sock_add_resctrl, }; /* helper functions */ -void -ckrm_ns_hold(struct ckrm_net_struct *ns) +void ckrm_ns_hold(struct ckrm_net_struct *ns) { - atomic_inc(&ns->ns_refcnt); - return; + atomic_inc(&ns->ns_refcnt); + return; } -void -ckrm_ns_put(struct ckrm_net_struct *ns) +void ckrm_ns_put(struct ckrm_net_struct *ns) { - if (atomic_dec_and_test(&ns->ns_refcnt)) - kfree(ns); - - return; + if (atomic_dec_and_test(&ns->ns_refcnt)) + kfree(ns); + return; } + /* * Change the class of a netstruct * @@ -130,12 +131,12 @@ ckrm_ns_put(struct ckrm_net_struct *ns) static void sock_set_class(struct ckrm_net_struct *ns, struct ckrm_sock_class *newcls, - struct ckrm_sock_class *oldcls, enum ckrm_event event) + struct ckrm_sock_class *oldcls, enum ckrm_event event) { int i; struct ckrm_res_ctlr *rcbs; struct ckrm_classtype *clstype; - void *old_res_class, *new_res_class; + void *old_res_class, *new_res_class; if ((newcls == oldcls) || (newcls == NULL)) { ns->core = (void *)oldcls; @@ -147,43 +148,46 @@ sock_set_class(struct ckrm_net_struct *ns, struct ckrm_sock_class *newcls, list_add(&ns->ckrm_link, &class_core(newcls)->objlist); class_unlock(class_core(newcls)); - clstype = class_isa(newcls); + clstype = class_isa(newcls); for (i = 0; i < clstype->max_resid; i++) { atomic_inc(&clstype->nr_resusers[i]); - old_res_class = oldcls ? class_core(oldcls)->res_class[i] : NULL; - new_res_class = newcls ? class_core(newcls)->res_class[i] : NULL; + old_res_class = + oldcls ? class_core(oldcls)->res_class[i] : NULL; + new_res_class = + newcls ? class_core(newcls)->res_class[i] : NULL; rcbs = clstype->res_ctlrs[i]; - if (rcbs && rcbs->change_resclass && (old_res_class != new_res_class)) - (*rcbs->change_resclass)(ns, old_res_class, new_res_class); + if (rcbs && rcbs->change_resclass + && (old_res_class != new_res_class)) + (*rcbs->change_resclass) (ns, old_res_class, + new_res_class); atomic_dec(&clstype->nr_resusers[i]); } return; } -static void -sock_add_resctrl(struct ckrm_core_class *core, int resid) +static void sock_add_resctrl(struct ckrm_core_class *core, int resid) { struct ckrm_net_struct *ns; struct ckrm_res_ctlr *rcbs; - if ((resid < 0) || (resid >= CKRM_MAX_RES_CTLRS) || ((rcbs = core->classtype->res_ctlrs[resid]) == NULL)) + if ((resid < 0) || (resid >= CKRM_MAX_RES_CTLRS) + || ((rcbs = core->classtype->res_ctlrs[resid]) == NULL)) return; class_lock(core); list_for_each_entry(ns, &core->objlist, ckrm_link) { if (rcbs->change_resclass) - (*rcbs->change_resclass)(ns, NULL, core->res_class[resid]); + (*rcbs->change_resclass) (ns, NULL, + core->res_class[resid]); } class_unlock(core); } - /************************************************************************** * Functions called from classification points * **************************************************************************/ -static void -cb_sockclass_listen_start(struct sock *sk) +static void cb_sockclass_listen_start(struct sock *sk) { struct ckrm_net_struct *ns = NULL; struct ckrm_sock_class *newcls = NULL; @@ -192,43 +196,43 @@ cb_sockclass_listen_start(struct sock *sk) int i = 0; // XXX - TBD ipv6 - if (sk->sk_family == IPPROTO_IPV6) + if (sk->sk_family == AF_INET6) return; // to store the socket address ns = (struct ckrm_net_struct *) - kmalloc(sizeof(struct ckrm_net_struct), GFP_ATOMIC); + kmalloc(sizeof(struct ckrm_net_struct), GFP_ATOMIC); if (!ns) return; - memset(ns,0, sizeof(ns)); + memset(ns, 0, sizeof(*ns)); INIT_LIST_HEAD(&ns->ckrm_link); + ckrm_ns_hold(ns); ns->ns_family = sk->sk_family; - if (ns->ns_family == IPPROTO_IPV6) // IPv6 not supported yet. + if (ns->ns_family == AF_INET6) // IPv6 not supported yet. return; ns->ns_daddrv4 = inet_sk(sk)->rcv_saddr; ns->ns_dport = inet_sk(sk)->num; - + ns->ns_pid = current->pid; ns->ns_tgid = current->tgid; - + ns->ns_tsk = current; ce_protect(&CT_sockclass); - CE_CLASSIFY_RET(newcls,&CT_sockclass,CKRM_EVENT_LISTEN_START,ns,current); + CE_CLASSIFY_RET(newcls, &CT_sockclass, CKRM_EVENT_LISTEN_START, ns, + current); ce_release(&CT_sockclass); - if (newcls == NULL) { + if (newcls == NULL) { newcls = &sockclass_dflt_class; ckrm_core_grab(class_core(newcls)); } class_lock(class_core(newcls)); list_add(&ns->ckrm_link, &class_core(newcls)->objlist); - ckrm_ns_put(ns); ns->core = newcls; class_unlock(class_core(newcls)); - // the socket is already locked // take a reference on socket on our behalf @@ -242,26 +246,27 @@ cb_sockclass_listen_start(struct sock *sk) atomic_inc(&clstype->nr_resusers[i]); rcbs = clstype->res_ctlrs[i]; if (rcbs && rcbs->change_resclass) { - (*rcbs->change_resclass)((void *)ns, - NULL,class_core(newcls)->res_class[i]); + (*rcbs->change_resclass) ((void *)ns, + NULL, + class_core(newcls)-> + res_class[i]); } atomic_dec(&clstype->nr_resusers[i]); } return; } -static void -cb_sockclass_listen_stop(struct sock *sk) +static void cb_sockclass_listen_stop(struct sock *sk) { struct ckrm_net_struct *ns = NULL; struct ckrm_sock_class *newcls = NULL; // XXX - TBD ipv6 - if (sk->sk_family == IPPROTO_IPV6) + if (sk->sk_family == AF_INET6) return; - ns = (struct ckrm_net_struct *)sk->sk_ns; - if (!ns) // listen_start called before socket_aq was loaded + ns = (struct ckrm_net_struct *)sk->sk_ns; + if (!ns) // listen_start called before socket_aq was loaded return; newcls = ns->core; @@ -272,7 +277,6 @@ cb_sockclass_listen_stop(struct sock *sk) class_unlock(class_core(newcls)); ckrm_core_drop(class_core(newcls)); } - // the socket is already locked sk->sk_ns = NULL; sock_put(sk); @@ -283,35 +287,36 @@ cb_sockclass_listen_stop(struct sock *sk) } static struct ckrm_event_spec sock_events_callbacks[] = { - CKRM_EVENT_SPEC( LISTEN_START, cb_sockclass_listen_start ), - CKRM_EVENT_SPEC( LISTEN_STOP, cb_sockclass_listen_stop ), - { -1 } + CKRM_EVENT_SPEC(LISTEN_START, cb_sockclass_listen_start), + CKRM_EVENT_SPEC(LISTEN_STOP, cb_sockclass_listen_stop), + {-1} }; /************************************************************************** * Class Object Creation / Destruction **************************************************************************/ -static struct ckrm_core_class * -sock_alloc_class(struct ckrm_core_class *parent, const char *name) +static struct ckrm_core_class *sock_alloc_class(struct ckrm_core_class *parent, + const char *name) { struct ckrm_sock_class *sockcls; sockcls = kmalloc(sizeof(struct ckrm_sock_class), GFP_KERNEL); - if (sockcls == NULL) + if (sockcls == NULL) return NULL; + memset(sockcls, 0, sizeof(struct ckrm_sock_class)); - ckrm_init_core_class(&CT_sockclass,class_core(sockcls),parent,name); + ckrm_init_core_class(&CT_sockclass, class_core(sockcls), parent, name); ce_protect(&CT_sockclass); if (CT_sockclass.ce_cb_active && CT_sockclass.ce_callbacks.class_add) - (*CT_sockclass.ce_callbacks.class_add)(name,sockcls); + (*CT_sockclass.ce_callbacks.class_add) (name, sockcls, + CT_sockclass.typeID); ce_release(&CT_sockclass); return class_core(sockcls); } -static int -sock_free_class(struct ckrm_core_class *core) +static int sock_free_class(struct ckrm_core_class *core) { struct ckrm_sock_class *sockcls; @@ -322,7 +327,7 @@ sock_free_class(struct ckrm_core_class *core) if (core == core->classtype->default_class) { // reset the name tag core->name = dflt_sockclass_name; - return 0; + return 0; } sockcls = class_type(struct ckrm_sock_class, core); @@ -330,28 +335,29 @@ sock_free_class(struct ckrm_core_class *core) ce_protect(&CT_sockclass); if (CT_sockclass.ce_cb_active && CT_sockclass.ce_callbacks.class_delete) - (*CT_sockclass.ce_callbacks.class_delete)(core->name,sockcls); + (*CT_sockclass.ce_callbacks.class_delete) (core->name, sockcls, + CT_sockclass.typeID); - sock_reclassify_class ( sockcls ); + sock_reclassify_class(sockcls); ce_release(&CT_sockclass); - ckrm_release_core_class(core); // Hubertus .... could just drop the class .. error message + ckrm_release_core_class(core); + // Hubertus .... could just drop the class .. error message + return 0; } - -static int -sock_show_members(struct ckrm_core_class *core, struct seq_file *seq) +static int sock_show_members(struct ckrm_core_class *core, struct seq_file *seq) { struct list_head *lh; struct ckrm_net_struct *ns = NULL; class_lock(core); list_for_each(lh, &core->objlist) { - ns = container_of(lh, struct ckrm_net_struct,ckrm_link); - seq_printf(seq, "%d.%d.%d.%d\\%d\n", - NIPQUAD(ns->ns_daddrv4),ns->ns_dport); + ns = container_of(lh, struct ckrm_net_struct, ckrm_link); + seq_printf(seq, "%d.%d.%d.%d\\%d\n", + NIPQUAD(ns->ns_daddrv4), ns->ns_dport); } class_unlock(core); @@ -359,7 +365,8 @@ sock_show_members(struct ckrm_core_class *core, struct seq_file *seq) } static int -sock_forced_reclassify_ns(struct ckrm_net_struct *tns, struct ckrm_core_class *core) +sock_forced_reclassify_ns(struct ckrm_net_struct *tns, + struct ckrm_core_class *core) { struct ckrm_net_struct *ns = NULL; struct sock *sk = NULL; @@ -373,10 +380,13 @@ sock_forced_reclassify_ns(struct ckrm_net_struct *tns, struct ckrm_core_class *c newcls = class_type(struct ckrm_sock_class, core); // lookup the listening sockets // returns with a reference count set on socket - sk = tcp_v4_lookup_listener(tns->ns_daddrv4,tns->ns_dport,0); + if (tns->ns_family == AF_INET6) + return -EOPNOTSUPP; + + sk = tcp_v4_lookup_listener(tns->ns_daddrv4, tns->ns_dport, 0); if (!sk) { printk(KERN_INFO "No such listener 0x%x:%d\n", - tns->ns_daddrv4, tns->ns_dport); + tns->ns_daddrv4, tns->ns_dport); return rc; } lock_sock(sk); @@ -385,12 +395,17 @@ sock_forced_reclassify_ns(struct ckrm_net_struct *tns, struct ckrm_core_class *c } ns = sk->sk_ns; ckrm_ns_hold(ns); + if (!capable(CAP_NET_ADMIN) && (ns->ns_tsk->user != current->user)) { + ckrm_ns_put(ns); + rc = -EPERM; + goto out; + } + oldcls = ns->core; if ((oldcls == NULL) || (oldcls == newcls)) { ckrm_ns_put(ns); goto out; } - // remove the net_struct from the current class class_lock(class_core(oldcls)); list_del(&ns->ckrm_link); @@ -401,37 +416,35 @@ sock_forced_reclassify_ns(struct ckrm_net_struct *tns, struct ckrm_core_class *c sock_set_class(ns, newcls, oldcls, CKRM_EVENT_MANUAL); ckrm_ns_put(ns); rc = 0; -out: + out: release_sock(sk); sock_put(sk); return rc; -} +} enum sock_target_token_t { - IPV4, IPV6, SOCKC_TARGET_ERR + IPV4, IPV6, SOCKC_TARGET_ERR }; static match_table_t sock_target_tokens = { {IPV4, "ipv4=%s"}, {IPV6, "ipv6=%s"}, - {SOCKC_TARGET_ERR, NULL}, + {SOCKC_TARGET_ERR, NULL}, }; -char * -v4toi(char *s, char c, __u32 *v) +char *v4toi(char *s, char c, __u32 * v) { - unsigned int k = 0, n = 0; + unsigned int k = 0, n = 0; - while(*s && (*s != c)) { + while (*s && (*s != c)) { if (*s == '.') { n <<= 8; n |= k; k = 0; - } - else - k = k *10 + *s - '0'; + } else + k = k * 10 + *s - '0'; s++; } @@ -442,19 +455,19 @@ v4toi(char *s, char c, __u32 *v) } static int -sock_forced_reclassify(struct ckrm_core_class *target,const char *options) -{ - char *p,*p2; +sock_forced_reclassify(struct ckrm_core_class *target, const char *options) +{ + char *p, *p2; struct ckrm_net_struct ns; __u32 v4addr, tmp; if (!options) - return 1; - - while ((p = strsep((char**)&options, ",")) != NULL) { + return -EINVAL; + + while ((p = strsep((char **)&options, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; int token; - + if (!*p) continue; token = match_token(p, sock_target_tokens, args); @@ -463,33 +476,33 @@ sock_forced_reclassify(struct ckrm_core_class *target,const char *options) case IPV4: p2 = p; - while(*p2 && (*p2 != '=')) + while (*p2 && (*p2 != '=')) ++p2; p2++; - p2 = v4toi(p2, '\\',&(v4addr)); + p2 = v4toi(p2, '\\', &(v4addr)); ns.ns_daddrv4 = htonl(v4addr); - ns.ns_family = 4; //IPPROTO_IPV4 - p2 = v4toi(++p2, ':',&tmp); ns.ns_dport = (__u16)tmp; - p2 = v4toi(++p2,'\0',&ns.ns_pid); - - sock_forced_reclassify_ns(&ns,target); + ns.ns_family = AF_INET; + p2 = v4toi(++p2, ':', &tmp); + ns.ns_dport = (__u16) tmp; + if (*p2) + p2 = v4toi(++p2, '\0', &ns.ns_pid); + sock_forced_reclassify_ns(&ns, target); break; case IPV6: printk(KERN_INFO "rcfs: IPV6 not supported yet\n"); - return 0; + return -ENOSYS; default: - return 0; + return -EINVAL; } } - return 1; -} + return -EINVAL; +} /* * Listen_aq reclassification. */ -static void -sock_reclassify_class(struct ckrm_sock_class *cls) +static void sock_reclassify_class(struct ckrm_sock_class *cls) { struct ckrm_net_struct *ns, *tns; struct ckrm_core_class *core = class_core(cls); @@ -513,42 +526,41 @@ sock_reclassify_class(struct ckrm_sock_class *cls) list_splice_init(&core->objlist, &local_list); class_unlock(core); ckrm_core_drop(core); - + list_for_each_entry_safe(ns, tns, &local_list, ckrm_link) { ckrm_ns_hold(ns); list_del(&ns->ckrm_link); if (ns->ns_sk) { lock_sock(ns->ns_sk); - sock_set_class(ns, &sockclass_dflt_class, NULL, CKRM_EVENT_MANUAL); + sock_set_class(ns, &sockclass_dflt_class, NULL, + CKRM_EVENT_MANUAL); release_sock(ns->ns_sk); } ckrm_ns_put(ns); } - return ; + return; } -void __init -ckrm_meta_init_sockclass(void) +void __init ckrm_meta_init_sockclass(void) { - printk("...... Initializing ClassType<%s> ........\n",CT_sockclass.name); + printk("...... Initializing ClassType<%s> ........\n", + CT_sockclass.name); // intialize the default class ckrm_init_core_class(&CT_sockclass, class_core(&sockclass_dflt_class), - NULL,dflt_sockclass_name); + NULL, dflt_sockclass_name); // register classtype and initialize default task class ckrm_register_classtype(&CT_sockclass); ckrm_register_event_set(sock_events_callbacks); - // note registeration of all resource controllers will be done later dynamically - // as these are specified as modules + // note registeration of all resource controllers will be done + // later dynamically as these are specified as modules } - - #if 1 -/*************************************************************************************** +/***************************************************************************** * Debugging Network Classes: Utility functions - **************************************************************************************/ + *****************************************************************************/ #endif diff --git a/kernel/ckrm/ckrm_tasks.c b/kernel/ckrm/ckrm_tasks.c index dcc7ee341..ee539216e 100644 --- a/kernel/ckrm/ckrm_tasks.c +++ b/kernel/ckrm/ckrm_tasks.c @@ -30,28 +30,30 @@ #include #include #include +#include -#define TOTAL_NUM_TASKS (131072) // 128 K +#define TOTAL_NUM_TASKS (131072) // 128 K #define NUMTASKS_DEBUG #define NUMTASKS_NAME "numtasks" typedef struct ckrm_numtasks { - struct ckrm_core_class *core; // the core i am part of... - struct ckrm_core_class *parent; // parent of the core above. + struct ckrm_core_class *core; // the core i am part of... + struct ckrm_core_class *parent; // parent of the core above. struct ckrm_shares shares; - spinlock_t cnt_lock; // always grab parent's lock first and then child's - int cnt_guarantee; // num_tasks guarantee in local units - int cnt_unused; // has to borrow if more than this is needed - int cnt_limit; // no tasks over this limit. - atomic_t cnt_cur_alloc; // current alloc from self - atomic_t cnt_borrowed; // borrowed from the parent + spinlock_t cnt_lock; // always grab parent's lock before child's + int cnt_guarantee; // num_tasks guarantee in local units + int cnt_unused; // has to borrow if more than this is needed + int cnt_limit; // no tasks over this limit. + atomic_t cnt_cur_alloc; // current alloc from self + atomic_t cnt_borrowed; // borrowed from the parent - int over_guarantee; //turn on/off when cur_alloc goes over/under guarantee + int over_guarantee; // turn on/off when cur_alloc goes + // over/under guarantee // internally maintained statictics to compare with max numbers - int limit_failures; // no. of failures 'cause the request was over the limit - int borrow_sucesses; // no. of successful borrows - int borrow_failures; // no. of borrow faileures + int limit_failures; // # failures as request was over the limit + int borrow_sucesses; // # successful borrows + int borrow_failures; // # borrow failures // Maximum the specific statictics has reached. int max_limit_failures; @@ -71,53 +73,50 @@ struct ckrm_res_ctlr numtasks_rcbs; * to make share values sane. * Does not traverse hierarchy reinitializing children. */ -static void -numtasks_res_initcls_one(ckrm_numtasks_t *res) +static void numtasks_res_initcls_one(ckrm_numtasks_t * res) { - res->shares.my_guarantee = CKRM_SHARE_DONTCARE; - res->shares.my_limit = CKRM_SHARE_DONTCARE; - res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; - res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; + res->shares.my_guarantee = CKRM_SHARE_DONTCARE; + res->shares.my_limit = CKRM_SHARE_DONTCARE; + res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; + res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT; res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE; - res->shares.cur_max_limit = 0; + res->shares.cur_max_limit = 0; - res->cnt_guarantee = CKRM_SHARE_DONTCARE; - res->cnt_unused = CKRM_SHARE_DONTCARE; - res->cnt_limit = CKRM_SHARE_DONTCARE; + res->cnt_guarantee = CKRM_SHARE_DONTCARE; + res->cnt_unused = CKRM_SHARE_DONTCARE; + res->cnt_limit = CKRM_SHARE_DONTCARE; - res->over_guarantee = 0; + res->over_guarantee = 0; - res->limit_failures = 0; - res->borrow_sucesses = 0; - res->borrow_failures = 0; + res->limit_failures = 0; + res->borrow_sucesses = 0; + res->borrow_failures = 0; - res->max_limit_failures = 0; - res->max_borrow_sucesses = 0; - res->max_borrow_failures = 0; + res->max_limit_failures = 0; + res->max_borrow_sucesses = 0; + res->max_borrow_failures = 0; - res->tot_limit_failures = 0; - res->tot_borrow_sucesses = 0; - res->tot_borrow_failures = 0; + res->tot_limit_failures = 0; + res->tot_borrow_sucesses = 0; + res->tot_borrow_failures = 0; atomic_set(&res->cnt_cur_alloc, 0); atomic_set(&res->cnt_borrowed, 0); return; } -#if 0 -static void -numtasks_res_initcls(void *my_res) +#if 0 +static void numtasks_res_initcls(void *my_res) { ckrm_numtasks_t *res = my_res; /* Write a version which propagates values all the way down and replace rcbs callback with that version */ - + } #endif -int -numtasks_get_ref(void *arg, int force) +static int numtasks_get_ref_local(void *arg, int force) { int rc, resid = numtasks_rcbs.resid; ckrm_numtasks_t *res; @@ -127,22 +126,23 @@ numtasks_get_ref(void *arg, int force) return 1; res = ckrm_get_res_class(core, resid, ckrm_numtasks_t); - if (res == NULL) + if (res == NULL) return 1; atomic_inc(&res->cnt_cur_alloc); rc = 1; if (((res->parent) && (res->cnt_unused == CKRM_SHARE_DONTCARE)) || - (atomic_read(&res->cnt_cur_alloc) > res->cnt_unused)) { + (atomic_read(&res->cnt_cur_alloc) > res->cnt_unused)) { rc = 0; - if (!force && (res->cnt_limit != CKRM_SHARE_DONTCARE) && - (atomic_read(&res->cnt_cur_alloc) > res->cnt_limit)) { + if (!force && (res->cnt_limit != CKRM_SHARE_DONTCARE) && + (atomic_read(&res->cnt_cur_alloc) > res->cnt_limit)) { res->limit_failures++; res->tot_limit_failures++; } else if (res->parent != NULL) { - if ((rc = numtasks_get_ref(res->parent, force)) == 1) { + if ((rc = + numtasks_get_ref_local(res->parent, force)) == 1) { atomic_inc(&res->cnt_borrowed); res->borrow_sucesses++; res->tot_borrow_sucesses++; @@ -177,8 +177,7 @@ numtasks_get_ref(void *arg, int force) return rc; } -void -numtasks_put_ref(void *arg) +static void numtasks_put_ref_local(void *arg) { int resid = numtasks_rcbs.resid; ckrm_numtasks_t *res; @@ -189,37 +188,40 @@ numtasks_put_ref(void *arg) } res = ckrm_get_res_class(core, resid, ckrm_numtasks_t); - if (res == NULL) + if (res == NULL) return; atomic_dec(&res->cnt_cur_alloc); if (atomic_read(&res->cnt_borrowed) > 0) { atomic_dec(&res->cnt_borrowed); - numtasks_put_ref(res->parent); + numtasks_put_ref_local(res->parent); } return; } -static void * -numtasks_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) +static void *numtasks_res_alloc(struct ckrm_core_class *core, + struct ckrm_core_class *parent) { ckrm_numtasks_t *res; - + res = kmalloc(sizeof(ckrm_numtasks_t), GFP_ATOMIC); - + if (res) { + memset(res, 0, sizeof(ckrm_numtasks_t)); res->core = core; res->parent = parent; numtasks_res_initcls_one(res); res->cnt_lock = SPIN_LOCK_UNLOCKED; if (parent == NULL) { - // I am part of root class. so set the max tasks to available - // default + // I am part of root class. So set the max tasks + // to available default res->cnt_guarantee = TOTAL_NUM_TASKS; - res->cnt_unused = TOTAL_NUM_TASKS; + res->cnt_unused = TOTAL_NUM_TASKS; res->cnt_limit = TOTAL_NUM_TASKS; } + try_module_get(THIS_MODULE); } else { - printk(KERN_ERR "numtasks_res_alloc: failed GFP_ATOMIC alloc\n"); + printk(KERN_ERR + "numtasks_res_alloc: failed GFP_ATOMIC alloc\n"); } return res; } @@ -228,30 +230,29 @@ numtasks_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent) * No locking of this resource class object necessary as we are not * supposed to be assigned (or used) when/after this function is called. */ -static void -numtasks_res_free(void *my_res) +static void numtasks_res_free(void *my_res) { ckrm_numtasks_t *res = my_res, *parres, *childres; ckrm_core_class_t *child = NULL; int i, borrowed, maxlimit, resid = numtasks_rcbs.resid; - if (!res) + if (!res) return; // Assuming there will be no children when this function is called - + parres = ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t); if (unlikely(atomic_read(&res->cnt_cur_alloc) != 0 || - atomic_read(&res->cnt_borrowed))) { - printk(KERN_ERR "numtasks_res_free: resource still alloc'd %p\n", res); + atomic_read(&res->cnt_borrowed))) { + printk(KERN_ERR + "numtasks_res_free: resource still alloc'd %p\n", res); if ((borrowed = atomic_read(&res->cnt_borrowed)) > 0) { for (i = 0; i < borrowed; i++) { - numtasks_put_ref(parres->core); + numtasks_put_ref_local(parres->core); } } } - // return child's limit/guarantee to parent node spin_lock(&parres->cnt_lock); child_guarantee_changed(&parres->shares, res->shares.my_guarantee, 0); @@ -272,15 +273,17 @@ numtasks_res_free(void *my_res) spin_unlock(&parres->cnt_lock); kfree(res); + module_put(THIS_MODULE); return; } + /* * Recalculate the guarantee and limit in real units... and propagate the * same to children. * Caller is responsible for protecting res and for the integrity of parres */ static void -recalc_and_propagate(ckrm_numtasks_t *res, ckrm_numtasks_t *parres) +recalc_and_propagate(ckrm_numtasks_t * res, ckrm_numtasks_t * parres) { ckrm_core_class_t *child = NULL; ckrm_numtasks_t *childres; @@ -294,26 +297,34 @@ recalc_and_propagate(ckrm_numtasks_t *res, ckrm_numtasks_t *parres) // if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) { res->cnt_guarantee = CKRM_SHARE_DONTCARE; + } else if (par->total_guarantee) { + res->cnt_guarantee = + (self->my_guarantee * parres->cnt_guarantee) + / par->total_guarantee; } else { - res->cnt_guarantee = (self->my_guarantee * parres->cnt_guarantee) - / par->total_guarantee; + res->cnt_guarantee = 0; } + if (parres->cnt_limit == CKRM_SHARE_DONTCARE) { res->cnt_limit = CKRM_SHARE_DONTCARE; - } else { + } else if (par->max_limit) { res->cnt_limit = (self->my_limit * parres->cnt_limit) - / par->max_limit; + / par->max_limit; + } else { + res->cnt_limit = 0; } // Calculate unused units if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) { res->cnt_unused = CKRM_SHARE_DONTCARE; - } else { + } else if (self->total_guarantee) { res->cnt_unused = (self->unused_guarantee * - res->cnt_guarantee) / self->total_guarantee; + res->cnt_guarantee) / + self->total_guarantee; + } else { + res->cnt_unused = 0; } } - // propagate to children ckrm_lock_hier(res->core); while ((child = ckrm_get_next_child(res->core, child)) != NULL) { @@ -327,18 +338,18 @@ recalc_and_propagate(ckrm_numtasks_t *res, ckrm_numtasks_t *parres) return; } -static int -numtasks_set_share_values(void *my_res, struct ckrm_shares *new) +static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new) { ckrm_numtasks_t *parres, *res = my_res; struct ckrm_shares *cur = &res->shares, *par; int rc = -EINVAL, resid = numtasks_rcbs.resid; - if (!res) + if (!res) return rc; if (res->parent) { - parres = ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t); + parres = + ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t); spin_lock(&parres->cnt_lock); spin_lock(&res->cnt_lock); par = &parres->shares; @@ -354,11 +365,13 @@ numtasks_set_share_values(void *my_res, struct ckrm_shares *new) // Calculate parent's unused units if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) { parres->cnt_unused = CKRM_SHARE_DONTCARE; - } else { + } else if (par->total_guarantee) { parres->cnt_unused = (par->unused_guarantee * - parres->cnt_guarantee) / par->total_guarantee; + parres->cnt_guarantee) / + par->total_guarantee; + } else { + parres->cnt_unused = 0; } - recalc_and_propagate(res, parres); } spin_unlock(&res->cnt_lock); @@ -368,113 +381,107 @@ numtasks_set_share_values(void *my_res, struct ckrm_shares *new) return rc; } - -static int -numtasks_get_share_values(void *my_res, struct ckrm_shares *shares) +static int numtasks_get_share_values(void *my_res, struct ckrm_shares *shares) { ckrm_numtasks_t *res = my_res; - if (!res) + if (!res) return -EINVAL; *shares = res->shares; return 0; } -static int -numtasks_get_stats(void *my_res, struct seq_file *sfile) +static int numtasks_get_stats(void *my_res, struct seq_file *sfile) { ckrm_numtasks_t *res = my_res; - if (!res) + if (!res) return -EINVAL; seq_printf(sfile, "Number of tasks resource:\n"); seq_printf(sfile, "Total Over limit failures: %d\n", - res->tot_limit_failures); + res->tot_limit_failures); seq_printf(sfile, "Total Over guarantee sucesses: %d\n", - res->tot_borrow_sucesses); + res->tot_borrow_sucesses); seq_printf(sfile, "Total Over guarantee failures: %d\n", - res->tot_borrow_failures); + res->tot_borrow_failures); seq_printf(sfile, "Maximum Over limit failures: %d\n", - res->max_limit_failures); + res->max_limit_failures); seq_printf(sfile, "Maximum Over guarantee sucesses: %d\n", - res->max_borrow_sucesses); + res->max_borrow_sucesses); seq_printf(sfile, "Maximum Over guarantee failures: %d\n", - res->max_borrow_failures); + res->max_borrow_failures); #ifdef NUMTASKS_DEBUG - seq_printf(sfile, "cur_alloc %d; borrowed %d; cnt_guar %d; cnt_limit %d " - "unused_guarantee %d, cur_max_limit %d\n", - atomic_read(&res->cnt_cur_alloc), - atomic_read(&res->cnt_borrowed), - res->cnt_guarantee, - res->cnt_limit, - res->shares.unused_guarantee, - res->shares.cur_max_limit); + seq_printf(sfile, + "cur_alloc %d; borrowed %d; cnt_guar %d; cnt_limit %d " + "unused_guarantee %d, cur_max_limit %d\n", + atomic_read(&res->cnt_cur_alloc), + atomic_read(&res->cnt_borrowed), res->cnt_guarantee, + res->cnt_limit, res->shares.unused_guarantee, + res->shares.cur_max_limit); #endif return 0; } -static int -numtasks_show_config(void *my_res, struct seq_file *sfile) +static int numtasks_show_config(void *my_res, struct seq_file *sfile) { ckrm_numtasks_t *res = my_res; - if (!res) + if (!res) return -EINVAL; - seq_printf(sfile, "res=%s,parameter=somevalue\n",NUMTASKS_NAME); + seq_printf(sfile, "res=%s,parameter=somevalue\n", NUMTASKS_NAME); return 0; } -static int -numtasks_set_config(void *my_res, const char *cfgstr) +static int numtasks_set_config(void *my_res, const char *cfgstr) { ckrm_numtasks_t *res = my_res; - if (!res) + if (!res) return -EINVAL; - printk("numtasks config='%s'\n",cfgstr); + printk("numtasks config='%s'\n", cfgstr); return 0; } -static void -numtasks_change_resclass(void *task, void *old, void *new) +static void numtasks_change_resclass(void *task, void *old, void *new) { ckrm_numtasks_t *oldres = old; ckrm_numtasks_t *newres = new; - if (oldres != (void *) -1) { + if (oldres != (void *)-1) { struct task_struct *tsk = task; if (!oldres) { - struct ckrm_core_class *old_core = &(tsk->parent->taskclass->core); - oldres = ckrm_get_res_class(old_core, numtasks_rcbs.resid, - ckrm_numtasks_t); + struct ckrm_core_class *old_core = + &(tsk->parent->taskclass->core); + oldres = + ckrm_get_res_class(old_core, numtasks_rcbs.resid, + ckrm_numtasks_t); } - numtasks_put_ref(oldres->core); + numtasks_put_ref_local(oldres->core); } if (newres) { - (void) numtasks_get_ref(newres->core, 1); + (void)numtasks_get_ref_local(newres->core, 1); } } struct ckrm_res_ctlr numtasks_rcbs = { - .res_name = NUMTASKS_NAME, - .res_hdepth = 1, - .resid = -1, - .res_alloc = numtasks_res_alloc, - .res_free = numtasks_res_free, - .set_share_values = numtasks_set_share_values, - .get_share_values = numtasks_get_share_values, - .get_stats = numtasks_get_stats, - .show_config = numtasks_show_config, - .set_config = numtasks_set_config, - .change_resclass = numtasks_change_resclass, + .res_name = NUMTASKS_NAME, + .res_hdepth = 1, + .resid = -1, + .res_alloc = numtasks_res_alloc, + .res_free = numtasks_res_free, + .set_share_values = numtasks_set_share_values, + .get_share_values = numtasks_get_share_values, + .get_stats = numtasks_get_stats, + .show_config = numtasks_show_config, + .set_config = numtasks_set_config, + .change_resclass = numtasks_change_resclass, }; -int __init -init_ckrm_numtasks_res(void) +int __init init_ckrm_numtasks_res(void) { struct ckrm_classtype *clstype; int resid = numtasks_rcbs.resid; @@ -486,24 +493,27 @@ init_ckrm_numtasks_res(void) } if (resid == -1) { - resid = ckrm_register_res_ctlr(clstype,&numtasks_rcbs); - printk("........init_ckrm_numtasks_res -> %d\n",resid); + resid = ckrm_register_res_ctlr(clstype, &numtasks_rcbs); + printk("........init_ckrm_numtasks_res -> %d\n", resid); + if (resid != -1) { + ckrm_numtasks_register(numtasks_get_ref_local, + numtasks_put_ref_local); + numtasks_rcbs.classtype = clstype; + } } return 0; -} +} -void __exit -exit_ckrm_numtasks_res(void) +void __exit exit_ckrm_numtasks_res(void) { + if (numtasks_rcbs.resid != -1) { + ckrm_numtasks_register(NULL, NULL); + } ckrm_unregister_res_ctlr(&numtasks_rcbs); numtasks_rcbs.resid = -1; } module_init(init_ckrm_numtasks_res) -module_exit(exit_ckrm_numtasks_res) - -EXPORT_SYMBOL(numtasks_get_ref); -EXPORT_SYMBOL(numtasks_put_ref); - -MODULE_LICENSE("GPL"); + module_exit(exit_ckrm_numtasks_res) + MODULE_LICENSE("GPL"); diff --git a/kernel/ckrm/ckrm_tc.c b/kernel/ckrm/ckrm_tc.c index cc0377887..83c6b0bd8 100644 --- a/kernel/ckrm/ckrm_tc.c +++ b/kernel/ckrm/ckrm_tc.c @@ -50,77 +50,74 @@ #include +#define TC_DEBUG(fmt, args...) do { \ +/* printk("%s: " fmt, __FUNCTION__ , ## args); */ } while (0) - -#define TC_DEBUG(fmt, args...) do { /* printk("%s: " fmt, __FUNCTION__ , ## args); */ } while (0) - - -static struct ckrm_task_class taskclass_dflt_class = { +static struct ckrm_task_class taskclass_dflt_class = { }; const char *dflt_taskclass_name = TASK_CLASS_TYPE_NAME; -static struct ckrm_core_class *ckrm_alloc_task_class(struct ckrm_core_class *parent, const char *name); +static struct ckrm_core_class *ckrm_alloc_task_class(struct ckrm_core_class + *parent, const char *name); static int ckrm_free_task_class(struct ckrm_core_class *core); -static int tc_forced_reclassify(ckrm_core_class_t *target, const char *resname); -static int tc_show_members(struct ckrm_core_class *core, struct seq_file *seq); +static int tc_forced_reclassify(ckrm_core_class_t * target, + const char *resname); +static int tc_show_members(struct ckrm_core_class *core, struct seq_file *seq); static void tc_add_resctrl(struct ckrm_core_class *core, int resid); struct ckrm_classtype CT_taskclass = { - .mfidx = TC_MF_IDX, - .name = TASK_CLASS_TYPE_NAME, - .typeID = CKRM_CLASSTYPE_TASK_CLASS, - .maxdepth = 3, // Hubertus .. just to start - .resid_reserved = 4, // Hubertus .. reservation - .max_res_ctlrs = CKRM_MAX_RES_CTLRS, - .max_resid = 0, - .bit_res_ctlrs = 0L, + .mfidx = TC_MF_IDX, + .name = TASK_CLASS_TYPE_NAME, + .typeID = CKRM_CLASSTYPE_TASK_CLASS, + .maxdepth = 3, // Hubertus .. just to start + .resid_reserved = 4, // Hubertus .. reservation + .max_res_ctlrs = CKRM_MAX_RES_CTLRS, + .max_resid = 0, + .bit_res_ctlrs = 0L, .res_ctlrs_lock = SPIN_LOCK_UNLOCKED, - .classes = LIST_HEAD_INIT(CT_taskclass.classes), + .classes = LIST_HEAD_INIT(CT_taskclass.classes), + + .default_class = &taskclass_dflt_class.core, - .default_class = &taskclass_dflt_class.core, - // private version of functions - .alloc = &ckrm_alloc_task_class, - .free = &ckrm_free_task_class, - .show_members = &tc_show_members, + .alloc = &ckrm_alloc_task_class, + .free = &ckrm_free_task_class, + .show_members = &tc_show_members, .forced_reclassify = &tc_forced_reclassify, // use of default functions - .show_shares = &ckrm_class_show_shares, - .show_stats = &ckrm_class_show_stats, - .show_config = &ckrm_class_show_config, - .set_config = &ckrm_class_set_config, - .set_shares = &ckrm_class_set_shares, - .reset_stats = &ckrm_class_reset_stats, + .show_shares = &ckrm_class_show_shares, + .show_stats = &ckrm_class_show_stats, + .show_config = &ckrm_class_show_config, + .set_config = &ckrm_class_set_config, + .set_shares = &ckrm_class_set_shares, + .reset_stats = &ckrm_class_reset_stats, // mandatory private version .. no dflt available - .add_resctrl = &tc_add_resctrl, + .add_resctrl = &tc_add_resctrl, }; /************************************************************************** * Helper Functions * **************************************************************************/ -static inline void -ckrm_init_task_lock(struct task_struct *tsk) +static inline void ckrm_init_task_lock(struct task_struct *tsk) { tsk->ckrm_tsklock = SPIN_LOCK_UNLOCKED; } // Hubertus .. following functions should move to ckrm_rc.h -static inline void -ckrm_task_lock(struct task_struct *tsk) +static inline void ckrm_task_lock(struct task_struct *tsk) { - spin_lock(&tsk->ckrm_tsklock); + spin_lock(&tsk->ckrm_tsklock); } -static inline void -ckrm_task_unlock(struct task_struct *tsk) +static inline void ckrm_task_unlock(struct task_struct *tsk) { - spin_unlock(&tsk->ckrm_tsklock); + spin_unlock(&tsk->ckrm_tsklock); } /* @@ -140,14 +137,13 @@ ckrm_task_unlock(struct task_struct *tsk) * Function is also called with a ckrm_core_grab on the new core, hence * it needs to be dropped if no assignment takes place. */ - static void -ckrm_set_taskclass(struct task_struct *tsk, ckrm_task_class_t *newcls, - ckrm_task_class_t *oldcls, enum ckrm_event event) +ckrm_set_taskclass(struct task_struct *tsk, ckrm_task_class_t * newcls, + ckrm_task_class_t * oldcls, enum ckrm_event event) { int i; - ckrm_classtype_t *clstype; - ckrm_res_ctlr_t *rcbs; + ckrm_classtype_t *clstype; + ckrm_res_ctlr_t *rcbs; ckrm_task_class_t *curcls; void *old_res_class, *new_res_class; int drop_old_cls; @@ -155,24 +151,37 @@ ckrm_set_taskclass(struct task_struct *tsk, ckrm_task_class_t *newcls, ckrm_task_lock(tsk); curcls = tsk->taskclass; + if ((void *)-1 == curcls) { + // task is disassociated from ckrm... don't bother it. + ckrm_task_unlock(tsk); + ckrm_core_drop(class_core(newcls)); + return; + } + + if ((curcls == NULL) && (newcls == (void *)-1)) { + // task need to disassociated from ckrm and has no curcls + // just disassociate and return. + tsk->taskclass = newcls; + ckrm_task_unlock(tsk); + return; + } // check whether compare_and_exchange should if (oldcls && (oldcls != curcls)) { ckrm_task_unlock(tsk); if (newcls) { /* compensate for previous grab */ TC_DEBUG("(%s:%d): Race-condition caught <%s> %d\n", - tsk->comm,tsk->pid,class_core(newcls)->name,event); + tsk->comm, tsk->pid, class_core(newcls)->name, + event); ckrm_core_drop(class_core(newcls)); } return; } - // make sure we have a real destination core if (!newcls) { newcls = &taskclass_dflt_class; ckrm_core_grab(class_core(newcls)); } - // take out of old class // remember that we need to drop the oldcore if ((drop_old_cls = (curcls != NULL))) { @@ -188,8 +197,14 @@ ckrm_set_taskclass(struct task_struct *tsk, ckrm_task_class_t *newcls, INIT_LIST_HEAD(&tsk->taskclass_link); tsk->taskclass = NULL; class_unlock(class_core(curcls)); - } - + if (newcls == (void *)-1) { + tsk->taskclass = newcls; + ckrm_task_unlock(tsk); + // still need to get out of old class + newcls = NULL; + goto rc_handling; + } + } // put into new class class_lock(class_core(newcls)); tsk->taskclass = newcls; @@ -201,69 +216,80 @@ ckrm_set_taskclass(struct task_struct *tsk, ckrm_task_class_t *newcls, goto out; } - CE_NOTIFY(&CT_taskclass,event,newcls,tsk); + CE_NOTIFY(&CT_taskclass, event, newcls, tsk); ckrm_task_unlock(tsk); - clstype = class_isa(newcls); // Hubertus .. can hardcode ckrm_CT_taskclass - if (clstype->bit_res_ctlrs) { // avoid running through the entire list if non is registered + rc_handling: + clstype = &CT_taskclass; + if (clstype->bit_res_ctlrs) { + // avoid running through the entire list if non is registered for (i = 0; i < clstype->max_resid; i++) { - if (clstype->res_ctlrs[i] == NULL) + if (clstype->res_ctlrs[i] == NULL) continue; atomic_inc(&clstype->nr_resusers[i]); - old_res_class = curcls ? class_core(curcls)->res_class[i] : NULL; - new_res_class = newcls ? class_core(newcls)->res_class[i] : NULL; + old_res_class = + curcls ? class_core(curcls)->res_class[i] : NULL; + new_res_class = + newcls ? class_core(newcls)->res_class[i] : NULL; rcbs = clstype->res_ctlrs[i]; - if (rcbs && rcbs->change_resclass && (old_res_class != new_res_class)) - (*rcbs->change_resclass)(tsk, old_res_class, new_res_class); + if (rcbs && rcbs->change_resclass + && (old_res_class != new_res_class)) + (*rcbs->change_resclass) (tsk, old_res_class, + new_res_class); atomic_dec(&clstype->nr_resusers[i]); } } - out: - if (drop_old_cls) + out: + if (drop_old_cls) ckrm_core_drop(class_core(curcls)); return; } -// HF SUGGEST: we could macro-tize this for other types DEF_FUNC_ADD_RESCTRL(funcname,link) +// HF SUGGEST: we could macro-tize this for other types +// DEF_FUNC_ADD_RESCTRL(funcname,link) // would DEF_FUNC_ADD_RESCTRL(tc_add_resctrl,taskclass_link) -static void -tc_add_resctrl(struct ckrm_core_class *core, int resid) +static void tc_add_resctrl(struct ckrm_core_class *core, int resid) { struct task_struct *tsk; struct ckrm_res_ctlr *rcbs; - if ((resid < 0) || (resid >= CKRM_MAX_RES_CTLRS) || ((rcbs = core->classtype->res_ctlrs[resid]) == NULL)) + if ((resid < 0) || (resid >= CKRM_MAX_RES_CTLRS) + || ((rcbs = core->classtype->res_ctlrs[resid]) == NULL)) return; class_lock(core); list_for_each_entry(tsk, &core->objlist, taskclass_link) { if (rcbs->change_resclass) - (*rcbs->change_resclass)(tsk, (void *) -1, core->res_class[resid]); + (*rcbs->change_resclass) (tsk, (void *)-1, + core->res_class[resid]); } class_unlock(core); } - /************************************************************************** * Functions called from classification points * **************************************************************************/ -#define ECB_PRINTK(fmt, args...) // do { if (CT_taskclass.ce_regd) printk("%s: " fmt, __FUNCTION__ , ## args); } while (0) - -#define CE_CLASSIFY_TASK(event, tsk) \ -do { \ - struct ckrm_task_class *newcls = NULL, *oldcls = tsk->taskclass; \ - \ - CE_CLASSIFY_RET(newcls,&CT_taskclass,event,tsk); \ - if (newcls) { \ - /* called synchrously. no need to get task struct */ \ - ckrm_set_taskclass(tsk, newcls, oldcls, event); \ - } \ +#define ECB_PRINTK(fmt, args...) \ +// do { if (CT_taskclass.ce_regd) +// printk("%s: " fmt, __FUNCTION__ , ## args); } while (0) + +#define CE_CLASSIFY_TASK(event, tsk) \ +do { \ + struct ckrm_task_class *newcls = NULL; \ + struct ckrm_task_class *oldcls = tsk->taskclass; \ + \ + CE_CLASSIFY_RET(newcls,&CT_taskclass,event,tsk); \ + if (newcls) { \ + /* called synchrously. no need to get task struct */ \ + ckrm_set_taskclass(tsk, newcls, oldcls, event); \ + } \ } while (0) + #define CE_CLASSIFY_TASK_PROTECT(event, tsk) \ do { \ ce_protect(&CT_taskclass); \ @@ -271,26 +297,20 @@ do { \ ce_release(&CT_taskclass); \ } while (0) - - - -static void -cb_taskclass_newtask(struct task_struct *tsk) +static void cb_taskclass_newtask(struct task_struct *tsk) { tsk->taskclass = NULL; INIT_LIST_HEAD(&tsk->taskclass_link); } - -static void -cb_taskclass_fork(struct task_struct *tsk) +static void cb_taskclass_fork(struct task_struct *tsk) { struct ckrm_task_class *cls = NULL; - ECB_PRINTK("%p:%d:%s\n",tsk,tsk->pid,tsk->comm); + ECB_PRINTK("%p:%d:%s\n", tsk, tsk->pid, tsk->comm); ce_protect(&CT_taskclass); - CE_CLASSIFY_RET(cls,&CT_taskclass,CKRM_EVENT_FORK,tsk); + CE_CLASSIFY_RET(cls, &CT_taskclass, CKRM_EVENT_FORK, tsk); if (cls == NULL) { ckrm_task_lock(tsk->parent); cls = tsk->parent->taskclass; @@ -299,65 +319,45 @@ cb_taskclass_fork(struct task_struct *tsk) } if (!list_empty(&tsk->taskclass_link)) printk("BUG in cb_fork.. tsk (%s:%d> already linked\n", - tsk->comm,tsk->pid); + tsk->comm, tsk->pid); ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_FORK); ce_release(&CT_taskclass); } -static void -cb_taskclass_exit(struct task_struct *tsk) +static void cb_taskclass_exit(struct task_struct *tsk) { - ckrm_task_class_t *cls; - - // Remove the task from the current core class - - ECB_PRINTK("%p:%d:%s\n",tsk,tsk->pid,tsk->comm); - ckrm_task_lock(tsk); - - CE_CLASSIFY_NORET( &CT_taskclass, CKRM_EVENT_EXIT, tsk); - - if ((cls = tsk->taskclass) != NULL) { - class_lock(class_core(cls)); - tsk->taskclass = NULL; - list_del(&tsk->taskclass_link); - class_unlock(class_core(cls)); - ckrm_core_drop(class_core(cls)); - } else { - INIT_LIST_HEAD(&tsk->taskclass_link); - } - ckrm_task_unlock(tsk); + CE_CLASSIFY_NORET(&CT_taskclass, CKRM_EVENT_EXIT, tsk); + ckrm_set_taskclass(tsk, (void *)-1, NULL, CKRM_EVENT_EXIT); } -static void -cb_taskclass_exec(const char *filename) +static void cb_taskclass_exec(const char *filename) { - ECB_PRINTK("%p:%d:%s <%s>\n",current,current->pid,current->comm,filename); + ECB_PRINTK("%p:%d:%s <%s>\n", current, current->pid, current->comm, + filename); CE_CLASSIFY_TASK_PROTECT(CKRM_EVENT_EXEC, current); } -static void -cb_taskclass_uid(void) +static void cb_taskclass_uid(void) { - ECB_PRINTK("%p:%d:%s\n",current,current->pid,current->comm); + ECB_PRINTK("%p:%d:%s\n", current, current->pid, current->comm); CE_CLASSIFY_TASK_PROTECT(CKRM_EVENT_UID, current); } -static void -cb_taskclass_gid(void) +static void cb_taskclass_gid(void) { - ECB_PRINTK("%p:%d:%s\n",current,current->pid,current->comm); + ECB_PRINTK("%p:%d:%s\n", current, current->pid, current->comm); CE_CLASSIFY_TASK_PROTECT(CKRM_EVENT_GID, current); } static struct ckrm_event_spec taskclass_events_callbacks[] = { - CKRM_EVENT_SPEC( NEWTASK, cb_taskclass_newtask ), - CKRM_EVENT_SPEC( EXEC , cb_taskclass_exec ), - CKRM_EVENT_SPEC( FORK , cb_taskclass_fork ), - CKRM_EVENT_SPEC( EXIT , cb_taskclass_exit ), - CKRM_EVENT_SPEC( UID , cb_taskclass_uid ), - CKRM_EVENT_SPEC( GID , cb_taskclass_gid ), - { -1 } + CKRM_EVENT_SPEC(NEWTASK, cb_taskclass_newtask), + CKRM_EVENT_SPEC(EXEC, cb_taskclass_exec), + CKRM_EVENT_SPEC(FORK, cb_taskclass_fork), + CKRM_EVENT_SPEC(EXIT, cb_taskclass_exit), + CKRM_EVENT_SPEC(UID, cb_taskclass_uid), + CKRM_EVENT_SPEC(GID, cb_taskclass_gid), + {-1} }; /*********************************************************************** @@ -372,8 +372,7 @@ static struct ckrm_event_spec taskclass_events_callbacks[] = { * ***********************************************************************/ -DECLARE_MUTEX(async_serializer); // serialize all async functions - +DECLARE_MUTEX(async_serializer); // serialize all async functions /* * Go through the task list and reclassify all tasks according to the current @@ -390,8 +389,7 @@ DECLARE_MUTEX(async_serializer); // serialize all async functions * We use a hybrid by comparing ratio nr_threads/pidmax */ -static void -ckrm_reclassify_all_tasks(void) +static void ckrm_reclassify_all_tasks(void) { extern int pid_max; @@ -401,22 +399,21 @@ ckrm_reclassify_all_tasks(void) int ratio; int use_bitmap; - ratio = curpidmax / nr_threads; if (curpidmax <= PID_MAX_DEFAULT) { - use_bitmap = 1; + use_bitmap = 1; } else { - use_bitmap = (ratio >= 2); + use_bitmap = (ratio >= 2); } ce_protect(&CT_taskclass); - retry: + retry: if (use_bitmap == 0) { // go through it in one walk read_lock(&tasklist_lock); - for ( i=0 ; icore)) return; - down(&async_serializer); // protect again race condition - - - TC_DEBUG("start %p:%s:%d:%d\n",cls,cls->core.name, - atomic_read(&cls->core.refcnt),atomic_read(&cls->core.hnode.parent->refcnt)); - // If no CE registered for this classtype, following will be needed repeatedly; - ce_regd = class_core(cls)->classtype->ce_regd; + down(&async_serializer); // protect again race condition + TC_DEBUG("start %p:%s:%d:%d\n", cls, cls->core.name, + atomic_read(&cls->core.refcnt), + atomic_read(&cls->core.hnode.parent->refcnt)); + // If no CE registered for this classtype, following will be needed + // repeatedly; + ce_regd = class_core(cls)->classtype->ce_regd; cnode = &(class_core(cls)->hnode); parcls = class_type(ckrm_task_class_t, cnode->parent); -next_task: + next_task: class_lock(class_core(cls)); if (!list_empty(&class_core(cls)->objlist)) { struct ckrm_task_class *newcls = NULL; - struct task_struct *tsk = - list_entry(class_core(cls)->objlist.next, - struct task_struct, taskclass_link); - + struct task_struct *tsk = + list_entry(class_core(cls)->objlist.next, + struct task_struct, taskclass_link); + get_task_struct(tsk); class_unlock(class_core(cls)); if (ce_regd) { - CE_CLASSIFY_RET(newcls,&CT_taskclass,CKRM_EVENT_RECLASSIFY,tsk); + CE_CLASSIFY_RET(newcls, &CT_taskclass, + CKRM_EVENT_RECLASSIFY, tsk); if (cls == newcls) { // don't allow reclassifying to the same class - // as we are in the process of cleaning up this class - ckrm_core_drop(class_core(newcls)); // to compensate CE's grab + // as we are in the process of cleaning up + // this class + + // compensate CE's grab + ckrm_core_drop(class_core(newcls)); newcls = NULL; } } @@ -575,27 +575,27 @@ next_task: num++; goto next_task; } - TC_DEBUG("stop %p:%s:%d:%d %d\n",cls,cls->core.name, - atomic_read(&cls->core.refcnt),atomic_read(&cls->core.hnode.parent->refcnt),num); + TC_DEBUG("stop %p:%s:%d:%d %d\n", cls, cls->core.name, + atomic_read(&cls->core.refcnt), + atomic_read(&cls->core.hnode.parent->refcnt), num); class_unlock(class_core(cls)); ckrm_core_drop(class_core(cls)); up(&async_serializer); - return ; + return; } /* * Change the core class of the given task. */ -int -ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls) +int ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls) { struct task_struct *tsk; if (!ckrm_validate_and_grab_core(class_core(cls))) - return - EINVAL; + return -EINVAL; read_lock(&tasklist_lock); if ((tsk = find_task_by_pid(pid)) == NULL) { @@ -605,39 +605,47 @@ ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls) } get_task_struct(tsk); read_unlock(&tasklist_lock); - - down(&async_serializer); // protect again race condition - + + /* Check permissions */ + if ((!capable(CAP_SYS_NICE)) && + (!capable(CAP_SYS_RESOURCE)) && (current->user != tsk->user)) { + ckrm_core_drop(class_core(cls)); + put_task_struct(tsk); + return -EPERM; + } + + down(&async_serializer); // protect again race condition + ce_protect(&CT_taskclass); ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_MANUAL); ce_release(&CT_taskclass); put_task_struct(tsk); - + up(&async_serializer); return 0; } -static struct ckrm_core_class * -ckrm_alloc_task_class(struct ckrm_core_class *parent, const char *name) +static struct ckrm_core_class *ckrm_alloc_task_class(struct ckrm_core_class + *parent, const char *name) { struct ckrm_task_class *taskcls; taskcls = kmalloc(sizeof(struct ckrm_task_class), GFP_KERNEL); - if (taskcls == NULL) + if (taskcls == NULL) return NULL; + memset(taskcls, 0, sizeof(struct ckrm_task_class)); - ckrm_init_core_class(&CT_taskclass, - class_core(taskcls),parent,name); + ckrm_init_core_class(&CT_taskclass, class_core(taskcls), parent, name); ce_protect(&CT_taskclass); if (CT_taskclass.ce_cb_active && CT_taskclass.ce_callbacks.class_add) - (*CT_taskclass.ce_callbacks.class_add)(name,taskcls); + (*CT_taskclass.ce_callbacks.class_add) (name, taskcls, + CT_taskclass.typeID); ce_release(&CT_taskclass); return class_core(taskcls); } -static int -ckrm_free_task_class(struct ckrm_core_class *core) +static int ckrm_free_task_class(struct ckrm_core_class *core) { struct ckrm_task_class *taskcls; @@ -648,82 +656,79 @@ ckrm_free_task_class(struct ckrm_core_class *core) if (core == core->classtype->default_class) { // reset the name tag core->name = dflt_taskclass_name; - return 0; + return 0; } - TC_DEBUG("%p:%s:%d\n",core,core->name,atomic_read(&core->refcnt)); + TC_DEBUG("%p:%s:%d\n", core, core->name, atomic_read(&core->refcnt)); taskcls = class_type(struct ckrm_task_class, core); ce_protect(&CT_taskclass); if (CT_taskclass.ce_cb_active && CT_taskclass.ce_callbacks.class_delete) - (*CT_taskclass.ce_callbacks.class_delete)(core->name,taskcls); - ckrm_reclassify_class_tasks( taskcls ); + (*CT_taskclass.ce_callbacks.class_delete) (core->name, taskcls, + CT_taskclass.typeID); + ckrm_reclassify_class_tasks(taskcls); ce_release(&CT_taskclass); - ckrm_release_core_class(core); // Hubertus .... could just drop the class .. error message + ckrm_release_core_class(core); + // Hubertus .... could just drop the class .. error message return 0; } - -void __init -ckrm_meta_init_taskclass(void) +void __init ckrm_meta_init_taskclass(void) { - printk("...... Initializing ClassType<%s> ........\n",CT_taskclass.name); + printk("...... Initializing ClassType<%s> ........\n", + CT_taskclass.name); // intialize the default class ckrm_init_core_class(&CT_taskclass, class_core(&taskclass_dflt_class), - NULL,dflt_taskclass_name); + NULL, dflt_taskclass_name); // register classtype and initialize default task class ckrm_register_classtype(&CT_taskclass); ckrm_register_event_set(taskclass_events_callbacks); - // note registeration of all resource controllers will be done later dynamically - // as these are specified as modules + // note registeration of all resource controllers will be done + // later dynamically as these are specified as modules } - - -static int -tc_show_members(struct ckrm_core_class *core, struct seq_file *seq) +static int tc_show_members(struct ckrm_core_class *core, struct seq_file *seq) { struct list_head *lh; struct task_struct *tsk; class_lock(core); - list_for_each(lh, &core->objlist) { + list_for_each(lh, &core->objlist) { tsk = container_of(lh, struct task_struct, taskclass_link); - seq_printf(seq,"%ld\n", (long)tsk->pid); + seq_printf(seq, "%ld\n", (long)tsk->pid); } class_unlock(core); return 0; } -static int -tc_forced_reclassify(struct ckrm_core_class *target,const char *obj) -{ +static int tc_forced_reclassify(struct ckrm_core_class *target, const char *obj) +{ pid_t pid; int rc = -EINVAL; - pid = (pid_t) simple_strtoul(obj,NULL,10); + pid = (pid_t) simple_strtoul(obj, NULL, 10); if (pid > 0) { rc = ckrm_forced_reclassify_pid(pid, - class_type(ckrm_task_class_t,target)); + class_type(ckrm_task_class_t, + target)); } return rc; -} - +} + #if 1 -/*************************************************************************************** +/****************************************************************************** * Debugging Task Classes: Utility functions - **************************************************************************************/ + ******************************************************************************/ -void -check_tasklist_sanity(struct ckrm_task_class *cls) +void check_tasklist_sanity(struct ckrm_task_class *cls) { struct ckrm_core_class *core = class_core(cls); struct list_head *lh1, *lh2; @@ -734,35 +739,38 @@ check_tasklist_sanity(struct ckrm_task_class *cls) if (list_empty(&core->objlist)) { class_lock(core); printk("check_tasklist_sanity: class %s empty list\n", - core->name); + core->name); return; } list_for_each_safe(lh1, lh2, &core->objlist) { - struct task_struct *tsk = container_of(lh1, struct task_struct, taskclass_link); + struct task_struct *tsk = + container_of(lh1, struct task_struct, + taskclass_link); if (count++ > 20000) { printk("list is CORRUPTED\n"); break; } if (tsk->taskclass != cls) { const char *tclsname; - tclsname = (tsk->taskclass) ? class_core(tsk->taskclass)->name - : "NULL"; - printk("sanity: task %s:%d has ckrm_core |%s| but in list |%s|\n", - tsk->comm,tsk->pid,tclsname,core->name); + tclsname = (tsk->taskclass) ? + class_core(tsk->taskclass)->name:"NULL"; + printk("sanity: task %s:%d has ckrm_core " + "|%s| but in list |%s|\n", tsk->comm, + tsk->pid, tclsname, core->name); } } class_unlock(core); } } -void -ckrm_debug_free_task_class(struct ckrm_task_class *tskcls) +void ckrm_debug_free_task_class(struct ckrm_task_class *tskcls) { struct task_struct *proc, *thread; int count = 0; printk("Analyze Error <%s> %d\n", - class_core(tskcls)->name,atomic_read(&(class_core(tskcls)->refcnt))); + class_core(tskcls)->name, + atomic_read(&(class_core(tskcls)->refcnt))); read_lock(&tasklist_lock); class_lock(class_core(tskcls)); @@ -770,16 +778,19 @@ ckrm_debug_free_task_class(struct ckrm_task_class *tskcls) count += (tskcls == thread->taskclass); if ((thread->taskclass == tskcls) || (tskcls == NULL)) { const char *tclsname; - tclsname = (thread->taskclass) ? class_core(thread->taskclass)->name : "NULL"; - printk("%d thread=<%s:%d> -> <%s> <%lx>\n", - count,thread->comm,thread->pid,tclsname, thread->flags & PF_EXITING); + tclsname = (thread->taskclass) ? + class_core(thread->taskclass)->name :"NULL"; + printk("%d thread=<%s:%d> -> <%s> <%lx>\n", count, + thread->comm, thread->pid, tclsname, + thread->flags & PF_EXITING); } } while_each_thread(proc, thread); class_unlock(class_core(tskcls)); read_unlock(&tasklist_lock); printk("End Analyze Error <%s> %d\n", - class_core(tskcls)->name,atomic_read(&(class_core(tskcls)->refcnt))); -} + class_core(tskcls)->name, + atomic_read(&(class_core(tskcls)->refcnt))); +} #endif diff --git a/kernel/ckrm/ckrmutils.c b/kernel/ckrm/ckrmutils.c index c0d873cb4..d54e7b563 100644 --- a/kernel/ckrm/ckrmutils.c +++ b/kernel/ckrm/ckrmutils.c @@ -27,12 +27,11 @@ #include #include -int -get_exe_path_name(struct task_struct *tsk, char *buf, int buflen) +int get_exe_path_name(struct task_struct *tsk, char *buf, int buflen) { - struct vm_area_struct * vma; + struct vm_area_struct *vma; struct vfsmount *mnt; - struct mm_struct * mm = get_task_mm(tsk); + struct mm_struct *mm = get_task_mm(tsk); struct dentry *dentry; char *lname; int rc = 0; @@ -45,15 +44,14 @@ get_exe_path_name(struct task_struct *tsk, char *buf, int buflen) down_read(&mm->mmap_sem); vma = mm->mmap; while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && - vma->vm_file) { + if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) { dentry = dget(vma->vm_file->f_dentry); mnt = mntget(vma->vm_file->f_vfsmnt); lname = d_path(dentry, mnt, buf, buflen); - if (! IS_ERR(lname)) { + if (!IS_ERR(lname)) { strncpy(buf, lname, strlen(lname) + 1); } else { - rc = (int) PTR_ERR(lname); + rc = (int)PTR_ERR(lname); } mntput(mnt); dput(dentry); @@ -66,14 +64,12 @@ get_exe_path_name(struct task_struct *tsk, char *buf, int buflen) return rc; } - /* * must be called with cnt_lock of parres held * Caller is responsible for making sure that the new guarantee doesn't * overflow parent's total guarantee. */ -void -child_guarantee_changed(struct ckrm_shares *parent, int cur, int new) +void child_guarantee_changed(struct ckrm_shares *parent, int cur, int new) { if (new == cur || !parent) { return; @@ -92,8 +88,7 @@ child_guarantee_changed(struct ckrm_shares *parent, int cur, int new) * Caller is responsible for making sure that the new limit is not more * than parent's max_limit */ -void -child_maxlimit_changed(struct ckrm_shares *parent, int new_limit) +void child_maxlimit_changed(struct ckrm_shares *parent, int new_limit) { if (parent && parent->cur_max_limit < new_limit) { parent->cur_max_limit = new_limit; @@ -107,7 +102,7 @@ child_maxlimit_changed(struct ckrm_shares *parent, int new_limit) */ int set_shares(struct ckrm_shares *new, struct ckrm_shares *cur, - struct ckrm_shares *par) + struct ckrm_shares *par) { int rc = -EINVAL; int cur_usage_guar = cur->total_guarantee - cur->unused_guarantee; @@ -117,54 +112,51 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur, if (new->total_guarantee <= CKRM_SHARE_DONTCARE) { goto set_share_err; } else if (new->total_guarantee == CKRM_SHARE_UNCHANGED) { - ;// do nothing + ; // do nothing } else if (cur_usage_guar > new->total_guarantee) { goto set_share_err; } - // Check max_limit for correctness if (new->max_limit <= CKRM_SHARE_DONTCARE) { goto set_share_err; } else if (new->max_limit == CKRM_SHARE_UNCHANGED) { - ; // do nothing + ; // do nothing } else if (cur->cur_max_limit > new->max_limit) { goto set_share_err; } - // Check my_guarantee for correctness if (new->my_guarantee == CKRM_SHARE_UNCHANGED) { - ; // do nothing + ; // do nothing } else if (new->my_guarantee == CKRM_SHARE_DONTCARE) { - ; // do nothing + ; // do nothing } else if (par && increase_by > par->unused_guarantee) { goto set_share_err; } - // Check my_limit for correctness if (new->my_limit == CKRM_SHARE_UNCHANGED) { - ; // do nothing + ; // do nothing } else if (new->my_limit == CKRM_SHARE_DONTCARE) { - ; // do nothing + ; // do nothing } else if (par && new->my_limit > par->max_limit) { // I can't get more limit than my parent's limit goto set_share_err; - - } + } // make sure guarantee is lesser than limit if (new->my_limit == CKRM_SHARE_DONTCARE) { - ; // do nothing + ; // do nothing } else if (new->my_limit == CKRM_SHARE_UNCHANGED) { if (new->my_guarantee == CKRM_SHARE_DONTCARE) { - ; // do nothing + ; // do nothing } else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) { - ; // do nothing earlier setting would 've taken care of it + ; // do nothing earlier setting would've + // taken care of it } else if (new->my_guarantee > cur->my_limit) { goto set_share_err; } - } else { // new->my_limit has a valid value + } else { // new->my_limit has a valid value if (new->my_guarantee == CKRM_SHARE_DONTCARE) { - ; // do nothing + ; // do nothing } else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) { if (cur->my_guarantee > new->my_limit) { goto set_share_err; @@ -176,7 +168,7 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur, if (new->my_guarantee != CKRM_SHARE_UNCHANGED) { child_guarantee_changed(par, cur->my_guarantee, - new->my_guarantee); + new->my_guarantee); cur->my_guarantee = new->my_guarantee; } @@ -195,7 +187,7 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur, } rc = 0; -set_share_err: + set_share_err: return rc; } @@ -203,5 +195,3 @@ EXPORT_SYMBOL(get_exe_path_name); EXPORT_SYMBOL(child_guarantee_changed); EXPORT_SYMBOL(child_maxlimit_changed); EXPORT_SYMBOL(set_shares); - - diff --git a/kernel/exit.c b/kernel/exit.c index 3a51fec0a..f53583e2b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include @@ -639,6 +641,8 @@ static void exit_notify(struct task_struct *tsk) int state; struct task_struct *t; + ckrm_cb_exit(tsk); + if (signal_pending(tsk) && !tsk->signal->group_exit && !thread_group_empty(tsk)) { /* @@ -812,6 +816,9 @@ asmlinkage NORET_TYPE void do_exit(long code) module_put(tsk->binfmt->module); tsk->exit_code = code; +#ifdef CONFIG_CKRM_TYPE_TASKCLASS + numtasks_put_ref(tsk->taskclass); +#endif exit_notify(tsk); schedule(); BUG(); diff --git a/kernel/fork.c b/kernel/fork.c index 0cbc27f48..4af488db0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -260,6 +262,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->thread_info = ti; ti->task = tsk; + ckrm_cb_newtask(tsk); /* One for us, one for whoever does the "release_task()" (usually parent) */ atomic_set(&tsk->usage,2); return tsk; @@ -927,6 +930,7 @@ struct task_struct *copy_process(unsigned long clone_flags, if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; + init_delays(p); p->did_exec = 0; copy_flags(clone_flags, p); if (clone_flags & CLONE_IDLETASK) @@ -1181,6 +1185,12 @@ long do_fork(unsigned long clone_flags, clone_flags |= CLONE_PTRACE; } +#ifdef CONFIG_CKRM_TYPE_TASKCLASS + if (numtasks_get_ref(current->taskclass, 0) == 0) { + return -ENOMEM; + } +#endif + p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr); /* * Do this prior waking up the new thread - the thread pointer @@ -1191,6 +1201,8 @@ long do_fork(unsigned long clone_flags, if (!IS_ERR(p)) { struct completion vfork; + ckrm_cb_fork(p); + if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); @@ -1246,6 +1258,10 @@ long do_fork(unsigned long clone_flags, * COW overhead when the child exec()s afterwards. */ set_need_resched(); + } else { +#ifdef CONFIG_CKRM_TYPE_TASKCLASS + numtasks_put_ref(current->taskclass); +#endif } return pid; } diff --git a/kernel/sched.c b/kernel/sched.c index 10c2581f4..b5d3eb51d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2295,9 +2295,12 @@ switch_tasks: if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev))) prev->interactive_credit--; } + add_delay_ts(prev,runcpu_total,prev->timestamp,now); prev->timestamp = now; if (likely(prev != next)) { + add_delay_ts(next,waitcpu_total,next->timestamp,now); + inc_delay(next,runs); next->timestamp = now; rq->nr_switches++; rq->curr = next; @@ -3050,10 +3053,13 @@ EXPORT_SYMBOL(yield); void __sched io_schedule(void) { struct runqueue *rq = this_rq(); + def_delay_var(dstart); + start_delay_set(dstart,PF_IOWAIT); atomic_inc(&rq->nr_iowait); schedule(); atomic_dec(&rq->nr_iowait); + add_io_delay(dstart); } EXPORT_SYMBOL(io_schedule); @@ -3062,10 +3068,13 @@ long __sched io_schedule_timeout(long timeout) { struct runqueue *rq = this_rq(); long ret; + def_delay_var(dstart); + start_delay_set(dstart,PF_IOWAIT); atomic_inc(&rq->nr_iowait); ret = schedule_timeout(timeout); atomic_dec(&rq->nr_iowait); + add_io_delay(dstart); return ret; } @@ -4029,3 +4038,12 @@ void __sched __preempt_write_lock(rwlock_t *lock) EXPORT_SYMBOL(__preempt_write_lock); #endif /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) */ + +#ifdef CONFIG_DELAY_ACCT +int task_running_sys(struct task_struct *p) +{ + return task_running(task_rq(p),p); +} +EXPORT_SYMBOL(task_running_sys); +#endif + diff --git a/kernel/sys.c b/kernel/sys.c index 1bbc66a60..00026ea2c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -601,6 +602,9 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) current->fsgid = new_egid; current->egid = new_egid; current->gid = new_rgid; + + ckrm_cb_gid(); + return 0; } @@ -638,6 +642,9 @@ asmlinkage long sys_setgid(gid_t gid) } else return -EPERM; + + ckrm_cb_gid(); + return 0; } @@ -726,6 +733,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) current->suid = current->euid; current->fsuid = current->euid; + ckrm_cb_uid(); + return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); } @@ -771,6 +780,8 @@ asmlinkage long sys_setuid(uid_t uid) current->fsuid = current->euid = uid; current->suid = new_suid; + ckrm_cb_uid(); + return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); } @@ -817,6 +828,8 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) if (suid != (uid_t) -1) current->suid = suid; + ckrm_cb_uid(); + return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); } @@ -866,6 +879,9 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) current->gid = rgid; if (sgid != (gid_t) -1) current->sgid = sgid; + + ckrm_cb_gid(); + return 0; } diff --git a/mm/memory.c b/mm/memory.c index 3df1f05e7..ddf7049ff 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1670,15 +1670,20 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, * We need the page table lock to synchronize with kswapd * and the SMP-safe atomic PTE updates. */ + set_delay_flag(current,PF_MEMIO); spin_lock(&mm->page_table_lock); pmd = pmd_alloc(mm, pgd, address); if (pmd) { pte_t * pte = pte_alloc_map(mm, pmd, address); - if (pte) - return handle_pte_fault(mm, vma, address, write_access, pte, pmd); + if (pte) { + int rc = handle_pte_fault(mm, vma, address, write_access, pte, pmd); + clear_delay_flag(current,PF_MEMIO); + return rc; + } } spin_unlock(&mm->page_table_lock); + clear_delay_flag(current,PF_MEMIO); return VM_FAULT_OOM; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 0bb1b8808..d59797c7e 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -360,5 +360,28 @@ config INET_IPCOMP If unsure, say Y. +config ACCEPT_QUEUES + bool "IP: TCP Multiple accept queues support" + depends on INET && NETFILTER + ---help--- + Support multiple accept queues per listening socket. If you say Y + here, multiple accept queues will be configured per listening + socket. + + Each queue is mapped to a priority class. Incoming connection + requests can be classified (see iptables(8), MARK target), depending + on the packet's src/dest address or other parameters, into one of + the priority classes. The requests are then queued to the relevant + accept queue. + + Each of the queues can be assigned a weight. The accept()ance + of packets is then scheduled in accordance with the weight + assigned to the priority class. + + Be sure to enable "Network packet filtering" if you wish + to use this feature. + + If unsure, say N. + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 260cb4032..443669cb8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -257,6 +257,10 @@ #include #include +#ifdef CONFIG_CKRM +#include +#endif + #include #include #include @@ -536,13 +540,20 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) int tcp_listen_start(struct sock *sk) { +#ifdef CONFIG_ACCEPT_QUEUES + int i = 0; +#endif struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcp_listen_opt *lopt; sk->sk_max_ack_backlog = 0; sk->sk_ack_backlog = 0; +#ifdef CONFIG_ACCEPT_QUEUES + tp->accept_queue = NULL; +#else tp->accept_queue = tp->accept_queue_tail = NULL; +#endif tp->syn_wait_lock = RW_LOCK_UNLOCKED; tcp_delack_init(tp); @@ -556,6 +567,23 @@ int tcp_listen_start(struct sock *sk) break; get_random_bytes(&lopt->hash_rnd, 4); +#ifdef CONFIG_ACCEPT_QUEUES + tp->class_index = 0; + for (i=0; i < NUM_ACCEPT_QUEUES; i++) { + tp->acceptq[i].aq_tail = NULL; + tp->acceptq[i].aq_head = NULL; + tp->acceptq[i].aq_wait_time = 0; + tp->acceptq[i].aq_qcount = 0; + tp->acceptq[i].aq_count = 0; + if (i == 0) { + tp->acceptq[i].aq_ratio = 1; + } + else { + tp->acceptq[i].aq_ratio = 0; + } + } +#endif + write_lock_bh(&tp->syn_wait_lock); tp->listen_opt = lopt; write_unlock_bh(&tp->syn_wait_lock); @@ -572,6 +600,10 @@ int tcp_listen_start(struct sock *sk) sk_dst_reset(sk); sk->sk_prot->hash(sk); +#ifdef CONFIG_CKRM + ckrm_cb_listen_start(sk); +#endif + return 0; } @@ -602,7 +634,18 @@ static void tcp_listen_stop (struct sock *sk) write_lock_bh(&tp->syn_wait_lock); tp->listen_opt = NULL; write_unlock_bh(&tp->syn_wait_lock); - tp->accept_queue = tp->accept_queue_tail = NULL; + +#ifdef CONFIG_CKRM + ckrm_cb_listen_stop(sk); +#endif + +#ifdef CONFIG_ACCEPT_QUEUES + for (i = 0; i < NUM_ACCEPT_QUEUES; i++) + tp->acceptq[i].aq_head = tp->acceptq[i].aq_tail = NULL; +#else + tp->accept_queue_tail = NULL; +#endif + tp->accept_queue = NULL; if (lopt->qlen) { for (i = 0; i < TCP_SYNQ_HSIZE; i++) { @@ -648,7 +691,11 @@ static void tcp_listen_stop (struct sock *sk) local_bh_enable(); sock_put(child); +#ifdef CONFIG_ACCEPT_QUEUES + sk_acceptq_removed(sk, req->acceptq_class); +#else sk_acceptq_removed(sk); +#endif tcp_openreq_fastfree(req); } BUG_TRAP(!sk->sk_ack_backlog); @@ -2196,6 +2243,10 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) struct open_request *req; struct sock *newsk; int error; +#ifdef CONFIG_ACCEPT_QUEUES + int prev_class = 0; + int first; +#endif lock_sock(sk); @@ -2209,7 +2260,6 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) /* Find already established connection */ if (!tp->accept_queue) { long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - /* If this is a non blocking socket don't sleep */ error = -EAGAIN; if (!timeo) @@ -2220,12 +2270,46 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) goto out; } +#ifndef CONFIG_ACCEPT_QUEUES req = tp->accept_queue; if ((tp->accept_queue = req->dl_next) == NULL) tp->accept_queue_tail = NULL; + newsk = req->sk; + sk_acceptq_removed(sk); +#else + first = tp->class_index; + /* We should always have request queued here. The accept_queue + * is already checked for NULL above. + */ + while(!tp->acceptq[first].aq_head) { + tp->acceptq[first].aq_cnt = 0; + first = (first+1) & ~NUM_ACCEPT_QUEUES; + } + req = tp->acceptq[first].aq_head; + tp->acceptq[first].aq_qcount--; + tp->acceptq[first].aq_count++; + tp->acceptq[first].aq_wait_time+=(jiffies - req->acceptq_time_stamp); + for (prev_class= first-1 ; prev_class >=0; prev_class--) + if (tp->acceptq[prev_class].aq_tail) + break; + if (prev_class>=0) + tp->acceptq[prev_class].aq_tail->dl_next = req->dl_next; + else + tp->accept_queue = req->dl_next; + + if (req == tp->acceptq[first].aq_tail) + tp->acceptq[first].aq_head = tp->acceptq[first].aq_tail = NULL; + else + tp->acceptq[first].aq_head = req->dl_next; + + if((++(tp->acceptq[first].aq_cnt)) >= tp->acceptq[first].aq_ratio){ + tp->acceptq[first].aq_cnt = 0; + tp->class_index = ++first & (NUM_ACCEPT_QUEUES-1); + } newsk = req->sk; - sk_acceptq_removed(sk); + sk_acceptq_removed(sk, req->acceptq_class); +#endif tcp_openreq_fastfree(req); BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); release_sock(sk); @@ -2237,6 +2321,7 @@ out: return NULL; } + /* * Socket option code for TCP. */ @@ -2395,7 +2480,54 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, } } break; + +#ifdef CONFIG_ACCEPT_QUEUES + case TCP_ACCEPTQ_SHARE: +#ifdef CONFIG_CKRM + // If CKRM is set then the shares are set through rcfs. + // Get shares will still succeed. + err = -EOPNOTSUPP; + break; +#else + { + char share_wt[NUM_ACCEPT_QUEUES]; + int i,j; + if (sk->sk_state != TCP_LISTEN) + return -EOPNOTSUPP; + + if (copy_from_user(share_wt,optval, optlen)) { + err = -EFAULT; + break; + } + j = 0; + for (i = 0; i < NUM_ACCEPT_QUEUES; i++) { + if (share_wt[i]) { + if (!j) + j = share_wt[i]; + else if (share_wt[i] < j) { + j = share_wt[i]; + } + } + else + tp->acceptq[i].aq_ratio = 0; + + } + if (j == 0) { + /* Class 0 is always valid. If nothing is + * specified set class 0 as 1. + */ + share_wt[0] = 1; + j = 1; + } + for (i=0; i < NUM_ACCEPT_QUEUES; i++) { + tp->acceptq[i].aq_ratio = share_wt[i]/j; + tp->acceptq[i].aq_cnt = 0; + } + } + break; +#endif +#endif default: err = -ENOPROTOOPT; break; @@ -2476,6 +2608,40 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, case TCP_QUICKACK: val = !tp->ack.pingpong; break; + +#ifdef CONFIG_ACCEPT_QUEUES + case TCP_ACCEPTQ_SHARE: + { + struct tcp_acceptq_info tinfo[NUM_ACCEPT_QUEUES]; + int i; + + if (sk->sk_state != TCP_LISTEN) + return -EOPNOTSUPP; + + if (get_user(len, optlen)) + return -EFAULT; + + memset(tinfo, 0, sizeof(tinfo)); + + for(i=0; i < NUM_ACCEPT_QUEUES; i++) { + tinfo[i].acceptq_wait_time = + jiffies_to_msecs(tp->acceptq[i].aq_wait_time); + tinfo[i].acceptq_qcount = tp->acceptq[i].aq_qcount; + tinfo[i].acceptq_count = tp->acceptq[i].aq_count; + tinfo[i].acceptq_shares=tp->acceptq[i].aq_ratio; + } + + len = min_t(unsigned int, len, sizeof(tinfo)); + if (put_user(len, optlen)) + return -EFAULT; + + if (copy_to_user(optval, (char *)tinfo, len)) + return -EFAULT; + + return 0; + } + break; +#endif default: return -ENOPROTOOPT; }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e5d1364dd..20e2fa7d2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -458,7 +458,6 @@ inline struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, head = &tcp_listening_hash[tcp_lhashfn(hnum)]; if (!hlist_empty(head)) { struct inet_opt *inet = inet_sk((sk = __sk_head(head))); - if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && @@ -916,7 +915,11 @@ static void tcp_v4_synq_add(struct sock *sk, struct open_request *req) lopt->syn_table[h] = req; write_unlock(&tp->syn_wait_lock); +#ifdef CONFIG_ACCEPT_QUEUES + tcp_synq_added(sk, req); +#else tcp_synq_added(sk); +#endif } @@ -1413,6 +1416,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __u32 daddr = skb->nh.iph->daddr; __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; +#ifdef CONFIG_ACCEPT_QUEUES + int class = 0; +#endif #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1437,12 +1443,31 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; } +#ifdef CONFIG_ACCEPT_QUEUES + class = (skb->nfmark <= 0) ? 0 : + ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark); + /* + * Accept only if the class has shares set or if the default class + * i.e. class 0 has shares + */ + if (!(tcp_sk(sk)->acceptq[class].aq_ratio)) { + if (tcp_sk(sk)->acceptq[0].aq_ratio) + class = 0; + else + goto drop; + } +#endif + /* Accept backlog is full. If we have already queued enough * of warm entries in syn queue, drop request. It is better than * clogging syn queue with openreqs with exponentially increasing * timeout. */ +#ifdef CONFIG_ACCEPT_QUEUES + if (sk_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1) +#else if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) +#endif goto drop; req = tcp_openreq_alloc(); @@ -1472,7 +1497,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tp.tstamp_ok = tp.saw_tstamp; tcp_openreq_init(req, &tp, skb); - +#ifdef CONFIG_ACCEPT_QUEUES + req->acceptq_class = class; + req->acceptq_time_stamp = jiffies; +#endif req->af.v4_req.loc_addr = daddr; req->af.v4_req.rmt_addr = saddr; req->af.v4_req.opt = tcp_v4_save_options(sk, skb); @@ -1567,7 +1595,11 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct tcp_opt *newtp; struct sock *newsk; +#ifdef CONFIG_ACCEPT_QUEUES + if (sk_acceptq_is_full(sk, req->acceptq_class)) +#else if (sk_acceptq_is_full(sk)) +#endif goto exit_overflow; if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d4c0d84d1..6a4578035 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -790,7 +790,14 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newtp->num_sacks = 0; newtp->urg_data = 0; newtp->listen_opt = NULL; +#ifdef CONFIG_ACCEPT_QUEUES + newtp->accept_queue = NULL; + memset(newtp->acceptq, 0,sizeof(newtp->acceptq)); + newtp->class_index = 0; + +#else newtp->accept_queue = newtp->accept_queue_tail = NULL; +#endif /* Deinitialize syn_wait_lock to trap illegal accesses. */ memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock)); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cab2678b1..bfef4c1b0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -489,7 +489,16 @@ static void tcp_synack_timer(struct sock *sk) * ones are about to clog our table. */ if (lopt->qlen>>(lopt->max_qlen_log-1)) { +#ifdef CONFIG_ACCEPT_QUEUES + int young = 0; + + for(i=0; i < NUM_ACCEPT_QUEUES; i++) + young += lopt->qlen_young[i]; + + young <<= 1; +#else int young = (lopt->qlen_young<<1); +#endif while (thresh > 2) { if (lopt->qlen < young) @@ -515,9 +524,12 @@ static void tcp_synack_timer(struct sock *sk) unsigned long timeo; if (req->retrans++ == 0) - lopt->qlen_young--; - timeo = min((TCP_TIMEOUT_INIT << req->retrans), - TCP_RTO_MAX); +#ifdef CONFIG_ACCEPT_QUEUES + lopt->qlen_young[req->acceptq_class]--; +#else + lopt->qlen_young--; +#endif + timeo = min((TCP_TIMEOUT_INIT << req->retrans), TCP_RTO_MAX); req->expires = now + timeo; reqp = &req->dl_next; continue; @@ -529,7 +541,11 @@ static void tcp_synack_timer(struct sock *sk) write_unlock(&tp->syn_wait_lock); lopt->qlen--; if (req->retrans == 0) - lopt->qlen_young--; +#ifdef CONFIG_ACCEPT_QUEUES + lopt->qlen_young[req->acceptq_class]--; +#else + lopt->qlen_young--; +#endif tcp_openreq_free(req); continue; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2668fc87c..c9d44e39d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1153,7 +1153,11 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req) lopt->syn_table[h] = req; write_unlock(&tp->syn_wait_lock); +#ifdef CONFIG_ACCEPT_QUEUES + tcp_synq_added(sk, req); +#else tcp_synq_added(sk); +#endif } @@ -1166,6 +1170,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_opt tmptp, *tp = tcp_sk(sk); struct open_request *req = NULL; __u32 isn = TCP_SKB_CB(skb)->when; +#ifdef CONFIG_ACCEPT_QUEUES + int class = 0; +#endif if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1173,6 +1180,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) goto drop; + /* * There are no SYN attacks on IPv6, yet... */ @@ -1182,9 +1190,27 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; } +#ifdef CONFIG_ACCEPT_QUEUES + class = (skb->nfmark <= 0) ? 0 : + ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark); + /* + * Accept only if the class has shares set or if the default class + * i.e. class 0 has shares + */ + if (!(tcp_sk(sk)->acceptq[class].aq_ratio)) { + if (tcp_sk(sk)->acceptq[0].aq_ratio) + class = 0; + else + goto drop; + } + + if (sk_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1) +#else if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) +#endif goto drop; + req = tcp_openreq_alloc(); if (req == NULL) goto drop; @@ -1197,7 +1223,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tmptp.tstamp_ok = tmptp.saw_tstamp; tcp_openreq_init(req, &tmptp, skb); - +#ifdef CONFIG_ACCEPT_QUEUES + req->acceptq_class = class; + req->acceptq_time_stamp = jiffies; +#endif req->class = &or_ipv6; ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr); ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr); @@ -1299,7 +1328,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, opt = np->opt; +#ifdef CONFIG_ACCEPT_QUEUES + if (sk_acceptq_is_full(sk, req->acceptq_class)) +#else if (sk_acceptq_is_full(sk)) +#endif goto out_overflow; if (np->rxopt.bits.srcrt == 2 && -- 2.47.0