From b4268b9c7120e92cad30a76386e7b8afef10e49f Mon Sep 17 00:00:00 2001 From: Marc Fiuczynski Date: Thu, 23 Sep 2004 00:19:46 +0000 Subject: [PATCH] ckrm_E16 release candidate 1 merge --- include/linux/ckrm.h | 11 ++- include/linux/sched.h | 34 +++++++-- include/linux/tcp.h | 1 - init/Kconfig | 24 +++--- kernel/ckrm/Makefile | 6 +- kernel/ckrm/ckrm_tc.c | 81 +++++++++----------- kernel/ckrm/rbce/rbcemod.c | 132 ++++++++++++++++----------------- kernel/ckrm/rbce/rbcemod_ext.c | 7 +- kernel/ckrm/rbce/token.c | 23 +++++- kernel/exit.c | 3 - kernel/sched.c | 1 - 11 files changed, 179 insertions(+), 144 deletions(-) diff --git a/include/linux/ckrm.h b/include/linux/ckrm.h index 04f4ec00f..a29bf282a 100644 --- a/include/linux/ckrm.h +++ b/include/linux/ckrm.h @@ -9,10 +9,13 @@ * * Latest version, more details at http://ckrm.sf.net * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2253b0cc6..ee1bd330d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1249,19 +1249,43 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #define def_delay_var(var) unsigned long long var #define get_delay(tsk,field) ((tsk)->delays.field) -#define delay_value(x) (((unsigned long)(x))/1000) #define start_delay(var) ((var) = sched_clock()) #define start_delay_set(var,flg) (set_delay_flag(current,flg),(var) = sched_clock()) #define inc_delay(tsk,field) (((tsk)->delays.field)++) -#define add_delay_ts(tsk,field,start_ts,end_ts) ((tsk)->delays.field += delay_value((end_ts)-(start_ts))) -#define add_delay_clear(tsk,field,start_ts,flg) (add_delay_ts(tsk,field,start_ts,sched_clock()),clear_delay_flag(tsk,flg)) -static inline void add_io_delay(unsigned long dstart) +/* because of hardware timer drifts in SMPs and task continue on different cpu + * then where the start_ts was taken there is a possibility that + * end_ts < start_ts by some usecs. In this case we ignore the diff + * and add nothing to the total. + */ +#ifdef CONFIG_SMP +#define test_ts_integrity(start_ts,end_ts) (likely((end_ts) > (start_ts))) +#else +#define test_ts_integrity(start_ts,end_ts) (1) +#endif + +#define add_delay_ts(tsk,field,start_ts,end_ts) \ + do { if (test_ts_integrity(start_ts,end_ts)) (tsk)->delays.field += ((end_ts)-(start_ts)); } while (0) + +#define add_delay_clear(tsk,field,start_ts,flg) \ + do { \ + unsigned long long now = sched_clock();\ + add_delay_ts(tsk,field,start_ts,now); \ + clear_delay_flag(tsk,flg); \ + } while (0) + +static inline void add_io_delay(unsigned long long dstart) { struct task_struct * tsk = current; - unsigned long val = delay_value(sched_clock()-dstart); + unsigned long long now = sched_clock(); + unsigned long long val; + + if (test_ts_integrity(dstart,now)) + val = now - dstart; + else + val = 0; if (test_delay_flag(tsk,PF_MEMIO)) { tsk->delays.mem_iowait_total += val; tsk->delays.num_memwaits++; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9cdf6963e..9ed5fac6c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -387,7 +387,6 @@ struct tcp_opt { #ifndef CONFIG_ACCEPT_QUEUES struct open_request *accept_queue_tail; #endif - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ int linger2; diff --git a/init/Kconfig b/init/Kconfig index b73e02043..26615b43a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -115,6 +115,18 @@ config BSD_PROCESS_ACCT up to the user level program to do useful things with this information. This is generally a good idea, so say Y. +config BSD_PROCESS_ACCT_V3 + bool "BSD Process Accounting version 3 file format" + depends on BSD_PROCESS_ACCT + default n + help + If you say Y here, the process accounting information is written + in a new file format that also logs the process IDs of each + process and it's parent. Note that this file format is incompatible + with previous v0/v1/v2 file formats, so you will need updated tools + for processing it. A preliminary version of these tools is available + at . + menu "Class Based Kernel Resource Management" config CKRM @@ -239,18 +251,6 @@ config CKRM_CRBCE endmenu -config BSD_PROCESS_ACCT_V3 - bool "BSD Process Accounting version 3 file format" - depends on BSD_PROCESS_ACCT - default n - help - If you say Y here, the process accounting information is written - in a new file format that also logs the process IDs of each - process and it's parent. Note that this file format is incompatible - with previous v0/v1/v2 file formats, so you will need updated tools - for processing it. A preliminary version of these tools is available - at . - config SYSCTL bool "Sysctl support" ---help--- diff --git a/kernel/ckrm/Makefile b/kernel/ckrm/Makefile index 3da88775d..8f5e2fbf1 100644 --- a/kernel/ckrm/Makefile +++ b/kernel/ckrm/Makefile @@ -3,11 +3,11 @@ # ifeq ($(CONFIG_CKRM),y) - obj-y = ckrm.o ckrmutils.o ckrm_tasks_stub.o rbce/ + obj-y = ckrm.o ckrmutils.o ckrm_numtasks_stub.o rbce/ endif obj-$(CONFIG_CKRM_TYPE_TASKCLASS) += ckrm_tc.o - obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_tasks.o + obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_numtasks.o obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o - obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o + obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_laq.o obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o obj-$(CONFIG_CKRM_CPU_MONITOR) += ckrm_cpu_monitor.o diff --git a/kernel/ckrm/ckrm_tc.c b/kernel/ckrm/ckrm_tc.c index 316266494..23ebb3a20 100644 --- a/kernel/ckrm/ckrm_tc.c +++ b/kernel/ckrm/ckrm_tc.c @@ -397,7 +397,7 @@ DECLARE_MUTEX(async_serializer); // serialize all async functions * We use a hybrid by comparing ratio nr_threads/pidmax */ -static void ckrm_reclassify_all_tasks(void) +static int ckrm_reclassify_all_tasks(void) { extern int pid_max; @@ -407,6 +407,11 @@ static void ckrm_reclassify_all_tasks(void) int ratio; int use_bitmap; + /* Check permissions */ + if ((!capable(CAP_SYS_NICE)) && (!capable(CAP_SYS_RESOURCE))) { + return -EPERM; + } + ratio = curpidmax / nr_threads; if (curpidmax <= PID_MAX_DEFAULT) { use_bitmap = 1; @@ -417,6 +422,7 @@ static void ckrm_reclassify_all_tasks(void) ce_protect(&CT_taskclass); retry: + if (use_bitmap == 0) { // go through it in one walk read_lock(&tasklist_lock); @@ -490,40 +496,13 @@ static void ckrm_reclassify_all_tasks(void) } else { read_unlock(&tasklist_lock); } + pos++; } } } ce_release(&CT_taskclass); -} - -int ckrm_reclassify(int pid) -{ - struct task_struct *tsk; - int rc = 0; - - down(&async_serializer); // protect again race condition - if (pid < 0) { - // do we want to treat this as process group .. should YES ToDo - rc = -EINVAL; - } else if (pid == 0) { - // reclassify all tasks in the system - ckrm_reclassify_all_tasks(); - } else { - // reclassify particular pid - read_lock(&tasklist_lock); - if ((tsk = find_task_by_pid(pid)) != NULL) { - get_task_struct(tsk); - read_unlock(&tasklist_lock); - CE_CLASSIFY_TASK_PROTECT(CKRM_EVENT_RECLASSIFY, tsk); - put_task_struct(tsk); - } else { - read_unlock(&tasklist_lock); - rc = -EINVAL; - } - } - up(&async_serializer); - return rc; + return 0; } /* @@ -546,7 +525,7 @@ static void ckrm_reclassify_class_tasks(struct ckrm_task_class *cls) atomic_read(&cls->core.hnode.parent->refcnt)); // If no CE registered for this classtype, following will be needed // repeatedly; - ce_regd = class_core(cls)->classtype->ce_regd; + ce_regd = atomic_read(&class_core(cls)->classtype->ce_regd); cnode = &(class_core(cls)->hnode); parcls = class_type(ckrm_task_class_t, cnode->parent); @@ -595,20 +574,21 @@ static void ckrm_reclassify_class_tasks(struct ckrm_task_class *cls) } /* - * Change the core class of the given task. + * Change the core class of the given task */ int ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls) { struct task_struct *tsk; - if (!ckrm_validate_and_grab_core(class_core(cls))) + if (cls && !ckrm_validate_and_grab_core(class_core(cls))) return -EINVAL; read_lock(&tasklist_lock); if ((tsk = find_task_by_pid(pid)) == NULL) { read_unlock(&tasklist_lock); - ckrm_core_drop(class_core(cls)); + if (cls) + ckrm_core_drop(class_core(cls)); return -EINVAL; } get_task_struct(tsk); @@ -617,19 +597,21 @@ int ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls) /* Check permissions */ if ((!capable(CAP_SYS_NICE)) && (!capable(CAP_SYS_RESOURCE)) && (current->user != tsk->user)) { - ckrm_core_drop(class_core(cls)); + if (cls) + ckrm_core_drop(class_core(cls)); put_task_struct(tsk); return -EPERM; } - down(&async_serializer); // protect again race condition - ce_protect(&CT_taskclass); - ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_MANUAL); + if (cls == NULL) + CE_CLASSIFY_TASK(CKRM_EVENT_RECLASSIFY,tsk); + else + ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_MANUAL); + ce_release(&CT_taskclass); put_task_struct(tsk); - up(&async_serializer); return 0; } @@ -721,16 +703,25 @@ static int tc_forced_reclassify(struct ckrm_core_class *target, const char *obj) pid_t pid; int rc = -EINVAL; - pid = (pid_t) simple_strtoul(obj, NULL, 10); - if (pid > 0) { - rc = ckrm_forced_reclassify_pid(pid, - class_type(ckrm_task_class_t, - target)); + pid = (pid_t) simple_strtol(obj, NULL, 0); + + down(&async_serializer); // protect again race condition with reclassify_class + if (pid < 0) { + // do we want to treat this as process group .. TBD + rc = -EINVAL; + } else if (pid == 0) { + rc = (target == NULL) ? ckrm_reclassify_all_tasks() : -EINVAL; + } else { + struct ckrm_task_class *cls = NULL; + if (target) + cls = class_type(ckrm_task_class_t,target); + rc = ckrm_forced_reclassify_pid(pid,cls); } + up(&async_serializer); return rc; } -#if 1 +#if 0 /****************************************************************************** * Debugging Task Classes: Utility functions diff --git a/kernel/ckrm/rbce/rbcemod.c b/kernel/ckrm/rbce/rbcemod.c index fa8d2c470..4d5f40aef 100644 --- a/kernel/ckrm/rbce/rbcemod.c +++ b/kernel/ckrm/rbce/rbcemod.c @@ -1,4 +1,5 @@ -/* Rule-based Classification Engine (RBCE) module +/* Rule-based Classification Engine (RBCE) and + * Consolidated RBCE module code (combined) * * Copyright (C) Hubertus Franke, IBM Corp. 2003 * (C) Chandra Seetharaman, IBM Corp. 2003 @@ -14,6 +15,10 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * */ /* Changes @@ -49,7 +54,7 @@ #include #include #include "bitvector.h" -#include "rbce.h" +#include #define DEBUG @@ -174,6 +179,8 @@ int termop_2_vecidx[RBCE_RULE_INVALID] = { #define POLICY_ACTION_REDO_ALL 0x02 // Recompute all rule flags #define POLICY_ACTION_PACK_TERMS 0x04 // Time to pack the terms +const int use_persistent_state = 1; + struct ckrm_eng_callback ckrm_ecbs; // Term vector state @@ -510,7 +517,6 @@ rbce_class_deletecb(const char *classname, void *classobj, int classtype) } } } - put_class(cls); if ((cls = find_class_name(classname)) != NULL) { printk(KERN_ERR "rbce ERROR: class %s exists in rbce after " @@ -1337,65 +1343,49 @@ int rule_exists(const char *rname) static struct rbce_private_data *create_private_data(struct rbce_private_data *, int); -int rbce_ckrm_reclassify(int pid) +static inline +void reset_evaluation(struct rbce_private_data *pdata,int termflag) { - printk("ckrm_reclassify_pid ignored\n"); - return -EINVAL; -} - -int reclassify_pid(int pid) -{ - struct task_struct *tsk; - - // FIXME: Need to treat -pid as process group - if (pid < 0) { - return -EINVAL; - } - - if (pid == 0) { - rbce_ckrm_reclassify(0); // just reclassify all tasks. - } - // if pid is +ve take control of the task, start evaluating it - if ((tsk = find_task_by_pid(pid)) == NULL) { - return -EINVAL; - } - - if (unlikely(!RBCE_DATA(tsk))) { - RBCE_DATAP(tsk) = create_private_data(NULL, 0); - if (!RBCE_DATA(tsk)) { - return -ENOMEM; - } - } - RBCE_DATA(tsk)->evaluate = 1; - rbce_ckrm_reclassify(pid); - return 0; + /* reset TAG ruleterm evaluation results to pick up + * on next classification event + */ + if (use_persistent_state && gl_mask_vecs[termflag]) { + bitvector_and_not( pdata->eval, pdata->eval, + gl_mask_vecs[termflag] ); + bitvector_and_not( pdata->true, pdata->true, + gl_mask_vecs[termflag] ); + } } - + int set_tasktag(int pid, char *tag) { char *tp; + int rc = 0; struct task_struct *tsk; struct rbce_private_data *pdata; + int len; if (!tag) { return -EINVAL; } - - if ((tsk = find_task_by_pid(pid)) == NULL) { - return -EINVAL; - } - - tp = kmalloc(strlen(tag) + 1, GFP_ATOMIC); - + len = strlen(tag) + 1; + tp = kmalloc(len, GFP_ATOMIC); if (!tp) { return -ENOMEM; } + strncpy(tp,tag,len); + + read_lock(&tasklist_lock); + if ((tsk = find_task_by_pid(pid)) == NULL) { + rc = -EINVAL; + goto out; + } if (unlikely(!RBCE_DATA(tsk))) { RBCE_DATAP(tsk) = create_private_data(NULL, 0); if (!RBCE_DATA(tsk)) { - kfree(tp); - return -ENOMEM; + rc = -ENOMEM; + goto out; } } pdata = RBCE_DATA(tsk); @@ -1403,10 +1393,13 @@ int set_tasktag(int pid, char *tag) kfree(pdata->app_tag); } pdata->app_tag = tp; - strcpy(pdata->app_tag, tag); - rbce_ckrm_reclassify(pid); - - return 0; + reset_evaluation(pdata,RBCE_TERMFLAG_TAG); + + out: + read_unlock(&tasklist_lock); + if (rc != 0) + kfree(tp); + return rc; } /*====================== Classification Functions =======================*/ @@ -1888,8 +1881,6 @@ static inline void unstore_pdata(struct rbce_private_data *pdata) #endif // PDATA_DEBUG -const int use_persistent_state = 1; - /* * Allocate and initialize a rbce_private_data data structure. * @@ -2261,6 +2252,7 @@ void *rbce_tc_classify(enum ckrm_event event, ...) va_list args; void *cls = NULL; struct task_struct *tsk; + struct rbce_private_data *pdata; va_start(args, event); tsk = va_arg(args, struct task_struct *); @@ -2315,6 +2307,9 @@ void *rbce_tc_classify(enum ckrm_event event, ...) break; case CKRM_EVENT_RECLASSIFY: + if ((pdata = (RBCE_DATA(tsk)))) { + pdata->evaluate = 1; + } cls = rbce_classify(tsk, NULL, RBCE_TERMFLAG_ALL, tc_classtype); break; @@ -2407,38 +2402,40 @@ struct ce_regtable_struct ce_regtable[] = { {NULL} }; -static int register_classtype_engines(void) -{ +static void unregister_classtype_engines(void) + { int rc; struct ce_regtable_struct *ceptr = ce_regtable; while (ceptr->name) { - rc = ckrm_register_engine(ceptr->name, ceptr->cbs); - printk("ce register with <%s> typeId=%d\n", ceptr->name, rc); - if ((rc < 0) && (rc != -ENOENT)) - return (rc); - if (rc != -ENOENT) - *ceptr->clsvar = rc; + if (*ceptr->clsvar >= 0) { + printk("ce unregister with <%s>\n",ceptr->name); + while ((rc = ckrm_unregister_engine(ceptr->name)) == -EAGAIN) + ; + printk("ce unregister with <%s> rc=%d\n",ceptr->name,rc); + *ceptr->clsvar = -1; + } ceptr++; } - return 0; -} + } -static void unregister_classtype_engines(void) +static int register_classtype_engines(void) { int rc; struct ce_regtable_struct *ceptr = ce_regtable; while (ceptr->name) { - if (*ceptr->clsvar >= 0) { - printk("ce unregister with <%s>\n", ceptr->name); - rc = ckrm_unregister_engine(ceptr->name); - printk("ce unregister with <%s> rc=%d\n", ceptr->name, - rc); - *ceptr->clsvar = -1; + rc = ckrm_register_engine(ceptr->name, ceptr->cbs); + printk("ce register with <%s> typeId=%d\n",ceptr->name,rc); + if ((rc < 0) && (rc != -ENOENT)) { + unregister_classtype_engines(); + return (rc); } + if (rc != -ENOENT) + *ceptr->clsvar = rc; ceptr++; } + return 0; } // =========== /proc/sysctl/debug/rbce debug stuff ============= @@ -2597,7 +2594,6 @@ EXPORT_SYMBOL(rule_exists); EXPORT_SYMBOL(change_rule); EXPORT_SYMBOL(delete_rule); EXPORT_SYMBOL(rename_rule); -EXPORT_SYMBOL(reclassify_pid); EXPORT_SYMBOL(set_tasktag); module_init(init_rbce); diff --git a/kernel/ckrm/rbce/rbcemod_ext.c b/kernel/ckrm/rbce/rbcemod_ext.c index b7886ebf4..b0c6ee9aa 100644 --- a/kernel/ckrm/rbce/rbcemod_ext.c +++ b/kernel/ckrm/rbce/rbcemod_ext.c @@ -3,7 +3,7 @@ * Copyright (C) Hubertus Franke, IBM Corp. 2003 * * Extension to be included into RBCE to collect delay and sample information - * requires user daemon to activate. + * Requires user daemon e.g. crbcedmn to activate. * * Latest version, more details at http://ckrm.sf.net * @@ -12,8 +12,13 @@ * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * */ + /******************************************************************************* * * User-Kernel Communication Channel (UKCC) diff --git a/kernel/ckrm/rbce/token.c b/kernel/ckrm/rbce/token.c index 0ace80a50..7bcdf5492 100644 --- a/kernel/ckrm/rbce/token.c +++ b/kernel/ckrm/rbce/token.c @@ -1,3 +1,24 @@ +/* Tokens for Rule-based Classification Engine (RBCE) and + * Consolidated RBCE module code (combined) + * + * Copyright (C) Hubertus Franke, IBM Corp. 2003 + * (C) Chandra Seetharaman, IBM Corp. 2003 + * (C) Vivek Kashyap, IBM Corp. 2004 + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * + */ + #include #include @@ -176,7 +197,7 @@ rules_parse(char *rule_defn, struct rbce_rule_term **rterms, int *term_mask) nterms = 0; while (*rp++) { - if (*rp == '>' || *rp == '<' || *rp == '=') { + if (*rp == '>' || *rp == '<' || *rp == '=' || *rp == '!') { nterms++; } } diff --git a/kernel/exit.c b/kernel/exit.c index 5bc8fff46..2f136029c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -859,9 +859,6 @@ asmlinkage NORET_TYPE void do_exit(long code) module_put(tsk->binfmt->module); tsk->exit_code = code; -#ifdef CONFIG_CKRM_TYPE_TASKCLASS - numtasks_put_ref(tsk->taskclass); -#endif exit_notify(tsk); #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); diff --git a/kernel/sched.c b/kernel/sched.c index b4512b77b..74a53bf05 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2370,7 +2370,6 @@ void scheduler_tick(int user_ticks, int sys_ticks) } goto out_unlock; } -#warning MEF PLANETLAB: "if (vx_need_resched(p)) was if (!--p->time_slice) */" if (vx_need_resched(p)) { #ifdef CONFIG_CKRM_CPU_SCHEDULE /* Hubertus ... we can abstract this out */ -- 2.47.0