ckrm_E16 release candidate 1 merge
authorMarc Fiuczynski <mef@cs.princeton.edu>
Thu, 23 Sep 2004 00:19:46 +0000 (00:19 +0000)
committerMarc Fiuczynski <mef@cs.princeton.edu>
Thu, 23 Sep 2004 00:19:46 +0000 (00:19 +0000)
include/linux/ckrm.h
include/linux/sched.h
include/linux/tcp.h
init/Kconfig
kernel/ckrm/Makefile
kernel/ckrm/ckrm_tc.c
kernel/ckrm/rbce/rbcemod.c
kernel/ckrm/rbce/rbcemod_ext.c
kernel/ckrm/rbce/token.c
kernel/exit.c
kernel/sched.c

index 04f4ec0..a29bf28 100644 (file)
@@ -9,10 +9,13 @@
  *
  * Latest version, more details at http://ckrm.sf.net
  * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  *
  */
 
index 2253b0c..ee1bd33 100644 (file)
@@ -1249,19 +1249,43 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #define def_delay_var(var)                     unsigned long long var
 #define get_delay(tsk,field)                    ((tsk)->delays.field)
-#define delay_value(x)                         (((unsigned long)(x))/1000)
 
 #define start_delay(var)                        ((var) = sched_clock())
 #define start_delay_set(var,flg)                (set_delay_flag(current,flg),(var) = sched_clock())
 
 #define inc_delay(tsk,field) (((tsk)->delays.field)++)
-#define add_delay_ts(tsk,field,start_ts,end_ts) ((tsk)->delays.field += delay_value((end_ts)-(start_ts)))
-#define add_delay_clear(tsk,field,start_ts,flg) (add_delay_ts(tsk,field,start_ts,sched_clock()),clear_delay_flag(tsk,flg))
 
-static inline void add_io_delay(unsigned long dstart) 
+/* because of hardware timer drifts in SMPs and task continue on different cpu
+ * then where the start_ts was taken there is a possibility that
+ * end_ts < start_ts by some usecs. In this case we ignore the diff
+ * and add nothing to the total.
+ */
+#ifdef CONFIG_SMP
+#define test_ts_integrity(start_ts,end_ts)  (likely((end_ts) > (start_ts)))
+#else
+#define test_ts_integrity(start_ts,end_ts)  (1)
+#endif
+
+#define add_delay_ts(tsk,field,start_ts,end_ts) \
+       do { if (test_ts_integrity(start_ts,end_ts)) (tsk)->delays.field += ((end_ts)-(start_ts)); } while (0)
+
+#define add_delay_clear(tsk,field,start_ts,flg)        \
+       do {                                           \
+               unsigned long long now = sched_clock();\
+               add_delay_ts(tsk,field,start_ts,now);  \
+               clear_delay_flag(tsk,flg);             \
+        } while (0)
+
+static inline void add_io_delay(unsigned long long dstart) 
 {
        struct task_struct * tsk = current;
-       unsigned long val = delay_value(sched_clock()-dstart);
+       unsigned long long now = sched_clock();
+       unsigned long long val;
+
+       if (test_ts_integrity(dstart,now))
+               val = now - dstart;
+       else
+               val = 0;
        if (test_delay_flag(tsk,PF_MEMIO)) {
                tsk->delays.mem_iowait_total += val;
                tsk->delays.num_memwaits++;
index 9cdf696..9ed5fac 100644 (file)
@@ -387,7 +387,6 @@ struct tcp_opt {
 #ifndef CONFIG_ACCEPT_QUEUES
        struct open_request     *accept_queue_tail;
 #endif
-
        unsigned int            keepalive_time;   /* time before keep alive takes place */
        unsigned int            keepalive_intvl;  /* time interval between keep alive probes */
        int                     linger2;
index b73e020..26615b4 100644 (file)
@@ -115,6 +115,18 @@ config BSD_PROCESS_ACCT
          up to the user level program to do useful things with this
          information.  This is generally a good idea, so say Y.
 
+config BSD_PROCESS_ACCT_V3
+       bool "BSD Process Accounting version 3 file format"
+       depends on BSD_PROCESS_ACCT
+       default n
+       help
+         If you say Y here, the process accounting information is written
+         in a new file format that also logs the process IDs of each
+         process and it's parent. Note that this file format is incompatible
+         with previous v0/v1/v2 file formats, so you will need updated tools
+         for processing it. A preliminary version of these tools is available
+         at <http://http://www.de.kernel.org/pub/linux/utils/acct/>.
+
 menu "Class Based Kernel Resource Management"
 
 config CKRM
@@ -239,18 +251,6 @@ config CKRM_CRBCE
 
 endmenu
 
-config BSD_PROCESS_ACCT_V3
-       bool "BSD Process Accounting version 3 file format"
-       depends on BSD_PROCESS_ACCT
-       default n
-       help
-         If you say Y here, the process accounting information is written
-         in a new file format that also logs the process IDs of each
-         process and it's parent. Note that this file format is incompatible
-         with previous v0/v1/v2 file formats, so you will need updated tools
-         for processing it. A preliminary version of these tools is available
-         at <http://http://www.de.kernel.org/pub/linux/utils/acct/>.
-
 config SYSCTL
        bool "Sysctl support"
        ---help---
index 3da8877..8f5e2fb 100644 (file)
@@ -3,11 +3,11 @@
 #
 
 ifeq ($(CONFIG_CKRM),y)
-    obj-y = ckrm.o ckrmutils.o ckrm_tasks_stub.o rbce/
+    obj-y = ckrm.o ckrmutils.o ckrm_numtasks_stub.o rbce/
 endif  
     obj-$(CONFIG_CKRM_TYPE_TASKCLASS)  += ckrm_tc.o
-    obj-$(CONFIG_CKRM_RES_NUMTASKS)    += ckrm_tasks.o
+    obj-$(CONFIG_CKRM_RES_NUMTASKS)    += ckrm_numtasks.o
     obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o
-    obj-$(CONFIG_CKRM_RES_LISTENAQ)    += ckrm_listenaq.o
+    obj-$(CONFIG_CKRM_RES_LISTENAQ)    += ckrm_laq.o
     obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o
     obj-$(CONFIG_CKRM_CPU_MONITOR) += ckrm_cpu_monitor.o
index 3162664..23ebb3a 100644 (file)
@@ -397,7 +397,7 @@ DECLARE_MUTEX(async_serializer);    // serialize all async functions
  * We use a hybrid by comparing ratio nr_threads/pidmax
  */
 
-static void ckrm_reclassify_all_tasks(void)
+static int ckrm_reclassify_all_tasks(void)
 {
        extern int pid_max;
 
@@ -407,6 +407,11 @@ static void ckrm_reclassify_all_tasks(void)
        int ratio;
        int use_bitmap;
 
+       /* Check permissions */
+       if ((!capable(CAP_SYS_NICE)) && (!capable(CAP_SYS_RESOURCE))) {
+               return -EPERM;
+       }
+
        ratio = curpidmax / nr_threads;
        if (curpidmax <= PID_MAX_DEFAULT) {
                use_bitmap = 1;
@@ -417,6 +422,7 @@ static void ckrm_reclassify_all_tasks(void)
        ce_protect(&CT_taskclass);
 
       retry:
+
        if (use_bitmap == 0) {
                // go through it in one walk
                read_lock(&tasklist_lock);
@@ -490,40 +496,13 @@ static void ckrm_reclassify_all_tasks(void)
                                } else {
                                        read_unlock(&tasklist_lock);
                                }
+                               pos++;
                        }
                }
 
        }
        ce_release(&CT_taskclass);
-}
-
-int ckrm_reclassify(int pid)
-{
-       struct task_struct *tsk;
-       int rc = 0;
-
-       down(&async_serializer);        // protect again race condition
-       if (pid < 0) {
-               // do we want to treat this as process group .. should YES ToDo
-               rc = -EINVAL;
-       } else if (pid == 0) {
-               // reclassify all tasks in the system
-               ckrm_reclassify_all_tasks();
-       } else {
-               // reclassify particular pid
-               read_lock(&tasklist_lock);
-               if ((tsk = find_task_by_pid(pid)) != NULL) {
-                       get_task_struct(tsk);
-                       read_unlock(&tasklist_lock);
-                       CE_CLASSIFY_TASK_PROTECT(CKRM_EVENT_RECLASSIFY, tsk);
-                       put_task_struct(tsk);
-               } else {
-                       read_unlock(&tasklist_lock);
-                       rc = -EINVAL;
-               }
-       }
-       up(&async_serializer);
-       return rc;
+       return 0;
 }
 
 /*
@@ -546,7 +525,7 @@ static void ckrm_reclassify_class_tasks(struct ckrm_task_class *cls)
                 atomic_read(&cls->core.hnode.parent->refcnt));
        // If no CE registered for this classtype, following will be needed 
        // repeatedly;
-       ce_regd = class_core(cls)->classtype->ce_regd;
+       ce_regd = atomic_read(&class_core(cls)->classtype->ce_regd);
        cnode = &(class_core(cls)->hnode);
        parcls = class_type(ckrm_task_class_t, cnode->parent);
 
@@ -595,20 +574,21 @@ static void ckrm_reclassify_class_tasks(struct ckrm_task_class *cls)
 }
 
 /*
- * Change the core class of the given task.
+ * Change the core class of the given task
  */
 
 int ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls)
 {
        struct task_struct *tsk;
 
-       if (!ckrm_validate_and_grab_core(class_core(cls)))
+       if (cls && !ckrm_validate_and_grab_core(class_core(cls)))
                return -EINVAL;
 
        read_lock(&tasklist_lock);
        if ((tsk = find_task_by_pid(pid)) == NULL) {
                read_unlock(&tasklist_lock);
-               ckrm_core_drop(class_core(cls));
+               if (cls) 
+                       ckrm_core_drop(class_core(cls));
                return -EINVAL;
        }
        get_task_struct(tsk);
@@ -617,19 +597,21 @@ int ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls)
        /* Check permissions */
        if ((!capable(CAP_SYS_NICE)) &&
            (!capable(CAP_SYS_RESOURCE)) && (current->user != tsk->user)) {
-               ckrm_core_drop(class_core(cls));
+               if (cls) 
+                       ckrm_core_drop(class_core(cls));
                put_task_struct(tsk);
                return -EPERM;
        }
 
-       down(&async_serializer);        // protect again race condition
-
        ce_protect(&CT_taskclass);
-       ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_MANUAL);
+       if (cls == NULL)
+               CE_CLASSIFY_TASK(CKRM_EVENT_RECLASSIFY,tsk);
+       else 
+               ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_MANUAL);
+
        ce_release(&CT_taskclass);
        put_task_struct(tsk);
 
-       up(&async_serializer);
        return 0;
 }
 
@@ -721,16 +703,25 @@ static int tc_forced_reclassify(struct ckrm_core_class *target, const char *obj)
        pid_t pid;
        int rc = -EINVAL;
 
-       pid = (pid_t) simple_strtoul(obj, NULL, 10);
-       if (pid > 0) {
-               rc = ckrm_forced_reclassify_pid(pid,
-                                               class_type(ckrm_task_class_t,
-                                                          target));
+       pid = (pid_t) simple_strtol(obj, NULL, 0);
+
+       down(&async_serializer);        // protect again race condition with reclassify_class
+       if (pid < 0) {
+               // do we want to treat this as process group .. TBD
+               rc = -EINVAL;
+       } else if (pid == 0) {
+               rc = (target == NULL) ? ckrm_reclassify_all_tasks() : -EINVAL;
+       } else {
+               struct ckrm_task_class *cls = NULL;
+               if (target) 
+                       cls = class_type(ckrm_task_class_t,target);
+               rc = ckrm_forced_reclassify_pid(pid,cls);
        }
+       up(&async_serializer);
        return rc;
 }
 
-#if 1
+#if 0
 
 /******************************************************************************
  * Debugging Task Classes:  Utility functions
index fa8d2c4..4d5f40a 100644 (file)
@@ -1,4 +1,5 @@
-/* Rule-based Classification Engine (RBCE) module
+/* Rule-based Classification Engine (RBCE) and
+ * Consolidated RBCE module code (combined)
  *
  * Copyright (C) Hubertus Franke, IBM Corp. 2003
  *           (C) Chandra Seetharaman, IBM Corp. 2003
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
  */
 
 /* Changes
@@ -49,7 +54,7 @@
 #include <linux/ckrm_ce.h>
 #include <linux/ckrm_net.h>
 #include "bitvector.h"
-#include "rbce.h"
+#include <linux/rbce.h>
 
 #define DEBUG
 
@@ -174,6 +179,8 @@ int termop_2_vecidx[RBCE_RULE_INVALID] = {
 #define POLICY_ACTION_REDO_ALL         0x02    // Recompute all rule flags
 #define POLICY_ACTION_PACK_TERMS       0x04    // Time to pack the terms
 
+const int use_persistent_state = 1;
+
 struct ckrm_eng_callback ckrm_ecbs;
 
 // Term vector state
@@ -510,7 +517,6 @@ rbce_class_deletecb(const char *classname, void *classobj, int classtype)
                                }
                        }
                }
-               put_class(cls);
                if ((cls = find_class_name(classname)) != NULL) {
                        printk(KERN_ERR
                               "rbce ERROR: class %s exists in rbce after "
@@ -1337,65 +1343,49 @@ int rule_exists(const char *rname)
 static struct rbce_private_data *create_private_data(struct rbce_private_data *,
                                                     int);
 
-int rbce_ckrm_reclassify(int pid)
+static inline
+void reset_evaluation(struct rbce_private_data *pdata,int termflag)
 {
-       printk("ckrm_reclassify_pid ignored\n");
-       return -EINVAL;
-}
-
-int reclassify_pid(int pid)
-{
-       struct task_struct *tsk;
-
-       // FIXME: Need to treat -pid as process group
-       if (pid < 0) {
-               return -EINVAL;
-       }
-
-       if (pid == 0) {
-               rbce_ckrm_reclassify(0);        // just reclassify all tasks.
-       }
-       // if pid is +ve take control of the task, start evaluating it
-       if ((tsk = find_task_by_pid(pid)) == NULL) {
-               return -EINVAL;
-       }
-
-       if (unlikely(!RBCE_DATA(tsk))) {
-               RBCE_DATAP(tsk) = create_private_data(NULL, 0);
-               if (!RBCE_DATA(tsk)) {
-                       return -ENOMEM;
-               }
-       }
-       RBCE_DATA(tsk)->evaluate = 1;
-       rbce_ckrm_reclassify(pid);
-       return 0;
+       /* reset TAG ruleterm evaluation results to pick up 
+        * on next classification event
+        */
+       if (use_persistent_state && gl_mask_vecs[termflag]) {
+               bitvector_and_not( pdata->eval, pdata->eval, 
+                                  gl_mask_vecs[termflag] );
+               bitvector_and_not( pdata->true, pdata->true, 
+                                  gl_mask_vecs[termflag] );
+       }
 }
-
+  
 int set_tasktag(int pid, char *tag)
 {
        char *tp;
+       int rc = 0;
        struct task_struct *tsk;
        struct rbce_private_data *pdata;
+       int len;
 
        if (!tag) {
                return -EINVAL;
        }
-
-       if ((tsk = find_task_by_pid(pid)) == NULL) {
-               return -EINVAL;
-       }
-
-       tp = kmalloc(strlen(tag) + 1, GFP_ATOMIC);
-
+       len = strlen(tag) + 1;
+       tp = kmalloc(len, GFP_ATOMIC);
        if (!tp) {
                return -ENOMEM;
        }
+       strncpy(tp,tag,len);
+
+       read_lock(&tasklist_lock);
+       if ((tsk = find_task_by_pid(pid)) == NULL) {
+               rc = -EINVAL;
+               goto out;
+       }
 
        if (unlikely(!RBCE_DATA(tsk))) {
                RBCE_DATAP(tsk) = create_private_data(NULL, 0);
                if (!RBCE_DATA(tsk)) {
-                       kfree(tp);
-                       return -ENOMEM;
+                       rc = -ENOMEM;
+                       goto out;
                }
        }
        pdata = RBCE_DATA(tsk);
@@ -1403,10 +1393,13 @@ int set_tasktag(int pid, char *tag)
                kfree(pdata->app_tag);
        }
        pdata->app_tag = tp;
-       strcpy(pdata->app_tag, tag);
-       rbce_ckrm_reclassify(pid);
-
-       return 0;
+       reset_evaluation(pdata,RBCE_TERMFLAG_TAG);
+       
+ out:
+       read_unlock(&tasklist_lock);
+       if (rc != 0) 
+               kfree(tp);
+       return rc;
 }
 
 /*====================== Classification Functions =======================*/
@@ -1888,8 +1881,6 @@ static inline void unstore_pdata(struct rbce_private_data *pdata)
 
 #endif                         // PDATA_DEBUG
 
-const int use_persistent_state = 1;
-
 /*
  * Allocate and initialize a rbce_private_data data structure.
  *
@@ -2261,6 +2252,7 @@ void *rbce_tc_classify(enum ckrm_event event, ...)
        va_list args;
        void *cls = NULL;
        struct task_struct *tsk;
+       struct rbce_private_data *pdata;
 
        va_start(args, event);
        tsk = va_arg(args, struct task_struct *);
@@ -2315,6 +2307,9 @@ void *rbce_tc_classify(enum ckrm_event event, ...)
                break;
 
        case CKRM_EVENT_RECLASSIFY:
+               if ((pdata = (RBCE_DATA(tsk)))) {
+                       pdata->evaluate = 1;
+               }
                cls = rbce_classify(tsk, NULL, RBCE_TERMFLAG_ALL, tc_classtype);
                break;
 
@@ -2407,38 +2402,40 @@ struct ce_regtable_struct ce_regtable[] = {
        {NULL}
 };
 
-static int register_classtype_engines(void)
-{
+static void unregister_classtype_engines(void)
+  {
        int rc;
        struct ce_regtable_struct *ceptr = ce_regtable;
 
        while (ceptr->name) {
-               rc = ckrm_register_engine(ceptr->name, ceptr->cbs);
-               printk("ce register with <%s> typeId=%d\n", ceptr->name, rc);
-               if ((rc < 0) && (rc != -ENOENT))
-                       return (rc);
-               if (rc != -ENOENT)
-                       *ceptr->clsvar = rc;
+               if (*ceptr->clsvar >= 0) {
+                       printk("ce unregister with <%s>\n",ceptr->name);
+                       while ((rc = ckrm_unregister_engine(ceptr->name)) == -EAGAIN)
+                               ;
+                       printk("ce unregister with <%s> rc=%d\n",ceptr->name,rc);
+                       *ceptr->clsvar = -1;
+               }
                ceptr++;
        }
-       return 0;
-}
+  }
 
-static void unregister_classtype_engines(void)
+static int register_classtype_engines(void)
 {
        int rc;
        struct ce_regtable_struct *ceptr = ce_regtable;
 
        while (ceptr->name) {
-               if (*ceptr->clsvar >= 0) {
-                       printk("ce unregister with <%s>\n", ceptr->name);
-                       rc = ckrm_unregister_engine(ceptr->name);
-                       printk("ce unregister with <%s> rc=%d\n", ceptr->name,
-                              rc);
-                       *ceptr->clsvar = -1;
+               rc = ckrm_register_engine(ceptr->name, ceptr->cbs);
+               printk("ce register with <%s> typeId=%d\n",ceptr->name,rc);
+               if ((rc < 0) && (rc != -ENOENT)) {
+                       unregister_classtype_engines();
+                       return (rc);
                }
+               if (rc != -ENOENT) 
+                       *ceptr->clsvar = rc;
                ceptr++;
        }
+       return 0;
 }
 
 // =========== /proc/sysctl/debug/rbce debug stuff =============
@@ -2597,7 +2594,6 @@ EXPORT_SYMBOL(rule_exists);
 EXPORT_SYMBOL(change_rule);
 EXPORT_SYMBOL(delete_rule);
 EXPORT_SYMBOL(rename_rule);
-EXPORT_SYMBOL(reclassify_pid);
 EXPORT_SYMBOL(set_tasktag);
 
 module_init(init_rbce);
index b7886eb..b0c6ee9 100644 (file)
@@ -3,7 +3,7 @@
  * Copyright (C) Hubertus Franke, IBM Corp. 2003
  *
  * Extension to be included into RBCE to collect delay and sample information
- * requires user daemon <crbcedmn> to activate.
+ * Requires user daemon e.g. crbcedmn to activate.
  *
  * Latest version, more details at http://ckrm.sf.net
  *
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
  */
 
+
 /*******************************************************************************
  *
  *   User-Kernel Communication Channel (UKCC)
index 0ace80a..7bcdf54 100644 (file)
@@ -1,3 +1,24 @@
+/* Tokens for Rule-based Classification Engine (RBCE) and
+ * Consolidated RBCE module code (combined)
+ *
+ * Copyright (C) Hubertus Franke, IBM Corp. 2003
+ *           (C) Chandra Seetharaman, IBM Corp. 2003
+ *           (C) Vivek Kashyap, IBM Corp. 2004 
+ * 
+ * Latest version, more details at http://ckrm.sf.net
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * 
+ *
+ */
+
 #include <linux/parser.h>
 #include <linux/ctype.h>
 
@@ -176,7 +197,7 @@ rules_parse(char *rule_defn, struct rbce_rule_term **rterms, int *term_mask)
 
        nterms = 0;
        while (*rp++) {
-               if (*rp == '>' || *rp == '<' || *rp == '=') {
+               if (*rp == '>' || *rp == '<' || *rp == '=' || *rp == '!') {
                        nterms++;
                }
        }
index 5bc8fff..2f13602 100644 (file)
@@ -859,9 +859,6 @@ asmlinkage NORET_TYPE void do_exit(long code)
                module_put(tsk->binfmt->module);
 
        tsk->exit_code = code;
-#ifdef CONFIG_CKRM_TYPE_TASKCLASS
-       numtasks_put_ref(tsk->taskclass);
-#endif
        exit_notify(tsk);
 #ifdef CONFIG_NUMA
        mpol_free(tsk->mempolicy);
index b4512b7..74a53bf 100644 (file)
@@ -2370,7 +2370,6 @@ void scheduler_tick(int user_ticks, int sys_ticks)
                }
                goto out_unlock;
        }
-#warning MEF PLANETLAB: "if (vx_need_resched(p)) was if (!--p->time_slice) */"
        if (vx_need_resched(p)) {
 #ifdef CONFIG_CKRM_CPU_SCHEDULE
                /* Hubertus ... we can abstract this out */