*
* Latest version, more details at http://ckrm.sf.net
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*/
#define def_delay_var(var) unsigned long long var
#define get_delay(tsk,field) ((tsk)->delays.field)
-#define delay_value(x) (((unsigned long)(x))/1000)
#define start_delay(var) ((var) = sched_clock())
#define start_delay_set(var,flg) (set_delay_flag(current,flg),(var) = sched_clock())
#define inc_delay(tsk,field) (((tsk)->delays.field)++)
-#define add_delay_ts(tsk,field,start_ts,end_ts) ((tsk)->delays.field += delay_value((end_ts)-(start_ts)))
-#define add_delay_clear(tsk,field,start_ts,flg) (add_delay_ts(tsk,field,start_ts,sched_clock()),clear_delay_flag(tsk,flg))
-static inline void add_io_delay(unsigned long dstart)
+/* because of hardware timer drifts in SMPs and task continue on different cpu
+ * then where the start_ts was taken there is a possibility that
+ * end_ts < start_ts by some usecs. In this case we ignore the diff
+ * and add nothing to the total.
+ */
+#ifdef CONFIG_SMP
+#define test_ts_integrity(start_ts,end_ts) (likely((end_ts) > (start_ts)))
+#else
+#define test_ts_integrity(start_ts,end_ts) (1)
+#endif
+
+#define add_delay_ts(tsk,field,start_ts,end_ts) \
+ do { if (test_ts_integrity(start_ts,end_ts)) (tsk)->delays.field += ((end_ts)-(start_ts)); } while (0)
+
+#define add_delay_clear(tsk,field,start_ts,flg) \
+ do { \
+ unsigned long long now = sched_clock();\
+ add_delay_ts(tsk,field,start_ts,now); \
+ clear_delay_flag(tsk,flg); \
+ } while (0)
+
+static inline void add_io_delay(unsigned long long dstart)
{
struct task_struct * tsk = current;
- unsigned long val = delay_value(sched_clock()-dstart);
+ unsigned long long now = sched_clock();
+ unsigned long long val;
+
+ if (test_ts_integrity(dstart,now))
+ val = now - dstart;
+ else
+ val = 0;
if (test_delay_flag(tsk,PF_MEMIO)) {
tsk->delays.mem_iowait_total += val;
tsk->delays.num_memwaits++;
#ifndef CONFIG_ACCEPT_QUEUES
struct open_request *accept_queue_tail;
#endif
-
unsigned int keepalive_time; /* time before keep alive takes place */
unsigned int keepalive_intvl; /* time interval between keep alive probes */
int linger2;
up to the user level program to do useful things with this
information. This is generally a good idea, so say Y.
+config BSD_PROCESS_ACCT_V3
+ bool "BSD Process Accounting version 3 file format"
+ depends on BSD_PROCESS_ACCT
+ default n
+ help
+ If you say Y here, the process accounting information is written
+ in a new file format that also logs the process IDs of each
+ process and it's parent. Note that this file format is incompatible
+ with previous v0/v1/v2 file formats, so you will need updated tools
+ for processing it. A preliminary version of these tools is available
+ at <http://http://www.de.kernel.org/pub/linux/utils/acct/>.
+
menu "Class Based Kernel Resource Management"
config CKRM
endmenu
-config BSD_PROCESS_ACCT_V3
- bool "BSD Process Accounting version 3 file format"
- depends on BSD_PROCESS_ACCT
- default n
- help
- If you say Y here, the process accounting information is written
- in a new file format that also logs the process IDs of each
- process and it's parent. Note that this file format is incompatible
- with previous v0/v1/v2 file formats, so you will need updated tools
- for processing it. A preliminary version of these tools is available
- at <http://http://www.de.kernel.org/pub/linux/utils/acct/>.
-
config SYSCTL
bool "Sysctl support"
---help---
#
ifeq ($(CONFIG_CKRM),y)
- obj-y = ckrm.o ckrmutils.o ckrm_tasks_stub.o rbce/
+ obj-y = ckrm.o ckrmutils.o ckrm_numtasks_stub.o rbce/
endif
obj-$(CONFIG_CKRM_TYPE_TASKCLASS) += ckrm_tc.o
- obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_tasks.o
+ obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_numtasks.o
obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o
- obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o
+ obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_laq.o
obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o
obj-$(CONFIG_CKRM_CPU_MONITOR) += ckrm_cpu_monitor.o
* We use a hybrid by comparing ratio nr_threads/pidmax
*/
-static void ckrm_reclassify_all_tasks(void)
+static int ckrm_reclassify_all_tasks(void)
{
extern int pid_max;
int ratio;
int use_bitmap;
+ /* Check permissions */
+ if ((!capable(CAP_SYS_NICE)) && (!capable(CAP_SYS_RESOURCE))) {
+ return -EPERM;
+ }
+
ratio = curpidmax / nr_threads;
if (curpidmax <= PID_MAX_DEFAULT) {
use_bitmap = 1;
ce_protect(&CT_taskclass);
retry:
+
if (use_bitmap == 0) {
// go through it in one walk
read_lock(&tasklist_lock);
} else {
read_unlock(&tasklist_lock);
}
+ pos++;
}
}
}
ce_release(&CT_taskclass);
-}
-
-int ckrm_reclassify(int pid)
-{
- struct task_struct *tsk;
- int rc = 0;
-
- down(&async_serializer); // protect again race condition
- if (pid < 0) {
- // do we want to treat this as process group .. should YES ToDo
- rc = -EINVAL;
- } else if (pid == 0) {
- // reclassify all tasks in the system
- ckrm_reclassify_all_tasks();
- } else {
- // reclassify particular pid
- read_lock(&tasklist_lock);
- if ((tsk = find_task_by_pid(pid)) != NULL) {
- get_task_struct(tsk);
- read_unlock(&tasklist_lock);
- CE_CLASSIFY_TASK_PROTECT(CKRM_EVENT_RECLASSIFY, tsk);
- put_task_struct(tsk);
- } else {
- read_unlock(&tasklist_lock);
- rc = -EINVAL;
- }
- }
- up(&async_serializer);
- return rc;
+ return 0;
}
/*
atomic_read(&cls->core.hnode.parent->refcnt));
// If no CE registered for this classtype, following will be needed
// repeatedly;
- ce_regd = class_core(cls)->classtype->ce_regd;
+ ce_regd = atomic_read(&class_core(cls)->classtype->ce_regd);
cnode = &(class_core(cls)->hnode);
parcls = class_type(ckrm_task_class_t, cnode->parent);
}
/*
- * Change the core class of the given task.
+ * Change the core class of the given task
*/
int ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls)
{
struct task_struct *tsk;
- if (!ckrm_validate_and_grab_core(class_core(cls)))
+ if (cls && !ckrm_validate_and_grab_core(class_core(cls)))
return -EINVAL;
read_lock(&tasklist_lock);
if ((tsk = find_task_by_pid(pid)) == NULL) {
read_unlock(&tasklist_lock);
- ckrm_core_drop(class_core(cls));
+ if (cls)
+ ckrm_core_drop(class_core(cls));
return -EINVAL;
}
get_task_struct(tsk);
/* Check permissions */
if ((!capable(CAP_SYS_NICE)) &&
(!capable(CAP_SYS_RESOURCE)) && (current->user != tsk->user)) {
- ckrm_core_drop(class_core(cls));
+ if (cls)
+ ckrm_core_drop(class_core(cls));
put_task_struct(tsk);
return -EPERM;
}
- down(&async_serializer); // protect again race condition
-
ce_protect(&CT_taskclass);
- ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_MANUAL);
+ if (cls == NULL)
+ CE_CLASSIFY_TASK(CKRM_EVENT_RECLASSIFY,tsk);
+ else
+ ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_MANUAL);
+
ce_release(&CT_taskclass);
put_task_struct(tsk);
- up(&async_serializer);
return 0;
}
pid_t pid;
int rc = -EINVAL;
- pid = (pid_t) simple_strtoul(obj, NULL, 10);
- if (pid > 0) {
- rc = ckrm_forced_reclassify_pid(pid,
- class_type(ckrm_task_class_t,
- target));
+ pid = (pid_t) simple_strtol(obj, NULL, 0);
+
+ down(&async_serializer); // protect again race condition with reclassify_class
+ if (pid < 0) {
+ // do we want to treat this as process group .. TBD
+ rc = -EINVAL;
+ } else if (pid == 0) {
+ rc = (target == NULL) ? ckrm_reclassify_all_tasks() : -EINVAL;
+ } else {
+ struct ckrm_task_class *cls = NULL;
+ if (target)
+ cls = class_type(ckrm_task_class_t,target);
+ rc = ckrm_forced_reclassify_pid(pid,cls);
}
+ up(&async_serializer);
return rc;
}
-#if 1
+#if 0
/******************************************************************************
* Debugging Task Classes: Utility functions
-/* Rule-based Classification Engine (RBCE) module
+/* Rule-based Classification Engine (RBCE) and
+ * Consolidated RBCE module code (combined)
*
* Copyright (C) Hubertus Franke, IBM Corp. 2003
* (C) Chandra Seetharaman, IBM Corp. 2003
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
*/
/* Changes
#include <linux/ckrm_ce.h>
#include <linux/ckrm_net.h>
#include "bitvector.h"
-#include "rbce.h"
+#include <linux/rbce.h>
#define DEBUG
#define POLICY_ACTION_REDO_ALL 0x02 // Recompute all rule flags
#define POLICY_ACTION_PACK_TERMS 0x04 // Time to pack the terms
+const int use_persistent_state = 1;
+
struct ckrm_eng_callback ckrm_ecbs;
// Term vector state
}
}
}
- put_class(cls);
if ((cls = find_class_name(classname)) != NULL) {
printk(KERN_ERR
"rbce ERROR: class %s exists in rbce after "
static struct rbce_private_data *create_private_data(struct rbce_private_data *,
int);
-int rbce_ckrm_reclassify(int pid)
+static inline
+void reset_evaluation(struct rbce_private_data *pdata,int termflag)
{
- printk("ckrm_reclassify_pid ignored\n");
- return -EINVAL;
-}
-
-int reclassify_pid(int pid)
-{
- struct task_struct *tsk;
-
- // FIXME: Need to treat -pid as process group
- if (pid < 0) {
- return -EINVAL;
- }
-
- if (pid == 0) {
- rbce_ckrm_reclassify(0); // just reclassify all tasks.
- }
- // if pid is +ve take control of the task, start evaluating it
- if ((tsk = find_task_by_pid(pid)) == NULL) {
- return -EINVAL;
- }
-
- if (unlikely(!RBCE_DATA(tsk))) {
- RBCE_DATAP(tsk) = create_private_data(NULL, 0);
- if (!RBCE_DATA(tsk)) {
- return -ENOMEM;
- }
- }
- RBCE_DATA(tsk)->evaluate = 1;
- rbce_ckrm_reclassify(pid);
- return 0;
+ /* reset TAG ruleterm evaluation results to pick up
+ * on next classification event
+ */
+ if (use_persistent_state && gl_mask_vecs[termflag]) {
+ bitvector_and_not( pdata->eval, pdata->eval,
+ gl_mask_vecs[termflag] );
+ bitvector_and_not( pdata->true, pdata->true,
+ gl_mask_vecs[termflag] );
+ }
}
-
+
int set_tasktag(int pid, char *tag)
{
char *tp;
+ int rc = 0;
struct task_struct *tsk;
struct rbce_private_data *pdata;
+ int len;
if (!tag) {
return -EINVAL;
}
-
- if ((tsk = find_task_by_pid(pid)) == NULL) {
- return -EINVAL;
- }
-
- tp = kmalloc(strlen(tag) + 1, GFP_ATOMIC);
-
+ len = strlen(tag) + 1;
+ tp = kmalloc(len, GFP_ATOMIC);
if (!tp) {
return -ENOMEM;
}
+ strncpy(tp,tag,len);
+
+ read_lock(&tasklist_lock);
+ if ((tsk = find_task_by_pid(pid)) == NULL) {
+ rc = -EINVAL;
+ goto out;
+ }
if (unlikely(!RBCE_DATA(tsk))) {
RBCE_DATAP(tsk) = create_private_data(NULL, 0);
if (!RBCE_DATA(tsk)) {
- kfree(tp);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out;
}
}
pdata = RBCE_DATA(tsk);
kfree(pdata->app_tag);
}
pdata->app_tag = tp;
- strcpy(pdata->app_tag, tag);
- rbce_ckrm_reclassify(pid);
-
- return 0;
+ reset_evaluation(pdata,RBCE_TERMFLAG_TAG);
+
+ out:
+ read_unlock(&tasklist_lock);
+ if (rc != 0)
+ kfree(tp);
+ return rc;
}
/*====================== Classification Functions =======================*/
#endif // PDATA_DEBUG
-const int use_persistent_state = 1;
-
/*
* Allocate and initialize a rbce_private_data data structure.
*
va_list args;
void *cls = NULL;
struct task_struct *tsk;
+ struct rbce_private_data *pdata;
va_start(args, event);
tsk = va_arg(args, struct task_struct *);
break;
case CKRM_EVENT_RECLASSIFY:
+ if ((pdata = (RBCE_DATA(tsk)))) {
+ pdata->evaluate = 1;
+ }
cls = rbce_classify(tsk, NULL, RBCE_TERMFLAG_ALL, tc_classtype);
break;
{NULL}
};
-static int register_classtype_engines(void)
-{
+static void unregister_classtype_engines(void)
+ {
int rc;
struct ce_regtable_struct *ceptr = ce_regtable;
while (ceptr->name) {
- rc = ckrm_register_engine(ceptr->name, ceptr->cbs);
- printk("ce register with <%s> typeId=%d\n", ceptr->name, rc);
- if ((rc < 0) && (rc != -ENOENT))
- return (rc);
- if (rc != -ENOENT)
- *ceptr->clsvar = rc;
+ if (*ceptr->clsvar >= 0) {
+ printk("ce unregister with <%s>\n",ceptr->name);
+ while ((rc = ckrm_unregister_engine(ceptr->name)) == -EAGAIN)
+ ;
+ printk("ce unregister with <%s> rc=%d\n",ceptr->name,rc);
+ *ceptr->clsvar = -1;
+ }
ceptr++;
}
- return 0;
-}
+ }
-static void unregister_classtype_engines(void)
+static int register_classtype_engines(void)
{
int rc;
struct ce_regtable_struct *ceptr = ce_regtable;
while (ceptr->name) {
- if (*ceptr->clsvar >= 0) {
- printk("ce unregister with <%s>\n", ceptr->name);
- rc = ckrm_unregister_engine(ceptr->name);
- printk("ce unregister with <%s> rc=%d\n", ceptr->name,
- rc);
- *ceptr->clsvar = -1;
+ rc = ckrm_register_engine(ceptr->name, ceptr->cbs);
+ printk("ce register with <%s> typeId=%d\n",ceptr->name,rc);
+ if ((rc < 0) && (rc != -ENOENT)) {
+ unregister_classtype_engines();
+ return (rc);
}
+ if (rc != -ENOENT)
+ *ceptr->clsvar = rc;
ceptr++;
}
+ return 0;
}
// =========== /proc/sysctl/debug/rbce debug stuff =============
EXPORT_SYMBOL(change_rule);
EXPORT_SYMBOL(delete_rule);
EXPORT_SYMBOL(rename_rule);
-EXPORT_SYMBOL(reclassify_pid);
EXPORT_SYMBOL(set_tasktag);
module_init(init_rbce);
* Copyright (C) Hubertus Franke, IBM Corp. 2003
*
* Extension to be included into RBCE to collect delay and sample information
- * requires user daemon <crbcedmn> to activate.
+ * Requires user daemon e.g. crbcedmn to activate.
*
* Latest version, more details at http://ckrm.sf.net
*
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
*/
+
/*******************************************************************************
*
* User-Kernel Communication Channel (UKCC)
+/* Tokens for Rule-based Classification Engine (RBCE) and
+ * Consolidated RBCE module code (combined)
+ *
+ * Copyright (C) Hubertus Franke, IBM Corp. 2003
+ * (C) Chandra Seetharaman, IBM Corp. 2003
+ * (C) Vivek Kashyap, IBM Corp. 2004
+ *
+ * Latest version, more details at http://ckrm.sf.net
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ *
+ */
+
#include <linux/parser.h>
#include <linux/ctype.h>
nterms = 0;
while (*rp++) {
- if (*rp == '>' || *rp == '<' || *rp == '=') {
+ if (*rp == '>' || *rp == '<' || *rp == '=' || *rp == '!') {
nterms++;
}
}
module_put(tsk->binfmt->module);
tsk->exit_code = code;
-#ifdef CONFIG_CKRM_TYPE_TASKCLASS
- numtasks_put_ref(tsk->taskclass);
-#endif
exit_notify(tsk);
#ifdef CONFIG_NUMA
mpol_free(tsk->mempolicy);
}
goto out_unlock;
}
-#warning MEF PLANETLAB: "if (vx_need_resched(p)) was if (!--p->time_slice) */"
if (vx_need_resched(p)) {
#ifdef CONFIG_CKRM_CPU_SCHEDULE
/* Hubertus ... we can abstract this out */