1 /* Data Collection Extension to Rule-based Classification Engine (RBCE) module
3 * Copyright (C) Hubertus Franke, IBM Corp. 2003
5 * Extension to be included into RBCE to collect delay and sample information
6 * Requires user daemon e.g. crbcedmn to activate.
8 * Latest version, more details at http://ckrm.sf.net
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it would be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22 /*******************************************************************************
24 * User-Kernel Communication Channel (UKCC)
25 * Protocol and communication handling
27 ******************************************************************************/
29 #include <linux/relayfs_fs.h>
31 #define PSAMPLE(pdata) (&((pdata)->ext_data.sample))
32 #define UKCC_N_SUB_BUFFERS (4)
33 #define UKCC_SUB_BUFFER_SIZE (1<<15)
34 #define UKCC_TOTAL_BUFFER_SIZE (UKCC_N_SUB_BUFFERS * UKCC_SUB_BUFFER_SIZE)
36 #define CHANNEL_AUTO_CONT 0 /* this is during debugging only. It allows
37 the module to continue sending data through
38 the UKCC if space frees up vs. going into
39 the recovery driven mode
48 int ukcc_channel = -1;
49 static enum ukcc_state chan_state = UKCC_STANDBY;
51 inline static int ukcc_ok(void)
53 return (chan_state == UKCC_OK);
56 static void ukcc_cmd_deliver(int rchan_id, char *from, u32 len);
57 static void client_attached(void);
58 static void client_detached(void);
60 static int ukcc_fileop_notify(int rchan_id,
61 struct file *filp, enum relay_fileop fileop)
63 static int readers = 0;
64 if (fileop == RELAY_FILE_OPEN) {
65 // printk(KERN_DEBUG "got fileop_notify RELAY_FILE_OPEN for file %p\n",
68 printk(KERN_DEBUG "only one client allowed, backoff .... \n");
71 if (!try_module_get(THIS_MODULE))
76 } else if (fileop == RELAY_FILE_CLOSE) {
77 // printk(KERN_DEBUG "got fileop_notify RELAY_FILE_CLOSE for file %p\n",
81 module_put(THIS_MODULE);
86 static int create_ukcc_channel(void)
88 static struct rchan_callbacks ukcc_callbacks = {
92 .user_deliver = ukcc_cmd_deliver,
94 .fileop_notify = ukcc_fileop_notify,
98 RELAY_USAGE_GLOBAL | RELAY_SCHEME_ANY | RELAY_TIMESTAMP_ANY;
100 // notify on subbuffer full (through poll)
101 channel_flags |= RELAY_DELIVERY_BULK;
102 // channel_flags |= RELAY_DELIVERY_PACKET;
103 // avoid overwrite, otherwise recovery will be nasty...
104 channel_flags |= RELAY_MODE_NO_OVERWRITE;
106 ukcc_channel = relay_open(CRBCE_UKCC_NAME,
107 UKCC_SUB_BUFFER_SIZE,
110 &ukcc_callbacks, 0, 0, 0, 0, 0, 0, NULL, 0);
111 if (ukcc_channel < 0)
112 printk(KERN_DEBUG "crbce: ukcc creation failed, errcode: %d\n",
115 printk(KERN_DEBUG "crbce: ukcc created (%u KB)\n",
116 UKCC_TOTAL_BUFFER_SIZE >> 10);
120 static inline void close_ukcc_channel(void)
122 if (ukcc_channel >= 0) {
123 relay_close(ukcc_channel);
125 chan_state = UKCC_STANDBY;
129 #define rec_set_hdr(r,t,p) ((r)->hdr.type = (t), (r)->hdr.pid = (p))
130 #define rec_set_timehdr(r,t,p,c) (rec_set_hdr(r,t,p), \
131 (r)->hdr.jiffies = jiffies, (r)->hdr.cls=(unsigned long)(c) )
133 #if CHANNEL_AUTO_CONT
135 /* we only provide this for debugging.. it allows us to send records
136 * based on availability in the channel when the UKCC stalles rather
137 * going through the UKCC recovery protocol
140 #define rec_send_len(r,l) \
142 int chan_wasok = (chan_state == UKCC_OK); \
143 int chan_isok = (relay_write(ukcc_channel, \
144 (r),(l),-1,NULL) > 0); \
145 chan_state = chan_isok ? UKCC_OK : UKCC_STANDBY; \
146 if (chan_wasok && !chan_isok) { \
147 printk(KERN_DEBUG "Channel stalled\n"); \
148 } else if (!chan_wasok && chan_isok) { \
149 printk(KERN_DEBUG "Channel continues\n"); \
153 #define rec_send(r) rec_send_len(r,sizeof(*(r)))
157 /* Default UKCC channel protocol.
158 * Though a UKCC buffer overflow should not happen ever, it is possible iff
159 * the user daemon stops reading for some reason. Hence we provide a simple
160 * protocol based on 3 states
161 * UKCC_OK := channel is active and properly working. When a channel
162 * write fails we move to state CHAN_FULL.
163 * UKCC_FULL := channel is active, but the last send_rec has failed. As
164 * a result we will try to send an indication to the daemon
165 * that this has happened. When that succeeds, we move to
166 * state UKCC_STANDBY.
167 * UKCC_STANDBY := we are waiting to be restarted by the user daemon
171 static void ukcc_full(void)
173 static spinlock_t ukcc_state_lock = SPIN_LOCK_UNLOCKED;
174 /* protect transition from OK -> FULL to ensure only one record is sent,
175 rest we do not need to protect, protocol implies that. we keep the
179 spin_lock(&ukcc_state_lock);
180 if ((send = (chan_state != UKCC_STANDBY)))
181 chan_state = UKCC_STANDBY; /* assume we can send */
182 spin_unlock(&ukcc_state_lock);
185 struct crbce_ukcc_full rec;
186 rec_set_timehdr(&rec, CRBCE_REC_UKCC_FULL, 0, 0);
187 if (relay_write(ukcc_channel, &rec,
188 sizeof(rec), -1, NULL) <= 0) {
189 /* channel is remains full .. try with next one */
190 chan_state = UKCC_FULL;
195 #define rec_send_len(r,l) \
197 switch (chan_state) { \
199 if (relay_write(ukcc_channel,(r), \
210 #define rec_send(r) rec_send_len(r,sizeof(*(r)))
214 /******************************************************************************
216 * Callbacks for the CKRM engine.
217 * In each we do the necessary classification and event record generation
218 * We generate 3 kind of records in the callback
219 * (a) FORK send the pid, the class and the ppid
220 * (b) RECLASSIFICATION send the pid, the class and < sample data +
222 * (b) EXIT send the pid
224 ******************************************************************************/
228 static inline void copy_delay(struct task_delay_info *delay,
229 struct task_struct *tsk)
231 *delay = tsk->delays;
234 static inline void zero_delay(struct task_delay_info *delay)
236 memset(delay, 0, sizeof(struct task_delay_info));
237 /* we need to think about doing this 64-bit atomic */
240 static inline void zero_sample(struct task_sample_info *sample)
242 memset(sample, 0, sizeof(struct task_sample_info));
243 /* we need to think about doing this 64-bit atomic */
246 static inline int check_zero(void *ptr, int len)
250 unsigned long *uptr = (unsigned long *)ptr;
252 for (i = len / sizeof(unsigned long); i-- && iszero; uptr++)
253 // assume its rounded
254 iszero &= (*uptr == 0);
258 static inline int check_not_zero(void *ptr, int len)
261 unsigned long *uptr = (unsigned long *)ptr;
263 for (i = len / sizeof(unsigned long); i--; uptr++)
264 // assume its rounded
270 static inline int sample_changed(struct task_sample_info *s)
272 return check_not_zero(s, sizeof(*s));
274 static inline int delay_changed(struct task_delay_info *d)
276 return check_not_zero(d, sizeof(*d));
280 send_task_record(struct task_struct *tsk, int event,
281 struct ckrm_core_class *core, int send_forced)
283 struct crbce_rec_task_data rec;
284 struct rbce_private_data *pdata;
289 pdata = RBCE_DATA(tsk);
291 // printk(KERN_DEBUG "send [%d]<%s>: no pdata\n",tsk->pid,tsk->comm);
294 if (send_forced || (delta_mode == 0)
295 || sample_changed(PSAMPLE(RBCE_DATA(tsk)))
296 || delay_changed(&tsk->delays)) {
297 rec_set_timehdr(&rec, event, tsk->pid,
298 core ? core : (struct ckrm_core_class *)tsk->
300 rec.sample = *PSAMPLE(RBCE_DATA(tsk));
301 copy_delay(&rec.delay, tsk);
303 if (delta_mode || send_forced) {
304 // on reclassify or delta mode reset the counters
305 zero_sample(PSAMPLE(RBCE_DATA(tsk)));
306 zero_delay(&tsk->delays);
313 static inline void send_exit_notification(struct task_struct *tsk)
315 send_task_record(tsk, CRBCE_REC_EXIT, NULL, 1);
319 rbce_tc_ext_notify(int event, void *core, struct task_struct *tsk)
321 struct crbce_rec_fork rec;
324 case CKRM_EVENT_FORK:
326 rec.ppid = tsk->parent->pid;
327 rec_set_timehdr(&rec, CKRM_EVENT_FORK, tsk->pid, core);
331 case CKRM_EVENT_MANUAL:
335 send_task_record(tsk, event, (struct ckrm_core_class *)core, 1);
340 /*====================== end classification engine =======================*/
342 static void sample_task_data(unsigned long unused);
344 struct timer_list sample_timer = {.expires = 0,.function = sample_task_data };
345 unsigned long timer_interval_length = (250 * HZ) / 1000;
347 inline void stop_sample_timer(void)
349 if (sample_timer.expires > 0) {
350 del_timer_sync(&sample_timer);
351 sample_timer.expires = 0;
355 inline void start_sample_timer(void)
357 if (timer_interval_length > 0) {
358 sample_timer.expires =
359 jiffies + (timer_interval_length * HZ) / 1000;
360 add_timer(&sample_timer);
364 static void send_task_data(void)
366 struct crbce_rec_data_delim limrec;
367 struct task_struct *proc, *thread;
371 rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0);
374 read_lock(&tasklist_lock);
375 do_each_thread(proc, thread) {
378 sendcnt += send_task_record(thread, CRBCE_REC_SAMPLE, NULL, 0);
380 } while_each_thread(proc, thread);
381 read_unlock(&tasklist_lock);
384 rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0);
387 // printk(KERN_DEBUG "send_task_data mode=%d t#=%d s#=%d\n",
388 // delta_mode,taskcnt,sendcnt);
391 static void notify_class_action(struct rbce_class *cls, int action)
393 struct crbce_class_info cinfo;
396 rec_set_timehdr(&cinfo, CRBCE_REC_CLASS_INFO, 0, cls->classobj);
397 cinfo.action = action;
398 len = strnlen(cls->obj.name, CRBCE_MAX_CLASS_NAME_LEN - 1);
399 memcpy(&cinfo.name, cls->obj.name, len);
400 cinfo.name[len] = '\0';
404 len += sizeof(cinfo) - CRBCE_MAX_CLASS_NAME_LEN;
405 rec_send_len(&cinfo, len);
408 static void send_classlist(void)
410 struct rbce_class *cls;
412 read_lock(&global_rwlock);
413 list_for_each_entry(cls, &class_list, obj.link) {
414 notify_class_action(cls, 1);
416 read_unlock(&global_rwlock);
422 * This function resends all essential task information to the client.
425 static void resend_task_info(void)
427 struct crbce_rec_data_delim limrec;
428 struct crbce_rec_fork rec;
429 struct task_struct *proc, *thread;
431 send_classlist(); // first send available class information
434 rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0);
437 write_lock(&tasklist_lock); // avoid any mods during this phase
438 do_each_thread(proc, thread) {
440 rec.ppid = thread->parent->pid;
441 rec_set_timehdr(&rec, CRBCE_REC_TASKINFO, thread->pid,
446 while_each_thread(proc, thread);
447 write_unlock(&tasklist_lock);
450 rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0);
454 extern int task_running_sys(struct task_struct *);
456 static void add_all_private_data(void)
458 struct task_struct *proc, *thread;
460 write_lock(&tasklist_lock);
461 do_each_thread(proc, thread) {
462 if (RBCE_DATA(thread) == NULL)
463 RBCE_DATAP(thread) = create_private_data(NULL, 0);
465 while_each_thread(proc, thread);
466 write_unlock(&tasklist_lock);
469 static void sample_task_data(unsigned long unused)
471 struct task_struct *proc, *thread;
475 read_lock(&tasklist_lock);
476 do_each_thread(proc, thread) {
477 struct rbce_private_data *pdata = RBCE_DATA(thread);
480 // some wierdo race condition .. simply ignore
483 if (thread->state == TASK_RUNNING) {
484 if (task_running_sys(thread)) {
485 atomic_inc((atomic_t *) &
486 (PSAMPLE(pdata)->cpu_running));
489 atomic_inc((atomic_t *) &
490 (PSAMPLE(pdata)->cpu_waiting));
494 /* update IO state */
495 if (thread->flags & PF_IOWAIT) {
496 if (thread->flags & PF_MEMIO)
497 atomic_inc((atomic_t *) &
498 (PSAMPLE(pdata)->memio_delayed));
500 atomic_inc((atomic_t *) &
501 (PSAMPLE(pdata)->io_delayed));
504 while_each_thread(proc, thread);
505 read_unlock(&tasklist_lock);
506 // printk(KERN_DEBUG "sample_timer: run=%d wait=%d\n",run,wait);
507 start_sample_timer();
510 static void ukcc_cmd_deliver(int rchan_id, char *from, u32 len)
512 struct crbce_command *cmdrec = (struct crbce_command *)from;
513 struct crbce_cmd_done cmdret;
516 // printk(KERN_DEBUG "ukcc_cmd_deliver: %d %d len=%d:%d\n",cmdrec->type,
517 // cmdrec->cmd,cmdrec->len,len);
519 cmdrec->len = len; // add this to reflection so the user doesn't
520 // accidently write the wrong length and the
521 // protocol is getting screwed up
523 if (cmdrec->type != CRBCE_REC_KERNEL_CMD) {
528 switch (cmdrec->cmd) {
529 case CRBCE_CMD_SET_TIMER:
531 struct crbce_cmd_settimer *cptr =
532 (struct crbce_cmd_settimer *)cmdrec;
533 if (len != sizeof(*cptr)) {
538 timer_interval_length = cptr->interval;
539 if ((timer_interval_length > 0)
540 && (timer_interval_length < 10))
541 timer_interval_length = 10;
542 // anything finer can create problems
543 printk(KERN_INFO "CRBCE set sample collect timer %lu\n",
544 timer_interval_length);
545 start_sample_timer();
548 case CRBCE_CMD_SEND_DATA:
550 struct crbce_cmd_send_data *cptr =
551 (struct crbce_cmd_send_data *)cmdrec;
552 if (len != sizeof(*cptr)) {
556 delta_mode = cptr->delta_mode;
560 case CRBCE_CMD_START:
561 add_all_private_data();
562 chan_state = UKCC_OK;
567 chan_state = UKCC_STANDBY;
568 free_all_private_data();
577 cmdret.hdr.type = CRBCE_REC_KERNEL_CMD_DONE;
578 cmdret.hdr.cmd = cmdrec->cmd;
581 // printk(KERN_DEBUG "ukcc_cmd_deliver ACK: %d %d rc=%d %d\n",cmdret.hdr.type,
582 // cmdret.hdr.cmd,rc,sizeof(cmdret));
585 static void client_attached(void)
587 printk(KERN_DEBUG "client [%d]<%s> attached to UKCC\n", current->pid,
589 relay_reset(ukcc_channel);
592 static void client_detached(void)
594 printk(KERN_DEBUG "client [%d]<%s> detached to UKCC\n", current->pid,
596 chan_state = UKCC_STANDBY;
598 relay_reset(ukcc_channel);
599 free_all_private_data();
602 static int init_rbce_ext_pre(void)
606 rc = create_ukcc_channel();
607 return ((rc < 0) ? rc : 0);
610 static int init_rbce_ext_post(void)
612 init_timer(&sample_timer);
616 static void exit_rbce_ext(void)
619 close_ukcc_channel();