1 /* Data Collection Extension to Rule-based Classification Engine (RBCE) module
3 * Copyright (C) Hubertus Franke, IBM Corp. 2003
5 * Extension to be included into RBCE to collect delay and sample information
6 * requires user daemon <crbcedmn> to activate.
8 * Latest version, more details at http://ckrm.sf.net
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
17 /*******************************************************************************
19 * User-Kernel Communication Channel (UKCC)
20 * Protocol and communication handling
22 ******************************************************************************/
24 #include <linux/relayfs_fs.h>
26 #define PSAMPLE(pdata) (&((pdata)->ext_data.sample))
27 #define UKCC_N_SUB_BUFFERS (4)
28 #define UKCC_SUB_BUFFER_SIZE (1<<15)
29 #define UKCC_TOTAL_BUFFER_SIZE (UKCC_N_SUB_BUFFERS * UKCC_SUB_BUFFER_SIZE)
31 #define CHANNEL_AUTO_CONT 0 /* this is during debugging only. It allows
32 the module to continue sending data through
33 the UKCC if space frees up vs. going into
34 the recovery driven mode
43 int ukcc_channel = -1;
44 static enum ukcc_state chan_state = UKCC_STANDBY;
46 inline static int ukcc_ok(void)
48 return (chan_state == UKCC_OK);
51 static void ukcc_cmd_deliver(int rchan_id, char *from, u32 len);
52 static void client_attached(void);
53 static void client_detached(void);
55 static int ukcc_fileop_notify(int rchan_id,
56 struct file *filp, enum relay_fileop fileop)
58 static int readers = 0;
59 if (fileop == RELAY_FILE_OPEN) {
60 // printk("got fileop_notify RELAY_FILE_OPEN for file %p\n",
63 printk("only one client allowed, backoff .... \n");
66 if (!try_module_get(THIS_MODULE))
71 } else if (fileop == RELAY_FILE_CLOSE) {
72 // printk("got fileop_notify RELAY_FILE_CLOSE for file %p\n",
76 module_put(THIS_MODULE);
81 static int create_ukcc_channel(void)
83 static struct rchan_callbacks ukcc_callbacks = {
87 .user_deliver = ukcc_cmd_deliver,
89 .fileop_notify = ukcc_fileop_notify,
93 RELAY_USAGE_GLOBAL | RELAY_SCHEME_ANY | RELAY_TIMESTAMP_ANY;
95 // notify on subbuffer full (through poll)
96 channel_flags |= RELAY_DELIVERY_BULK;
97 // channel_flags |= RELAY_DELIVERY_PACKET;
98 // avoid overwrite, otherwise recovery will be nasty...
99 channel_flags |= RELAY_MODE_NO_OVERWRITE;
101 ukcc_channel = relay_open(CRBCE_UKCC_NAME,
102 UKCC_SUB_BUFFER_SIZE,
105 &ukcc_callbacks, 0, 0, 0, 0, 0, 0, NULL, 0);
106 if (ukcc_channel < 0)
107 printk("crbce: ukcc creation failed, errcode: %d\n",
110 printk("crbce: ukcc created (%u KB)\n",
111 UKCC_TOTAL_BUFFER_SIZE >> 10);
115 static inline void close_ukcc_channel(void)
117 if (ukcc_channel >= 0) {
118 relay_close(ukcc_channel);
120 chan_state = UKCC_STANDBY;
124 #define rec_set_hdr(r,t,p) ((r)->hdr.type = (t), (r)->hdr.pid = (p))
125 #define rec_set_timehdr(r,t,p,c) (rec_set_hdr(r,t,p), \
126 (r)->hdr.jiffies = jiffies, (r)->hdr.cls=(unsigned long)(c) )
128 #if CHANNEL_AUTO_CONT
130 /* we only provide this for debugging.. it allows us to send records
131 * based on availability in the channel when the UKCC stalles rather
132 * going through the UKCC recovery protocol
135 #define rec_send_len(r,l) \
137 int chan_wasok = (chan_state == UKCC_OK); \
138 int chan_isok = (relay_write(ukcc_channel, \
139 (r),(l),-1,NULL) > 0); \
140 chan_state = chan_isok ? UKCC_OK : UKCC_STANDBY; \
141 if (chan_wasok && !chan_isok) { \
142 printk("Channel stalled\n"); \
143 } else if (!chan_wasok && chan_isok) { \
144 printk("Channel continues\n"); \
148 #define rec_send(r) rec_send_len(r,sizeof(*(r)))
152 /* Default UKCC channel protocol.
153 * Though a UKCC buffer overflow should not happen ever, it is possible iff
154 * the user daemon stops reading for some reason. Hence we provide a simple
155 * protocol based on 3 states
156 * UKCC_OK := channel is active and properly working. When a channel
157 * write fails we move to state CHAN_FULL.
158 * UKCC_FULL := channel is active, but the last send_rec has failed. As
159 * a result we will try to send an indication to the daemon
160 * that this has happened. When that succeeds, we move to
161 * state UKCC_STANDBY.
162 * UKCC_STANDBY := we are waiting to be restarted by the user daemon
166 static void ukcc_full(void)
168 static spinlock_t ukcc_state_lock = SPIN_LOCK_UNLOCKED;
169 /* protect transition from OK -> FULL to ensure only one record is sent,
170 rest we do not need to protect, protocol implies that. we keep the
174 spin_lock(&ukcc_state_lock);
175 if ((send = (chan_state != UKCC_STANDBY)))
176 chan_state = UKCC_STANDBY; /* assume we can send */
177 spin_unlock(&ukcc_state_lock);
180 struct crbce_ukcc_full rec;
181 rec_set_timehdr(&rec, CRBCE_REC_UKCC_FULL, 0, 0);
182 if (relay_write(ukcc_channel, &rec,
183 sizeof(rec), -1, NULL) <= 0) {
184 /* channel is remains full .. try with next one */
185 chan_state = UKCC_FULL;
190 #define rec_send_len(r,l) \
192 switch (chan_state) { \
194 if (relay_write(ukcc_channel,(r), \
205 #define rec_send(r) rec_send_len(r,sizeof(*(r)))
209 /******************************************************************************
211 * Callbacks for the CKRM engine.
212 * In each we do the necessary classification and event record generation
213 * We generate 3 kind of records in the callback
214 * (a) FORK send the pid, the class and the ppid
215 * (b) RECLASSIFICATION send the pid, the class and < sample data +
217 * (b) EXIT send the pid
219 ******************************************************************************/
223 static inline void copy_delay(struct task_delay_info *delay,
224 struct task_struct *tsk)
226 *delay = tsk->delays;
229 static inline void zero_delay(struct task_delay_info *delay)
231 memset(delay, 0, sizeof(struct task_delay_info));
232 /* we need to think about doing this 64-bit atomic */
235 static inline void zero_sample(struct task_sample_info *sample)
237 memset(sample, 0, sizeof(struct task_sample_info));
238 /* we need to think about doing this 64-bit atomic */
241 static inline int check_zero(void *ptr, int len)
245 unsigned long *uptr = (unsigned long *)ptr;
247 for (i = len / sizeof(unsigned long); i-- && iszero; uptr++)
248 // assume its rounded
249 iszero &= (*uptr == 0);
253 static inline int check_not_zero(void *ptr, int len)
256 unsigned long *uptr = (unsigned long *)ptr;
258 for (i = len / sizeof(unsigned long); i--; uptr++)
259 // assume its rounded
265 static inline int sample_changed(struct task_sample_info *s)
267 return check_not_zero(s, sizeof(*s));
269 static inline int delay_changed(struct task_delay_info *d)
271 return check_not_zero(d, sizeof(*d));
275 send_task_record(struct task_struct *tsk, int event,
276 struct ckrm_core_class *core, int send_forced)
278 struct crbce_rec_task_data rec;
279 struct rbce_private_data *pdata;
284 pdata = RBCE_DATA(tsk);
286 // printk("send [%d]<%s>: no pdata\n",tsk->pid,tsk->comm);
289 if (send_forced || (delta_mode == 0)
290 || sample_changed(PSAMPLE(RBCE_DATA(tsk)))
291 || delay_changed(&tsk->delays)) {
292 rec_set_timehdr(&rec, event, tsk->pid,
293 core ? core : (struct ckrm_core_class *)tsk->
295 rec.sample = *PSAMPLE(RBCE_DATA(tsk));
296 copy_delay(&rec.delay, tsk);
298 if (delta_mode || send_forced) {
299 // on reclassify or delta mode reset the counters
300 zero_sample(PSAMPLE(RBCE_DATA(tsk)));
301 zero_delay(&tsk->delays);
308 static inline void send_exit_notification(struct task_struct *tsk)
310 send_task_record(tsk, CRBCE_REC_EXIT, NULL, 1);
314 rbce_tc_ext_notify(int event, void *core, struct task_struct *tsk)
316 struct crbce_rec_fork rec;
319 case CKRM_EVENT_FORK:
321 rec.ppid = tsk->parent->pid;
322 rec_set_timehdr(&rec, CKRM_EVENT_FORK, tsk->pid, core);
326 case CKRM_EVENT_MANUAL:
330 send_task_record(tsk, event, (struct ckrm_core_class *)core, 1);
335 /*====================== end classification engine =======================*/
337 static void sample_task_data(unsigned long unused);
339 struct timer_list sample_timer = {.expires = 0,.function = sample_task_data };
340 unsigned long timer_interval_length = (250 * HZ) / 1000;
342 inline void stop_sample_timer(void)
344 if (sample_timer.expires > 0) {
345 del_timer_sync(&sample_timer);
346 sample_timer.expires = 0;
350 inline void start_sample_timer(void)
352 if (timer_interval_length > 0) {
353 sample_timer.expires =
354 jiffies + (timer_interval_length * HZ) / 1000;
355 add_timer(&sample_timer);
359 static void send_task_data(void)
361 struct crbce_rec_data_delim limrec;
362 struct task_struct *proc, *thread;
366 rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0);
369 read_lock(&tasklist_lock);
370 do_each_thread(proc, thread) {
373 sendcnt += send_task_record(thread, CRBCE_REC_SAMPLE, NULL, 0);
375 } while_each_thread(proc, thread);
376 read_unlock(&tasklist_lock);
379 rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0);
382 // printk("send_task_data mode=%d t#=%d s#=%d\n",
383 // delta_mode,taskcnt,sendcnt);
386 static void notify_class_action(struct rbce_class *cls, int action)
388 struct crbce_class_info cinfo;
391 rec_set_timehdr(&cinfo, CRBCE_REC_CLASS_INFO, 0, cls->classobj);
392 cinfo.action = action;
393 len = strnlen(cls->obj.name, CRBCE_MAX_CLASS_NAME_LEN - 1);
394 memcpy(&cinfo.name, cls->obj.name, len);
395 cinfo.name[len] = '\0';
399 len += sizeof(cinfo) - CRBCE_MAX_CLASS_NAME_LEN;
400 rec_send_len(&cinfo, len);
403 static void send_classlist(void)
405 struct rbce_class *cls;
407 read_lock(&global_rwlock);
408 list_for_each_entry(cls, &class_list, obj.link) {
409 notify_class_action(cls, 1);
411 read_unlock(&global_rwlock);
417 * This function resends all essential task information to the client.
420 static void resend_task_info(void)
422 struct crbce_rec_data_delim limrec;
423 struct crbce_rec_fork rec;
424 struct task_struct *proc, *thread;
426 send_classlist(); // first send available class information
429 rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0);
432 write_lock(&tasklist_lock); // avoid any mods during this phase
433 do_each_thread(proc, thread) {
435 rec.ppid = thread->parent->pid;
436 rec_set_timehdr(&rec, CRBCE_REC_TASKINFO, thread->pid,
441 while_each_thread(proc, thread);
442 write_unlock(&tasklist_lock);
445 rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0);
449 extern int task_running_sys(struct task_struct *);
451 static void add_all_private_data(void)
453 struct task_struct *proc, *thread;
455 write_lock(&tasklist_lock);
456 do_each_thread(proc, thread) {
457 if (RBCE_DATA(thread) == NULL)
458 RBCE_DATAP(thread) = create_private_data(NULL, 0);
460 while_each_thread(proc, thread);
461 write_unlock(&tasklist_lock);
464 static void sample_task_data(unsigned long unused)
466 struct task_struct *proc, *thread;
470 read_lock(&tasklist_lock);
471 do_each_thread(proc, thread) {
472 struct rbce_private_data *pdata = RBCE_DATA(thread);
475 // some wierdo race condition .. simply ignore
478 if (thread->state == TASK_RUNNING) {
479 if (task_running_sys(thread)) {
480 atomic_inc((atomic_t *) &
481 (PSAMPLE(pdata)->cpu_running));
484 atomic_inc((atomic_t *) &
485 (PSAMPLE(pdata)->cpu_waiting));
489 /* update IO state */
490 if (thread->flags & PF_IOWAIT) {
491 if (thread->flags & PF_MEMIO)
492 atomic_inc((atomic_t *) &
493 (PSAMPLE(pdata)->memio_delayed));
495 atomic_inc((atomic_t *) &
496 (PSAMPLE(pdata)->io_delayed));
499 while_each_thread(proc, thread);
500 read_unlock(&tasklist_lock);
501 // printk("sample_timer: run=%d wait=%d\n",run,wait);
502 start_sample_timer();
505 static void ukcc_cmd_deliver(int rchan_id, char *from, u32 len)
507 struct crbce_command *cmdrec = (struct crbce_command *)from;
508 struct crbce_cmd_done cmdret;
511 // printk("ukcc_cmd_deliver: %d %d len=%d:%d\n",cmdrec->type,
512 // cmdrec->cmd,cmdrec->len,len);
514 cmdrec->len = len; // add this to reflection so the user doesn't
515 // accidently write the wrong length and the
516 // protocol is getting screwed up
518 if (cmdrec->type != CRBCE_REC_KERNEL_CMD) {
523 switch (cmdrec->cmd) {
524 case CRBCE_CMD_SET_TIMER:
526 struct crbce_cmd_settimer *cptr =
527 (struct crbce_cmd_settimer *)cmdrec;
528 if (len != sizeof(*cptr)) {
533 timer_interval_length = cptr->interval;
534 if ((timer_interval_length > 0)
535 && (timer_interval_length < 10))
536 timer_interval_length = 10;
537 // anything finer can create problems
538 printk(KERN_INFO "CRBCE set sample collect timer %lu\n",
539 timer_interval_length);
540 start_sample_timer();
543 case CRBCE_CMD_SEND_DATA:
545 struct crbce_cmd_send_data *cptr =
546 (struct crbce_cmd_send_data *)cmdrec;
547 if (len != sizeof(*cptr)) {
551 delta_mode = cptr->delta_mode;
555 case CRBCE_CMD_START:
556 add_all_private_data();
557 chan_state = UKCC_OK;
562 chan_state = UKCC_STANDBY;
563 free_all_private_data();
572 cmdret.hdr.type = CRBCE_REC_KERNEL_CMD_DONE;
573 cmdret.hdr.cmd = cmdrec->cmd;
576 // printk("ukcc_cmd_deliver ACK: %d %d rc=%d %d\n",cmdret.hdr.type,
577 // cmdret.hdr.cmd,rc,sizeof(cmdret));
580 static void client_attached(void)
582 printk("client [%d]<%s> attached to UKCC\n", current->pid,
584 relay_reset(ukcc_channel);
587 static void client_detached(void)
589 printk("client [%d]<%s> detached to UKCC\n", current->pid,
591 chan_state = UKCC_STANDBY;
593 relay_reset(ukcc_channel);
594 free_all_private_data();
597 static int init_rbce_ext_pre(void)
601 rc = create_ukcc_channel();
602 return ((rc < 0) ? rc : 0);
605 static int init_rbce_ext_post(void)
607 init_timer(&sample_timer);
611 static void exit_rbce_ext(void)
614 close_ukcc_channel();