kernel/ckrm/ckrm_cpu_monitor.c

   1 /* ckrm_cpu_monitor.c - Hierarchical CKRM CPU Resource Monitor
   2  *
   3  * Copyright (C) Haoqiang Zheng,  IBM Corp. 2004
   4  *           (C) Hubertus Franke, IBM Corp. 2004
   5  *
   6  * Latest version, more details at http://ckrm.sf.net
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  */
  14
  15 /* Changes
  16  *
  17  * 23 June 2004: Created
  18  *
  19  */
  20 #include <linux/module.h>
  21 #include <linux/init.h>
  22 #include <asm/errno.h>
  23 #include <linux/list.h>
  24 #include <linux/spinlock.h>
  25 #include <linux/ckrm.h>
  26 #include <linux/ckrm_rc.h>
  27 #include <linux/ckrm_tc.h>
  28 #include <asm/div64.h>
  29 #include <linux/ckrm_sched.h>
  30
  31 #define CPU_MONITOR_INTERVAL (4*HZ) /*how often do we adjust the shares*/
  32 #define CKRM_SHARE_ACCURACY 7
  33 #define CKRM_SHARE_MAX (1<<CKRM_SHARE_ACCURACY)
  34
  35 extern struct ckrm_cpu_class *ckrm_get_cpu_class(struct ckrm_core_class *core);
  36
  37 void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat)
  38 {
  39         int i;
  40         struct ckrm_cpu_class_local_stat* local_stat;
  41         unsigned long long now = sched_clock();
  42
  43         stat->stat_lock = SPIN_LOCK_UNLOCKED;
  44         stat->total_ns = 0;
  45         stat->cpu_demand = 0;
  46
  47         for (i=0; i< NR_CPUS; i++) {
  48                 local_stat = &stat->local_stats[i];
  49                 local_stat->run = 0;
  50                 local_stat->total = 0;
  51                 local_stat->last_sleep = now;
  52                 local_stat->cpu_demand = 0;
  53         }
  54
  55         stat->effective_guarantee = 0;
  56         stat->effective_limit = 0;
  57         stat->glut = 0;
  58         stat->effective_share = 100;
  59         stat->self_effective_share = 100;
  60 }
  61 /**********************************************/
  62 /*          cpu demand                        */
  63 /**********************************************/
  64
  65 /*
  66  * How CPU demand is calculated:
  67  * consider class local runqueue (clr) first
  68  * at any time, a clr can at the following three states
  69  * -- run: a task belonning to this class is running on this cpu
  70  * -- wait: at least one of its task is running, but the class is not running
  71  * -- sleep: none of the task of this class is runnable
  72  *
  73  * cpu_demand(t1,t2) = r(t1,t2)/(r(t1,t2)+s(t1,t2))
  74  *
  75  * the cpu_demand of a class =
  76  *    sum of cpu_demand of all the class local runqueues
  77  */
  78
  79 /**
  80  * update_cpu_demand - update a state change
  81  *
  82  * should be called whenever the state of a local queue changes
  83  * -- when deschedule : report how much run
  84  * -- when enqueue: report how much sleep
  85  *
  86  * to deal with excessive long run/sleep state
  87  * -- whenever the the ckrm_cpu_monitor is called, check if the class is in sleep state, if yes, then update sleep record
  88  */
  89 #define CKRM_CPU_DEMAND_RUN 0
  90 #define CKRM_CPU_DEMAND_SLEEP 1
  91 //how often should we recalculate the cpu demand, in ns
  92 #define CPU_DEMAND_CAL_THRESHOLD (1000000000LL)
  93 static inline void update_local_cpu_demand(struct ckrm_cpu_class_local_stat* local_stat,int state, unsigned long long len)
  94 {
  95         local_stat->total += len;
  96         if (state == CKRM_CPU_DEMAND_RUN)
  97                 local_stat->run += len;
  98
  99         if (local_stat->total >= CPU_DEMAND_CAL_THRESHOLD) {
 100                 local_stat->total >>= CKRM_SHARE_ACCURACY;
 101                 if (local_stat->total > 0xFFFFFFFF)
 102                         local_stat->total = 0xFFFFFFFF;
 103
 104                 do_div(local_stat->run,(unsigned long)local_stat->total);
 105                 local_stat->cpu_demand +=local_stat->run;
 106                 local_stat->cpu_demand >>= 1;
 107                 local_stat->total = 0;
 108                 local_stat->run = 0;
 109         }
 110 }
 111
 112 static inline void cpu_demand_update_run(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len)
 113 {
 114         update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_RUN,len);
 115 }
 116
 117 static inline void cpu_demand_update_sleep(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len)
 118 {
 119         update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_SLEEP,len);
 120 }
 121
 122 #define CPU_DEMAND_ENQUEUE 0
 123 #define CPU_DEMAND_DEQUEUE 1
 124 #define CPU_DEMAND_DESCHEDULE 2
 125
 126 /**
 127  * cpu_demand_event - and cpu_demand event occured
 128  * @event: one of the following three events:
 129  *   CPU_DEMAND_ENQUEUE: local class enqueue
 130  *   CPU_DEMAND_DEQUEUE: local class dequeue
 131  *   CPU_DEMAND_DESCHEDULE: one task belong a certain local class deschedule
 132  * @len: valid only for CPU_DEMAND_DESCHEDULE, how long the task has been run
 133  */
 134 void cpu_demand_event(struct ckrm_cpu_class_local_stat* local_stat, int event, unsigned long long len)
 135 {
 136         switch (event) {
 137         case CPU_DEMAND_ENQUEUE:
 138                 len = sched_clock() - local_stat->last_sleep;
 139                 local_stat->last_sleep = 0;
 140                 cpu_demand_update_sleep(local_stat,len);
 141                 break;
 142         case CPU_DEMAND_DEQUEUE:
 143                 local_stat->last_sleep = sched_clock();
 144                 break;
 145         case CPU_DEMAND_DESCHEDULE:
 146                 cpu_demand_update_run(local_stat,len);
 147                 break;
 148         default:
 149                 BUG();
 150         }
 151 }
 152
 153 /**
 154  * check all the class local queue
 155  * if local queueu is not in runqueue, then it's in sleep state
 156  * if compare to last sleep,
 157  */
 158 static inline void cpu_demand_check_sleep(struct ckrm_cpu_class_stat *stat, int cpu)
 159 {
 160         struct ckrm_cpu_class_local_stat * local_stat = &stat->local_stats[cpu];
 161         unsigned long long sleep,now;
 162         if (local_stat->last_sleep) {
 163                 now = sched_clock();
 164                 sleep = now - local_stat->last_sleep;
 165                 local_stat->last_sleep = now;
 166                 cpu_demand_update_sleep(local_stat,sleep);
 167         }
 168 }
 169
 170 /**
 171  *get_self_cpu_demand - get cpu demand of the class itself (excluding children)
 172  *
 173  * self_cpu_demand = sum(cpu demand of all local queues)
 174  */
 175 static unsigned long get_self_cpu_demand(struct ckrm_cpu_class_stat
 176                                                 *stat)
 177 {
 178         int cpu_demand = 0;
 179         int i;
 180
 181         for_each_online_cpu(i) {
 182                 cpu_demand_check_sleep(stat,i);
 183                 cpu_demand += stat->local_stats[i].cpu_demand;
 184         }
 185
 186         if (cpu_demand > CKRM_SHARE_MAX)
 187                 cpu_demand = CKRM_SHARE_MAX;
 188         return cpu_demand;
 189 }
 190
 191 /*
 192  * update effective cpu demand for each class
 193  * assume the root_core->parent == NULL
 194  */
 195 static void update_cpu_demand(struct ckrm_core_class *root_core)
 196 {
 197         struct ckrm_core_class *cur_core, *child_core;
 198         struct ckrm_cpu_class *cls;
 199
 200         cur_core = root_core;
 201         child_core = NULL;
 202         /*
 203          * iterate the tree
 204          * update cpu_demand of each node
 205          */
 206       repeat:
 207         if (!cur_core)
 208                 return;
 209
 210         cls = ckrm_get_cpu_class(cur_core);
 211         if (!child_core)        //first child
 212                 cls->stat.cpu_demand = get_self_cpu_demand(&cls->stat);
 213         else {
 214                 cls->stat.cpu_demand +=
 215                     ckrm_get_cpu_class(child_core)->stat.cpu_demand;
 216                 if (cls->stat.cpu_demand > CKRM_SHARE_MAX)
 217                         cls->stat.cpu_demand = CKRM_SHARE_MAX;
 218         }
 219
 220         //next child
 221         child_core = ckrm_get_next_child(cur_core, child_core);
 222         if (child_core) {
 223                 //go down
 224                 cur_core = child_core;
 225                 child_core = NULL;
 226                 goto repeat;
 227         } else {                //no more child, go back
 228                 child_core = cur_core;
 229                 cur_core = child_core->hnode.parent;
 230         }
 231         goto repeat;
 232 }
 233
 234 /**********************************************/
 235 /*          effective guarantee & limit       */
 236 /**********************************************/
 237 static inline void set_effective_share(struct ckrm_cpu_class_stat *stat,
 238                                        int new_share)
 239 {
 240         if (!new_share)
 241                 new_share = 1;
 242         stat->effective_share = new_share;
 243 }
 244
 245 static inline void set_self_effective_share(struct ckrm_cpu_class_stat *stat,
 246                                             int new_share)
 247 {
 248         if (!new_share)
 249                 new_share = 1;
 250         stat->self_effective_share = new_share;
 251 }
 252
 253 static inline void update_child_effective(struct ckrm_core_class *parent)
 254 {
 255         struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
 256         struct ckrm_core_class *child_core = ckrm_get_next_child(parent, NULL);
 257
 258         while (child_core) {
 259                 struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
 260
 261                 c_cls->stat.effective_guarantee =
 262                     p_cls->stat.effective_guarantee *
 263                     c_cls->shares.my_guarantee / p_cls->shares.total_guarantee;
 264                 c_cls->stat.effective_limit =
 265                     p_cls->stat.effective_guarantee * c_cls->shares.my_limit /
 266                     p_cls->shares.total_guarantee;
 267
 268                 child_core = ckrm_get_next_child(parent, child_core);
 269         };
 270
 271 }
 272
 273 /*
 274  * update effective guarantee and effective limit
 275  * -- effective share = parent->effective->share * share/parent->total_share
 276  * -- effective limit = parent->effective->share * limit/parent->total_share
 277  * should be called only when class structure changed
 278  */
 279 static void update_effective_guarantee_limit(struct ckrm_core_class *root_core)
 280 {
 281         struct ckrm_core_class *cur_core, *child_core = NULL;
 282         struct ckrm_cpu_class *cls;
 283
 284         cur_core = root_core;
 285         cls = ckrm_get_cpu_class(cur_core);
 286         cls->stat.effective_guarantee = CKRM_SHARE_MAX;
 287         cls->stat.effective_limit = cls->stat.effective_guarantee;
 288
 289       repeat:
 290         //check exit
 291         if (!cur_core)
 292                 return;
 293
 294         //visit this node
 295         update_child_effective(cur_core);
 296         //next child
 297         child_core = ckrm_get_next_child(cur_core, child_core);
 298         if (child_core) {
 299                 //go down
 300                 cur_core = child_core;
 301                 child_core = NULL;
 302                 goto repeat;
 303         } else {                //no more child, go back
 304                 child_core = cur_core;
 305                 cur_core = child_core->hnode.parent;
 306         }
 307         goto repeat;
 308 }
 309
 310 /**********************************************/
 311 /*          surplus allocation                */
 312 /**********************************************/
 313
 314 /*
 315  * surplus = my_effective_share - demand
 316  * if surplus < 0, surplus = 0
 317  */
 318 static inline int get_node_surplus(struct ckrm_cpu_class *cls)
 319 {
 320         int surplus = cls->stat.effective_guarantee - cls->stat.cpu_demand;
 321
 322         if (surplus < 0)
 323                 surplus = 0;
 324
 325         return surplus;
 326 }
 327
 328 /*
 329  * consume the surplus
 330  * return how much consumed
 331  * set glut when necessary
 332  */
 333 static inline int node_surplus_consume(int old_surplus,
 334                                        struct ckrm_core_class *child_core,
 335                                        struct ckrm_cpu_class *p_cls)
 336 {
 337         int consumed = 0;
 338         int inc_limit;
 339
 340         struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
 341
 342         if (c_cls->stat.glut)
 343                 goto out;
 344
 345         //check demand
 346         if (c_cls->stat.effective_share >= c_cls->stat.cpu_demand) {
 347                 c_cls->stat.glut = 1;
 348                 goto out;
 349         }
 350
 351         consumed =
 352             old_surplus * c_cls->shares.my_guarantee /
 353             p_cls->shares.total_guarantee;
 354
 355         //check limit
 356         inc_limit = c_cls->stat.effective_limit - c_cls->stat.effective_share;
 357         if (inc_limit <= consumed) {
 358                 c_cls->stat.glut = 1;
 359                 consumed = inc_limit;
 360         }
 361
 362         c_cls->stat.effective_share += consumed;
 363       out:
 364         return consumed;
 365 }
 366
 367 /*
 368  * re-allocate the shares for all the childs under this node
 369  * task:
 370  *  1. get total surplus
 371  *  2. allocate surplus
 372  *  3. set the effective_share of each node
 373  */
 374 static void alloc_surplus_node(struct ckrm_core_class *parent)
 375 {
 376         int total_surplus = 0, old_surplus = 0;
 377         struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
 378         struct ckrm_core_class *child_core = NULL;
 379         int self_share;
 380
 381         /*
 382          * calculate surplus
 383          * total_surplus = sum(child_surplus)
 384          * reset glut flag
 385          * initialize effective_share
 386          */
 387         do {
 388                 child_core = ckrm_get_next_child(parent, child_core);
 389                 if (child_core) {
 390                         struct ckrm_cpu_class *c_cls =
 391                             ckrm_get_cpu_class(child_core);
 392                         ckrm_stat_t *stat = &c_cls->stat;
 393
 394                         total_surplus += get_node_surplus(c_cls);
 395                         stat->glut = 0;
 396                         set_effective_share(stat, stat->effective_guarantee);
 397                 }
 398         } while (child_core);
 399
 400         /*distribute the surplus */
 401         child_core = NULL;
 402         do {
 403                 if (!child_core)        //keep the surplus of last round
 404                         old_surplus = total_surplus;
 405
 406                 child_core = ckrm_get_next_child(parent, child_core);
 407                 if (child_core) {
 408                         total_surplus -=
 409                             node_surplus_consume(old_surplus, child_core,
 410                                                  p_cls);
 411                 }
 412                 //start a new round if something is allocated in the last round
 413         } while (child_core || (total_surplus != old_surplus));
 414
 415         //any remaining surplus goes to the default class
 416         self_share = p_cls->stat.effective_share *
 417             p_cls->shares.unused_guarantee / p_cls->shares.total_guarantee;
 418         self_share += total_surplus;
 419
 420         set_self_effective_share(&p_cls->stat, self_share);
 421 }
 422
 423 /**
 424  * alloc_surplus - reallocate unused shares
 425  *
 426  * class A's usused share should be allocated to its siblings
 427  */
 428 static void alloc_surplus(struct ckrm_core_class *root_core)
 429 {
 430         struct ckrm_core_class *cur_core, *child_core = NULL;
 431         struct ckrm_cpu_class *cls;
 432
 433         cur_core = root_core;
 434         cls = ckrm_get_cpu_class(cur_core);
 435         cls->stat.glut = 0;
 436         set_effective_share(&cls->stat, cls->stat.effective_guarantee);
 437       repeat:
 438         //check exit
 439         if (!cur_core)
 440                 return;
 441
 442         //visit this node
 443         alloc_surplus_node(cur_core);
 444         //next child
 445         child_core = ckrm_get_next_child(cur_core, child_core);
 446         if (child_core) {
 447                 //go down
 448                 cur_core = child_core;
 449                 child_core = NULL;
 450                 goto repeat;
 451         } else {                //no more child, go back
 452                 child_core = cur_core;
 453                 cur_core = child_core->hnode.parent;
 454         }
 455         goto repeat;
 456 }
 457
 458 /**
 459  *ckrm_cpu_monitor - adjust relative shares of the classes based on their progress
 460  *
 461  * this function is called every CPU_MONITOR_INTERVAL
 462  * it computes the cpu demand of each class
 463  * and re-allocate the un-used shares to other classes
 464  */
 465 void ckrm_cpu_monitor(void)
 466 {
 467         struct ckrm_core_class *root_core = default_cpu_class->core;
 468         if (!root_core)
 469                 return;
 470
 471         update_effective_guarantee_limit(root_core);
 472         update_cpu_demand(root_core);
 473         alloc_surplus(root_core);
 474 }
 475
 476 /*****************************************************/
 477 /*            Supporting Functions                   */
 478 /*****************************************************/
 479 static pid_t cpu_monitor_pid = -1;
 480 static int thread_exit = 0;
 481
 482 static int ckrm_cpu_monitord(void *nothing)
 483 {
 484         wait_queue_head_t wait;
 485
 486         init_waitqueue_head(&wait);
 487
 488         daemonize("ckrm_cpu_ctrld");
 489         for (;;) {
 490                 /*sleep for sometime before next try*/
 491                 interruptible_sleep_on_timeout(&wait, CPU_MONITOR_INTERVAL);
 492                 ckrm_cpu_monitor();
 493                 if (thread_exit) {
 494                         break;
 495                 }
 496         }
 497         cpu_monitor_pid = -1;
 498         thread_exit = 2;
 499         printk("cpu_monitord exit\n");
 500         return 0;
 501 }
 502
 503 void ckrm_start_monitor(void)
 504 {
 505         cpu_monitor_pid = kernel_thread(ckrm_cpu_monitord, 0, CLONE_KERNEL);
 506         if (cpu_monitor_pid < 0) {
 507                 printk("ckrm_cpu_monitord for failed\n");
 508         }
 509 }
 510
 511 void ckrm_kill_monitor(void)
 512 {
 513         wait_queue_head_t wait;
 514         int interval = HZ;
 515         init_waitqueue_head(&wait);
 516
 517         printk("killing process %d\n", cpu_monitor_pid);
 518         if (cpu_monitor_pid > 0) {
 519                 thread_exit = 1;
 520                 while (thread_exit != 2) {
 521                         interruptible_sleep_on_timeout(&wait, interval);
 522                 }
 523         }
 524 }
 525
 526 int ckrm_cpu_monitor_init(void)
 527 {
 528         ckrm_start_monitor();
 529         return 0;
 530 }
 531
 532 void ckrm_cpu_monitor_exit(void)
 533 {
 534         ckrm_kill_monitor();
 535 }
 536
 537 module_init(ckrm_cpu_monitor_init);
 538 module_exit(ckrm_cpu_monitor_exit);
 539
 540 MODULE_AUTHOR("Haoqiang Zheng <hzheng@cs.columbia.edu>");
 541 MODULE_DESCRIPTION("Hierarchical CKRM CPU Resource Monitor");
 542 MODULE_LICENSE("GPL");