This stack check implementation leverages the compiler's profiling (gcc -p)
[linux-2.6.git] / kernel / ckrm / ckrm_listenaq.c
1 /* ckrm_socketaq.c - accept queue resource controller
2  *
3  * Copyright (C) Vivek Kashyap,      IBM Corp. 2004
4  * 
5  * Latest version, more details at http://ckrm.sf.net
6  * 
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  */
13
14 /* Changes
15  * Initial version
16  */
17
18 /* Code Description: TBD
19  *
20  */
21
22 #include <linux/module.h>
23 #include <linux/init.h>
24 #include <linux/slab.h>
25 #include <asm/errno.h>
26 #include <linux/list.h>
27 #include <linux/spinlock.h>
28 #include <linux/ckrm.h>
29 #include <linux/ckrm_rc.h>
30 #include <net/tcp.h>
31
32 #include <linux/ckrm_net.h>
33
34 #define hnode_2_core(ptr) \
35                 ((ptr) ? container_of(ptr, struct ckrm_core_class, hnode) : NULL)
36
37
38 #define CKRM_SAQ_MAX_DEPTH      3 // 0 => /rcfs
39                                   // 1 => socket_aq
40                                   // 2 => socket_aq/listen_class
41                                   // 3 => socket_aq/listen_class/accept_queues
42                                   // 4 => Not allowed
43
44 typedef struct ckrm_laq_res {
45         spinlock_t              reslock;
46         atomic_t                refcnt;
47         struct ckrm_shares      shares;
48         struct ckrm_core_class *core;
49         struct ckrm_core_class *pcore;
50         int                     my_depth;
51         int                     my_id;
52 } ckrm_laq_res_t;
53
54 static int my_resid = -1;
55
56 extern  struct ckrm_core_class *rcfs_create_under_netroot(char *, int, int);
57 extern struct ckrm_core_class *rcfs_make_core(struct dentry *, 
58                                                 struct ckrm_core_class * ) ;
59
60 void
61 laq_res_hold(struct ckrm_laq_res *res)
62 {
63         atomic_inc(&res->refcnt);
64         return;
65 }
66
67 void
68 laq_res_put(struct ckrm_laq_res *res)
69 {
70         if (atomic_dec_and_test(&res->refcnt))
71                 kfree(res);
72         return;
73 }
74
75 /* Initialize rescls values
76  */
77 static void
78 laq_res_initcls(void *my_res)
79 {
80         ckrm_laq_res_t *res = my_res;
81
82         res->shares.my_guarantee     = CKRM_SHARE_DONTCARE;
83         res->shares.my_limit         = CKRM_SHARE_DONTCARE;
84         res->shares.total_guarantee  = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
85         res->shares.max_limit        = CKRM_SHARE_DFLT_MAX_LIMIT;
86         res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
87         res->shares.cur_max_limit    = 0;
88 }
89
90 static int 
91 atoi(char *s)
92 {
93         int k = 0;
94         while(*s) 
95                 k = *s++ - '0' + (k * 10);
96         return k;
97 }
98
99 static char *
100 laq_get_name(struct ckrm_core_class *c)
101 {
102         char *p = (char *)c->name;
103
104         while(*p)
105                 p++;
106         while( *p != '/' && p != c->name)
107                 p--;
108
109         return ++p;
110 }
111
112 static void *
113 laq_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
114 {
115         ckrm_laq_res_t *res, *pres;
116         int pdepth;
117
118         if (parent)
119                 pres = ckrm_get_res_class(parent, my_resid, ckrm_laq_res_t);
120         else
121                 pres = NULL;
122
123         if (core == core->classtype->default_class)    
124                 pdepth = 1;
125         else {
126                 if (!parent)
127                         return NULL;
128                 pdepth = 1 + pres->my_depth;
129         }
130
131         res = kmalloc(sizeof(ckrm_laq_res_t), GFP_ATOMIC);
132         if (res) {
133                 memset(res, 0, sizeof(res));
134                 spin_lock_init(&res->reslock);
135                 laq_res_hold(res);
136                 res->my_depth  = pdepth;
137                 if (pdepth == 2)        // listen class
138                         res->my_id = 0;
139                 else if (pdepth == 3)
140                         res->my_id = atoi(laq_get_name(core));
141                 res->core = core;
142                 res->pcore = parent;
143
144                 // rescls in place, now initialize contents other than 
145                 // hierarchy pointers
146                 laq_res_initcls(res); // acts as initialising value
147         }
148
149         return res;
150 }
151
152 static void
153 laq_res_free(void *my_res)
154 {
155         ckrm_laq_res_t *res = (ckrm_laq_res_t *)my_res;
156         ckrm_laq_res_t *parent;
157
158         if (!res) 
159                 return;
160
161         if (res->my_depth != 3) {
162                 kfree(res);
163                 return;
164         }
165
166         parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
167         if (!parent)    // Should never happen
168                 return;
169
170         spin_lock(&parent->reslock);
171         spin_lock(&res->reslock);
172
173         // return child's guarantee to parent node
174         // Limits have no meaning for accept queue control
175         child_guarantee_changed(&parent->shares, res->shares.my_guarantee, 0);
176
177         spin_unlock(&res->reslock);
178         laq_res_put(res);       
179         spin_unlock(&parent->reslock);
180         return;
181 }
182
183 /**************************************************************************
184  *                      SHARES                                          ***
185  **************************************************************************/
186
187 void
188 laq_set_aq_values(ckrm_laq_res_t *my_res, ckrm_laq_res_t *parent, int updatep)
189 {
190
191         struct ckrm_net_struct *ns;
192         struct ckrm_core_class *core = parent->core;
193         struct tcp_opt *tp;
194         
195         if (my_res->my_depth < 2) 
196                 return;
197         
198         // XXX Instead of holding a  class_lock introduce a rw
199         // lock to be write locked by listen callbacks and read locked here.
200         // - VK
201         class_lock(core);
202         list_for_each_entry(ns, &core->objlist,ckrm_link) { 
203                 tp = tcp_sk(ns->ns_sk);
204                 if (updatep)
205                         tp->acceptq[0].aq_ratio =
206                                parent->shares.total_guarantee/
207                                 parent->shares.unused_guarantee;               
208
209                 tp->acceptq[my_res->my_id].aq_ratio =
210                        my_res->shares.total_guarantee/
211                         parent->shares.my_guarantee;           
212         }
213         class_unlock(core);
214         return;
215 }
216
217 static int
218 laq_set_share_values(void *my_res, struct ckrm_shares *shares)
219 {
220         ckrm_laq_res_t *res = my_res;
221         ckrm_laq_res_t *parent, *child;
222         struct ckrm_hnode *chnode; 
223         int rc = 0;
224
225         if (!res) 
226                 return -EINVAL;
227
228         if (!res->pcore) { 
229                 // something is badly wrong
230                 printk(KERN_ERR "socketaq internal inconsistency\n");
231                 return -EBADF;
232         }
233
234         parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
235         if (!parent)    // socket_class does not have a share interface
236                 return -EINVAL;
237
238         // Ensure that we ignore limit values
239         shares->my_limit = shares->max_limit = CKRM_SHARE_UNCHANGED;
240
241         switch (res->my_depth) {
242
243         case 0: printk(KERN_ERR "socketaq bad entry\n");
244                 rc = -EBADF;
245                 break;
246
247         case 1: // can't be written to. this is internal default.
248                 // return -EINVAL
249                 rc = -EINVAL;
250                 break;
251
252         case 2: // nothing to inherit
253                 if (!shares->total_guarantee) {
254                         rc = -EINVAL;
255                         break;
256                 }
257
258                 ckrm_lock_hier(res->pcore);
259                 spin_lock(&res->reslock);
260                 rc = set_shares(shares, &res->shares, NULL);
261                 if (!rc) {
262                         list_for_each_entry(chnode,
263                                         &res->core->hnode.children,siblings){
264                                 child=hnode_2_core(chnode)->res_class[my_resid];
265                                 laq_set_aq_values(child,res,(child->my_id==1));
266                         }
267                 }
268                 spin_unlock(&res->reslock);
269                 ckrm_unlock_hier(res->pcore);
270                 break;
271
272         case 3: // accept queue itself. Check against parent.
273                 ckrm_lock_hier(parent->pcore);
274                 spin_lock(&parent->reslock);
275                 rc = set_shares(shares, &res->shares, &parent->shares);
276                 if (!rc) {
277                         laq_set_aq_values(res,parent,1);
278                 }
279                 spin_unlock(&parent->reslock);
280                 ckrm_unlock_hier(parent->pcore);
281                 break;
282         }
283
284         return rc;
285 }
286
287 static int
288 laq_get_share_values(void *my_res, struct ckrm_shares *shares)
289 {
290         ckrm_laq_res_t *res = my_res;
291
292         if (!res) 
293                 return -EINVAL;
294         *shares = res->shares;
295         return 0;
296 }
297
298 /**************************************************************************
299  *                      STATS                                           ***
300  **************************************************************************/
301
302 void
303 laq_print_aq_stats(struct seq_file *sfile, struct tcp_acceptq_info *taq, int i)
304 {
305         seq_printf(sfile, "Class %d connections:\n\taccepted: %u\n\t"
306                           "queued: %u\n\twait_time: %lu\n\t",
307                           i, taq->acceptq_count, taq->acceptq_qcount,
308                           taq->acceptq_wait_time);
309
310         if (i)
311                 return;
312
313         for (i = 1; i < NUM_ACCEPT_QUEUES; i++) {
314                 taq[0].acceptq_wait_time += taq[i].acceptq_wait_time;
315                 taq[0].acceptq_qcount += taq[i].acceptq_qcount;
316                 taq[0].acceptq_count += taq[i].acceptq_count;
317         }
318
319         seq_printf(sfile, "Totals :\n\taccepted: %u\n\t"
320                           "queued: %u\n\twait_time: %lu\n",
321                            taq->acceptq_count, taq->acceptq_qcount,
322                           taq->acceptq_wait_time);
323
324         return;
325 }
326
327 void
328 laq_get_aq_stats(ckrm_laq_res_t *pres, ckrm_laq_res_t *mres, 
329                                         struct tcp_acceptq_info *taq)
330 {
331         struct ckrm_net_struct *ns;
332         struct ckrm_core_class *core = pres->core;
333         struct tcp_opt *tp;
334         int a = mres->my_id;
335         int z;
336
337         if (a == 0)
338                 z = NUM_ACCEPT_QUEUES;
339         else
340                 z = a+1;
341
342         // XXX Instead of holding a  class_lock introduce a rw
343         // lock to be write locked by listen callbacks and read locked here.
344         // - VK
345         class_lock(pres->core);
346         list_for_each_entry(ns, &core->objlist,ckrm_link) { 
347                 tp = tcp_sk(ns->ns_sk);
348                 for (; a< z; a++) {
349                         taq->acceptq_wait_time += tp->acceptq[a].aq_wait_time;
350                         taq->acceptq_qcount += tp->acceptq[a].aq_qcount;
351                         taq->acceptq_count += tp->acceptq[a].aq_count;
352                         taq++;
353                 }
354         }
355         class_unlock(pres->core);
356 }
357
358
359 static int  
360 laq_get_stats(void *my_res, struct seq_file *sfile)
361 {
362         ckrm_laq_res_t *res = my_res;
363         ckrm_laq_res_t *parent;
364         struct tcp_acceptq_info taq[NUM_ACCEPT_QUEUES];
365         int rc = 0;
366
367         if (!res) 
368                 return -EINVAL;
369         
370         if (!res->pcore) { 
371                 // something is badly wrong
372                 printk(KERN_ERR "socketaq internal inconsistency\n");
373                 return -EBADF;
374         }
375
376         parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
377         if (!parent) {  // socket_class does not have a stat interface
378                 printk(KERN_ERR "socketaq internal fs inconsistency\n");
379                 return -EINVAL;
380         }
381
382         memset(taq, 0, sizeof(struct tcp_acceptq_info) * NUM_ACCEPT_QUEUES);
383
384         switch (res->my_depth) {
385
386         default:
387         case 0: printk(KERN_ERR "socket class bad entry\n");
388                 rc = -EBADF;
389                 break;
390
391         case 1: // can't be read from. this is internal default.
392                 // return -EINVAL
393                 rc = -EINVAL;
394                 break;
395
396         case 2: // return the default and total
397                 ckrm_lock_hier(res->core);      // block any deletes
398                 laq_get_aq_stats(res, res, &taq[0]);
399                 laq_print_aq_stats(sfile, &taq[0], 0);
400                 ckrm_unlock_hier(res->core);    // block any deletes
401                 break;
402
403         case 3: 
404                 ckrm_lock_hier(parent->core);   // block any deletes
405                 laq_get_aq_stats(parent, res, &taq[res->my_id]);
406                 laq_print_aq_stats(sfile, &taq[res->my_id], res->my_id);
407                 ckrm_unlock_hier(parent->core); // block any deletes
408                 break;
409         }
410
411         return rc;
412 }
413
414 /*
415  * The network connection is reclassified to this class. Update its shares.
416  * The socket lock is held. 
417  */
418 static void
419 laq_change_resclass(void *n, void *old, void *r)
420 {
421         struct ckrm_net_struct *ns = (struct ckrm_net_struct *)n;
422         struct ckrm_laq_res *res = (struct ckrm_laq_res *)r;
423         struct ckrm_hnode  *chnode = NULL;
424
425
426         if (res->my_depth != 2) 
427                 return; 
428
429         // a change to my_depth == 3 ie. the accept classes cannot happen.
430         // there is no target file
431         if (res->my_depth == 2) { // it is one of the socket classes
432                 struct ckrm_laq_res *reschild;
433                 struct sock *sk = ns->ns_sk; 
434                 struct tcp_opt *tp = tcp_sk(sk);
435
436                 // share rule: hold parent resource lock. then self.
437                 // However, since my_depth == 1 is a generic class it is not
438                 // needed here. Self lock is enough.
439                 spin_lock(&res->reslock);
440                 tp->acceptq[0].aq_ratio = res->shares.total_guarantee/
441                                 res->shares.unused_guarantee;
442                 list_for_each_entry(chnode,&res->core->hnode.children,siblings){
443                         reschild = hnode_2_core(chnode)->res_class[my_resid];
444
445                         spin_lock(&reschild->reslock);
446                         tp->acceptq[reschild->my_id].aq_ratio=
447                                 reschild->shares.total_guarantee/
448                                         res->shares.my_guarantee;
449                         spin_unlock(&reschild->reslock);
450                 }
451                 spin_unlock(&res->reslock);
452         }
453         
454         return;
455 }
456
457 struct ckrm_res_ctlr laq_rcbs = {
458         .res_name          = "laq",
459         .resid             = -1 , // dynamically assigned
460         .res_alloc         = laq_res_alloc,
461         .res_free          = laq_res_free,
462         .set_share_values  = laq_set_share_values,
463         .get_share_values  = laq_get_share_values,
464         .get_stats         = laq_get_stats,
465         .change_resclass   = laq_change_resclass,
466         //      .res_initcls       = laq_res_initcls,         // LAQ_HUBERTUS: no need for this !!
467 };
468
469 int __init
470 init_ckrm_laq_res(void)
471 {
472         struct ckrm_classtype *clstype;
473         int resid;
474
475         clstype = ckrm_find_classtype_by_name("socket_class");
476         if (clstype == NULL) {
477                 printk(KERN_INFO " Unknown ckrm classtype<socket_class>");
478                 return -ENOENT;
479         }
480
481         if (my_resid == -1) {
482                 resid = ckrm_register_res_ctlr(clstype,&laq_rcbs);
483                 if (resid >= 0)
484                         my_resid = resid;
485                 printk("........init_ckrm_listen_aq_res -> %d\n",my_resid);
486         }
487         return 0;
488
489 }       
490
491 void __exit
492 exit_ckrm_laq_res(void)
493 {
494         ckrm_unregister_res_ctlr(&laq_rcbs);
495         my_resid = -1;
496 }
497
498
499 module_init(init_ckrm_laq_res)
500 module_exit(exit_ckrm_laq_res)
501
502 MODULE_LICENSE("GPL");
503