This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / kernel / ckrm / ckrm_sockc.c
1 /* ckrm_sock.c - Class-based Kernel Resource Management (CKRM)
2  *
3  * Copyright (C) Hubertus Franke, IBM Corp. 2003,2004
4  *           (C) Shailabh Nagar,  IBM Corp. 2003
5  *           (C) Chandra Seetharaman,  IBM Corp. 2003
6  *           (C) Vivek Kashyap, IBM Corp. 2004
7  * 
8  * 
9  * Provides kernel API of CKRM for in-kernel,per-resource controllers 
10  * (one each for cpu, memory, io, network) and callbacks for 
11  * classification modules.
12  *
13  * Latest version, more details at http://ckrm.sf.net
14  * 
15  * This program is free software; you can redistribute it and/or modify
16  * it under the terms of the GNU General Public License as published by
17  * the Free Software Foundation; either version 2 of the License, or
18  * (at your option) any later version.
19  *
20  */
21
22 /* Changes
23  *
24  * 28 Aug 2003
25  *        Created.
26  * 06 Nov 2003
27  *        Made modifications to suit the new RBCE module.
28  * 10 Nov 2003
29  *        Fixed a bug in fork and exit callbacks. Added callbacks_active and
30  *        surrounding logic. Added task paramter for all CE callbacks.
31  * 23 Mar 2004
32  *        moved to referenced counted class objects and correct locking
33  * 12 Apr 2004
34  *        introduced adopted to emerging classtype interface
35  */
36
37 #include <linux/config.h>
38 #include <linux/init.h>
39 #include <linux/linkage.h>
40 #include <linux/kernel.h>
41 #include <linux/errno.h>
42 #include <asm/uaccess.h>
43 #include <linux/mm.h>
44 #include <asm/errno.h>
45 #include <linux/string.h>
46 #include <linux/list.h>
47 #include <linux/spinlock.h>
48 #include <linux/module.h>
49 #include <linux/ckrm_rc.h>
50 #include <linux/parser.h>
51 #include <net/tcp.h>
52
53 #include <linux/ckrm_net.h>
54
55 struct ckrm_sock_class {
56         struct ckrm_core_class core;
57 };
58
59 static struct ckrm_sock_class sockclass_dflt_class = {
60 };
61
62 #define SOCKET_CLASS_TYPE_NAME  "socketclass"
63
64 const char *dflt_sockclass_name = SOCKET_CLASS_TYPE_NAME;
65
66 static struct ckrm_core_class *sock_alloc_class(struct ckrm_core_class *parent,
67                                                 const char *name);
68 static int sock_free_class(struct ckrm_core_class *core);
69
70 static int sock_forced_reclassify(ckrm_core_class_t * target,
71                                   const char *resname);
72 static int sock_show_members(struct ckrm_core_class *core,
73                              struct seq_file *seq);
74 static void sock_add_resctrl(struct ckrm_core_class *core, int resid);
75 static void sock_reclassify_class(struct ckrm_sock_class *cls);
76
77 struct ckrm_classtype CT_sockclass = {
78         .mfidx = 1,
79         .name = SOCKET_CLASS_TYPE_NAME,
80         .typeID = CKRM_CLASSTYPE_SOCKET_CLASS,
81         .maxdepth = 3,
82         .resid_reserved = 0,
83         .max_res_ctlrs = CKRM_MAX_RES_CTLRS,
84         .max_resid = 0,
85         .bit_res_ctlrs = 0L,
86         .res_ctlrs_lock = SPIN_LOCK_UNLOCKED,
87         .classes = LIST_HEAD_INIT(CT_sockclass.classes),
88
89         .default_class = &sockclass_dflt_class.core,
90
91         // private version of functions 
92         .alloc = &sock_alloc_class,
93         .free = &sock_free_class,
94         .show_members = &sock_show_members,
95         .forced_reclassify = &sock_forced_reclassify,
96
97         // use of default functions 
98         .show_shares = &ckrm_class_show_shares,
99         .show_stats = &ckrm_class_show_stats,
100         .show_config = &ckrm_class_show_config,
101         .set_config = &ckrm_class_set_config,
102         .set_shares = &ckrm_class_set_shares,
103         .reset_stats = &ckrm_class_reset_stats,
104
105         // mandatory private version .. no dflt available
106         .add_resctrl = &sock_add_resctrl,
107 };
108
109 /* helper functions */
110
111 void ckrm_ns_hold(struct ckrm_net_struct *ns)
112 {
113         atomic_inc(&ns->ns_refcnt);
114         return;
115 }
116
117 void ckrm_ns_put(struct ckrm_net_struct *ns)
118 {
119         if (atomic_dec_and_test(&ns->ns_refcnt))
120                 kfree(ns);
121         return;
122 }
123
124 /*
125  * Change the class of a netstruct 
126  *
127  * Change the task's task class  to "newcls" if the task's current 
128  * class (task->taskclass) is same as given "oldcls", if it is non-NULL.
129  *
130  */
131
132 static void
133 sock_set_class(struct ckrm_net_struct *ns, struct ckrm_sock_class *newcls,
134                struct ckrm_sock_class *oldcls, enum ckrm_event event)
135 {
136         int i;
137         struct ckrm_res_ctlr *rcbs;
138         struct ckrm_classtype *clstype;
139         void *old_res_class, *new_res_class;
140
141         if ((newcls == oldcls) || (newcls == NULL)) {
142                 ns->core = (void *)oldcls;
143                 return;
144         }
145
146         class_lock(class_core(newcls));
147         ns->core = newcls;
148         list_add(&ns->ckrm_link, &class_core(newcls)->objlist);
149         class_unlock(class_core(newcls));
150
151         clstype = class_isa(newcls);
152         for (i = 0; i < clstype->max_resid; i++) {
153                 atomic_inc(&clstype->nr_resusers[i]);
154                 old_res_class =
155                     oldcls ? class_core(oldcls)->res_class[i] : NULL;
156                 new_res_class =
157                     newcls ? class_core(newcls)->res_class[i] : NULL;
158                 rcbs = clstype->res_ctlrs[i];
159                 if (rcbs && rcbs->change_resclass
160                     && (old_res_class != new_res_class))
161                         (*rcbs->change_resclass) (ns, old_res_class,
162                                                   new_res_class);
163                 atomic_dec(&clstype->nr_resusers[i]);
164         }
165         return;
166 }
167
168 static void sock_add_resctrl(struct ckrm_core_class *core, int resid)
169 {
170         struct ckrm_net_struct *ns;
171         struct ckrm_res_ctlr *rcbs;
172
173         if ((resid < 0) || (resid >= CKRM_MAX_RES_CTLRS)
174             || ((rcbs = core->classtype->res_ctlrs[resid]) == NULL))
175                 return;
176
177         class_lock(core);
178         list_for_each_entry(ns, &core->objlist, ckrm_link) {
179                 if (rcbs->change_resclass)
180                         (*rcbs->change_resclass) (ns, NULL,
181                                                   core->res_class[resid]);
182         }
183         class_unlock(core);
184 }
185
186 /**************************************************************************
187  *                   Functions called from classification points          *
188  **************************************************************************/
189
190 static void cb_sockclass_listen_start(struct sock *sk)
191 {
192         struct ckrm_net_struct *ns = NULL;
193         struct ckrm_sock_class *newcls = NULL;
194         struct ckrm_res_ctlr *rcbs;
195         struct ckrm_classtype *clstype;
196         int i = 0;
197
198         // XXX - TBD ipv6
199         if (sk->sk_family == AF_INET6)
200                 return;
201
202         // to store the socket address
203         ns = (struct ckrm_net_struct *)
204             kmalloc(sizeof(struct ckrm_net_struct), GFP_ATOMIC);
205         if (!ns)
206                 return;
207
208         memset(ns, 0, sizeof(*ns));
209         INIT_LIST_HEAD(&ns->ckrm_link);
210         ckrm_ns_hold(ns);
211
212         ns->ns_family = sk->sk_family;
213         if (ns->ns_family == AF_INET6)  // IPv6 not supported yet.
214                 return;
215
216         ns->ns_daddrv4 = inet_sk(sk)->rcv_saddr;
217         ns->ns_dport = inet_sk(sk)->num;
218
219         ns->ns_pid = current->pid;
220         ns->ns_tgid = current->tgid;
221         ns->ns_tsk = current;
222         ce_protect(&CT_sockclass);
223         CE_CLASSIFY_RET(newcls, &CT_sockclass, CKRM_EVENT_LISTEN_START, ns,
224                         current);
225         ce_release(&CT_sockclass);
226
227         if (newcls == NULL) {
228                 newcls = &sockclass_dflt_class;
229                 ckrm_core_grab(class_core(newcls));
230         }
231
232         class_lock(class_core(newcls));
233         list_add(&ns->ckrm_link, &class_core(newcls)->objlist);
234         ns->core = newcls;
235         class_unlock(class_core(newcls));
236
237         // the socket is already locked
238         // take a reference on socket on our behalf
239         sock_hold(sk);
240         sk->sk_ns = (void *)ns;
241         ns->ns_sk = sk;
242
243         // modify its shares
244         clstype = class_isa(newcls);
245         for (i = 0; i < clstype->max_resid; i++) {
246                 atomic_inc(&clstype->nr_resusers[i]);
247                 rcbs = clstype->res_ctlrs[i];
248                 if (rcbs && rcbs->change_resclass) {
249                         (*rcbs->change_resclass) ((void *)ns,
250                                                   NULL,
251                                                   class_core(newcls)->
252                                                   res_class[i]);
253                 }
254                 atomic_dec(&clstype->nr_resusers[i]);
255         }
256         return;
257 }
258
259 static void cb_sockclass_listen_stop(struct sock *sk)
260 {
261         struct ckrm_net_struct *ns = NULL;
262         struct ckrm_sock_class *newcls = NULL;
263
264         // XXX - TBD ipv6
265         if (sk->sk_family == AF_INET6)
266                 return;
267
268         ns = (struct ckrm_net_struct *)sk->sk_ns;
269         if (!ns)     // listen_start called before socket_aq was loaded
270                 return;
271
272         newcls = ns->core;
273         if (newcls) {
274                 class_lock(class_core(newcls));
275                 list_del(&ns->ckrm_link);
276                 INIT_LIST_HEAD(&ns->ckrm_link);
277                 class_unlock(class_core(newcls));
278                 ckrm_core_drop(class_core(newcls));
279         }
280         // the socket is already locked
281         sk->sk_ns = NULL;
282         sock_put(sk);
283
284         // Should be the last count and free it
285         ckrm_ns_put(ns);
286         return;
287 }
288
289 static struct ckrm_event_spec sock_events_callbacks[] = {
290         CKRM_EVENT_SPEC(LISTEN_START, cb_sockclass_listen_start),
291         CKRM_EVENT_SPEC(LISTEN_STOP, cb_sockclass_listen_stop),
292         {-1}
293 };
294
295 /**************************************************************************
296  *                  Class Object Creation / Destruction
297  **************************************************************************/
298
299 static struct ckrm_core_class *sock_alloc_class(struct ckrm_core_class *parent,
300                                                 const char *name)
301 {
302         struct ckrm_sock_class *sockcls;
303         sockcls = kmalloc(sizeof(struct ckrm_sock_class), GFP_KERNEL);
304         if (sockcls == NULL)
305                 return NULL;
306         memset(sockcls, 0, sizeof(struct ckrm_sock_class));
307
308         ckrm_init_core_class(&CT_sockclass, class_core(sockcls), parent, name);
309
310         ce_protect(&CT_sockclass);
311         if (CT_sockclass.ce_cb_active && CT_sockclass.ce_callbacks.class_add)
312                 (*CT_sockclass.ce_callbacks.class_add) (name, sockcls,
313                                                         CT_sockclass.typeID);
314         ce_release(&CT_sockclass);
315
316         return class_core(sockcls);
317 }
318
319 static int sock_free_class(struct ckrm_core_class *core)
320 {
321         struct ckrm_sock_class *sockcls;
322
323         if (!ckrm_is_core_valid(core)) {
324                 // Invalid core
325                 return (-EINVAL);
326         }
327         if (core == core->classtype->default_class) {
328                 // reset the name tag
329                 core->name = dflt_sockclass_name;
330                 return 0;
331         }
332
333         sockcls = class_type(struct ckrm_sock_class, core);
334
335         ce_protect(&CT_sockclass);
336
337         if (CT_sockclass.ce_cb_active && CT_sockclass.ce_callbacks.class_delete)
338                 (*CT_sockclass.ce_callbacks.class_delete) (core->name, sockcls,
339                                                            CT_sockclass.typeID);
340
341         sock_reclassify_class(sockcls);
342
343         ce_release(&CT_sockclass);
344
345         ckrm_release_core_class(core);  
346         // Hubertus .... could just drop the class .. error message
347
348         return 0;
349 }
350
351 static int sock_show_members(struct ckrm_core_class *core, struct seq_file *seq)
352 {
353         struct list_head *lh;
354         struct ckrm_net_struct *ns = NULL;
355
356         class_lock(core);
357         list_for_each(lh, &core->objlist) {
358                 ns = container_of(lh, struct ckrm_net_struct, ckrm_link);
359                 seq_printf(seq, "%d.%d.%d.%d\\%d\n",
360                            NIPQUAD(ns->ns_daddrv4), ns->ns_dport);
361         }
362         class_unlock(core);
363
364         return 0;
365 }
366
367 static int
368 sock_forced_reclassify_ns(struct ckrm_net_struct *tns,
369                           struct ckrm_core_class *core)
370 {
371         struct ckrm_net_struct *ns = NULL;
372         struct sock *sk = NULL;
373         struct ckrm_sock_class *oldcls, *newcls;
374         int rc = -EINVAL;
375
376         if (!ckrm_is_core_valid(core)) {
377                 return rc;
378         }
379
380         newcls = class_type(struct ckrm_sock_class, core);
381         // lookup the listening sockets
382         // returns with a reference count set on socket
383         if (tns->ns_family == AF_INET6)
384                 return -EOPNOTSUPP;
385
386         sk = tcp_v4_lookup_listener(tns->ns_daddrv4, tns->ns_dport, 0);
387         if (!sk) {
388                 printk(KERN_INFO "No such listener 0x%x:%d\n",
389                        tns->ns_daddrv4, tns->ns_dport);
390                 return rc;
391         }
392         lock_sock(sk);
393         if (!sk->sk_ns) {
394                 goto out;
395         }
396         ns = sk->sk_ns;
397         ckrm_ns_hold(ns);
398         if (!capable(CAP_NET_ADMIN) && (ns->ns_tsk->user != current->user)) {
399                 ckrm_ns_put(ns);
400                 rc = -EPERM;
401                 goto out;
402         }
403
404         oldcls = ns->core;
405         if ((oldcls == NULL) || (oldcls == newcls)) {
406                 ckrm_ns_put(ns);
407                 goto out;
408         }
409         // remove the net_struct from the current class
410         class_lock(class_core(oldcls));
411         list_del(&ns->ckrm_link);
412         INIT_LIST_HEAD(&ns->ckrm_link);
413         ns->core = NULL;
414         class_unlock(class_core(oldcls));
415
416         sock_set_class(ns, newcls, oldcls, CKRM_EVENT_MANUAL);
417         ckrm_ns_put(ns);
418         rc = 0;
419       out:
420         release_sock(sk);
421         sock_put(sk);
422
423         return rc;
424
425 }
426
427 enum sock_target_token_t {
428         IPV4, IPV6, SOCKC_TARGET_ERR
429 };
430
431 static match_table_t sock_target_tokens = {
432         {IPV4, "ipv4=%s"},
433         {IPV6, "ipv6=%s"},
434         {SOCKC_TARGET_ERR, NULL},
435 };
436
437 char *v4toi(char *s, char c, __u32 * v)
438 {
439         unsigned int k = 0, n = 0;
440
441         while (*s && (*s != c)) {
442                 if (*s == '.') {
443                         n <<= 8;
444                         n |= k;
445                         k = 0;
446                 } else
447                         k = k * 10 + *s - '0';
448                 s++;
449         }
450
451         n <<= 8;
452         *v = n | k;
453
454         return s;
455 }
456
457 static int
458 sock_forced_reclassify(struct ckrm_core_class *target, const char *options)
459 {
460         char *p, *p2;
461         struct ckrm_net_struct ns;
462         __u32 v4addr, tmp;
463
464         if (!options)
465                 return -EINVAL;
466
467         if (target == NULL) {
468                 unsigned long id = simple_strtol(options,NULL,0);
469                 if (!capable(CAP_NET_ADMIN))
470                         return -EPERM;
471                 if (id != 0) 
472                         return -EINVAL;
473                 printk("sock_class: reclassify all not net implemented\n");
474                 return 0;
475         }
476
477         while ((p = strsep((char **)&options, ",")) != NULL) {
478                 substring_t args[MAX_OPT_ARGS];
479                 int token;
480
481                 if (!*p)
482                         continue;
483                 token = match_token(p, sock_target_tokens, args);
484                 switch (token) {
485
486                 case IPV4:
487
488                         p2 = p;
489                         while (*p2 && (*p2 != '='))
490                                 ++p2;
491                         p2++;
492                         p2 = v4toi(p2, '\\', &(v4addr));
493                         ns.ns_daddrv4 = htonl(v4addr);
494                         ns.ns_family = AF_INET;
495                         p2 = v4toi(++p2, ':', &tmp);
496                         ns.ns_dport = (__u16) tmp;
497                         if (*p2)
498                                 p2 = v4toi(++p2, '\0', &ns.ns_pid);
499                         sock_forced_reclassify_ns(&ns, target);
500                         break;
501
502                 case IPV6:
503                         printk(KERN_INFO "rcfs: IPV6 not supported yet\n");
504                         return -ENOSYS;
505                 default:
506                         return -EINVAL;
507                 }
508         }
509         return -EINVAL;
510 }
511
512 /*
513  * Listen_aq reclassification.
514  */
515 static void sock_reclassify_class(struct ckrm_sock_class *cls)
516 {
517         struct ckrm_net_struct *ns, *tns;
518         struct ckrm_core_class *core = class_core(cls);
519         LIST_HEAD(local_list);
520
521         if (!cls)
522                 return;
523
524         if (!ckrm_validate_and_grab_core(core))
525                 return;
526
527         class_lock(core);
528         // we have the core refcnt
529         if (list_empty(&core->objlist)) {
530                 class_unlock(core);
531                 ckrm_core_drop(core);
532                 return;
533         }
534
535         INIT_LIST_HEAD(&local_list);
536         list_splice_init(&core->objlist, &local_list);
537         class_unlock(core);
538         ckrm_core_drop(core);
539
540         list_for_each_entry_safe(ns, tns, &local_list, ckrm_link) {
541                 ckrm_ns_hold(ns);
542                 list_del(&ns->ckrm_link);
543                 if (ns->ns_sk) {
544                         lock_sock(ns->ns_sk);
545                         sock_set_class(ns, &sockclass_dflt_class, NULL,
546                                        CKRM_EVENT_MANUAL);
547                         release_sock(ns->ns_sk);
548                 }
549                 ckrm_ns_put(ns);
550         }
551         return;
552 }
553
554 void __init ckrm_meta_init_sockclass(void)
555 {
556         printk("...... Initializing ClassType<%s> ........\n",
557                CT_sockclass.name);
558         // intialize the default class
559         ckrm_init_core_class(&CT_sockclass, class_core(&sockclass_dflt_class),
560                              NULL, dflt_sockclass_name);
561
562         // register classtype and initialize default task class
563         ckrm_register_classtype(&CT_sockclass);
564         ckrm_register_event_set(sock_events_callbacks);
565
566         // note registeration of all resource controllers will be done 
567         // later dynamically as these are specified as modules
568 }
569
570 #if 1
571
572 /*****************************************************************************
573  * Debugging Network Classes:  Utility functions
574  *****************************************************************************/
575
576 #endif