Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / net / sched / act_police.c
1 /*
2  * net/sched/police.c   Input police filter.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *              J Hadi Salim (action changes)
11  */
12
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/sched.h>
20 #include <linux/string.h>
21 #include <linux/mm.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/in.h>
25 #include <linux/errno.h>
26 #include <linux/interrupt.h>
27 #include <linux/netdevice.h>
28 #include <linux/skbuff.h>
29 #include <linux/module.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/init.h>
32 #include <net/sock.h>
33 #include <net/act_api.h>
34
35 #define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
36 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
37 #define PRIV(a) ((struct tcf_police *) (a)->priv)
38
39 /* use generic hash table */
40 #define MY_TAB_SIZE     16
41 #define MY_TAB_MASK     15
42 static u32 idx_gen;
43 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
44 /* Policer hash table lock */
45 static DEFINE_RWLOCK(police_lock);
46
47 /* Each policer is serialized by its individual spinlock */
48
49 static __inline__ unsigned tcf_police_hash(u32 index)
50 {
51         return index&0xF;
52 }
53
54 static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
55 {
56         struct tcf_police *p;
57
58         read_lock(&police_lock);
59         for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
60                 if (p->index == index)
61                         break;
62         }
63         read_unlock(&police_lock);
64         return p;
65 }
66
67 #ifdef CONFIG_NET_CLS_ACT
68 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
69                               int type, struct tc_action *a)
70 {
71         struct tcf_police *p;
72         int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
73         struct rtattr *r;
74
75         read_lock(&police_lock);
76
77         s_i = cb->args[0];
78
79         for (i = 0; i < MY_TAB_SIZE; i++) {
80                 p = tcf_police_ht[tcf_police_hash(i)];
81
82                 for (; p; p = p->next) {
83                         index++;
84                         if (index < s_i)
85                                 continue;
86                         a->priv = p;
87                         a->order = index;
88                         r = (struct rtattr*) skb->tail;
89                         RTA_PUT(skb, a->order, 0, NULL);
90                         if (type == RTM_DELACTION)
91                                 err = tcf_action_dump_1(skb, a, 0, 1);
92                         else
93                                 err = tcf_action_dump_1(skb, a, 0, 0);
94                         if (err < 0) {
95                                 index--;
96                                 skb_trim(skb, (u8*)r - skb->data);
97                                 goto done;
98                         }
99                         r->rta_len = skb->tail - (u8*)r;
100                         n_i++;
101                 }
102         }
103 done:
104         read_unlock(&police_lock);
105         if (n_i)
106                 cb->args[0] += n_i;
107         return n_i;
108
109 rtattr_failure:
110         skb_trim(skb, (u8*)r - skb->data);
111         goto done;
112 }
113
114 static inline int
115 tcf_act_police_hash_search(struct tc_action *a, u32 index)
116 {
117         struct tcf_police *p = tcf_police_lookup(index);
118
119         if (p != NULL) {
120                 a->priv = p;
121                 return 1;
122         } else {
123                 return 0;
124         }
125 }
126 #endif
127
128 static inline u32 tcf_police_new_index(void)
129 {
130         do {
131                 if (++idx_gen == 0)
132                         idx_gen = 1;
133         } while (tcf_police_lookup(idx_gen));
134
135         return idx_gen;
136 }
137
138 void tcf_police_destroy(struct tcf_police *p)
139 {
140         unsigned h = tcf_police_hash(p->index);
141         struct tcf_police **p1p;
142         
143         for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
144                 if (*p1p == p) {
145                         write_lock_bh(&police_lock);
146                         *p1p = p->next;
147                         write_unlock_bh(&police_lock);
148 #ifdef CONFIG_NET_ESTIMATOR
149                         gen_kill_estimator(&p->bstats, &p->rate_est);
150 #endif
151                         if (p->R_tab)
152                                 qdisc_put_rtab(p->R_tab);
153                         if (p->P_tab)
154                                 qdisc_put_rtab(p->P_tab);
155                         kfree(p);
156                         return;
157                 }
158         }
159         BUG_TRAP(0);
160 }
161
162 #ifdef CONFIG_NET_CLS_ACT
163 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
164                                  struct tc_action *a, int ovr, int bind)
165 {
166         unsigned h;
167         int ret = 0, err;
168         struct rtattr *tb[TCA_POLICE_MAX];
169         struct tc_police *parm;
170         struct tcf_police *p;
171         struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
172
173         if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
174                 return -EINVAL;
175
176         if (tb[TCA_POLICE_TBF-1] == NULL ||
177             RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
178                 return -EINVAL;
179         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
180
181         if (tb[TCA_POLICE_RESULT-1] != NULL &&
182             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
183                 return -EINVAL;
184         if (tb[TCA_POLICE_RESULT-1] != NULL &&
185             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
186                 return -EINVAL;
187
188         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
189                 a->priv = p;
190                 if (bind) {
191                         p->bindcnt += 1;
192                         p->refcnt += 1;
193                 }
194                 if (ovr)
195                         goto override;
196                 return ret;
197         }
198
199         p = kzalloc(sizeof(*p), GFP_KERNEL);
200         if (p == NULL)
201                 return -ENOMEM;
202
203         ret = ACT_P_CREATED;
204         p->refcnt = 1;
205         spin_lock_init(&p->lock);
206         p->stats_lock = &p->lock;
207         if (bind)
208                 p->bindcnt = 1;
209 override:
210         if (parm->rate.rate) {
211                 err = -ENOMEM;
212                 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
213                 if (R_tab == NULL)
214                         goto failure;
215                 if (parm->peakrate.rate) {
216                         P_tab = qdisc_get_rtab(&parm->peakrate,
217                                                tb[TCA_POLICE_PEAKRATE-1]);
218                         if (p->P_tab == NULL) {
219                                 qdisc_put_rtab(R_tab);
220                                 goto failure;
221                         }
222                 }
223         }
224         /* No failure allowed after this point */
225         spin_lock_bh(&p->lock);
226         if (R_tab != NULL) {
227                 qdisc_put_rtab(p->R_tab);
228                 p->R_tab = R_tab;
229         }
230         if (P_tab != NULL) {
231                 qdisc_put_rtab(p->P_tab);
232                 p->P_tab = P_tab;
233         }
234
235         if (tb[TCA_POLICE_RESULT-1])
236                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
237         p->toks = p->burst = parm->burst;
238         p->mtu = parm->mtu;
239         if (p->mtu == 0) {
240                 p->mtu = ~0;
241                 if (p->R_tab)
242                         p->mtu = 255<<p->R_tab->rate.cell_log;
243         }
244         if (p->P_tab)
245                 p->ptoks = L2T_P(p, p->mtu);
246         p->action = parm->action;
247
248 #ifdef CONFIG_NET_ESTIMATOR
249         if (tb[TCA_POLICE_AVRATE-1])
250                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
251         if (est)
252                 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
253 #endif
254
255         spin_unlock_bh(&p->lock);
256         if (ret != ACT_P_CREATED)
257                 return ret;
258
259         PSCHED_GET_TIME(p->t_c);
260         p->index = parm->index ? : tcf_police_new_index();
261         h = tcf_police_hash(p->index);
262         write_lock_bh(&police_lock);
263         p->next = tcf_police_ht[h];
264         tcf_police_ht[h] = p;
265         write_unlock_bh(&police_lock);
266
267         a->priv = p;
268         return ret;
269
270 failure:
271         if (ret == ACT_P_CREATED)
272                 kfree(p);
273         return err;
274 }
275
276 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
277 {
278         struct tcf_police *p = PRIV(a);
279
280         if (p != NULL)
281                 return tcf_police_release(p, bind);
282         return 0;
283 }
284
285 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
286                           struct tcf_result *res)
287 {
288         psched_time_t now;
289         struct tcf_police *p = PRIV(a);
290         long toks;
291         long ptoks = 0;
292
293         spin_lock(&p->lock);
294
295         p->bstats.bytes += skb->len;
296         p->bstats.packets++;
297
298 #ifdef CONFIG_NET_ESTIMATOR
299         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
300                 p->qstats.overlimits++;
301                 spin_unlock(&p->lock);
302                 return p->action;
303         }
304 #endif
305
306         if (skb->len <= p->mtu) {
307                 if (p->R_tab == NULL) {
308                         spin_unlock(&p->lock);
309                         return p->result;
310                 }
311
312                 PSCHED_GET_TIME(now);
313
314                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
315
316                 if (p->P_tab) {
317                         ptoks = toks + p->ptoks;
318                         if (ptoks > (long)L2T_P(p, p->mtu))
319                                 ptoks = (long)L2T_P(p, p->mtu);
320                         ptoks -= L2T_P(p, skb->len);
321                 }
322                 toks += p->toks;
323                 if (toks > (long)p->burst)
324                         toks = p->burst;
325                 toks -= L2T(p, skb->len);
326
327                 if ((toks|ptoks) >= 0) {
328                         p->t_c = now;
329                         p->toks = toks;
330                         p->ptoks = ptoks;
331                         spin_unlock(&p->lock);
332                         return p->result;
333                 }
334         }
335
336         p->qstats.overlimits++;
337         spin_unlock(&p->lock);
338         return p->action;
339 }
340
341 static int
342 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
343 {
344         unsigned char    *b = skb->tail;
345         struct tc_police opt;
346         struct tcf_police *p = PRIV(a);
347
348         opt.index = p->index;
349         opt.action = p->action;
350         opt.mtu = p->mtu;
351         opt.burst = p->burst;
352         opt.refcnt = p->refcnt - ref;
353         opt.bindcnt = p->bindcnt - bind;
354         if (p->R_tab)
355                 opt.rate = p->R_tab->rate;
356         else
357                 memset(&opt.rate, 0, sizeof(opt.rate));
358         if (p->P_tab)
359                 opt.peakrate = p->P_tab->rate;
360         else
361                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
362         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
363         if (p->result)
364                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
365 #ifdef CONFIG_NET_ESTIMATOR
366         if (p->ewma_rate)
367                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
368 #endif
369         return skb->len;
370
371 rtattr_failure:
372         skb_trim(skb, b - skb->data);
373         return -1;
374 }
375
376 MODULE_AUTHOR("Alexey Kuznetsov");
377 MODULE_DESCRIPTION("Policing actions");
378 MODULE_LICENSE("GPL");
379
380 static struct tc_action_ops act_police_ops = {
381         .kind           =       "police",
382         .type           =       TCA_ID_POLICE,
383         .capab          =       TCA_CAP_NONE,
384         .owner          =       THIS_MODULE,
385         .act            =       tcf_act_police,
386         .dump           =       tcf_act_police_dump,
387         .cleanup        =       tcf_act_police_cleanup,
388         .lookup         =       tcf_act_police_hash_search,
389         .init           =       tcf_act_police_locate,
390         .walk           =       tcf_act_police_walker
391 };
392
393 static int __init
394 police_init_module(void)
395 {
396         return tcf_register_action(&act_police_ops);
397 }
398
399 static void __exit
400 police_cleanup_module(void)
401 {
402         tcf_unregister_action(&act_police_ops);
403 }
404
405 module_init(police_init_module);
406 module_exit(police_cleanup_module);
407
408 #else /* CONFIG_NET_CLS_ACT */
409
410 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
411 {
412         unsigned h;
413         struct tcf_police *p;
414         struct rtattr *tb[TCA_POLICE_MAX];
415         struct tc_police *parm;
416
417         if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
418                 return NULL;
419
420         if (tb[TCA_POLICE_TBF-1] == NULL ||
421             RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
422                 return NULL;
423
424         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
425
426         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
427                 p->refcnt++;
428                 return p;
429         }
430
431         p = kzalloc(sizeof(*p), GFP_KERNEL);
432         if (p == NULL)
433                 return NULL;
434
435         p->refcnt = 1;
436         spin_lock_init(&p->lock);
437         p->stats_lock = &p->lock;
438         if (parm->rate.rate) {
439                 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
440                 if (p->R_tab == NULL)
441                         goto failure;
442                 if (parm->peakrate.rate) {
443                         p->P_tab = qdisc_get_rtab(&parm->peakrate,
444                                                   tb[TCA_POLICE_PEAKRATE-1]);
445                         if (p->P_tab == NULL)
446                                 goto failure;
447                 }
448         }
449         if (tb[TCA_POLICE_RESULT-1]) {
450                 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
451                         goto failure;
452                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
453         }
454 #ifdef CONFIG_NET_ESTIMATOR
455         if (tb[TCA_POLICE_AVRATE-1]) {
456                 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
457                         goto failure;
458                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
459         }
460 #endif
461         p->toks = p->burst = parm->burst;
462         p->mtu = parm->mtu;
463         if (p->mtu == 0) {
464                 p->mtu = ~0;
465                 if (p->R_tab)
466                         p->mtu = 255<<p->R_tab->rate.cell_log;
467         }
468         if (p->P_tab)
469                 p->ptoks = L2T_P(p, p->mtu);
470         PSCHED_GET_TIME(p->t_c);
471         p->index = parm->index ? : tcf_police_new_index();
472         p->action = parm->action;
473 #ifdef CONFIG_NET_ESTIMATOR
474         if (est)
475                 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
476 #endif
477         h = tcf_police_hash(p->index);
478         write_lock_bh(&police_lock);
479         p->next = tcf_police_ht[h];
480         tcf_police_ht[h] = p;
481         write_unlock_bh(&police_lock);
482         return p;
483
484 failure:
485         if (p->R_tab)
486                 qdisc_put_rtab(p->R_tab);
487         kfree(p);
488         return NULL;
489 }
490
491 int tcf_police(struct sk_buff *skb, struct tcf_police *p)
492 {
493         psched_time_t now;
494         long toks;
495         long ptoks = 0;
496
497         spin_lock(&p->lock);
498
499         p->bstats.bytes += skb->len;
500         p->bstats.packets++;
501
502 #ifdef CONFIG_NET_ESTIMATOR
503         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
504                 p->qstats.overlimits++;
505                 spin_unlock(&p->lock);
506                 return p->action;
507         }
508 #endif
509
510         if (skb->len <= p->mtu) {
511                 if (p->R_tab == NULL) {
512                         spin_unlock(&p->lock);
513                         return p->result;
514                 }
515
516                 PSCHED_GET_TIME(now);
517
518                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
519
520                 if (p->P_tab) {
521                         ptoks = toks + p->ptoks;
522                         if (ptoks > (long)L2T_P(p, p->mtu))
523                                 ptoks = (long)L2T_P(p, p->mtu);
524                         ptoks -= L2T_P(p, skb->len);
525                 }
526                 toks += p->toks;
527                 if (toks > (long)p->burst)
528                         toks = p->burst;
529                 toks -= L2T(p, skb->len);
530
531                 if ((toks|ptoks) >= 0) {
532                         p->t_c = now;
533                         p->toks = toks;
534                         p->ptoks = ptoks;
535                         spin_unlock(&p->lock);
536                         return p->result;
537                 }
538         }
539
540         p->qstats.overlimits++;
541         spin_unlock(&p->lock);
542         return p->action;
543 }
544 EXPORT_SYMBOL(tcf_police);
545
546 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
547 {
548         unsigned char    *b = skb->tail;
549         struct tc_police opt;
550
551         opt.index = p->index;
552         opt.action = p->action;
553         opt.mtu = p->mtu;
554         opt.burst = p->burst;
555         if (p->R_tab)
556                 opt.rate = p->R_tab->rate;
557         else
558                 memset(&opt.rate, 0, sizeof(opt.rate));
559         if (p->P_tab)
560                 opt.peakrate = p->P_tab->rate;
561         else
562                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
563         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
564         if (p->result)
565                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
566 #ifdef CONFIG_NET_ESTIMATOR
567         if (p->ewma_rate)
568                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
569 #endif
570         return skb->len;
571
572 rtattr_failure:
573         skb_trim(skb, b - skb->data);
574         return -1;
575 }
576
577 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
578 {
579         struct gnet_dump d;
580         
581         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
582                         TCA_XSTATS, p->stats_lock, &d) < 0)
583                 goto errout;
584         
585         if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
586 #ifdef CONFIG_NET_ESTIMATOR
587             gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
588 #endif
589             gnet_stats_copy_queue(&d, &p->qstats) < 0)
590                 goto errout;
591
592         if (gnet_stats_finish_copy(&d) < 0)
593                 goto errout;
594
595         return 0;
596
597 errout:
598         return -1;
599 }
600
601 #endif /* CONFIG_NET_CLS_ACT */