linux 2.6.16.38 w/ vs2.0.3-rc1
[linux-2.6.git] / net / sched / act_police.c
1 /*
2  * net/sched/police.c   Input police filter.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *              J Hadi Salim (action changes)
11  */
12
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/module.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
33 #include <net/sock.h>
34 #include <net/act_api.h>
35
36 #define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
37 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
38 #define PRIV(a) ((struct tcf_police *) (a)->priv)
39
40 /* use generic hash table */
41 #define MY_TAB_SIZE     16
42 #define MY_TAB_MASK     15
43 static u32 idx_gen;
44 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
45 /* Policer hash table lock */
46 static DEFINE_RWLOCK(police_lock);
47
48 /* old policer structure from before tc actions */
49 struct tc_police_compat
50 {
51         u32                     index;
52         int                     action;
53         u32                     limit;
54         u32                     burst;
55         u32                     mtu;
56         struct tc_ratespec      rate;
57         struct tc_ratespec      peakrate;
58 };
59
60 /* Each policer is serialized by its individual spinlock */
61
62 static __inline__ unsigned tcf_police_hash(u32 index)
63 {
64         return index&0xF;
65 }
66
67 static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
68 {
69         struct tcf_police *p;
70
71         read_lock(&police_lock);
72         for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
73                 if (p->index == index)
74                         break;
75         }
76         read_unlock(&police_lock);
77         return p;
78 }
79
80 #ifdef CONFIG_NET_CLS_ACT
81 static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
82                               int type, struct tc_action *a)
83 {
84         struct tcf_police *p;
85         int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
86         struct rtattr *r;
87
88         read_lock(&police_lock);
89
90         s_i = cb->args[0];
91
92         for (i = 0; i < MY_TAB_SIZE; i++) {
93                 p = tcf_police_ht[tcf_police_hash(i)];
94
95                 for (; p; p = p->next) {
96                         index++;
97                         if (index < s_i)
98                                 continue;
99                         a->priv = p;
100                         a->order = index;
101                         r = (struct rtattr*) skb->tail;
102                         RTA_PUT(skb, a->order, 0, NULL);
103                         if (type == RTM_DELACTION)
104                                 err = tcf_action_dump_1(skb, a, 0, 1);
105                         else
106                                 err = tcf_action_dump_1(skb, a, 0, 0);
107                         if (err < 0) {
108                                 index--;
109                                 skb_trim(skb, (u8*)r - skb->data);
110                                 goto done;
111                         }
112                         r->rta_len = skb->tail - (u8*)r;
113                         n_i++;
114                 }
115         }
116 done:
117         read_unlock(&police_lock);
118         if (n_i)
119                 cb->args[0] += n_i;
120         return n_i;
121
122 rtattr_failure:
123         skb_trim(skb, (u8*)r - skb->data);
124         goto done;
125 }
126
127 static inline int
128 tcf_hash_search(struct tc_action *a, u32 index)
129 {
130         struct tcf_police *p = tcf_police_lookup(index);
131
132         if (p != NULL) {
133                 a->priv = p;
134                 return 1;
135         } else {
136                 return 0;
137         }
138 }
139 #endif
140
141 static inline u32 tcf_police_new_index(void)
142 {
143         do {
144                 if (++idx_gen == 0)
145                         idx_gen = 1;
146         } while (tcf_police_lookup(idx_gen));
147
148         return idx_gen;
149 }
150
151 void tcf_police_destroy(struct tcf_police *p)
152 {
153         unsigned h = tcf_police_hash(p->index);
154         struct tcf_police **p1p;
155         
156         for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
157                 if (*p1p == p) {
158                         write_lock_bh(&police_lock);
159                         *p1p = p->next;
160                         write_unlock_bh(&police_lock);
161 #ifdef CONFIG_NET_ESTIMATOR
162                         gen_kill_estimator(&p->bstats, &p->rate_est);
163 #endif
164                         if (p->R_tab)
165                                 qdisc_put_rtab(p->R_tab);
166                         if (p->P_tab)
167                                 qdisc_put_rtab(p->P_tab);
168                         kfree(p);
169                         return;
170                 }
171         }
172         BUG_TRAP(0);
173 }
174
175 #ifdef CONFIG_NET_CLS_ACT
176 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
177                                  struct tc_action *a, int ovr, int bind)
178 {
179         unsigned h;
180         int ret = 0, err;
181         struct rtattr *tb[TCA_POLICE_MAX];
182         struct tc_police *parm;
183         struct tcf_police *p;
184         struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
185         int size;
186
187         if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
188                 return -EINVAL;
189
190         if (tb[TCA_POLICE_TBF-1] == NULL)
191                 return -EINVAL;
192         size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
193         if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
194                 return -EINVAL;
195         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
196
197         if (tb[TCA_POLICE_RESULT-1] != NULL &&
198             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
199                 return -EINVAL;
200         if (tb[TCA_POLICE_RESULT-1] != NULL &&
201             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
202                 return -EINVAL;
203
204         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
205                 a->priv = p;
206                 if (bind) {
207                         p->bindcnt += 1;
208                         p->refcnt += 1;
209                 }
210                 if (ovr)
211                         goto override;
212                 return ret;
213         }
214
215         p = kmalloc(sizeof(*p), GFP_KERNEL);
216         if (p == NULL)
217                 return -ENOMEM;
218         memset(p, 0, sizeof(*p));
219
220         ret = ACT_P_CREATED;
221         p->refcnt = 1;
222         spin_lock_init(&p->lock);
223         p->stats_lock = &p->lock;
224         if (bind)
225                 p->bindcnt = 1;
226 override:
227         if (parm->rate.rate) {
228                 err = -ENOMEM;
229                 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
230                 if (R_tab == NULL)
231                         goto failure;
232                 if (parm->peakrate.rate) {
233                         P_tab = qdisc_get_rtab(&parm->peakrate,
234                                                tb[TCA_POLICE_PEAKRATE-1]);
235                         if (p->P_tab == NULL) {
236                                 qdisc_put_rtab(R_tab);
237                                 goto failure;
238                         }
239                 }
240         }
241         /* No failure allowed after this point */
242         spin_lock_bh(&p->lock);
243         if (R_tab != NULL) {
244                 qdisc_put_rtab(p->R_tab);
245                 p->R_tab = R_tab;
246         }
247         if (P_tab != NULL) {
248                 qdisc_put_rtab(p->P_tab);
249                 p->P_tab = P_tab;
250         }
251
252         if (tb[TCA_POLICE_RESULT-1])
253                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
254         p->toks = p->burst = parm->burst;
255         p->mtu = parm->mtu;
256         if (p->mtu == 0) {
257                 p->mtu = ~0;
258                 if (p->R_tab)
259                         p->mtu = 255<<p->R_tab->rate.cell_log;
260         }
261         if (p->P_tab)
262                 p->ptoks = L2T_P(p, p->mtu);
263         p->action = parm->action;
264
265 #ifdef CONFIG_NET_ESTIMATOR
266         if (tb[TCA_POLICE_AVRATE-1])
267                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
268         if (est)
269                 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
270 #endif
271
272         spin_unlock_bh(&p->lock);
273         if (ret != ACT_P_CREATED)
274                 return ret;
275
276         PSCHED_GET_TIME(p->t_c);
277         p->index = parm->index ? : tcf_police_new_index();
278         h = tcf_police_hash(p->index);
279         write_lock_bh(&police_lock);
280         p->next = tcf_police_ht[h];
281         tcf_police_ht[h] = p;
282         write_unlock_bh(&police_lock);
283
284         a->priv = p;
285         return ret;
286
287 failure:
288         if (ret == ACT_P_CREATED)
289                 kfree(p);
290         return err;
291 }
292
293 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
294 {
295         struct tcf_police *p = PRIV(a);
296
297         if (p != NULL)
298                 return tcf_police_release(p, bind);
299         return 0;
300 }
301
302 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
303                           struct tcf_result *res)
304 {
305         psched_time_t now;
306         struct tcf_police *p = PRIV(a);
307         long toks;
308         long ptoks = 0;
309
310         spin_lock(&p->lock);
311
312         p->bstats.bytes += skb->len;
313         p->bstats.packets++;
314
315 #ifdef CONFIG_NET_ESTIMATOR
316         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
317                 p->qstats.overlimits++;
318                 spin_unlock(&p->lock);
319                 return p->action;
320         }
321 #endif
322
323         if (skb->len <= p->mtu) {
324                 if (p->R_tab == NULL) {
325                         spin_unlock(&p->lock);
326                         return p->result;
327                 }
328
329                 PSCHED_GET_TIME(now);
330
331                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
332
333                 if (p->P_tab) {
334                         ptoks = toks + p->ptoks;
335                         if (ptoks > (long)L2T_P(p, p->mtu))
336                                 ptoks = (long)L2T_P(p, p->mtu);
337                         ptoks -= L2T_P(p, skb->len);
338                 }
339                 toks += p->toks;
340                 if (toks > (long)p->burst)
341                         toks = p->burst;
342                 toks -= L2T(p, skb->len);
343
344                 if ((toks|ptoks) >= 0) {
345                         p->t_c = now;
346                         p->toks = toks;
347                         p->ptoks = ptoks;
348                         spin_unlock(&p->lock);
349                         return p->result;
350                 }
351         }
352
353         p->qstats.overlimits++;
354         spin_unlock(&p->lock);
355         return p->action;
356 }
357
358 static int
359 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
360 {
361         unsigned char    *b = skb->tail;
362         struct tc_police opt;
363         struct tcf_police *p = PRIV(a);
364
365         opt.index = p->index;
366         opt.action = p->action;
367         opt.mtu = p->mtu;
368         opt.burst = p->burst;
369         opt.refcnt = p->refcnt - ref;
370         opt.bindcnt = p->bindcnt - bind;
371         if (p->R_tab)
372                 opt.rate = p->R_tab->rate;
373         else
374                 memset(&opt.rate, 0, sizeof(opt.rate));
375         if (p->P_tab)
376                 opt.peakrate = p->P_tab->rate;
377         else
378                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
379         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
380         if (p->result)
381                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
382 #ifdef CONFIG_NET_ESTIMATOR
383         if (p->ewma_rate)
384                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
385 #endif
386         return skb->len;
387
388 rtattr_failure:
389         skb_trim(skb, b - skb->data);
390         return -1;
391 }
392
393 MODULE_AUTHOR("Alexey Kuznetsov");
394 MODULE_DESCRIPTION("Policing actions");
395 MODULE_LICENSE("GPL");
396
397 static struct tc_action_ops act_police_ops = {
398         .kind           =       "police",
399         .type           =       TCA_ID_POLICE,
400         .capab          =       TCA_CAP_NONE,
401         .owner          =       THIS_MODULE,
402         .act            =       tcf_act_police,
403         .dump           =       tcf_act_police_dump,
404         .cleanup        =       tcf_act_police_cleanup,
405         .lookup         =       tcf_hash_search,
406         .init           =       tcf_act_police_locate,
407         .walk           =       tcf_generic_walker
408 };
409
410 static int __init
411 police_init_module(void)
412 {
413         return tcf_register_action(&act_police_ops);
414 }
415
416 static void __exit
417 police_cleanup_module(void)
418 {
419         tcf_unregister_action(&act_police_ops);
420 }
421
422 module_init(police_init_module);
423 module_exit(police_cleanup_module);
424
425 #else /* CONFIG_NET_CLS_ACT */
426
427 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
428 {
429         unsigned h;
430         struct tcf_police *p;
431         struct rtattr *tb[TCA_POLICE_MAX];
432         struct tc_police *parm;
433         int size;
434
435         if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
436                 return NULL;
437
438         if (tb[TCA_POLICE_TBF-1] == NULL)
439                 return NULL;
440         size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
441         if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
442                 return NULL;
443
444         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
445
446         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
447                 p->refcnt++;
448                 return p;
449         }
450
451         p = kmalloc(sizeof(*p), GFP_KERNEL);
452         if (p == NULL)
453                 return NULL;
454
455         memset(p, 0, sizeof(*p));
456         p->refcnt = 1;
457         spin_lock_init(&p->lock);
458         p->stats_lock = &p->lock;
459         if (parm->rate.rate) {
460                 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
461                 if (p->R_tab == NULL)
462                         goto failure;
463                 if (parm->peakrate.rate) {
464                         p->P_tab = qdisc_get_rtab(&parm->peakrate,
465                                                   tb[TCA_POLICE_PEAKRATE-1]);
466                         if (p->P_tab == NULL)
467                                 goto failure;
468                 }
469         }
470         if (tb[TCA_POLICE_RESULT-1]) {
471                 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
472                         goto failure;
473                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
474         }
475 #ifdef CONFIG_NET_ESTIMATOR
476         if (tb[TCA_POLICE_AVRATE-1]) {
477                 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
478                         goto failure;
479                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
480         }
481 #endif
482         p->toks = p->burst = parm->burst;
483         p->mtu = parm->mtu;
484         if (p->mtu == 0) {
485                 p->mtu = ~0;
486                 if (p->R_tab)
487                         p->mtu = 255<<p->R_tab->rate.cell_log;
488         }
489         if (p->P_tab)
490                 p->ptoks = L2T_P(p, p->mtu);
491         PSCHED_GET_TIME(p->t_c);
492         p->index = parm->index ? : tcf_police_new_index();
493         p->action = parm->action;
494 #ifdef CONFIG_NET_ESTIMATOR
495         if (est)
496                 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
497 #endif
498         h = tcf_police_hash(p->index);
499         write_lock_bh(&police_lock);
500         p->next = tcf_police_ht[h];
501         tcf_police_ht[h] = p;
502         write_unlock_bh(&police_lock);
503         return p;
504
505 failure:
506         if (p->R_tab)
507                 qdisc_put_rtab(p->R_tab);
508         kfree(p);
509         return NULL;
510 }
511
512 int tcf_police(struct sk_buff *skb, struct tcf_police *p)
513 {
514         psched_time_t now;
515         long toks;
516         long ptoks = 0;
517
518         spin_lock(&p->lock);
519
520         p->bstats.bytes += skb->len;
521         p->bstats.packets++;
522
523 #ifdef CONFIG_NET_ESTIMATOR
524         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
525                 p->qstats.overlimits++;
526                 spin_unlock(&p->lock);
527                 return p->action;
528         }
529 #endif
530
531         if (skb->len <= p->mtu) {
532                 if (p->R_tab == NULL) {
533                         spin_unlock(&p->lock);
534                         return p->result;
535                 }
536
537                 PSCHED_GET_TIME(now);
538
539                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
540
541                 if (p->P_tab) {
542                         ptoks = toks + p->ptoks;
543                         if (ptoks > (long)L2T_P(p, p->mtu))
544                                 ptoks = (long)L2T_P(p, p->mtu);
545                         ptoks -= L2T_P(p, skb->len);
546                 }
547                 toks += p->toks;
548                 if (toks > (long)p->burst)
549                         toks = p->burst;
550                 toks -= L2T(p, skb->len);
551
552                 if ((toks|ptoks) >= 0) {
553                         p->t_c = now;
554                         p->toks = toks;
555                         p->ptoks = ptoks;
556                         spin_unlock(&p->lock);
557                         return p->result;
558                 }
559         }
560
561         p->qstats.overlimits++;
562         spin_unlock(&p->lock);
563         return p->action;
564 }
565 EXPORT_SYMBOL(tcf_police);
566
567 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
568 {
569         unsigned char    *b = skb->tail;
570         struct tc_police opt;
571
572         opt.index = p->index;
573         opt.action = p->action;
574         opt.mtu = p->mtu;
575         opt.burst = p->burst;
576         if (p->R_tab)
577                 opt.rate = p->R_tab->rate;
578         else
579                 memset(&opt.rate, 0, sizeof(opt.rate));
580         if (p->P_tab)
581                 opt.peakrate = p->P_tab->rate;
582         else
583                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
584         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
585         if (p->result)
586                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
587 #ifdef CONFIG_NET_ESTIMATOR
588         if (p->ewma_rate)
589                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
590 #endif
591         return skb->len;
592
593 rtattr_failure:
594         skb_trim(skb, b - skb->data);
595         return -1;
596 }
597
598 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
599 {
600         struct gnet_dump d;
601         
602         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
603                         TCA_XSTATS, p->stats_lock, &d) < 0)
604                 goto errout;
605         
606         if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
607 #ifdef CONFIG_NET_ESTIMATOR
608             gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
609 #endif
610             gnet_stats_copy_queue(&d, &p->qstats) < 0)
611                 goto errout;
612
613         if (gnet_stats_finish_copy(&d) < 0)
614                 goto errout;
615
616         return 0;
617
618 errout:
619         return -1;
620 }
621
622 #endif /* CONFIG_NET_CLS_ACT */