2 * net/sched/police.c Input police filter.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * J Hadi Salim (action changes)
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/module.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
34 #include <net/act_api.h>
36 #define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
37 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
38 #define PRIV(a) ((struct tcf_police *) (a)->priv)
40 /* use generic hash table */
41 #define MY_TAB_SIZE 16
42 #define MY_TAB_MASK 15
44 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
45 /* Policer hash table lock */
46 static DEFINE_RWLOCK(police_lock);
48 /* old policer structure from before tc actions */
49 struct tc_police_compat
56 struct tc_ratespec rate;
57 struct tc_ratespec peakrate;
60 /* Each policer is serialized by its individual spinlock */
62 static __inline__ unsigned tcf_police_hash(u32 index)
67 static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
71 read_lock(&police_lock);
72 for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
73 if (p->index == index)
76 read_unlock(&police_lock);
80 #ifdef CONFIG_NET_CLS_ACT
81 static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
82 int type, struct tc_action *a)
85 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
88 read_lock(&police_lock);
92 for (i = 0; i < MY_TAB_SIZE; i++) {
93 p = tcf_police_ht[tcf_police_hash(i)];
95 for (; p; p = p->next) {
101 r = (struct rtattr*) skb->tail;
102 RTA_PUT(skb, a->order, 0, NULL);
103 if (type == RTM_DELACTION)
104 err = tcf_action_dump_1(skb, a, 0, 1);
106 err = tcf_action_dump_1(skb, a, 0, 0);
109 skb_trim(skb, (u8*)r - skb->data);
112 r->rta_len = skb->tail - (u8*)r;
117 read_unlock(&police_lock);
123 skb_trim(skb, (u8*)r - skb->data);
128 tcf_hash_search(struct tc_action *a, u32 index)
130 struct tcf_police *p = tcf_police_lookup(index);
141 static inline u32 tcf_police_new_index(void)
146 } while (tcf_police_lookup(idx_gen));
151 void tcf_police_destroy(struct tcf_police *p)
153 unsigned h = tcf_police_hash(p->index);
154 struct tcf_police **p1p;
156 for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
158 write_lock_bh(&police_lock);
160 write_unlock_bh(&police_lock);
161 #ifdef CONFIG_NET_ESTIMATOR
162 gen_kill_estimator(&p->bstats, &p->rate_est);
165 qdisc_put_rtab(p->R_tab);
167 qdisc_put_rtab(p->P_tab);
175 #ifdef CONFIG_NET_CLS_ACT
176 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
177 struct tc_action *a, int ovr, int bind)
181 struct rtattr *tb[TCA_POLICE_MAX];
182 struct tc_police *parm;
183 struct tcf_police *p;
184 struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
187 if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
190 if (tb[TCA_POLICE_TBF-1] == NULL)
192 size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
193 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
195 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
197 if (tb[TCA_POLICE_RESULT-1] != NULL &&
198 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
200 if (tb[TCA_POLICE_RESULT-1] != NULL &&
201 RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
204 if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
215 p = kmalloc(sizeof(*p), GFP_KERNEL);
218 memset(p, 0, sizeof(*p));
222 spin_lock_init(&p->lock);
223 p->stats_lock = &p->lock;
227 if (parm->rate.rate) {
229 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
232 if (parm->peakrate.rate) {
233 P_tab = qdisc_get_rtab(&parm->peakrate,
234 tb[TCA_POLICE_PEAKRATE-1]);
235 if (p->P_tab == NULL) {
236 qdisc_put_rtab(R_tab);
241 /* No failure allowed after this point */
242 spin_lock_bh(&p->lock);
244 qdisc_put_rtab(p->R_tab);
248 qdisc_put_rtab(p->P_tab);
252 if (tb[TCA_POLICE_RESULT-1])
253 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
254 p->toks = p->burst = parm->burst;
259 p->mtu = 255<<p->R_tab->rate.cell_log;
262 p->ptoks = L2T_P(p, p->mtu);
263 p->action = parm->action;
265 #ifdef CONFIG_NET_ESTIMATOR
266 if (tb[TCA_POLICE_AVRATE-1])
267 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
269 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
272 spin_unlock_bh(&p->lock);
273 if (ret != ACT_P_CREATED)
276 PSCHED_GET_TIME(p->t_c);
277 p->index = parm->index ? : tcf_police_new_index();
278 h = tcf_police_hash(p->index);
279 write_lock_bh(&police_lock);
280 p->next = tcf_police_ht[h];
281 tcf_police_ht[h] = p;
282 write_unlock_bh(&police_lock);
288 if (ret == ACT_P_CREATED)
293 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
295 struct tcf_police *p = PRIV(a);
298 return tcf_police_release(p, bind);
302 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
303 struct tcf_result *res)
306 struct tcf_police *p = PRIV(a);
312 p->bstats.bytes += skb->len;
315 #ifdef CONFIG_NET_ESTIMATOR
316 if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
317 p->qstats.overlimits++;
318 spin_unlock(&p->lock);
323 if (skb->len <= p->mtu) {
324 if (p->R_tab == NULL) {
325 spin_unlock(&p->lock);
329 PSCHED_GET_TIME(now);
331 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
334 ptoks = toks + p->ptoks;
335 if (ptoks > (long)L2T_P(p, p->mtu))
336 ptoks = (long)L2T_P(p, p->mtu);
337 ptoks -= L2T_P(p, skb->len);
340 if (toks > (long)p->burst)
342 toks -= L2T(p, skb->len);
344 if ((toks|ptoks) >= 0) {
348 spin_unlock(&p->lock);
353 p->qstats.overlimits++;
354 spin_unlock(&p->lock);
359 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
361 unsigned char *b = skb->tail;
362 struct tc_police opt;
363 struct tcf_police *p = PRIV(a);
365 opt.index = p->index;
366 opt.action = p->action;
368 opt.burst = p->burst;
369 opt.refcnt = p->refcnt - ref;
370 opt.bindcnt = p->bindcnt - bind;
372 opt.rate = p->R_tab->rate;
374 memset(&opt.rate, 0, sizeof(opt.rate));
376 opt.peakrate = p->P_tab->rate;
378 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
379 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
381 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
382 #ifdef CONFIG_NET_ESTIMATOR
384 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
389 skb_trim(skb, b - skb->data);
393 MODULE_AUTHOR("Alexey Kuznetsov");
394 MODULE_DESCRIPTION("Policing actions");
395 MODULE_LICENSE("GPL");
397 static struct tc_action_ops act_police_ops = {
399 .type = TCA_ID_POLICE,
400 .capab = TCA_CAP_NONE,
401 .owner = THIS_MODULE,
402 .act = tcf_act_police,
403 .dump = tcf_act_police_dump,
404 .cleanup = tcf_act_police_cleanup,
405 .lookup = tcf_hash_search,
406 .init = tcf_act_police_locate,
407 .walk = tcf_generic_walker
411 police_init_module(void)
413 return tcf_register_action(&act_police_ops);
417 police_cleanup_module(void)
419 tcf_unregister_action(&act_police_ops);
422 module_init(police_init_module);
423 module_exit(police_cleanup_module);
425 #else /* CONFIG_NET_CLS_ACT */
427 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
430 struct tcf_police *p;
431 struct rtattr *tb[TCA_POLICE_MAX];
432 struct tc_police *parm;
435 if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
438 if (tb[TCA_POLICE_TBF-1] == NULL)
440 size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
441 if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
444 parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
446 if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
451 p = kmalloc(sizeof(*p), GFP_KERNEL);
455 memset(p, 0, sizeof(*p));
457 spin_lock_init(&p->lock);
458 p->stats_lock = &p->lock;
459 if (parm->rate.rate) {
460 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
461 if (p->R_tab == NULL)
463 if (parm->peakrate.rate) {
464 p->P_tab = qdisc_get_rtab(&parm->peakrate,
465 tb[TCA_POLICE_PEAKRATE-1]);
466 if (p->P_tab == NULL)
470 if (tb[TCA_POLICE_RESULT-1]) {
471 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
473 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
475 #ifdef CONFIG_NET_ESTIMATOR
476 if (tb[TCA_POLICE_AVRATE-1]) {
477 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
479 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
482 p->toks = p->burst = parm->burst;
487 p->mtu = 255<<p->R_tab->rate.cell_log;
490 p->ptoks = L2T_P(p, p->mtu);
491 PSCHED_GET_TIME(p->t_c);
492 p->index = parm->index ? : tcf_police_new_index();
493 p->action = parm->action;
494 #ifdef CONFIG_NET_ESTIMATOR
496 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
498 h = tcf_police_hash(p->index);
499 write_lock_bh(&police_lock);
500 p->next = tcf_police_ht[h];
501 tcf_police_ht[h] = p;
502 write_unlock_bh(&police_lock);
507 qdisc_put_rtab(p->R_tab);
512 int tcf_police(struct sk_buff *skb, struct tcf_police *p)
520 p->bstats.bytes += skb->len;
523 #ifdef CONFIG_NET_ESTIMATOR
524 if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
525 p->qstats.overlimits++;
526 spin_unlock(&p->lock);
531 if (skb->len <= p->mtu) {
532 if (p->R_tab == NULL) {
533 spin_unlock(&p->lock);
537 PSCHED_GET_TIME(now);
539 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
542 ptoks = toks + p->ptoks;
543 if (ptoks > (long)L2T_P(p, p->mtu))
544 ptoks = (long)L2T_P(p, p->mtu);
545 ptoks -= L2T_P(p, skb->len);
548 if (toks > (long)p->burst)
550 toks -= L2T(p, skb->len);
552 if ((toks|ptoks) >= 0) {
556 spin_unlock(&p->lock);
561 p->qstats.overlimits++;
562 spin_unlock(&p->lock);
565 EXPORT_SYMBOL(tcf_police);
567 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
569 unsigned char *b = skb->tail;
570 struct tc_police opt;
572 opt.index = p->index;
573 opt.action = p->action;
575 opt.burst = p->burst;
577 opt.rate = p->R_tab->rate;
579 memset(&opt.rate, 0, sizeof(opt.rate));
581 opt.peakrate = p->P_tab->rate;
583 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
584 RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
586 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
587 #ifdef CONFIG_NET_ESTIMATOR
589 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
594 skb_trim(skb, b - skb->data);
598 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
602 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
603 TCA_XSTATS, p->stats_lock, &d) < 0)
606 if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
607 #ifdef CONFIG_NET_ESTIMATOR
608 gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
610 gnet_stats_copy_queue(&d, &p->qstats) < 0)
613 if (gnet_stats_finish_copy(&d) < 0)
622 #endif /* CONFIG_NET_CLS_ACT */