Merge to Fedora kernel-2.6.18-1.2257_FC5 patched with stable patch-2.6.18.5-vs2.0...
[linux-2.6.git] / net / sched / act_police.c
1 /*
2  * net/sched/police.c   Input police filter.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *              J Hadi Salim (action changes)
11  */
12
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/sched.h>
20 #include <linux/string.h>
21 #include <linux/mm.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/in.h>
25 #include <linux/errno.h>
26 #include <linux/interrupt.h>
27 #include <linux/netdevice.h>
28 #include <linux/skbuff.h>
29 #include <linux/module.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/init.h>
32 #include <net/sock.h>
33 #include <net/act_api.h>
34
35 #define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
36 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
37 #define PRIV(a) ((struct tcf_police *) (a)->priv)
38
39 /* use generic hash table */
40 #define MY_TAB_SIZE     16
41 #define MY_TAB_MASK     15
42 static u32 idx_gen;
43 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
44 /* Policer hash table lock */
45 static DEFINE_RWLOCK(police_lock);
46
47 /* old policer structure from before tc actions */
48 struct tc_police_compat
49 {
50         u32                     index;
51         int                     action;
52         u32                     limit;
53         u32                     burst;
54         u32                     mtu;
55         struct tc_ratespec      rate;
56         struct tc_ratespec      peakrate;
57 };
58
59 /* Each policer is serialized by its individual spinlock */
60
61 static __inline__ unsigned tcf_police_hash(u32 index)
62 {
63         return index&0xF;
64 }
65
66 static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
67 {
68         struct tcf_police *p;
69
70         read_lock(&police_lock);
71         for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
72                 if (p->index == index)
73                         break;
74         }
75         read_unlock(&police_lock);
76         return p;
77 }
78
79 #ifdef CONFIG_NET_CLS_ACT
80 static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
81                               int type, struct tc_action *a)
82 {
83         struct tcf_police *p;
84         int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
85         struct rtattr *r;
86
87         read_lock(&police_lock);
88
89         s_i = cb->args[0];
90
91         for (i = 0; i < MY_TAB_SIZE; i++) {
92                 p = tcf_police_ht[tcf_police_hash(i)];
93
94                 for (; p; p = p->next) {
95                         index++;
96                         if (index < s_i)
97                                 continue;
98                         a->priv = p;
99                         a->order = index;
100                         r = (struct rtattr*) skb->tail;
101                         RTA_PUT(skb, a->order, 0, NULL);
102                         if (type == RTM_DELACTION)
103                                 err = tcf_action_dump_1(skb, a, 0, 1);
104                         else
105                                 err = tcf_action_dump_1(skb, a, 0, 0);
106                         if (err < 0) {
107                                 index--;
108                                 skb_trim(skb, (u8*)r - skb->data);
109                                 goto done;
110                         }
111                         r->rta_len = skb->tail - (u8*)r;
112                         n_i++;
113                 }
114         }
115 done:
116         read_unlock(&police_lock);
117         if (n_i)
118                 cb->args[0] += n_i;
119         return n_i;
120
121 rtattr_failure:
122         skb_trim(skb, (u8*)r - skb->data);
123         goto done;
124 }
125
126 static inline int
127 tcf_act_police_hash_search(struct tc_action *a, u32 index)
128 {
129         struct tcf_police *p = tcf_police_lookup(index);
130
131         if (p != NULL) {
132                 a->priv = p;
133                 return 1;
134         } else {
135                 return 0;
136         }
137 }
138 #endif
139
140 static inline u32 tcf_police_new_index(void)
141 {
142         do {
143                 if (++idx_gen == 0)
144                         idx_gen = 1;
145         } while (tcf_police_lookup(idx_gen));
146
147         return idx_gen;
148 }
149
150 void tcf_police_destroy(struct tcf_police *p)
151 {
152         unsigned h = tcf_police_hash(p->index);
153         struct tcf_police **p1p;
154         
155         for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
156                 if (*p1p == p) {
157                         write_lock_bh(&police_lock);
158                         *p1p = p->next;
159                         write_unlock_bh(&police_lock);
160 #ifdef CONFIG_NET_ESTIMATOR
161                         gen_kill_estimator(&p->bstats, &p->rate_est);
162 #endif
163                         if (p->R_tab)
164                                 qdisc_put_rtab(p->R_tab);
165                         if (p->P_tab)
166                                 qdisc_put_rtab(p->P_tab);
167                         kfree(p);
168                         return;
169                 }
170         }
171         BUG_TRAP(0);
172 }
173
174 #ifdef CONFIG_NET_CLS_ACT
175 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
176                                  struct tc_action *a, int ovr, int bind)
177 {
178         unsigned h;
179         int ret = 0, err;
180         struct rtattr *tb[TCA_POLICE_MAX];
181         struct tc_police *parm;
182         struct tcf_police *p;
183         struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
184         int size;
185
186         if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
187                 return -EINVAL;
188
189         if (tb[TCA_POLICE_TBF-1] == NULL)
190                 return -EINVAL;
191         size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
192         if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
193                 return -EINVAL;
194         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
195
196         if (tb[TCA_POLICE_RESULT-1] != NULL &&
197             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
198                 return -EINVAL;
199         if (tb[TCA_POLICE_RESULT-1] != NULL &&
200             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
201                 return -EINVAL;
202
203         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
204                 a->priv = p;
205                 if (bind) {
206                         p->bindcnt += 1;
207                         p->refcnt += 1;
208                 }
209                 if (ovr)
210                         goto override;
211                 return ret;
212         }
213
214         p = kzalloc(sizeof(*p), GFP_KERNEL);
215         if (p == NULL)
216                 return -ENOMEM;
217
218         ret = ACT_P_CREATED;
219         p->refcnt = 1;
220         spin_lock_init(&p->lock);
221         p->stats_lock = &p->lock;
222         if (bind)
223                 p->bindcnt = 1;
224 override:
225         if (parm->rate.rate) {
226                 err = -ENOMEM;
227                 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
228                 if (R_tab == NULL)
229                         goto failure;
230                 if (parm->peakrate.rate) {
231                         P_tab = qdisc_get_rtab(&parm->peakrate,
232                                                tb[TCA_POLICE_PEAKRATE-1]);
233                         if (p->P_tab == NULL) {
234                                 qdisc_put_rtab(R_tab);
235                                 goto failure;
236                         }
237                 }
238         }
239         /* No failure allowed after this point */
240         spin_lock_bh(&p->lock);
241         if (R_tab != NULL) {
242                 qdisc_put_rtab(p->R_tab);
243                 p->R_tab = R_tab;
244         }
245         if (P_tab != NULL) {
246                 qdisc_put_rtab(p->P_tab);
247                 p->P_tab = P_tab;
248         }
249
250         if (tb[TCA_POLICE_RESULT-1])
251                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
252         p->toks = p->burst = parm->burst;
253         p->mtu = parm->mtu;
254         if (p->mtu == 0) {
255                 p->mtu = ~0;
256                 if (p->R_tab)
257                         p->mtu = 255<<p->R_tab->rate.cell_log;
258         }
259         if (p->P_tab)
260                 p->ptoks = L2T_P(p, p->mtu);
261         p->action = parm->action;
262
263 #ifdef CONFIG_NET_ESTIMATOR
264         if (tb[TCA_POLICE_AVRATE-1])
265                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
266         if (est)
267                 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
268 #endif
269
270         spin_unlock_bh(&p->lock);
271         if (ret != ACT_P_CREATED)
272                 return ret;
273
274         PSCHED_GET_TIME(p->t_c);
275         p->index = parm->index ? : tcf_police_new_index();
276         h = tcf_police_hash(p->index);
277         write_lock_bh(&police_lock);
278         p->next = tcf_police_ht[h];
279         tcf_police_ht[h] = p;
280         write_unlock_bh(&police_lock);
281
282         a->priv = p;
283         return ret;
284
285 failure:
286         if (ret == ACT_P_CREATED)
287                 kfree(p);
288         return err;
289 }
290
291 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
292 {
293         struct tcf_police *p = PRIV(a);
294
295         if (p != NULL)
296                 return tcf_police_release(p, bind);
297         return 0;
298 }
299
300 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
301                           struct tcf_result *res)
302 {
303         psched_time_t now;
304         struct tcf_police *p = PRIV(a);
305         long toks;
306         long ptoks = 0;
307
308         spin_lock(&p->lock);
309
310         p->bstats.bytes += skb->len;
311         p->bstats.packets++;
312
313 #ifdef CONFIG_NET_ESTIMATOR
314         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
315                 p->qstats.overlimits++;
316                 spin_unlock(&p->lock);
317                 return p->action;
318         }
319 #endif
320
321         if (skb->len <= p->mtu) {
322                 if (p->R_tab == NULL) {
323                         spin_unlock(&p->lock);
324                         return p->result;
325                 }
326
327                 PSCHED_GET_TIME(now);
328
329                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
330
331                 if (p->P_tab) {
332                         ptoks = toks + p->ptoks;
333                         if (ptoks > (long)L2T_P(p, p->mtu))
334                                 ptoks = (long)L2T_P(p, p->mtu);
335                         ptoks -= L2T_P(p, skb->len);
336                 }
337                 toks += p->toks;
338                 if (toks > (long)p->burst)
339                         toks = p->burst;
340                 toks -= L2T(p, skb->len);
341
342                 if ((toks|ptoks) >= 0) {
343                         p->t_c = now;
344                         p->toks = toks;
345                         p->ptoks = ptoks;
346                         spin_unlock(&p->lock);
347                         return p->result;
348                 }
349         }
350
351         p->qstats.overlimits++;
352         spin_unlock(&p->lock);
353         return p->action;
354 }
355
356 static int
357 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
358 {
359         unsigned char    *b = skb->tail;
360         struct tc_police opt;
361         struct tcf_police *p = PRIV(a);
362
363         opt.index = p->index;
364         opt.action = p->action;
365         opt.mtu = p->mtu;
366         opt.burst = p->burst;
367         opt.refcnt = p->refcnt - ref;
368         opt.bindcnt = p->bindcnt - bind;
369         if (p->R_tab)
370                 opt.rate = p->R_tab->rate;
371         else
372                 memset(&opt.rate, 0, sizeof(opt.rate));
373         if (p->P_tab)
374                 opt.peakrate = p->P_tab->rate;
375         else
376                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
377         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
378         if (p->result)
379                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
380 #ifdef CONFIG_NET_ESTIMATOR
381         if (p->ewma_rate)
382                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
383 #endif
384         return skb->len;
385
386 rtattr_failure:
387         skb_trim(skb, b - skb->data);
388         return -1;
389 }
390
391 MODULE_AUTHOR("Alexey Kuznetsov");
392 MODULE_DESCRIPTION("Policing actions");
393 MODULE_LICENSE("GPL");
394
395 static struct tc_action_ops act_police_ops = {
396         .kind           =       "police",
397         .type           =       TCA_ID_POLICE,
398         .capab          =       TCA_CAP_NONE,
399         .owner          =       THIS_MODULE,
400         .act            =       tcf_act_police,
401         .dump           =       tcf_act_police_dump,
402         .cleanup        =       tcf_act_police_cleanup,
403         .lookup         =       tcf_act_police_hash_search,
404         .init           =       tcf_act_police_locate,
405         .walk           =       tcf_act_police_walker
406 };
407
408 static int __init
409 police_init_module(void)
410 {
411         return tcf_register_action(&act_police_ops);
412 }
413
414 static void __exit
415 police_cleanup_module(void)
416 {
417         tcf_unregister_action(&act_police_ops);
418 }
419
420 module_init(police_init_module);
421 module_exit(police_cleanup_module);
422
423 #else /* CONFIG_NET_CLS_ACT */
424
425 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
426 {
427         unsigned h;
428         struct tcf_police *p;
429         struct rtattr *tb[TCA_POLICE_MAX];
430         struct tc_police *parm;
431         int size;
432
433         if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
434                 return NULL;
435
436         if (tb[TCA_POLICE_TBF-1] == NULL)
437                 return NULL;
438         size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]);
439         if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
440                 return NULL;
441
442         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
443
444         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
445                 p->refcnt++;
446                 return p;
447         }
448
449         p = kzalloc(sizeof(*p), GFP_KERNEL);
450         if (p == NULL)
451                 return NULL;
452
453         p->refcnt = 1;
454         spin_lock_init(&p->lock);
455         p->stats_lock = &p->lock;
456         if (parm->rate.rate) {
457                 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
458                 if (p->R_tab == NULL)
459                         goto failure;
460                 if (parm->peakrate.rate) {
461                         p->P_tab = qdisc_get_rtab(&parm->peakrate,
462                                                   tb[TCA_POLICE_PEAKRATE-1]);
463                         if (p->P_tab == NULL)
464                                 goto failure;
465                 }
466         }
467         if (tb[TCA_POLICE_RESULT-1]) {
468                 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
469                         goto failure;
470                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
471         }
472 #ifdef CONFIG_NET_ESTIMATOR
473         if (tb[TCA_POLICE_AVRATE-1]) {
474                 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
475                         goto failure;
476                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
477         }
478 #endif
479         p->toks = p->burst = parm->burst;
480         p->mtu = parm->mtu;
481         if (p->mtu == 0) {
482                 p->mtu = ~0;
483                 if (p->R_tab)
484                         p->mtu = 255<<p->R_tab->rate.cell_log;
485         }
486         if (p->P_tab)
487                 p->ptoks = L2T_P(p, p->mtu);
488         PSCHED_GET_TIME(p->t_c);
489         p->index = parm->index ? : tcf_police_new_index();
490         p->action = parm->action;
491 #ifdef CONFIG_NET_ESTIMATOR
492         if (est)
493                 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
494 #endif
495         h = tcf_police_hash(p->index);
496         write_lock_bh(&police_lock);
497         p->next = tcf_police_ht[h];
498         tcf_police_ht[h] = p;
499         write_unlock_bh(&police_lock);
500         return p;
501
502 failure:
503         if (p->R_tab)
504                 qdisc_put_rtab(p->R_tab);
505         kfree(p);
506         return NULL;
507 }
508
509 int tcf_police(struct sk_buff *skb, struct tcf_police *p)
510 {
511         psched_time_t now;
512         long toks;
513         long ptoks = 0;
514
515         spin_lock(&p->lock);
516
517         p->bstats.bytes += skb->len;
518         p->bstats.packets++;
519
520 #ifdef CONFIG_NET_ESTIMATOR
521         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
522                 p->qstats.overlimits++;
523                 spin_unlock(&p->lock);
524                 return p->action;
525         }
526 #endif
527
528         if (skb->len <= p->mtu) {
529                 if (p->R_tab == NULL) {
530                         spin_unlock(&p->lock);
531                         return p->result;
532                 }
533
534                 PSCHED_GET_TIME(now);
535
536                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
537
538                 if (p->P_tab) {
539                         ptoks = toks + p->ptoks;
540                         if (ptoks > (long)L2T_P(p, p->mtu))
541                                 ptoks = (long)L2T_P(p, p->mtu);
542                         ptoks -= L2T_P(p, skb->len);
543                 }
544                 toks += p->toks;
545                 if (toks > (long)p->burst)
546                         toks = p->burst;
547                 toks -= L2T(p, skb->len);
548
549                 if ((toks|ptoks) >= 0) {
550                         p->t_c = now;
551                         p->toks = toks;
552                         p->ptoks = ptoks;
553                         spin_unlock(&p->lock);
554                         return p->result;
555                 }
556         }
557
558         p->qstats.overlimits++;
559         spin_unlock(&p->lock);
560         return p->action;
561 }
562 EXPORT_SYMBOL(tcf_police);
563
564 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
565 {
566         unsigned char    *b = skb->tail;
567         struct tc_police opt;
568
569         opt.index = p->index;
570         opt.action = p->action;
571         opt.mtu = p->mtu;
572         opt.burst = p->burst;
573         if (p->R_tab)
574                 opt.rate = p->R_tab->rate;
575         else
576                 memset(&opt.rate, 0, sizeof(opt.rate));
577         if (p->P_tab)
578                 opt.peakrate = p->P_tab->rate;
579         else
580                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
581         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
582         if (p->result)
583                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
584 #ifdef CONFIG_NET_ESTIMATOR
585         if (p->ewma_rate)
586                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
587 #endif
588         return skb->len;
589
590 rtattr_failure:
591         skb_trim(skb, b - skb->data);
592         return -1;
593 }
594
595 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
596 {
597         struct gnet_dump d;
598         
599         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
600                         TCA_XSTATS, p->stats_lock, &d) < 0)
601                 goto errout;
602         
603         if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
604 #ifdef CONFIG_NET_ESTIMATOR
605             gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
606 #endif
607             gnet_stats_copy_queue(&d, &p->qstats) < 0)
608                 goto errout;
609
610         if (gnet_stats_finish_copy(&d) < 0)
611                 goto errout;
612
613         return 0;
614
615 errout:
616         return -1;
617 }
618
619 #endif /* CONFIG_NET_CLS_ACT */