2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/kernel.h>
22 #include <linux/sched.h>
23 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/interrupt.h>
30 #include <linux/netdevice.h>
31 #include <linux/skbuff.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/init.h>
34 #include <linux/proc_fs.h>
35 #include <linux/seq_file.h>
36 #include <linux/kmod.h>
37 #include <linux/list.h>
40 #include <net/pkt_sched.h>
42 #include <asm/processor.h>
43 #include <asm/uaccess.h>
44 #include <asm/system.h>
45 #include <asm/bitops.h>
47 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
48 struct Qdisc *old, struct Qdisc *new);
49 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
50 struct Qdisc *q, unsigned long cl, int event);
57 This file consists of two interrelated parts:
59 1. queueing disciplines manager frontend.
60 2. traffic classes manager frontend.
62 Generally, queueing discipline ("qdisc") is a black box,
63 which is able to enqueue packets and to dequeue them (when
64 device is ready to send something) in order and at times
65 determined by algorithm hidden in it.
67 qdisc's are divided to two categories:
68 - "queues", which have no internal structure visible from outside.
69 - "schedulers", which split all the packets to "traffic classes",
70 using "packet classifiers" (look at cls_api.c)
72 In turn, classes may have child qdiscs (as rule, queues)
73 attached to them etc. etc. etc.
75 The goal of the routines in this file is to translate
76 information supplied by user in the form of handles
77 to more intelligible for kernel form, to make some sanity
78 checks and part of work, which is common to all qdiscs
79 and to provide rtnetlink notifications.
81 All real intelligent work is done inside qdisc modules.
85 Every discipline has two major routines: enqueue and dequeue.
89 dequeue usually returns a skb to send. It is allowed to return NULL,
90 but it does not mean that queue is empty, it just means that
91 discipline does not want to send anything this time.
92 Queue is really empty if q->q.qlen == 0.
93 For complicated disciplines with multiple queues q->q is not
94 real packet queue, but however q->q.qlen must be valid.
98 enqueue returns 0, if packet was enqueued successfully.
99 If packet (this one or another one) was dropped, it returns
101 NET_XMIT_DROP - this packet dropped
102 Expected action: do not backoff, but wait until queue will clear.
103 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
104 Expected action: backoff or ignore
105 NET_XMIT_POLICED - dropped by police.
106 Expected action: backoff or error to real-time apps.
112 requeues once dequeued packet. It is used for non-standard or
113 just buggy devices, which can defer output even if dev->tbusy=0.
117 returns qdisc to initial state: purge all buffers, clear all
118 timers, counters (except for statistics) etc.
122 initializes newly created qdisc.
126 destroys resources allocated by init and during lifetime of qdisc.
130 changes qdisc parameters.
133 /* Protects list of registered TC modules. It is pure SMP lock. */
134 static rwlock_t qdisc_mod_lock = RW_LOCK_UNLOCKED;
137 /************************************************
138 * Queueing disciplines manipulation. *
139 ************************************************/
142 /* The list of all installed queueing disciplines. */
144 static struct Qdisc_ops *qdisc_base;
146 /* Register/uregister queueing discipline */
148 int register_qdisc(struct Qdisc_ops *qops)
150 struct Qdisc_ops *q, **qp;
153 write_lock(&qdisc_mod_lock);
154 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
155 if (!strcmp(qops->id, q->id))
158 if (qops->enqueue == NULL)
159 qops->enqueue = noop_qdisc_ops.enqueue;
160 if (qops->requeue == NULL)
161 qops->requeue = noop_qdisc_ops.requeue;
162 if (qops->dequeue == NULL)
163 qops->dequeue = noop_qdisc_ops.dequeue;
169 write_unlock(&qdisc_mod_lock);
173 int unregister_qdisc(struct Qdisc_ops *qops)
175 struct Qdisc_ops *q, **qp;
178 write_lock(&qdisc_mod_lock);
179 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
187 write_unlock(&qdisc_mod_lock);
191 /* We know handle. Find qdisc among all qdisc's attached to device
192 (root qdisc, all its children, children of children etc.)
195 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
199 list_for_each_entry(q, &dev->qdisc_list, list) {
200 if (q->handle == handle)
206 struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
210 struct Qdisc_class_ops *cops = p->ops->cl_ops;
214 cl = cops->get(p, classid);
218 leaf = cops->leaf(p, cl);
223 /* Find queueing discipline by name */
225 struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
227 struct Qdisc_ops *q = NULL;
230 read_lock(&qdisc_mod_lock);
231 for (q = qdisc_base; q; q = q->next) {
232 if (rtattr_strcmp(kind, q->id) == 0)
235 read_unlock(&qdisc_mod_lock);
240 static struct qdisc_rate_table *qdisc_rtab_list;
242 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
244 struct qdisc_rate_table *rtab;
246 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
247 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
253 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
256 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
260 memcpy(rtab->data, RTA_DATA(tab), 1024);
261 rtab->next = qdisc_rtab_list;
262 qdisc_rtab_list = rtab;
267 void qdisc_put_rtab(struct qdisc_rate_table *tab)
269 struct qdisc_rate_table *rtab, **rtabp;
271 if (!tab || --tab->refcnt)
274 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
284 /* Allocate an unique handle from space managed by kernel */
286 u32 qdisc_alloc_handle(struct net_device *dev)
289 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
292 autohandle += TC_H_MAKE(0x10000U, 0);
293 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
294 autohandle = TC_H_MAKE(0x80000000U, 0);
295 } while (qdisc_lookup(dev, autohandle) && --i > 0);
297 return i>0 ? autohandle : 0;
300 /* Attach toplevel qdisc to device dev */
302 static struct Qdisc *
303 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
305 struct Qdisc *oqdisc;
307 if (dev->flags & IFF_UP)
310 qdisc_lock_tree(dev);
311 if (qdisc && qdisc->flags&TCQ_F_INGRES) {
312 oqdisc = dev->qdisc_ingress;
313 /* Prune old scheduler */
314 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
317 dev->qdisc_ingress = NULL;
319 dev->qdisc_ingress = qdisc;
324 oqdisc = dev->qdisc_sleeping;
326 /* Prune old scheduler */
327 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
330 /* ... and graft new one */
333 dev->qdisc_sleeping = qdisc;
334 dev->qdisc = &noop_qdisc;
337 qdisc_unlock_tree(dev);
339 if (dev->flags & IFF_UP)
346 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
349 Old qdisc is not destroyed but returned in *old.
352 int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid,
353 struct Qdisc *new, struct Qdisc **old)
356 struct Qdisc *q = *old;
359 if (parent == NULL) {
360 if (q && q->flags&TCQ_F_INGRES) {
361 *old = dev_graft_qdisc(dev, q);
363 *old = dev_graft_qdisc(dev, new);
366 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
371 unsigned long cl = cops->get(parent, classid);
373 err = cops->graft(parent, cl, new, old);
374 cops->put(parent, cl);
382 Allocate and initialize new qdisc.
384 Parameters are passed via opt.
387 static struct Qdisc *
388 qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
391 struct rtattr *kind = tca[TCA_KIND-1];
392 struct Qdisc *sch = NULL;
393 struct Qdisc_ops *ops;
396 ops = qdisc_lookup_ops(kind);
398 if (ops==NULL && tca[TCA_KIND-1] != NULL) {
399 if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
400 request_module("sch_%s", (char*)RTA_DATA(kind));
401 ops = qdisc_lookup_ops(kind);
410 size = sizeof(*sch) + ops->priv_size;
412 sch = kmalloc(size, GFP_KERNEL);
417 /* Grrr... Resolve race condition with module unload */
420 if (ops != qdisc_lookup_ops(kind))
423 memset(sch, 0, size);
425 INIT_LIST_HEAD(&sch->list);
426 skb_queue_head_init(&sch->q);
428 if (handle == TC_H_INGRESS)
429 sch->flags |= TCQ_F_INGRES;
432 sch->enqueue = ops->enqueue;
433 sch->dequeue = ops->dequeue;
436 atomic_set(&sch->refcnt, 1);
437 sch->stats_lock = &dev->queue_lock;
439 handle = qdisc_alloc_handle(dev);
445 if (handle == TC_H_INGRESS)
446 sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
448 sch->handle = handle;
451 if (!try_module_get(ops->owner))
454 /* enqueue is accessed locklessly - make sure it's visible
455 * before we set a netdevice's qdisc pointer to sch */
457 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
458 qdisc_lock_tree(dev);
459 list_add_tail(&sch->list, &dev->qdisc_list);
460 qdisc_unlock_tree(dev);
462 #ifdef CONFIG_NET_ESTIMATOR
464 qdisc_new_estimator(&sch->stats, sch->stats_lock,
469 module_put(ops->owner);
478 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
480 if (tca[TCA_OPTIONS-1]) {
483 if (sch->ops->change == NULL)
485 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
489 #ifdef CONFIG_NET_ESTIMATOR
490 if (tca[TCA_RATE-1]) {
491 qdisc_kill_estimator(&sch->stats);
492 qdisc_new_estimator(&sch->stats, sch->stats_lock,
499 struct check_loop_arg
501 struct qdisc_walker w;
506 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
508 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
510 struct check_loop_arg arg;
512 if (q->ops->cl_ops == NULL)
515 arg.w.stop = arg.w.skip = arg.w.count = 0;
516 arg.w.fn = check_loop_fn;
519 q->ops->cl_ops->walk(q, &arg.w);
520 return arg.w.stop ? -ELOOP : 0;
524 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
527 struct Qdisc_class_ops *cops = q->ops->cl_ops;
528 struct check_loop_arg *arg = (struct check_loop_arg *)w;
530 leaf = cops->leaf(q, cl);
532 if (leaf == arg->p || arg->depth > 7)
534 return check_loop(leaf, arg->p, arg->depth + 1);
543 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
545 struct tcmsg *tcm = NLMSG_DATA(n);
546 struct rtattr **tca = arg;
547 struct net_device *dev;
548 u32 clid = tcm->tcm_parent;
549 struct Qdisc *q = NULL;
550 struct Qdisc *p = NULL;
553 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
557 if (clid != TC_H_ROOT) {
558 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
559 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
561 q = qdisc_leaf(p, clid);
562 } else { /* ingress */
563 q = dev->qdisc_ingress;
566 q = dev->qdisc_sleeping;
571 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
574 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
578 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
581 if (n->nlmsg_type == RTM_DELQDISC) {
586 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
589 qdisc_notify(skb, n, clid, q, NULL);
590 spin_lock_bh(&dev->queue_lock);
592 spin_unlock_bh(&dev->queue_lock);
595 qdisc_notify(skb, n, clid, NULL, q);
604 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
606 struct tcmsg *tcm = NLMSG_DATA(n);
607 struct rtattr **tca = arg;
608 struct net_device *dev;
609 u32 clid = tcm->tcm_parent;
610 struct Qdisc *q = NULL;
611 struct Qdisc *p = NULL;
614 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
618 if (clid != TC_H_ROOT) {
619 if (clid != TC_H_INGRESS) {
620 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
622 q = qdisc_leaf(p, clid);
623 } else { /*ingress */
624 q = dev->qdisc_ingress;
627 q = dev->qdisc_sleeping;
630 /* It may be default qdisc, ignore it */
631 if (q && q->handle == 0)
634 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
635 if (tcm->tcm_handle) {
636 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
638 if (TC_H_MIN(tcm->tcm_handle))
640 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
642 if (n->nlmsg_flags&NLM_F_EXCL)
644 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
647 (p && check_loop(q, p, 0)))
649 atomic_inc(&q->refcnt);
655 /* This magic test requires explanation.
657 * We know, that some child q is already
658 * attached to this parent and have choice:
659 * either to change it or to create/graft new one.
661 * 1. We are allowed to create/graft only
662 * if CREATE and REPLACE flags are set.
664 * 2. If EXCL is set, requestor wanted to say,
665 * that qdisc tcm_handle is not expected
666 * to exist, so that we choose create/graft too.
668 * 3. The last case is when no flags are set.
669 * Alas, it is sort of hole in API, we
670 * cannot decide what to do unambiguously.
671 * For now we select create/graft, if
672 * user gave KIND, which does not match existing.
674 if ((n->nlmsg_flags&NLM_F_CREATE) &&
675 (n->nlmsg_flags&NLM_F_REPLACE) &&
676 ((n->nlmsg_flags&NLM_F_EXCL) ||
678 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
683 if (!tcm->tcm_handle)
685 q = qdisc_lookup(dev, tcm->tcm_handle);
688 /* Change qdisc parameters */
691 if (n->nlmsg_flags&NLM_F_EXCL)
693 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
695 err = qdisc_change(q, tca);
697 qdisc_notify(skb, n, clid, NULL, q);
701 if (!(n->nlmsg_flags&NLM_F_CREATE))
703 if (clid == TC_H_INGRESS)
704 q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
706 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
712 struct Qdisc *old_q = NULL;
713 err = qdisc_graft(dev, p, clid, q, &old_q);
716 spin_lock_bh(&dev->queue_lock);
718 spin_unlock_bh(&dev->queue_lock);
722 qdisc_notify(skb, n, clid, old_q, q);
724 spin_lock_bh(&dev->queue_lock);
725 qdisc_destroy(old_q);
726 spin_unlock_bh(&dev->queue_lock);
732 int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st, spinlock_t *lock)
735 RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), st);
736 spin_unlock_bh(lock);
740 spin_unlock_bh(lock);
745 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
746 u32 pid, u32 seq, unsigned flags, int event)
749 struct nlmsghdr *nlh;
750 unsigned char *b = skb->tail;
752 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
753 nlh->nlmsg_flags = flags;
754 tcm = NLMSG_DATA(nlh);
755 tcm->tcm_family = AF_UNSPEC;
756 tcm->tcm_ifindex = q->dev->ifindex;
757 tcm->tcm_parent = clid;
758 tcm->tcm_handle = q->handle;
759 tcm->tcm_info = atomic_read(&q->refcnt);
760 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
761 if (q->ops->dump && q->ops->dump(q, skb) < 0)
763 q->stats.qlen = q->q.qlen;
764 if (qdisc_copy_stats(skb, &q->stats, q->stats_lock))
766 nlh->nlmsg_len = skb->tail - b;
771 skb_trim(skb, b - skb->data);
775 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
776 u32 clid, struct Qdisc *old, struct Qdisc *new)
779 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
781 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
785 if (old && old->handle) {
786 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
790 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
795 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
802 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
806 struct net_device *dev;
810 s_q_idx = q_idx = cb->args[1];
811 read_lock(&dev_base_lock);
812 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
817 read_lock_bh(&qdisc_tree_lock);
819 list_for_each_entry(q, &dev->qdisc_list, list) {
820 if (q_idx < s_q_idx) {
824 if (tc_fill_qdisc(skb, q, 0, NETLINK_CB(cb->skb).pid,
825 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
826 read_unlock_bh(&qdisc_tree_lock);
831 read_unlock_bh(&qdisc_tree_lock);
835 read_unlock(&dev_base_lock);
845 /************************************************
846 * Traffic classes manipulation. *
847 ************************************************/
851 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
853 struct tcmsg *tcm = NLMSG_DATA(n);
854 struct rtattr **tca = arg;
855 struct net_device *dev;
856 struct Qdisc *q = NULL;
857 struct Qdisc_class_ops *cops;
858 unsigned long cl = 0;
859 unsigned long new_cl;
860 u32 pid = tcm->tcm_parent;
861 u32 clid = tcm->tcm_handle;
862 u32 qid = TC_H_MAJ(clid);
865 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
869 parent == TC_H_UNSPEC - unspecified parent.
870 parent == TC_H_ROOT - class is root, which has no parent.
871 parent == X:0 - parent is root class.
872 parent == X:Y - parent is a node in hierarchy.
873 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
875 handle == 0:0 - generate handle from kernel pool.
876 handle == 0:Y - class is X:Y, where X:0 is qdisc.
877 handle == X:Y - clear.
878 handle == X:0 - root class.
881 /* Step 1. Determine qdisc handle X:0 */
883 if (pid != TC_H_ROOT) {
884 u32 qid1 = TC_H_MAJ(pid);
887 /* If both majors are known, they must be identical. */
893 qid = dev->qdisc_sleeping->handle;
895 /* Now qid is genuine qdisc handle consistent
896 both with parent and child.
898 TC_H_MAJ(pid) still may be unspecified, complete it now.
901 pid = TC_H_MAKE(qid, pid);
904 qid = dev->qdisc_sleeping->handle;
907 /* OK. Locate qdisc */
908 if ((q = qdisc_lookup(dev, qid)) == NULL)
911 /* An check that it supports classes */
912 cops = q->ops->cl_ops;
916 /* Now try to get class */
918 if (pid == TC_H_ROOT)
921 clid = TC_H_MAKE(qid, clid);
924 cl = cops->get(q, clid);
928 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
931 switch (n->nlmsg_type) {
934 if (n->nlmsg_flags&NLM_F_EXCL)
938 err = cops->delete(q, cl);
940 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
943 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
952 err = cops->change(q, clid, pid, tca, &new_cl);
954 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
964 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
966 u32 pid, u32 seq, unsigned flags, int event)
969 struct nlmsghdr *nlh;
970 unsigned char *b = skb->tail;
972 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
973 nlh->nlmsg_flags = flags;
974 tcm = NLMSG_DATA(nlh);
975 tcm->tcm_family = AF_UNSPEC;
976 tcm->tcm_ifindex = q->dev->ifindex;
977 tcm->tcm_parent = q->handle;
978 tcm->tcm_handle = q->handle;
980 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
981 if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
983 nlh->nlmsg_len = skb->tail - b;
988 skb_trim(skb, b - skb->data);
992 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
993 struct Qdisc *q, unsigned long cl, int event)
996 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
998 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1002 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1007 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1010 struct qdisc_dump_args
1012 struct qdisc_walker w;
1013 struct sk_buff *skb;
1014 struct netlink_callback *cb;
1017 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1019 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1021 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1022 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1025 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1029 struct net_device *dev;
1031 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1032 struct qdisc_dump_args arg;
1034 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1036 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1042 read_lock_bh(&qdisc_tree_lock);
1043 list_for_each_entry(q, &dev->qdisc_list, list) {
1044 if (t < s_t || !q->ops->cl_ops ||
1046 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1051 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1052 arg.w.fn = qdisc_class_dump;
1056 arg.w.skip = cb->args[1];
1058 q->ops->cl_ops->walk(q, &arg.w);
1059 cb->args[1] = arg.w.count;
1064 read_unlock_bh(&qdisc_tree_lock);
1072 int psched_us_per_tick = 1;
1073 int psched_tick_per_us = 1;
1075 #ifdef CONFIG_PROC_FS
1076 static int psched_show(struct seq_file *seq, void *v)
1078 seq_printf(seq, "%08x %08x %08x %08x\n",
1079 psched_tick_per_us, psched_us_per_tick,
1085 static int psched_open(struct inode *inode, struct file *file)
1087 return single_open(file, psched_show, PDE(inode)->data);
1090 static struct file_operations psched_fops = {
1091 .owner = THIS_MODULE,
1092 .open = psched_open,
1094 .llseek = seq_lseek,
1095 .release = single_release,
1099 #ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
1100 int psched_tod_diff(int delta_sec, int bound)
1104 if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1)
1106 delta = delta_sec * 1000000;
1111 EXPORT_SYMBOL(psched_tod_diff);
1114 #ifdef CONFIG_NET_SCH_CLK_CPU
1115 psched_tdiff_t psched_clock_per_hz;
1116 int psched_clock_scale;
1117 EXPORT_SYMBOL(psched_clock_per_hz);
1118 EXPORT_SYMBOL(psched_clock_scale);
1120 psched_time_t psched_time_base;
1121 cycles_t psched_time_mark;
1122 EXPORT_SYMBOL(psched_time_mark);
1123 EXPORT_SYMBOL(psched_time_base);
1126 * Periodically adjust psched_time_base to avoid overflow
1127 * with 32-bit get_cycles(). Safe up to 4GHz CPU.
1129 static void psched_tick(unsigned long);
1130 static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0);
1132 static void psched_tick(unsigned long dummy)
1134 if (sizeof(cycles_t) == sizeof(u32)) {
1135 psched_time_t dummy_stamp;
1136 PSCHED_GET_TIME(dummy_stamp);
1137 psched_timer.expires = jiffies + 1*HZ;
1138 add_timer(&psched_timer);
1142 int __init psched_calibrate_clock(void)
1144 psched_time_t stamp, stamp1;
1145 struct timeval tv, tv1;
1146 psched_tdiff_t delay;
1151 stop = jiffies + HZ/10;
1152 PSCHED_GET_TIME(stamp);
1153 do_gettimeofday(&tv);
1154 while (time_before(jiffies, stop)) {
1158 PSCHED_GET_TIME(stamp1);
1159 do_gettimeofday(&tv1);
1161 delay = PSCHED_TDIFF(stamp1, stamp);
1162 rdelay = tv1.tv_usec - tv.tv_usec;
1163 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1167 psched_tick_per_us = delay;
1168 while ((delay>>=1) != 0)
1169 psched_clock_scale++;
1170 psched_us_per_tick = 1<<psched_clock_scale;
1171 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1176 static int __init pktsched_init(void)
1178 struct rtnetlink_link *link_p;
1180 #ifdef CONFIG_NET_SCH_CLK_CPU
1181 if (psched_calibrate_clock() < 0)
1183 #elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
1184 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1185 psched_us_per_tick = 1000000;
1188 link_p = rtnetlink_links[PF_UNSPEC];
1190 /* Setup rtnetlink links. It is made here to avoid
1191 exporting large number of public symbols.
1195 link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1196 link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1197 link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1198 link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1199 link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1200 link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1201 link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1202 link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1205 register_qdisc(&pfifo_qdisc_ops);
1206 register_qdisc(&bfifo_qdisc_ops);
1207 proc_net_fops_create("psched", 0, &psched_fops);
1212 subsys_initcall(pktsched_init);
1214 EXPORT_SYMBOL(qdisc_copy_stats);
1215 EXPORT_SYMBOL(qdisc_get_rtab);
1216 EXPORT_SYMBOL(qdisc_put_rtab);
1217 EXPORT_SYMBOL(register_qdisc);
1218 EXPORT_SYMBOL(unregister_qdisc);