2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/kernel.h>
22 #include <linux/sched.h>
23 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/interrupt.h>
30 #include <linux/netdevice.h>
31 #include <linux/skbuff.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/init.h>
34 #include <linux/proc_fs.h>
35 #include <linux/seq_file.h>
36 #include <linux/kmod.h>
37 #include <linux/list.h>
40 #include <net/pkt_sched.h>
42 #include <asm/processor.h>
43 #include <asm/uaccess.h>
44 #include <asm/system.h>
45 #include <asm/bitops.h>
47 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
48 struct Qdisc *old, struct Qdisc *new);
49 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
50 struct Qdisc *q, unsigned long cl, int event);
57 This file consists of two interrelated parts:
59 1. queueing disciplines manager frontend.
60 2. traffic classes manager frontend.
62 Generally, queueing discipline ("qdisc") is a black box,
63 which is able to enqueue packets and to dequeue them (when
64 device is ready to send something) in order and at times
65 determined by algorithm hidden in it.
67 qdisc's are divided to two categories:
68 - "queues", which have no internal structure visible from outside.
69 - "schedulers", which split all the packets to "traffic classes",
70 using "packet classifiers" (look at cls_api.c)
72 In turn, classes may have child qdiscs (as rule, queues)
73 attached to them etc. etc. etc.
75 The goal of the routines in this file is to translate
76 information supplied by user in the form of handles
77 to more intelligible for kernel form, to make some sanity
78 checks and part of work, which is common to all qdiscs
79 and to provide rtnetlink notifications.
81 All real intelligent work is done inside qdisc modules.
85 Every discipline has two major routines: enqueue and dequeue.
89 dequeue usually returns a skb to send. It is allowed to return NULL,
90 but it does not mean that queue is empty, it just means that
91 discipline does not want to send anything this time.
92 Queue is really empty if q->q.qlen == 0.
93 For complicated disciplines with multiple queues q->q is not
94 real packet queue, but however q->q.qlen must be valid.
98 enqueue returns 0, if packet was enqueued successfully.
99 If packet (this one or another one) was dropped, it returns
101 NET_XMIT_DROP - this packet dropped
102 Expected action: do not backoff, but wait until queue will clear.
103 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
104 Expected action: backoff or ignore
105 NET_XMIT_POLICED - dropped by police.
106 Expected action: backoff or error to real-time apps.
112 requeues once dequeued packet. It is used for non-standard or
113 just buggy devices, which can defer output even if dev->tbusy=0.
117 returns qdisc to initial state: purge all buffers, clear all
118 timers, counters (except for statistics) etc.
122 initializes newly created qdisc.
126 destroys resources allocated by init and during lifetime of qdisc.
130 changes qdisc parameters.
133 /* Protects list of registered TC modules. It is pure SMP lock. */
134 static rwlock_t qdisc_mod_lock = RW_LOCK_UNLOCKED;
137 /************************************************
138 * Queueing disciplines manipulation. *
139 ************************************************/
142 /* The list of all installed queueing disciplines. */
144 static struct Qdisc_ops *qdisc_base;
146 /* Register/uregister queueing discipline */
148 int register_qdisc(struct Qdisc_ops *qops)
150 struct Qdisc_ops *q, **qp;
153 write_lock(&qdisc_mod_lock);
154 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
155 if (!strcmp(qops->id, q->id))
158 if (qops->enqueue == NULL)
159 qops->enqueue = noop_qdisc_ops.enqueue;
160 if (qops->requeue == NULL)
161 qops->requeue = noop_qdisc_ops.requeue;
162 if (qops->dequeue == NULL)
163 qops->dequeue = noop_qdisc_ops.dequeue;
169 write_unlock(&qdisc_mod_lock);
173 int unregister_qdisc(struct Qdisc_ops *qops)
175 struct Qdisc_ops *q, **qp;
178 write_lock(&qdisc_mod_lock);
179 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
187 write_unlock(&qdisc_mod_lock);
191 /* We know handle. Find qdisc among all qdisc's attached to device
192 (root qdisc, all its children, children of children etc.)
195 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
199 list_for_each_entry(q, &dev->qdisc_list, list) {
200 if (q->handle == handle)
206 struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
210 struct Qdisc_class_ops *cops = p->ops->cl_ops;
214 cl = cops->get(p, classid);
218 leaf = cops->leaf(p, cl);
223 /* Find queueing discipline by name */
225 struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
227 struct Qdisc_ops *q = NULL;
230 read_lock(&qdisc_mod_lock);
231 for (q = qdisc_base; q; q = q->next) {
232 if (rtattr_strcmp(kind, q->id) == 0)
235 read_unlock(&qdisc_mod_lock);
240 static struct qdisc_rate_table *qdisc_rtab_list;
242 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
244 struct qdisc_rate_table *rtab;
246 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
247 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
253 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
256 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
260 memcpy(rtab->data, RTA_DATA(tab), 1024);
261 rtab->next = qdisc_rtab_list;
262 qdisc_rtab_list = rtab;
267 void qdisc_put_rtab(struct qdisc_rate_table *tab)
269 struct qdisc_rate_table *rtab, **rtabp;
271 if (!tab || --tab->refcnt)
274 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
284 /* Allocate an unique handle from space managed by kernel */
286 u32 qdisc_alloc_handle(struct net_device *dev)
289 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
292 autohandle += TC_H_MAKE(0x10000U, 0);
293 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
294 autohandle = TC_H_MAKE(0x80000000U, 0);
295 } while (qdisc_lookup(dev, autohandle) && --i > 0);
297 return i>0 ? autohandle : 0;
300 /* Attach toplevel qdisc to device dev */
302 static struct Qdisc *
303 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
305 struct Qdisc *oqdisc;
307 if (dev->flags & IFF_UP)
310 qdisc_lock_tree(dev);
311 if (qdisc && qdisc->flags&TCQ_F_INGRES) {
312 oqdisc = dev->qdisc_ingress;
313 /* Prune old scheduler */
314 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
317 dev->qdisc_ingress = NULL;
319 dev->qdisc_ingress = qdisc;
324 oqdisc = dev->qdisc_sleeping;
326 /* Prune old scheduler */
327 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
330 /* ... and graft new one */
333 dev->qdisc_sleeping = qdisc;
334 dev->qdisc = &noop_qdisc;
337 qdisc_unlock_tree(dev);
339 if (dev->flags & IFF_UP)
346 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
349 Old qdisc is not destroyed but returned in *old.
352 int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid,
353 struct Qdisc *new, struct Qdisc **old)
356 struct Qdisc *q = *old;
359 if (parent == NULL) {
360 if (q && q->flags&TCQ_F_INGRES) {
361 *old = dev_graft_qdisc(dev, q);
363 *old = dev_graft_qdisc(dev, new);
366 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
371 unsigned long cl = cops->get(parent, classid);
373 err = cops->graft(parent, cl, new, old);
374 cops->put(parent, cl);
382 Allocate and initialize new qdisc.
384 Parameters are passed via opt.
387 static struct Qdisc *
388 qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
391 struct rtattr *kind = tca[TCA_KIND-1];
394 struct Qdisc_ops *ops;
397 ops = qdisc_lookup_ops(kind);
399 if (ops==NULL && tca[TCA_KIND-1] != NULL) {
400 if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
401 request_module("sch_%s", (char*)RTA_DATA(kind));
402 ops = qdisc_lookup_ops(kind);
411 /* ensure that the Qdisc and the private data are 32-byte aligned */
412 size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
413 size += ops->priv_size + QDISC_ALIGN_CONST;
415 p = kmalloc(size, GFP_KERNEL);
420 sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
421 & ~QDISC_ALIGN_CONST);
422 sch->padded = (char *)sch - (char *)p;
424 /* Grrr... Resolve race condition with module unload */
427 if (ops != qdisc_lookup_ops(kind))
430 INIT_LIST_HEAD(&sch->list);
431 skb_queue_head_init(&sch->q);
433 if (handle == TC_H_INGRESS)
434 sch->flags |= TCQ_F_INGRES;
437 sch->enqueue = ops->enqueue;
438 sch->dequeue = ops->dequeue;
441 atomic_set(&sch->refcnt, 1);
442 sch->stats_lock = &dev->queue_lock;
444 handle = qdisc_alloc_handle(dev);
450 if (handle == TC_H_INGRESS)
451 sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
453 sch->handle = handle;
456 if (!try_module_get(ops->owner))
459 /* enqueue is accessed locklessly - make sure it's visible
460 * before we set a netdevice's qdisc pointer to sch */
462 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
463 qdisc_lock_tree(dev);
464 list_add_tail(&sch->list, &dev->qdisc_list);
465 qdisc_unlock_tree(dev);
467 #ifdef CONFIG_NET_ESTIMATOR
469 qdisc_new_estimator(&sch->stats, sch->stats_lock,
474 module_put(ops->owner);
483 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
485 if (tca[TCA_OPTIONS-1]) {
488 if (sch->ops->change == NULL)
490 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
494 #ifdef CONFIG_NET_ESTIMATOR
495 if (tca[TCA_RATE-1]) {
496 qdisc_kill_estimator(&sch->stats);
497 qdisc_new_estimator(&sch->stats, sch->stats_lock,
504 struct check_loop_arg
506 struct qdisc_walker w;
511 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
513 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
515 struct check_loop_arg arg;
517 if (q->ops->cl_ops == NULL)
520 arg.w.stop = arg.w.skip = arg.w.count = 0;
521 arg.w.fn = check_loop_fn;
524 q->ops->cl_ops->walk(q, &arg.w);
525 return arg.w.stop ? -ELOOP : 0;
529 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
532 struct Qdisc_class_ops *cops = q->ops->cl_ops;
533 struct check_loop_arg *arg = (struct check_loop_arg *)w;
535 leaf = cops->leaf(q, cl);
537 if (leaf == arg->p || arg->depth > 7)
539 return check_loop(leaf, arg->p, arg->depth + 1);
548 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
550 struct tcmsg *tcm = NLMSG_DATA(n);
551 struct rtattr **tca = arg;
552 struct net_device *dev;
553 u32 clid = tcm->tcm_parent;
554 struct Qdisc *q = NULL;
555 struct Qdisc *p = NULL;
558 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
562 if (clid != TC_H_ROOT) {
563 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
564 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
566 q = qdisc_leaf(p, clid);
567 } else { /* ingress */
568 q = dev->qdisc_ingress;
571 q = dev->qdisc_sleeping;
576 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
579 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
583 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
586 if (n->nlmsg_type == RTM_DELQDISC) {
591 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
594 qdisc_notify(skb, n, clid, q, NULL);
595 spin_lock_bh(&dev->queue_lock);
597 spin_unlock_bh(&dev->queue_lock);
600 qdisc_notify(skb, n, clid, NULL, q);
609 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
611 struct tcmsg *tcm = NLMSG_DATA(n);
612 struct rtattr **tca = arg;
613 struct net_device *dev;
614 u32 clid = tcm->tcm_parent;
615 struct Qdisc *q = NULL;
616 struct Qdisc *p = NULL;
619 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
623 if (clid != TC_H_ROOT) {
624 if (clid != TC_H_INGRESS) {
625 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
627 q = qdisc_leaf(p, clid);
628 } else { /*ingress */
629 q = dev->qdisc_ingress;
632 q = dev->qdisc_sleeping;
635 /* It may be default qdisc, ignore it */
636 if (q && q->handle == 0)
639 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
640 if (tcm->tcm_handle) {
641 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
643 if (TC_H_MIN(tcm->tcm_handle))
645 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
647 if (n->nlmsg_flags&NLM_F_EXCL)
649 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
652 (p && check_loop(q, p, 0)))
654 atomic_inc(&q->refcnt);
660 /* This magic test requires explanation.
662 * We know, that some child q is already
663 * attached to this parent and have choice:
664 * either to change it or to create/graft new one.
666 * 1. We are allowed to create/graft only
667 * if CREATE and REPLACE flags are set.
669 * 2. If EXCL is set, requestor wanted to say,
670 * that qdisc tcm_handle is not expected
671 * to exist, so that we choose create/graft too.
673 * 3. The last case is when no flags are set.
674 * Alas, it is sort of hole in API, we
675 * cannot decide what to do unambiguously.
676 * For now we select create/graft, if
677 * user gave KIND, which does not match existing.
679 if ((n->nlmsg_flags&NLM_F_CREATE) &&
680 (n->nlmsg_flags&NLM_F_REPLACE) &&
681 ((n->nlmsg_flags&NLM_F_EXCL) ||
683 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
688 if (!tcm->tcm_handle)
690 q = qdisc_lookup(dev, tcm->tcm_handle);
693 /* Change qdisc parameters */
696 if (n->nlmsg_flags&NLM_F_EXCL)
698 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
700 err = qdisc_change(q, tca);
702 qdisc_notify(skb, n, clid, NULL, q);
706 if (!(n->nlmsg_flags&NLM_F_CREATE))
708 if (clid == TC_H_INGRESS)
709 q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
711 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
717 struct Qdisc *old_q = NULL;
718 err = qdisc_graft(dev, p, clid, q, &old_q);
721 spin_lock_bh(&dev->queue_lock);
723 spin_unlock_bh(&dev->queue_lock);
727 qdisc_notify(skb, n, clid, old_q, q);
729 spin_lock_bh(&dev->queue_lock);
730 qdisc_destroy(old_q);
731 spin_unlock_bh(&dev->queue_lock);
737 int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st, spinlock_t *lock)
740 RTA_PUT(skb, TCA_STATS, sizeof(struct tc_stats), st);
741 spin_unlock_bh(lock);
745 spin_unlock_bh(lock);
750 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
751 u32 pid, u32 seq, unsigned flags, int event)
754 struct nlmsghdr *nlh;
755 unsigned char *b = skb->tail;
757 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
758 nlh->nlmsg_flags = flags;
759 tcm = NLMSG_DATA(nlh);
760 tcm->tcm_family = AF_UNSPEC;
761 tcm->tcm_ifindex = q->dev->ifindex;
762 tcm->tcm_parent = clid;
763 tcm->tcm_handle = q->handle;
764 tcm->tcm_info = atomic_read(&q->refcnt);
765 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
766 if (q->ops->dump && q->ops->dump(q, skb) < 0)
768 q->stats.qlen = q->q.qlen;
769 if (qdisc_copy_stats(skb, &q->stats, q->stats_lock))
771 nlh->nlmsg_len = skb->tail - b;
776 skb_trim(skb, b - skb->data);
780 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
781 u32 clid, struct Qdisc *old, struct Qdisc *new)
784 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
786 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
790 if (old && old->handle) {
791 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
795 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
800 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
807 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
811 struct net_device *dev;
815 s_q_idx = q_idx = cb->args[1];
816 read_lock(&dev_base_lock);
817 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
822 read_lock_bh(&qdisc_tree_lock);
824 list_for_each_entry(q, &dev->qdisc_list, list) {
825 if (q_idx < s_q_idx) {
829 if (tc_fill_qdisc(skb, q, 0, NETLINK_CB(cb->skb).pid,
830 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
831 read_unlock_bh(&qdisc_tree_lock);
836 read_unlock_bh(&qdisc_tree_lock);
840 read_unlock(&dev_base_lock);
850 /************************************************
851 * Traffic classes manipulation. *
852 ************************************************/
856 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
858 struct tcmsg *tcm = NLMSG_DATA(n);
859 struct rtattr **tca = arg;
860 struct net_device *dev;
861 struct Qdisc *q = NULL;
862 struct Qdisc_class_ops *cops;
863 unsigned long cl = 0;
864 unsigned long new_cl;
865 u32 pid = tcm->tcm_parent;
866 u32 clid = tcm->tcm_handle;
867 u32 qid = TC_H_MAJ(clid);
870 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
874 parent == TC_H_UNSPEC - unspecified parent.
875 parent == TC_H_ROOT - class is root, which has no parent.
876 parent == X:0 - parent is root class.
877 parent == X:Y - parent is a node in hierarchy.
878 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
880 handle == 0:0 - generate handle from kernel pool.
881 handle == 0:Y - class is X:Y, where X:0 is qdisc.
882 handle == X:Y - clear.
883 handle == X:0 - root class.
886 /* Step 1. Determine qdisc handle X:0 */
888 if (pid != TC_H_ROOT) {
889 u32 qid1 = TC_H_MAJ(pid);
892 /* If both majors are known, they must be identical. */
898 qid = dev->qdisc_sleeping->handle;
900 /* Now qid is genuine qdisc handle consistent
901 both with parent and child.
903 TC_H_MAJ(pid) still may be unspecified, complete it now.
906 pid = TC_H_MAKE(qid, pid);
909 qid = dev->qdisc_sleeping->handle;
912 /* OK. Locate qdisc */
913 if ((q = qdisc_lookup(dev, qid)) == NULL)
916 /* An check that it supports classes */
917 cops = q->ops->cl_ops;
921 /* Now try to get class */
923 if (pid == TC_H_ROOT)
926 clid = TC_H_MAKE(qid, clid);
929 cl = cops->get(q, clid);
933 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
936 switch (n->nlmsg_type) {
939 if (n->nlmsg_flags&NLM_F_EXCL)
943 err = cops->delete(q, cl);
945 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
948 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
957 err = cops->change(q, clid, pid, tca, &new_cl);
959 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
969 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
971 u32 pid, u32 seq, unsigned flags, int event)
974 struct nlmsghdr *nlh;
975 unsigned char *b = skb->tail;
977 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
978 nlh->nlmsg_flags = flags;
979 tcm = NLMSG_DATA(nlh);
980 tcm->tcm_family = AF_UNSPEC;
981 tcm->tcm_ifindex = q->dev->ifindex;
982 tcm->tcm_parent = q->handle;
983 tcm->tcm_handle = q->handle;
985 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
986 if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
988 nlh->nlmsg_len = skb->tail - b;
993 skb_trim(skb, b - skb->data);
997 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
998 struct Qdisc *q, unsigned long cl, int event)
1000 struct sk_buff *skb;
1001 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1003 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1007 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1012 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1015 struct qdisc_dump_args
1017 struct qdisc_walker w;
1018 struct sk_buff *skb;
1019 struct netlink_callback *cb;
1022 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1024 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1026 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1027 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1030 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1034 struct net_device *dev;
1036 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1037 struct qdisc_dump_args arg;
1039 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1041 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1047 read_lock_bh(&qdisc_tree_lock);
1048 list_for_each_entry(q, &dev->qdisc_list, list) {
1049 if (t < s_t || !q->ops->cl_ops ||
1051 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1056 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1057 arg.w.fn = qdisc_class_dump;
1061 arg.w.skip = cb->args[1];
1063 q->ops->cl_ops->walk(q, &arg.w);
1064 cb->args[1] = arg.w.count;
1069 read_unlock_bh(&qdisc_tree_lock);
1077 int psched_us_per_tick = 1;
1078 int psched_tick_per_us = 1;
1080 #ifdef CONFIG_PROC_FS
1081 static int psched_show(struct seq_file *seq, void *v)
1083 seq_printf(seq, "%08x %08x %08x %08x\n",
1084 psched_tick_per_us, psched_us_per_tick,
1090 static int psched_open(struct inode *inode, struct file *file)
1092 return single_open(file, psched_show, PDE(inode)->data);
1095 static struct file_operations psched_fops = {
1096 .owner = THIS_MODULE,
1097 .open = psched_open,
1099 .llseek = seq_lseek,
1100 .release = single_release,
1104 #ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
1105 int psched_tod_diff(int delta_sec, int bound)
1109 if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1)
1111 delta = delta_sec * 1000000;
1116 EXPORT_SYMBOL(psched_tod_diff);
1119 #ifdef CONFIG_NET_SCH_CLK_CPU
1120 psched_tdiff_t psched_clock_per_hz;
1121 int psched_clock_scale;
1122 EXPORT_SYMBOL(psched_clock_per_hz);
1123 EXPORT_SYMBOL(psched_clock_scale);
1125 psched_time_t psched_time_base;
1126 cycles_t psched_time_mark;
1127 EXPORT_SYMBOL(psched_time_mark);
1128 EXPORT_SYMBOL(psched_time_base);
1131 * Periodically adjust psched_time_base to avoid overflow
1132 * with 32-bit get_cycles(). Safe up to 4GHz CPU.
1134 static void psched_tick(unsigned long);
1135 static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0);
1137 static void psched_tick(unsigned long dummy)
1139 if (sizeof(cycles_t) == sizeof(u32)) {
1140 psched_time_t dummy_stamp;
1141 PSCHED_GET_TIME(dummy_stamp);
1142 psched_timer.expires = jiffies + 1*HZ;
1143 add_timer(&psched_timer);
1147 int __init psched_calibrate_clock(void)
1149 psched_time_t stamp, stamp1;
1150 struct timeval tv, tv1;
1151 psched_tdiff_t delay;
1156 stop = jiffies + HZ/10;
1157 PSCHED_GET_TIME(stamp);
1158 do_gettimeofday(&tv);
1159 while (time_before(jiffies, stop)) {
1163 PSCHED_GET_TIME(stamp1);
1164 do_gettimeofday(&tv1);
1166 delay = PSCHED_TDIFF(stamp1, stamp);
1167 rdelay = tv1.tv_usec - tv.tv_usec;
1168 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1172 psched_tick_per_us = delay;
1173 while ((delay>>=1) != 0)
1174 psched_clock_scale++;
1175 psched_us_per_tick = 1<<psched_clock_scale;
1176 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1181 static int __init pktsched_init(void)
1183 struct rtnetlink_link *link_p;
1185 #ifdef CONFIG_NET_SCH_CLK_CPU
1186 if (psched_calibrate_clock() < 0)
1188 #elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
1189 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1190 psched_us_per_tick = 1000000;
1193 link_p = rtnetlink_links[PF_UNSPEC];
1195 /* Setup rtnetlink links. It is made here to avoid
1196 exporting large number of public symbols.
1200 link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1201 link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1202 link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1203 link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1204 link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1205 link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1206 link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1207 link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1210 register_qdisc(&pfifo_qdisc_ops);
1211 register_qdisc(&bfifo_qdisc_ops);
1212 proc_net_fops_create("psched", 0, &psched_fops);
1217 subsys_initcall(pktsched_init);
1219 EXPORT_SYMBOL(qdisc_copy_stats);
1220 EXPORT_SYMBOL(qdisc_get_rtab);
1221 EXPORT_SYMBOL(qdisc_put_rtab);
1222 EXPORT_SYMBOL(register_qdisc);
1223 EXPORT_SYMBOL(unregister_qdisc);