2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/kernel.h>
22 #include <linux/sched.h>
23 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/interrupt.h>
30 #include <linux/netdevice.h>
31 #include <linux/skbuff.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/init.h>
34 #include <linux/proc_fs.h>
35 #include <linux/seq_file.h>
36 #include <linux/kmod.h>
39 #include <net/pkt_sched.h>
41 #include <asm/processor.h>
42 #include <asm/uaccess.h>
43 #include <asm/system.h>
44 #include <asm/bitops.h>
46 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
47 struct Qdisc *old, struct Qdisc *new);
48 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
49 struct Qdisc *q, unsigned long cl, int event);
56 This file consists of two interrelated parts:
58 1. queueing disciplines manager frontend.
59 2. traffic classes manager frontend.
61 Generally, queueing discipline ("qdisc") is a black box,
62 which is able to enqueue packets and to dequeue them (when
63 device is ready to send something) in order and at times
64 determined by algorithm hidden in it.
66 qdisc's are divided to two categories:
67 - "queues", which have no internal structure visible from outside.
68 - "schedulers", which split all the packets to "traffic classes",
69 using "packet classifiers" (look at cls_api.c)
71 In turn, classes may have child qdiscs (as rule, queues)
72 attached to them etc. etc. etc.
74 The goal of the routines in this file is to translate
75 information supplied by user in the form of handles
76 to more intelligible for kernel form, to make some sanity
77 checks and part of work, which is common to all qdiscs
78 and to provide rtnetlink notifications.
80 All real intelligent work is done inside qdisc modules.
84 Every discipline has two major routines: enqueue and dequeue.
88 dequeue usually returns a skb to send. It is allowed to return NULL,
89 but it does not mean that queue is empty, it just means that
90 discipline does not want to send anything this time.
91 Queue is really empty if q->q.qlen == 0.
92 For complicated disciplines with multiple queues q->q is not
93 real packet queue, but however q->q.qlen must be valid.
97 enqueue returns 0, if packet was enqueued successfully.
98 If packet (this one or another one) was dropped, it returns
100 NET_XMIT_DROP - this packet dropped
101 Expected action: do not backoff, but wait until queue will clear.
102 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
103 Expected action: backoff or ignore
104 NET_XMIT_POLICED - dropped by police.
105 Expected action: backoff or error to real-time apps.
111 requeues once dequeued packet. It is used for non-standard or
112 just buggy devices, which can defer output even if dev->tbusy=0.
116 returns qdisc to initial state: purge all buffers, clear all
117 timers, counters (except for statistics) etc.
121 initializes newly created qdisc.
125 destroys resources allocated by init and during lifetime of qdisc.
129 changes qdisc parameters.
132 /* Protects list of registered TC modules. It is pure SMP lock. */
133 static rwlock_t qdisc_mod_lock = RW_LOCK_UNLOCKED;
136 /************************************************
137 * Queueing disciplines manipulation. *
138 ************************************************/
141 /* The list of all installed queueing disciplines. */
143 static struct Qdisc_ops *qdisc_base;
145 /* Register/uregister queueing discipline */
147 int register_qdisc(struct Qdisc_ops *qops)
149 struct Qdisc_ops *q, **qp;
152 write_lock(&qdisc_mod_lock);
153 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
154 if (!strcmp(qops->id, q->id))
157 if (qops->enqueue == NULL)
158 qops->enqueue = noop_qdisc_ops.enqueue;
159 if (qops->requeue == NULL)
160 qops->requeue = noop_qdisc_ops.requeue;
161 if (qops->dequeue == NULL)
162 qops->dequeue = noop_qdisc_ops.dequeue;
168 write_unlock(&qdisc_mod_lock);
172 int unregister_qdisc(struct Qdisc_ops *qops)
174 struct Qdisc_ops *q, **qp;
177 write_lock(&qdisc_mod_lock);
178 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
186 write_unlock(&qdisc_mod_lock);
190 /* We know handle. Find qdisc among all qdisc's attached to device
191 (root qdisc, all its children, children of children etc.)
194 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
198 for (q = dev->qdisc_list; q; q = q->next) {
199 if (q->handle == handle)
205 struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
209 struct Qdisc_class_ops *cops = p->ops->cl_ops;
213 cl = cops->get(p, classid);
217 leaf = cops->leaf(p, cl);
222 /* Find queueing discipline by name */
224 struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
226 struct Qdisc_ops *q = NULL;
229 read_lock(&qdisc_mod_lock);
230 for (q = qdisc_base; q; q = q->next) {
231 if (rtattr_strcmp(kind, q->id) == 0)
234 read_unlock(&qdisc_mod_lock);
239 static struct qdisc_rate_table *qdisc_rtab_list;
241 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
243 struct qdisc_rate_table *rtab;
245 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
246 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
252 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
255 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
259 memcpy(rtab->data, RTA_DATA(tab), 1024);
260 rtab->next = qdisc_rtab_list;
261 qdisc_rtab_list = rtab;
266 void qdisc_put_rtab(struct qdisc_rate_table *tab)
268 struct qdisc_rate_table *rtab, **rtabp;
270 if (!tab || --tab->refcnt)
273 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
283 /* Allocate an unique handle from space managed by kernel */
285 u32 qdisc_alloc_handle(struct net_device *dev)
288 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
291 autohandle += TC_H_MAKE(0x10000U, 0);
292 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
293 autohandle = TC_H_MAKE(0x80000000U, 0);
294 } while (qdisc_lookup(dev, autohandle) && --i > 0);
296 return i>0 ? autohandle : 0;
299 /* Attach toplevel qdisc to device dev */
301 static struct Qdisc *
302 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
304 struct Qdisc *oqdisc;
306 if (dev->flags & IFF_UP)
309 write_lock(&qdisc_tree_lock);
310 spin_lock_bh(&dev->queue_lock);
311 if (qdisc && qdisc->flags&TCQ_F_INGRES) {
312 oqdisc = dev->qdisc_ingress;
313 /* Prune old scheduler */
314 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
317 dev->qdisc_ingress = NULL;
319 dev->qdisc_ingress = qdisc;
324 oqdisc = dev->qdisc_sleeping;
326 /* Prune old scheduler */
327 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
330 /* ... and graft new one */
333 dev->qdisc_sleeping = qdisc;
334 dev->qdisc = &noop_qdisc;
337 spin_unlock_bh(&dev->queue_lock);
338 write_unlock(&qdisc_tree_lock);
340 if (dev->flags & IFF_UP)
347 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
350 Old qdisc is not destroyed but returned in *old.
353 int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid,
354 struct Qdisc *new, struct Qdisc **old)
357 struct Qdisc *q = *old;
360 if (parent == NULL) {
361 if (q && q->flags&TCQ_F_INGRES) {
362 *old = dev_graft_qdisc(dev, q);
364 *old = dev_graft_qdisc(dev, new);
367 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
372 unsigned long cl = cops->get(parent, classid);
374 err = cops->graft(parent, cl, new, old);
375 cops->put(parent, cl);
383 Allocate and initialize new qdisc.
385 Parameters are passed via opt.
388 static struct Qdisc *
389 qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
392 struct rtattr *kind = tca[TCA_KIND-1];
393 struct Qdisc *sch = NULL;
394 struct Qdisc_ops *ops;
397 ops = qdisc_lookup_ops(kind);
399 if (ops==NULL && tca[TCA_KIND-1] != NULL) {
400 if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
401 request_module("sch_%s", (char*)RTA_DATA(kind));
402 ops = qdisc_lookup_ops(kind);
411 size = sizeof(*sch) + ops->priv_size;
413 sch = kmalloc(size, GFP_KERNEL);
418 /* Grrr... Resolve race condition with module unload */
421 if (ops != qdisc_lookup_ops(kind))
424 memset(sch, 0, size);
426 skb_queue_head_init(&sch->q);
428 if (handle == TC_H_INGRESS)
429 sch->flags |= TCQ_F_INGRES;
432 sch->enqueue = ops->enqueue;
433 sch->dequeue = ops->dequeue;
435 atomic_set(&sch->refcnt, 1);
436 sch->stats.lock = &dev->queue_lock;
438 handle = qdisc_alloc_handle(dev);
444 if (handle == TC_H_INGRESS)
445 sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
447 sch->handle = handle;
450 if (!try_module_get(ops->owner))
453 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
454 write_lock(&qdisc_tree_lock);
455 sch->next = dev->qdisc_list;
456 dev->qdisc_list = sch;
457 write_unlock(&qdisc_tree_lock);
458 #ifdef CONFIG_NET_ESTIMATOR
460 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
464 module_put(ops->owner);
473 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
475 if (tca[TCA_OPTIONS-1]) {
478 if (sch->ops->change == NULL)
480 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
484 #ifdef CONFIG_NET_ESTIMATOR
485 if (tca[TCA_RATE-1]) {
486 qdisc_kill_estimator(&sch->stats);
487 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
493 struct check_loop_arg
495 struct qdisc_walker w;
500 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
502 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
504 struct check_loop_arg arg;
506 if (q->ops->cl_ops == NULL)
509 arg.w.stop = arg.w.skip = arg.w.count = 0;
510 arg.w.fn = check_loop_fn;
513 q->ops->cl_ops->walk(q, &arg.w);
514 return arg.w.stop ? -ELOOP : 0;
518 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
521 struct Qdisc_class_ops *cops = q->ops->cl_ops;
522 struct check_loop_arg *arg = (struct check_loop_arg *)w;
524 leaf = cops->leaf(q, cl);
526 if (leaf == arg->p || arg->depth > 7)
528 return check_loop(leaf, arg->p, arg->depth + 1);
537 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
539 struct tcmsg *tcm = NLMSG_DATA(n);
540 struct rtattr **tca = arg;
541 struct net_device *dev;
542 u32 clid = tcm->tcm_parent;
543 struct Qdisc *q = NULL;
544 struct Qdisc *p = NULL;
547 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
551 if (clid != TC_H_ROOT) {
552 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
553 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
555 q = qdisc_leaf(p, clid);
556 } else { /* ingress */
557 q = dev->qdisc_ingress;
560 q = dev->qdisc_sleeping;
565 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
568 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
572 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
575 if (n->nlmsg_type == RTM_DELQDISC) {
580 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
583 qdisc_notify(skb, n, clid, q, NULL);
584 spin_lock_bh(&dev->queue_lock);
586 spin_unlock_bh(&dev->queue_lock);
589 qdisc_notify(skb, n, clid, NULL, q);
598 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
600 struct tcmsg *tcm = NLMSG_DATA(n);
601 struct rtattr **tca = arg;
602 struct net_device *dev;
603 u32 clid = tcm->tcm_parent;
604 struct Qdisc *q = NULL;
605 struct Qdisc *p = NULL;
608 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
612 if (clid != TC_H_ROOT) {
613 if (clid != TC_H_INGRESS) {
614 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
616 q = qdisc_leaf(p, clid);
617 } else { /*ingress */
618 q = dev->qdisc_ingress;
621 q = dev->qdisc_sleeping;
624 /* It may be default qdisc, ignore it */
625 if (q && q->handle == 0)
628 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
629 if (tcm->tcm_handle) {
630 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
632 if (TC_H_MIN(tcm->tcm_handle))
634 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
636 if (n->nlmsg_flags&NLM_F_EXCL)
638 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
641 (p && check_loop(q, p, 0)))
643 atomic_inc(&q->refcnt);
649 /* This magic test requires explanation.
651 * We know, that some child q is already
652 * attached to this parent and have choice:
653 * either to change it or to create/graft new one.
655 * 1. We are allowed to create/graft only
656 * if CREATE and REPLACE flags are set.
658 * 2. If EXCL is set, requestor wanted to say,
659 * that qdisc tcm_handle is not expected
660 * to exist, so that we choose create/graft too.
662 * 3. The last case is when no flags are set.
663 * Alas, it is sort of hole in API, we
664 * cannot decide what to do unambiguously.
665 * For now we select create/graft, if
666 * user gave KIND, which does not match existing.
668 if ((n->nlmsg_flags&NLM_F_CREATE) &&
669 (n->nlmsg_flags&NLM_F_REPLACE) &&
670 ((n->nlmsg_flags&NLM_F_EXCL) ||
672 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
677 if (!tcm->tcm_handle)
679 q = qdisc_lookup(dev, tcm->tcm_handle);
682 /* Change qdisc parameters */
685 if (n->nlmsg_flags&NLM_F_EXCL)
687 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
689 err = qdisc_change(q, tca);
691 qdisc_notify(skb, n, clid, NULL, q);
695 if (!(n->nlmsg_flags&NLM_F_CREATE))
697 if (clid == TC_H_INGRESS)
698 q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
700 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
706 struct Qdisc *old_q = NULL;
707 err = qdisc_graft(dev, p, clid, q, &old_q);
710 spin_lock_bh(&dev->queue_lock);
712 spin_unlock_bh(&dev->queue_lock);
716 qdisc_notify(skb, n, clid, old_q, q);
718 spin_lock_bh(&dev->queue_lock);
719 qdisc_destroy(old_q);
720 spin_unlock_bh(&dev->queue_lock);
726 int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st)
728 spin_lock_bh(st->lock);
729 RTA_PUT(skb, TCA_STATS, (char*)&st->lock - (char*)st, st);
730 spin_unlock_bh(st->lock);
734 spin_unlock_bh(st->lock);
739 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
740 u32 pid, u32 seq, unsigned flags, int event)
743 struct nlmsghdr *nlh;
744 unsigned char *b = skb->tail;
746 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
747 nlh->nlmsg_flags = flags;
748 tcm = NLMSG_DATA(nlh);
749 tcm->tcm_family = AF_UNSPEC;
750 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
751 tcm->tcm_parent = clid;
752 tcm->tcm_handle = q->handle;
753 tcm->tcm_info = atomic_read(&q->refcnt);
754 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
755 if (q->ops->dump && q->ops->dump(q, skb) < 0)
757 q->stats.qlen = q->q.qlen;
758 if (qdisc_copy_stats(skb, &q->stats))
760 nlh->nlmsg_len = skb->tail - b;
765 skb_trim(skb, b - skb->data);
769 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
770 u32 clid, struct Qdisc *old, struct Qdisc *new)
773 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
775 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
779 if (old && old->handle) {
780 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
784 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
789 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
796 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
800 struct net_device *dev;
804 s_q_idx = q_idx = cb->args[1];
805 read_lock(&dev_base_lock);
806 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
811 read_lock(&qdisc_tree_lock);
812 for (q = dev->qdisc_list, q_idx = 0; q;
813 q = q->next, q_idx++) {
816 if (tc_fill_qdisc(skb, q, 0, NETLINK_CB(cb->skb).pid,
817 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
818 read_unlock(&qdisc_tree_lock);
822 read_unlock(&qdisc_tree_lock);
826 read_unlock(&dev_base_lock);
836 /************************************************
837 * Traffic classes manipulation. *
838 ************************************************/
842 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
844 struct tcmsg *tcm = NLMSG_DATA(n);
845 struct rtattr **tca = arg;
846 struct net_device *dev;
847 struct Qdisc *q = NULL;
848 struct Qdisc_class_ops *cops;
849 unsigned long cl = 0;
850 unsigned long new_cl;
851 u32 pid = tcm->tcm_parent;
852 u32 clid = tcm->tcm_handle;
853 u32 qid = TC_H_MAJ(clid);
856 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
860 parent == TC_H_UNSPEC - unspecified parent.
861 parent == TC_H_ROOT - class is root, which has no parent.
862 parent == X:0 - parent is root class.
863 parent == X:Y - parent is a node in hierarchy.
864 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
866 handle == 0:0 - generate handle from kernel pool.
867 handle == 0:Y - class is X:Y, where X:0 is qdisc.
868 handle == X:Y - clear.
869 handle == X:0 - root class.
872 /* Step 1. Determine qdisc handle X:0 */
874 if (pid != TC_H_ROOT) {
875 u32 qid1 = TC_H_MAJ(pid);
878 /* If both majors are known, they must be identical. */
884 qid = dev->qdisc_sleeping->handle;
886 /* Now qid is genuine qdisc handle consistent
887 both with parent and child.
889 TC_H_MAJ(pid) still may be unspecified, complete it now.
892 pid = TC_H_MAKE(qid, pid);
895 qid = dev->qdisc_sleeping->handle;
898 /* OK. Locate qdisc */
899 if ((q = qdisc_lookup(dev, qid)) == NULL)
902 /* An check that it supports classes */
903 cops = q->ops->cl_ops;
907 /* Now try to get class */
909 if (pid == TC_H_ROOT)
912 clid = TC_H_MAKE(qid, clid);
915 cl = cops->get(q, clid);
919 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
922 switch (n->nlmsg_type) {
925 if (n->nlmsg_flags&NLM_F_EXCL)
929 err = cops->delete(q, cl);
931 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
934 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
943 err = cops->change(q, clid, pid, tca, &new_cl);
945 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
955 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
957 u32 pid, u32 seq, unsigned flags, int event)
960 struct nlmsghdr *nlh;
961 unsigned char *b = skb->tail;
963 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
964 nlh->nlmsg_flags = flags;
965 tcm = NLMSG_DATA(nlh);
966 tcm->tcm_family = AF_UNSPEC;
967 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
968 tcm->tcm_parent = q->handle;
969 tcm->tcm_handle = q->handle;
971 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
972 if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
974 nlh->nlmsg_len = skb->tail - b;
979 skb_trim(skb, b - skb->data);
983 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
984 struct Qdisc *q, unsigned long cl, int event)
987 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
989 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
993 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
998 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1001 struct qdisc_dump_args
1003 struct qdisc_walker w;
1004 struct sk_buff *skb;
1005 struct netlink_callback *cb;
1008 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1010 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1012 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1013 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1016 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1020 struct net_device *dev;
1022 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1023 struct qdisc_dump_args arg;
1025 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1027 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1032 read_lock(&qdisc_tree_lock);
1033 for (q=dev->qdisc_list, t=0; q; q = q->next, t++) {
1034 if (t < s_t) continue;
1035 if (!q->ops->cl_ops) continue;
1036 if (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)
1039 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1040 arg.w.fn = qdisc_class_dump;
1044 arg.w.skip = cb->args[1];
1046 q->ops->cl_ops->walk(q, &arg.w);
1047 cb->args[1] = arg.w.count;
1051 read_unlock(&qdisc_tree_lock);
1059 int psched_us_per_tick = 1;
1060 int psched_tick_per_us = 1;
1062 #ifdef CONFIG_PROC_FS
1063 static int psched_show(struct seq_file *seq, void *v)
1065 seq_printf(seq, "%08x %08x %08x %08x\n",
1066 psched_tick_per_us, psched_us_per_tick,
1072 static int psched_open(struct inode *inode, struct file *file)
1074 return single_open(file, psched_show, PDE(inode)->data);
1077 static struct file_operations psched_fops = {
1078 .owner = THIS_MODULE,
1079 .open = psched_open,
1081 .llseek = seq_lseek,
1082 .release = single_release,
1086 #if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
1087 int psched_tod_diff(int delta_sec, int bound)
1091 if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1)
1093 delta = delta_sec * 1000000;
1100 psched_time_t psched_time_base;
1102 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1103 psched_tdiff_t psched_clock_per_hz;
1104 int psched_clock_scale;
1107 #ifdef PSCHED_WATCHER
1108 PSCHED_WATCHER psched_time_mark;
1110 static void psched_tick(unsigned long);
1112 static struct timer_list psched_timer = TIMER_INITIALIZER(psched_tick, 0, 0);
1114 static void psched_tick(unsigned long dummy)
1116 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1117 psched_time_t dummy_stamp;
1118 PSCHED_GET_TIME(dummy_stamp);
1119 /* It is OK up to 4GHz cpu */
1120 psched_timer.expires = jiffies + 1*HZ;
1122 unsigned long now = jiffies;
1123 psched_time_base += ((u64)(now-psched_time_mark))<<PSCHED_JSCALE;
1124 psched_time_mark = now;
1125 psched_timer.expires = now + 60*60*HZ;
1127 add_timer(&psched_timer);
1131 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1132 int __init psched_calibrate_clock(void)
1134 psched_time_t stamp, stamp1;
1135 struct timeval tv, tv1;
1136 psched_tdiff_t delay;
1140 #ifdef PSCHED_WATCHER
1143 stop = jiffies + HZ/10;
1144 PSCHED_GET_TIME(stamp);
1145 do_gettimeofday(&tv);
1146 while (time_before(jiffies, stop)) {
1150 PSCHED_GET_TIME(stamp1);
1151 do_gettimeofday(&tv1);
1153 delay = PSCHED_TDIFF(stamp1, stamp);
1154 rdelay = tv1.tv_usec - tv.tv_usec;
1155 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1159 psched_tick_per_us = delay;
1160 while ((delay>>=1) != 0)
1161 psched_clock_scale++;
1162 psched_us_per_tick = 1<<psched_clock_scale;
1163 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1168 static int __init pktsched_init(void)
1170 struct rtnetlink_link *link_p;
1172 #if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1173 if (psched_calibrate_clock() < 0)
1175 #elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
1176 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1177 psched_us_per_tick = 1000000;
1178 #ifdef PSCHED_WATCHER
1183 link_p = rtnetlink_links[PF_UNSPEC];
1185 /* Setup rtnetlink links. It is made here to avoid
1186 exporting large number of public symbols.
1190 link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1191 link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1192 link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1193 link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1194 link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1195 link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1196 link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1197 link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1200 register_qdisc(&pfifo_qdisc_ops);
1201 register_qdisc(&bfifo_qdisc_ops);
1202 proc_net_fops_create("psched", 0, &psched_fops);
1207 subsys_initcall(pktsched_init);
1209 EXPORT_SYMBOL(qdisc_copy_stats);
1210 EXPORT_SYMBOL(qdisc_get_rtab);
1211 EXPORT_SYMBOL(qdisc_put_rtab);
1212 EXPORT_SYMBOL(register_qdisc);
1213 EXPORT_SYMBOL(unregister_qdisc);