vserver 1.9.5.x5
[linux-2.6.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <asm/uaccess.h>
13 #include <asm/system.h>
14 #include <linux/bitops.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/socket.h>
21 #include <linux/sockios.h>
22 #include <linux/in.h>
23 #include <linux/errno.h>
24 #include <linux/interrupt.h>
25 #include <linux/if_ether.h>
26 #include <linux/inet.h>
27 #include <linux/netdevice.h>
28 #include <linux/etherdevice.h>
29 #include <linux/notifier.h>
30 #include <linux/init.h>
31 #include <net/ip.h>
32 #include <net/route.h>
33 #include <linux/skbuff.h>
34 #include <linux/moduleparam.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37
38 /*
39    How to setup it.
40    ----------------
41
42    After loading this module you will find a new device teqlN
43    and new qdisc with the same name. To join a slave to the equalizer
44    you should just set this qdisc on a device f.e.
45
46    # tc qdisc add dev eth0 root teql0
47    # tc qdisc add dev eth1 root teql0
48
49    That's all. Full PnP 8)
50
51    Applicability.
52    --------------
53
54    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
55       signal and generate EOI events. If you want to equalize virtual devices
56       like tunnels, use a normal eql device.
57    2. This device puts no limitations on physical slave characteristics
58       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
59       Certainly, large difference in link speeds will make the resulting
60       eqalized link unusable, because of huge packet reordering.
61       I estimate an upper useful difference as ~10 times.
62    3. If the slave requires address resolution, only protocols using
63       neighbour cache (IPv4/IPv6) will work over the equalized link.
64       Other protocols are still allowed to use the slave device directly,
65       which will not break load balancing, though native slave
66       traffic will have the highest priority.  */
67
68 struct teql_master
69 {
70         struct Qdisc_ops qops;
71         struct net_device *dev;
72         struct Qdisc *slaves;
73         struct list_head master_list;
74         struct net_device_stats stats;
75 };
76
77 struct teql_sched_data
78 {
79         struct Qdisc *next;
80         struct teql_master *m;
81         struct neighbour *ncache;
82         struct sk_buff_head q;
83 };
84
85 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
86
87 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
88
89 /* "teql*" qdisc routines */
90
91 static int
92 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
93 {
94         struct net_device *dev = sch->dev;
95         struct teql_sched_data *q = qdisc_priv(sch);
96
97         __skb_queue_tail(&q->q, skb);
98         if (q->q.qlen <= dev->tx_queue_len) {
99                 sch->bstats.bytes += skb->len;
100                 sch->bstats.packets++;
101                 return 0;
102         }
103
104         __skb_unlink(skb, &q->q);
105         kfree_skb(skb);
106         sch->qstats.drops++;
107         return NET_XMIT_DROP;
108 }
109
110 static int
111 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
112 {
113         struct teql_sched_data *q = qdisc_priv(sch);
114
115         __skb_queue_head(&q->q, skb);
116         sch->qstats.requeues++;
117         return 0;
118 }
119
120 static struct sk_buff *
121 teql_dequeue(struct Qdisc* sch)
122 {
123         struct teql_sched_data *dat = qdisc_priv(sch);
124         struct sk_buff *skb;
125
126         skb = __skb_dequeue(&dat->q);
127         if (skb == NULL) {
128                 struct net_device *m = dat->m->dev->qdisc->dev;
129                 if (m) {
130                         dat->m->slaves = sch;
131                         netif_wake_queue(m);
132                 }
133         }
134         sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
135         return skb;
136 }
137
138 static __inline__ void
139 teql_neigh_release(struct neighbour *n)
140 {
141         if (n)
142                 neigh_release(n);
143 }
144
145 static void
146 teql_reset(struct Qdisc* sch)
147 {
148         struct teql_sched_data *dat = qdisc_priv(sch);
149
150         skb_queue_purge(&dat->q);
151         sch->q.qlen = 0;
152         teql_neigh_release(xchg(&dat->ncache, NULL));
153 }
154
155 static void
156 teql_destroy(struct Qdisc* sch)
157 {
158         struct Qdisc *q, *prev;
159         struct teql_sched_data *dat = qdisc_priv(sch);
160         struct teql_master *master = dat->m;
161
162         if ((prev = master->slaves) != NULL) {
163                 do {
164                         q = NEXT_SLAVE(prev);
165                         if (q == sch) {
166                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
167                                 if (q == master->slaves) {
168                                         master->slaves = NEXT_SLAVE(q);
169                                         if (q == master->slaves) {
170                                                 master->slaves = NULL;
171                                                 spin_lock_bh(&master->dev->queue_lock);
172                                                 qdisc_reset(master->dev->qdisc);
173                                                 spin_unlock_bh(&master->dev->queue_lock);
174                                         }
175                                 }
176                                 skb_queue_purge(&dat->q);
177                                 teql_neigh_release(xchg(&dat->ncache, NULL));
178                                 break;
179                         }
180                                 
181                 } while ((prev = q) != master->slaves);
182         }
183 }
184
185 static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
186 {
187         struct net_device *dev = sch->dev;
188         struct teql_master *m = (struct teql_master*)sch->ops;
189         struct teql_sched_data *q = qdisc_priv(sch);
190
191         if (dev->hard_header_len > m->dev->hard_header_len)
192                 return -EINVAL;
193
194         if (m->dev == dev)
195                 return -ELOOP;
196
197         q->m = m;
198
199         skb_queue_head_init(&q->q);
200
201         if (m->slaves) {
202                 if (m->dev->flags & IFF_UP) {
203                         if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
204                             || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
205                             || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
206                             || dev->mtu < m->dev->mtu)
207                                 return -EINVAL;
208                 } else {
209                         if (!(dev->flags&IFF_POINTOPOINT))
210                                 m->dev->flags &= ~IFF_POINTOPOINT;
211                         if (!(dev->flags&IFF_BROADCAST))
212                                 m->dev->flags &= ~IFF_BROADCAST;
213                         if (!(dev->flags&IFF_MULTICAST))
214                                 m->dev->flags &= ~IFF_MULTICAST;
215                         if (dev->mtu < m->dev->mtu)
216                                 m->dev->mtu = dev->mtu;
217                 }
218                 q->next = NEXT_SLAVE(m->slaves);
219                 NEXT_SLAVE(m->slaves) = sch;
220         } else {
221                 q->next = sch;
222                 m->slaves = sch;
223                 m->dev->mtu = dev->mtu;
224                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
225         }
226         return 0;
227 }
228
229 /* "teql*" netdevice routines */
230
231 static int
232 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
233 {
234         struct teql_sched_data *q = qdisc_priv(dev->qdisc);
235         struct neighbour *mn = skb->dst->neighbour;
236         struct neighbour *n = q->ncache;
237
238         if (mn->tbl == NULL)
239                 return -EINVAL;
240         if (n && n->tbl == mn->tbl &&
241             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
242                 atomic_inc(&n->refcnt);
243         } else {
244                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
245                 if (IS_ERR(n))
246                         return PTR_ERR(n);
247         }
248         if (neigh_event_send(n, skb_res) == 0) {
249                 int err;
250                 read_lock(&n->lock);
251                 err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
252                 read_unlock(&n->lock);
253                 if (err < 0) {
254                         neigh_release(n);
255                         return -EINVAL;
256                 }
257                 teql_neigh_release(xchg(&q->ncache, n));
258                 return 0;
259         }
260         neigh_release(n);
261         return (skb_res == NULL) ? -EAGAIN : 1;
262 }
263
264 static __inline__ int
265 teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
266 {
267         if (dev->hard_header == NULL ||
268             skb->dst == NULL ||
269             skb->dst->neighbour == NULL)
270                 return 0;
271         return __teql_resolve(skb, skb_res, dev);
272 }
273
274 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
275 {
276         struct teql_master *master = (void*)dev->priv;
277         struct Qdisc *start, *q;
278         int busy;
279         int nores;
280         int len = skb->len;
281         struct sk_buff *skb_res = NULL;
282
283         start = master->slaves;
284
285 restart:
286         nores = 0;
287         busy = 0;
288
289         if ((q = start) == NULL)
290                 goto drop;
291
292         do {
293                 struct net_device *slave = q->dev;
294                 
295                 if (slave->qdisc_sleeping != q)
296                         continue;
297                 if (netif_queue_stopped(slave) || ! netif_running(slave)) {
298                         busy = 1;
299                         continue;
300                 }
301
302                 switch (teql_resolve(skb, skb_res, slave)) {
303                 case 0:
304                         if (spin_trylock(&slave->xmit_lock)) {
305                                 slave->xmit_lock_owner = smp_processor_id();
306                                 if (!netif_queue_stopped(slave) &&
307                                     slave->hard_start_xmit(skb, slave) == 0) {
308                                         slave->xmit_lock_owner = -1;
309                                         spin_unlock(&slave->xmit_lock);
310                                         master->slaves = NEXT_SLAVE(q);
311                                         netif_wake_queue(dev);
312                                         master->stats.tx_packets++;
313                                         master->stats.tx_bytes += len;
314                                         return 0;
315                                 }
316                                 slave->xmit_lock_owner = -1;
317                                 spin_unlock(&slave->xmit_lock);
318                         }
319                         if (netif_queue_stopped(dev))
320                                 busy = 1;
321                         break;
322                 case 1:
323                         master->slaves = NEXT_SLAVE(q);
324                         return 0;
325                 default:
326                         nores = 1;
327                         break;
328                 }
329                 __skb_pull(skb, skb->nh.raw - skb->data);
330         } while ((q = NEXT_SLAVE(q)) != start);
331
332         if (nores && skb_res == NULL) {
333                 skb_res = skb;
334                 goto restart;
335         }
336
337         if (busy) {
338                 netif_stop_queue(dev);
339                 return 1;
340         }
341         master->stats.tx_errors++;
342
343 drop:
344         master->stats.tx_dropped++;
345         dev_kfree_skb(skb);
346         return 0;
347 }
348
349 static int teql_master_open(struct net_device *dev)
350 {
351         struct Qdisc * q;
352         struct teql_master *m = (void*)dev->priv;
353         int mtu = 0xFFFE;
354         unsigned flags = IFF_NOARP|IFF_MULTICAST;
355
356         if (m->slaves == NULL)
357                 return -EUNATCH;
358
359         flags = FMASK;
360
361         q = m->slaves;
362         do {
363                 struct net_device *slave = q->dev;
364
365                 if (slave == NULL)
366                         return -EUNATCH;
367
368                 if (slave->mtu < mtu)
369                         mtu = slave->mtu;
370                 if (slave->hard_header_len > LL_MAX_HEADER)
371                         return -EINVAL;
372
373                 /* If all the slaves are BROADCAST, master is BROADCAST
374                    If all the slaves are PtP, master is PtP
375                    Otherwise, master is NBMA.
376                  */
377                 if (!(slave->flags&IFF_POINTOPOINT))
378                         flags &= ~IFF_POINTOPOINT;
379                 if (!(slave->flags&IFF_BROADCAST))
380                         flags &= ~IFF_BROADCAST;
381                 if (!(slave->flags&IFF_MULTICAST))
382                         flags &= ~IFF_MULTICAST;
383         } while ((q = NEXT_SLAVE(q)) != m->slaves);
384
385         m->dev->mtu = mtu;
386         m->dev->flags = (m->dev->flags&~FMASK) | flags;
387         netif_start_queue(m->dev);
388         return 0;
389 }
390
391 static int teql_master_close(struct net_device *dev)
392 {
393         netif_stop_queue(dev);
394         return 0;
395 }
396
397 static struct net_device_stats *teql_master_stats(struct net_device *dev)
398 {
399         struct teql_master *m = (void*)dev->priv;
400         return &m->stats;
401 }
402
403 static int teql_master_mtu(struct net_device *dev, int new_mtu)
404 {
405         struct teql_master *m = (void*)dev->priv;
406         struct Qdisc *q;
407
408         if (new_mtu < 68)
409                 return -EINVAL;
410
411         q = m->slaves;
412         if (q) {
413                 do {
414                         if (new_mtu > q->dev->mtu)
415                                 return -EINVAL;
416                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
417         }
418
419         dev->mtu = new_mtu;
420         return 0;
421 }
422
423 static __init void teql_master_setup(struct net_device *dev)
424 {
425         struct teql_master *master = dev->priv;
426         struct Qdisc_ops *ops = &master->qops;
427
428         master->dev     = dev;
429         ops->priv_size  = sizeof(struct teql_sched_data);
430         
431         ops->enqueue    =       teql_enqueue;
432         ops->dequeue    =       teql_dequeue;
433         ops->requeue    =       teql_requeue;
434         ops->init       =       teql_qdisc_init;
435         ops->reset      =       teql_reset;
436         ops->destroy    =       teql_destroy;
437         ops->owner      =       THIS_MODULE;
438
439         dev->open               = teql_master_open;
440         dev->hard_start_xmit    = teql_master_xmit;
441         dev->stop               = teql_master_close;
442         dev->get_stats          = teql_master_stats;
443         dev->change_mtu         = teql_master_mtu;
444         dev->type               = ARPHRD_VOID;
445         dev->mtu                = 1500;
446         dev->tx_queue_len       = 100;
447         dev->flags              = IFF_NOARP;
448         dev->hard_header_len    = LL_MAX_HEADER;
449         SET_MODULE_OWNER(dev);
450 }
451
452 static LIST_HEAD(master_dev_list);
453 static int max_equalizers = 1;
454 module_param(max_equalizers, int, 0);
455 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
456
457 static int __init teql_init(void)
458 {
459         int i;
460         int err = -ENODEV;
461
462         for (i = 0; i < max_equalizers; i++) {
463                 struct net_device *dev;
464                 struct teql_master *master;
465
466                 dev = alloc_netdev(sizeof(struct teql_master),
467                                   "teql%d", teql_master_setup);
468                 if (!dev) {
469                         err = -ENOMEM;
470                         break;
471                 }
472
473                 if ((err = register_netdev(dev))) {
474                         free_netdev(dev);
475                         break;
476                 }
477
478                 master = dev->priv;
479
480                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
481                 err = register_qdisc(&master->qops);
482
483                 if (err) {
484                         unregister_netdev(dev);
485                         free_netdev(dev);
486                         break;
487                 }
488
489                 list_add_tail(&master->master_list, &master_dev_list);
490         }
491         return i ? 0 : err;
492 }
493
494 static void __exit teql_exit(void) 
495 {
496         struct teql_master *master, *nxt;
497
498         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
499
500                 list_del(&master->master_list);
501
502                 unregister_qdisc(&master->qops);
503                 unregister_netdev(master->dev);
504                 free_netdev(master->dev);
505         }
506 }
507
508 module_init(teql_init);
509 module_exit(teql_exit);
510
511 MODULE_LICENSE("GPL");