ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  */
16
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/sched.h>
23 #include <linux/netdevice.h>
24 #ifdef CONFIG_SYSCTL
25 #include <linux/sysctl.h>
26 #endif
27 #include <linux/times.h>
28 #include <net/neighbour.h>
29 #include <net/dst.h>
30 #include <net/sock.h>
31 #include <linux/rtnetlink.h>
32
33 #define NEIGH_DEBUG 1
34
35 #define NEIGH_PRINTK(x...) printk(x)
36 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
37 #define NEIGH_PRINTK0 NEIGH_PRINTK
38 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
39 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
40
41 #if NEIGH_DEBUG >= 1
42 #undef NEIGH_PRINTK1
43 #define NEIGH_PRINTK1 NEIGH_PRINTK
44 #endif
45 #if NEIGH_DEBUG >= 2
46 #undef NEIGH_PRINTK2
47 #define NEIGH_PRINTK2 NEIGH_PRINTK
48 #endif
49
50 static void neigh_timer_handler(unsigned long arg);
51 #ifdef CONFIG_ARPD
52 static void neigh_app_notify(struct neighbour *n);
53 #endif
54 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
55 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
56
57 static int neigh_glbl_allocs;
58 static struct neigh_table *neigh_tables;
59
60 /*
61    Neighbour hash table buckets are protected with rwlock tbl->lock.
62
63    - All the scans/updates to hash buckets MUST be made under this lock.
64    - NOTHING clever should be made under this lock: no callbacks
65      to protocol backends, no attempts to send something to network.
66      It will result in deadlocks, if backend/driver wants to use neighbour
67      cache.
68    - If the entry requires some non-trivial actions, increase
69      its reference count and release table lock.
70
71    Neighbour entries are protected:
72    - with reference count.
73    - with rwlock neigh->lock
74
75    Reference count prevents destruction.
76
77    neigh->lock mainly serializes ll address data and its validity state.
78    However, the same lock is used to protect another entry fields:
79     - timer
80     - resolution queue
81
82    Again, nothing clever shall be made under neigh->lock,
83    the most complicated procedure, which we allow is dev->hard_header.
84    It is supposed, that dev->hard_header is simplistic and does
85    not make callbacks to neighbour tables.
86
87    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
88    list of neighbour tables. This list is used only in process context,
89  */
90
91 static rwlock_t neigh_tbl_lock = RW_LOCK_UNLOCKED;
92
93 static int neigh_blackhole(struct sk_buff *skb)
94 {
95         kfree_skb(skb);
96         return -ENETDOWN;
97 }
98
99 /*
100  * It is random distribution in the interval (1/2)*base...(3/2)*base.
101  * It corresponds to default IPv6 settings and is not overridable,
102  * because it is really reasonable choice.
103  */
104
105 unsigned long neigh_rand_reach_time(unsigned long base)
106 {
107         return (net_random() % base) + (base >> 1);
108 }
109
110
111 static int neigh_forced_gc(struct neigh_table *tbl)
112 {
113         int shrunk = 0;
114         int i;
115
116         for (i = 0; i <= NEIGH_HASHMASK; i++) {
117                 struct neighbour *n, **np;
118
119                 np = &tbl->hash_buckets[i];
120                 write_lock_bh(&tbl->lock);
121                 while ((n = *np) != NULL) {
122                         /* Neighbour record may be discarded if:
123                            - nobody refers to it.
124                            - it is not permanent
125                            - (NEW and probably wrong)
126                              INCOMPLETE entries are kept at least for
127                              n->parms->retrans_time, otherwise we could
128                              flood network with resolution requests.
129                              It is not clear, what is better table overflow
130                              or flooding.
131                          */
132                         write_lock(&n->lock);
133                         if (atomic_read(&n->refcnt) == 1 &&
134                             !(n->nud_state & NUD_PERMANENT) &&
135                             (n->nud_state != NUD_INCOMPLETE ||
136                              jiffies - n->used > n->parms->retrans_time)) {
137                                 *np     = n->next;
138                                 n->dead = 1;
139                                 shrunk  = 1;
140                                 write_unlock(&n->lock);
141                                 neigh_release(n);
142                                 continue;
143                         }
144                         write_unlock(&n->lock);
145                         np = &n->next;
146                 }
147                 write_unlock_bh(&tbl->lock);
148         }
149
150         tbl->last_flush = jiffies;
151         return shrunk;
152 }
153
154 static int neigh_del_timer(struct neighbour *n)
155 {
156         if ((n->nud_state & NUD_IN_TIMER) &&
157             del_timer(&n->timer)) {
158                 neigh_release(n);
159                 return 1;
160         }
161         return 0;
162 }
163
164 static void pneigh_queue_purge(struct sk_buff_head *list)
165 {
166         struct sk_buff *skb;
167
168         while ((skb = skb_dequeue(list)) != NULL) {
169                 dev_put(skb->dev);
170                 kfree_skb(skb);
171         }
172 }
173
174 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
175 {
176         int i;
177
178         write_lock_bh(&tbl->lock);
179
180         for (i=0; i <= NEIGH_HASHMASK; i++) {
181                 struct neighbour *n, **np;
182
183                 np = &tbl->hash_buckets[i];
184                 while ((n = *np) != NULL) {
185                         if (dev && n->dev != dev) {
186                                 np = &n->next;
187                                 continue;
188                         }
189                         *np = n->next;
190                         write_lock_bh(&n->lock);
191                         n->dead = 1;
192                         neigh_del_timer(n);
193                         write_unlock_bh(&n->lock);
194                         neigh_release(n);
195                 }
196         }
197
198         write_unlock_bh(&tbl->lock);
199 }
200
201 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
202 {
203         int i;
204
205         write_lock_bh(&tbl->lock);
206
207         for (i = 0; i <= NEIGH_HASHMASK; i++) {
208                 struct neighbour *n, **np = &tbl->hash_buckets[i];
209
210                 while ((n = *np) != NULL) {
211                         if (dev && n->dev != dev) {
212                                 np = &n->next;
213                                 continue;
214                         }
215                         *np = n->next;
216                         write_lock(&n->lock);
217                         neigh_del_timer(n);
218                         n->dead = 1;
219
220                         if (atomic_read(&n->refcnt) != 1) {
221                                 /* The most unpleasant situation.
222                                    We must destroy neighbour entry,
223                                    but someone still uses it.
224
225                                    The destroy will be delayed until
226                                    the last user releases us, but
227                                    we must kill timers etc. and move
228                                    it to safe state.
229                                  */
230                                 n->parms = &tbl->parms;
231                                 skb_queue_purge(&n->arp_queue);
232                                 n->output = neigh_blackhole;
233                                 if (n->nud_state & NUD_VALID)
234                                         n->nud_state = NUD_NOARP;
235                                 else
236                                         n->nud_state = NUD_NONE;
237                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
238                         }
239                         write_unlock(&n->lock);
240                         neigh_release(n);
241                 }
242         }
243
244         pneigh_ifdown(tbl, dev);
245         write_unlock_bh(&tbl->lock);
246
247         del_timer_sync(&tbl->proxy_timer);
248         pneigh_queue_purge(&tbl->proxy_queue);
249         return 0;
250 }
251
252 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
253 {
254         struct neighbour *n = NULL;
255         unsigned long now = jiffies;
256
257         if (tbl->entries > tbl->gc_thresh3 ||
258             (tbl->entries > tbl->gc_thresh2 &&
259              now - tbl->last_flush > 5 * HZ)) {
260                 if (!neigh_forced_gc(tbl) &&
261                     tbl->entries > tbl->gc_thresh3)
262                         goto out;
263         }
264
265         n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
266         if (!n)
267                 goto out;
268
269         memset(n, 0, tbl->entry_size);
270
271         skb_queue_head_init(&n->arp_queue);
272         n->lock           = RW_LOCK_UNLOCKED;
273         n->updated        = n->used = now;
274         n->nud_state      = NUD_NONE;
275         n->output         = neigh_blackhole;
276         n->parms          = &tbl->parms;
277         init_timer(&n->timer);
278         n->timer.function = neigh_timer_handler;
279         n->timer.data     = (unsigned long)n;
280         tbl->stats.allocs++;
281         neigh_glbl_allocs++;
282         tbl->entries++;
283         n->tbl            = tbl;
284         atomic_set(&n->refcnt, 1);
285         n->dead           = 1;
286 out:
287         return n;
288 }
289
290 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
291                                struct net_device *dev)
292 {
293         struct neighbour *n;
294         int key_len = tbl->key_len;
295         u32 hash_val = tbl->hash(pkey, dev);
296
297         read_lock_bh(&tbl->lock);
298         for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
299                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
300                         neigh_hold(n);
301                         break;
302                 }
303         }
304         read_unlock_bh(&tbl->lock);
305         return n;
306 }
307
308 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
309                                struct net_device *dev)
310 {
311         u32 hash_val;
312         int key_len = tbl->key_len;
313         int error;
314         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
315
316         if (!n) {
317                 rc = ERR_PTR(-ENOBUFS);
318                 goto out;
319         }
320
321         memcpy(n->primary_key, pkey, key_len);
322         n->dev = dev;
323         dev_hold(dev);
324
325         /* Protocol specific setup. */
326         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
327                 rc = ERR_PTR(error);
328                 goto out_neigh_release;
329         }
330
331         /* Device specific setup. */
332         if (n->parms->neigh_setup &&
333             (error = n->parms->neigh_setup(n)) < 0) {
334                 rc = ERR_PTR(error);
335                 goto out_neigh_release;
336         }
337
338         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
339
340         hash_val = tbl->hash(pkey, dev);
341
342         write_lock_bh(&tbl->lock);
343         for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
344                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
345                         neigh_hold(n1);
346                         write_unlock_bh(&tbl->lock);
347                         rc = n1;
348                         goto out_neigh_release;
349                 }
350         }
351
352         n->next = tbl->hash_buckets[hash_val];
353         tbl->hash_buckets[hash_val] = n;
354         n->dead = 0;
355         neigh_hold(n);
356         write_unlock_bh(&tbl->lock);
357         NEIGH_PRINTK2("neigh %p is created.\n", n);
358         rc = n;
359 out:
360         return rc;
361 out_neigh_release:
362         neigh_release(n);
363         goto out;
364 }
365
366 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
367                                     struct net_device *dev, int creat)
368 {
369         struct pneigh_entry *n;
370         int key_len = tbl->key_len;
371         u32 hash_val = *(u32 *)(pkey + key_len - 4);
372
373         hash_val ^= (hash_val >> 16);
374         hash_val ^= hash_val >> 8;
375         hash_val ^= hash_val >> 4;
376         hash_val &= PNEIGH_HASHMASK;
377
378         read_lock_bh(&tbl->lock);
379
380         for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
381                 if (!memcmp(n->key, pkey, key_len) &&
382                     (n->dev == dev || !n->dev)) {
383                         read_unlock_bh(&tbl->lock);
384                         goto out;
385                 }
386         }
387         read_unlock_bh(&tbl->lock);
388         n = NULL;
389         if (!creat)
390                 goto out;
391
392         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
393         if (!n)
394                 goto out;
395
396         memcpy(n->key, pkey, key_len);
397         n->dev = dev;
398
399         if (tbl->pconstructor && tbl->pconstructor(n)) {
400                 kfree(n);
401                 n = NULL;
402                 goto out;
403         }
404
405         write_lock_bh(&tbl->lock);
406         n->next = tbl->phash_buckets[hash_val];
407         tbl->phash_buckets[hash_val] = n;
408         write_unlock_bh(&tbl->lock);
409 out:
410         return n;
411 }
412
413
414 int pneigh_delete(struct neigh_table *tbl, const void *pkey,
415                   struct net_device *dev)
416 {
417         struct pneigh_entry *n, **np;
418         int key_len = tbl->key_len;
419         u32 hash_val = *(u32 *)(pkey + key_len - 4);
420
421         hash_val ^= (hash_val >> 16);
422         hash_val ^= hash_val >> 8;
423         hash_val ^= hash_val >> 4;
424         hash_val &= PNEIGH_HASHMASK;
425
426         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
427              np = &n->next) {
428                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
429                         write_lock_bh(&tbl->lock);
430                         *np = n->next;
431                         write_unlock_bh(&tbl->lock);
432                         if (tbl->pdestructor)
433                                 tbl->pdestructor(n);
434                         kfree(n);
435                         return 0;
436                 }
437         }
438         return -ENOENT;
439 }
440
441 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
442 {
443         struct pneigh_entry *n, **np;
444         u32 h;
445
446         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
447                 np = &tbl->phash_buckets[h];
448                 while ((n = *np) != NULL) {
449                         if (!dev || n->dev == dev) {
450                                 *np = n->next;
451                                 if (tbl->pdestructor)
452                                         tbl->pdestructor(n);
453                                 kfree(n);
454                                 continue;
455                         }
456                         np = &n->next;
457                 }
458         }
459         return -ENOENT;
460 }
461
462
463 /*
464  *      neighbour must already be out of the table;
465  *
466  */
467 void neigh_destroy(struct neighbour *neigh)
468 {
469         struct hh_cache *hh;
470
471         if (!neigh->dead) {
472                 printk(KERN_WARNING
473                        "Destroying alive neighbour %p\n", neigh);
474                 dump_stack();
475                 return;
476         }
477
478         if (neigh_del_timer(neigh))
479                 printk(KERN_WARNING "Impossible event.\n");
480
481         while ((hh = neigh->hh) != NULL) {
482                 neigh->hh = hh->hh_next;
483                 hh->hh_next = NULL;
484                 write_lock_bh(&hh->hh_lock);
485                 hh->hh_output = neigh_blackhole;
486                 write_unlock_bh(&hh->hh_lock);
487                 if (atomic_dec_and_test(&hh->hh_refcnt))
488                         kfree(hh);
489         }
490
491         if (neigh->ops && neigh->ops->destructor)
492                 (neigh->ops->destructor)(neigh);
493
494         skb_queue_purge(&neigh->arp_queue);
495
496         dev_put(neigh->dev);
497
498         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
499
500         neigh_glbl_allocs--;
501         neigh->tbl->entries--;
502         kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
503 }
504
505 /* Neighbour state is suspicious;
506    disable fast path.
507
508    Called with write_locked neigh.
509  */
510 static void neigh_suspect(struct neighbour *neigh)
511 {
512         struct hh_cache *hh;
513
514         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
515
516         neigh->output = neigh->ops->output;
517
518         for (hh = neigh->hh; hh; hh = hh->hh_next)
519                 hh->hh_output = neigh->ops->output;
520 }
521
522 /* Neighbour state is OK;
523    enable fast path.
524
525    Called with write_locked neigh.
526  */
527 static void neigh_connect(struct neighbour *neigh)
528 {
529         struct hh_cache *hh;
530
531         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
532
533         neigh->output = neigh->ops->connected_output;
534
535         for (hh = neigh->hh; hh; hh = hh->hh_next)
536                 hh->hh_output = neigh->ops->hh_output;
537 }
538
539 /*
540    Transitions NUD_STALE <-> NUD_REACHABLE do not occur
541    when fast path is built: we have no timers associated with
542    these states, we do not have time to check state when sending.
543    neigh_periodic_timer check periodically neigh->confirmed
544    time and moves NUD_REACHABLE -> NUD_STALE.
545
546    If a routine wants to know TRUE entry state, it calls
547    neigh_sync before checking state.
548
549    Called with write_locked neigh.
550  */
551
552 static void neigh_sync(struct neighbour *n)
553 {
554         unsigned long now = jiffies;
555         u8 state = n->nud_state;
556
557         if (state & (NUD_NOARP | NUD_PERMANENT))
558                 return;
559         if (state & NUD_REACHABLE) {
560                 if (now - n->confirmed > n->parms->reachable_time) {
561                         n->nud_state = NUD_STALE;
562                         neigh_suspect(n);
563                 }
564         } else if (state & NUD_VALID) {
565                 if (now - n->confirmed < n->parms->reachable_time) {
566                         neigh_del_timer(n);
567                         n->nud_state = NUD_REACHABLE;
568                         neigh_connect(n);
569                 }
570         }
571 }
572
573 static void neigh_periodic_timer(unsigned long arg)
574 {
575         struct neigh_table *tbl = (struct neigh_table *)arg;
576         unsigned long now = jiffies;
577         int i;
578
579
580         write_lock(&tbl->lock);
581
582         /*
583          *      periodically recompute ReachableTime from random function
584          */
585
586         if (now - tbl->last_rand > 300 * HZ) {
587                 struct neigh_parms *p;
588                 tbl->last_rand = now;
589                 for (p = &tbl->parms; p; p = p->next)
590                         p->reachable_time =
591                                 neigh_rand_reach_time(p->base_reachable_time);
592         }
593
594         for (i = 0; i <= NEIGH_HASHMASK; i++) {
595                 struct neighbour *n, **np;
596
597                 np = &tbl->hash_buckets[i];
598                 while ((n = *np) != NULL) {
599                         unsigned state;
600
601                         write_lock(&n->lock);
602
603                         state = n->nud_state;
604                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
605                                 write_unlock(&n->lock);
606                                 goto next_elt;
607                         }
608
609                         if ((long)(n->used - n->confirmed) < 0)
610                                 n->used = n->confirmed;
611
612                         if (atomic_read(&n->refcnt) == 1 &&
613                             (state == NUD_FAILED ||
614                              now - n->used > n->parms->gc_staletime)) {
615                                 *np = n->next;
616                                 n->dead = 1;
617                                 write_unlock(&n->lock);
618                                 neigh_release(n);
619                                 continue;
620                         }
621
622                         if (n->nud_state & NUD_REACHABLE &&
623                             now - n->confirmed > n->parms->reachable_time) {
624                                 n->nud_state = NUD_STALE;
625                                 neigh_suspect(n);
626                         }
627                         write_unlock(&n->lock);
628
629 next_elt:
630                         np = &n->next;
631                 }
632         }
633
634         mod_timer(&tbl->gc_timer, now + tbl->gc_interval);
635         write_unlock(&tbl->lock);
636 }
637
638 static __inline__ int neigh_max_probes(struct neighbour *n)
639 {
640         struct neigh_parms *p = n->parms;
641         return (n->nud_state & NUD_PROBE ?
642                 p->ucast_probes :
643                 p->ucast_probes + p->app_probes + p->mcast_probes);
644 }
645
646
647 /* Called when a timer expires for a neighbour entry. */
648
649 static void neigh_timer_handler(unsigned long arg)
650 {
651         unsigned long now = jiffies;
652         struct neighbour *neigh = (struct neighbour *)arg;
653         unsigned state;
654         int notify = 0;
655
656         write_lock(&neigh->lock);
657
658         state = neigh->nud_state;
659
660         if (!(state & NUD_IN_TIMER)) {
661 #ifndef CONFIG_SMP
662                 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
663 #endif
664                 goto out;
665         }
666
667         if ((state & NUD_VALID) &&
668             now - neigh->confirmed < neigh->parms->reachable_time) {
669                 neigh->nud_state = NUD_REACHABLE;
670                 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
671                 neigh_connect(neigh);
672                 goto out;
673         }
674         if (state == NUD_DELAY) {
675                 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
676                 neigh->nud_state = NUD_PROBE;
677                 atomic_set(&neigh->probes, 0);
678         }
679
680         if (atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
681                 struct sk_buff *skb;
682
683                 neigh->nud_state = NUD_FAILED;
684                 notify = 1;
685                 neigh->tbl->stats.res_failed++;
686                 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
687
688                 /* It is very thin place. report_unreachable is very complicated
689                    routine. Particularly, it can hit the same neighbour entry!
690
691                    So that, we try to be accurate and avoid dead loop. --ANK
692                  */
693                 while (neigh->nud_state == NUD_FAILED &&
694                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
695                         write_unlock(&neigh->lock);
696                         neigh->ops->error_report(neigh, skb);
697                         write_lock(&neigh->lock);
698                 }
699                 skb_queue_purge(&neigh->arp_queue);
700                 goto out;
701         }
702
703         neigh->timer.expires = now + neigh->parms->retrans_time;
704         add_timer(&neigh->timer);
705         write_unlock(&neigh->lock);
706
707         neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
708         atomic_inc(&neigh->probes);
709         return;
710
711 out:
712         write_unlock(&neigh->lock);
713 #ifdef CONFIG_ARPD
714         if (notify && neigh->parms->app_probes)
715                 neigh_app_notify(neigh);
716 #endif
717         neigh_release(neigh);
718 }
719
720 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
721 {
722         int rc;
723
724         write_lock_bh(&neigh->lock);
725
726         rc = 0;
727         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
728                 goto out_unlock_bh;
729
730         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
731                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
732                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
733                         neigh->nud_state     = NUD_INCOMPLETE;
734                         neigh_hold(neigh);
735                         neigh->timer.expires = jiffies +
736                                                neigh->parms->retrans_time;
737                         add_timer(&neigh->timer);
738                         write_unlock_bh(&neigh->lock);
739                         neigh->ops->solicit(neigh, skb);
740                         atomic_inc(&neigh->probes);
741                         write_lock_bh(&neigh->lock);
742                 } else {
743                         neigh->nud_state = NUD_FAILED;
744                         write_unlock_bh(&neigh->lock);
745
746                         if (skb)
747                                 kfree_skb(skb);
748                         return 1;
749                 }
750         }
751
752         if (neigh->nud_state == NUD_INCOMPLETE) {
753                 if (skb) {
754                         if (skb_queue_len(&neigh->arp_queue) >=
755                             neigh->parms->queue_len) {
756                                 struct sk_buff *buff;
757                                 buff = neigh->arp_queue.next;
758                                 __skb_unlink(buff, &neigh->arp_queue);
759                                 kfree_skb(buff);
760                         }
761                         __skb_queue_tail(&neigh->arp_queue, skb);
762                 }
763                 rc = 1;
764         } else if (neigh->nud_state == NUD_STALE) {
765                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
766                 neigh_hold(neigh);
767                 neigh->nud_state = NUD_DELAY;
768                 neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
769                 add_timer(&neigh->timer);
770                 rc = 0;
771         }
772 out_unlock_bh:
773         write_unlock_bh(&neigh->lock);
774         return rc;
775 }
776
777 static __inline__ void neigh_update_hhs(struct neighbour *neigh)
778 {
779         struct hh_cache *hh;
780         void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
781                 neigh->dev->header_cache_update;
782
783         if (update) {
784                 for (hh = neigh->hh; hh; hh = hh->hh_next) {
785                         write_lock_bh(&hh->hh_lock);
786                         update(hh, neigh->dev, neigh->ha);
787                         write_unlock_bh(&hh->hh_lock);
788                 }
789         }
790 }
791
792
793
794 /* Generic update routine.
795    -- lladdr is new lladdr or NULL, if it is not supplied.
796    -- new    is new state.
797    -- override == 1 allows to override existing lladdr, if it is different.
798    -- arp == 0 means that the change is administrative.
799
800    Caller MUST hold reference count on the entry.
801  */
802
803 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
804                  int override, int arp)
805 {
806         u8 old;
807         int err;
808 #ifdef CONFIG_ARPD
809         int notify = 0;
810 #endif
811         struct net_device *dev;
812
813         write_lock_bh(&neigh->lock);
814
815         dev    = neigh->dev;
816         old    = neigh->nud_state;
817         err    = -EPERM;
818
819         if (arp && (old & (NUD_NOARP | NUD_PERMANENT)))
820                 goto out;
821
822         if (!(new & NUD_VALID)) {
823                 neigh_del_timer(neigh);
824                 if (old & NUD_CONNECTED)
825                         neigh_suspect(neigh);
826                 neigh->nud_state = new;
827                 err = 0;
828 #ifdef CONFIG_ARPD
829                 notify = old & NUD_VALID;
830 #endif
831                 goto out;
832         }
833
834         /* Compare new lladdr with cached one */
835         if (!dev->addr_len) {
836                 /* First case: device needs no address. */
837                 lladdr = neigh->ha;
838         } else if (lladdr) {
839                 /* The second case: if something is already cached
840                    and a new address is proposed:
841                    - compare new & old
842                    - if they are different, check override flag
843                  */
844                 if (old & NUD_VALID) {
845                         if (!memcmp(lladdr, neigh->ha, dev->addr_len))
846                                 lladdr = neigh->ha;
847                         else if (!override)
848                                 goto out;
849                 }
850         } else {
851                 /* No address is supplied; if we know something,
852                    use it, otherwise discard the request.
853                  */
854                 err = -EINVAL;
855                 if (!(old & NUD_VALID))
856                         goto out;
857                 lladdr = neigh->ha;
858         }
859
860         neigh_sync(neigh);
861         old = neigh->nud_state;
862         if (new & NUD_CONNECTED)
863                 neigh->confirmed = jiffies;
864         neigh->updated = jiffies;
865
866         /* If entry was valid and address is not changed,
867            do not change entry state, if new one is STALE.
868          */
869         err = 0;
870         if ((old & NUD_VALID) && lladdr == neigh->ha &&
871             (new == old || (new == NUD_STALE && (old & NUD_CONNECTED))))
872                 goto out;
873
874         neigh_del_timer(neigh);
875         neigh->nud_state = new;
876         if (lladdr != neigh->ha) {
877                 memcpy(&neigh->ha, lladdr, dev->addr_len);
878                 neigh_update_hhs(neigh);
879                 if (!(new & NUD_CONNECTED))
880                         neigh->confirmed = jiffies -
881                                       (neigh->parms->base_reachable_time << 1);
882 #ifdef CONFIG_ARPD
883                 notify = 1;
884 #endif
885         }
886         if (new == old)
887                 goto out;
888         if (new & NUD_CONNECTED)
889                 neigh_connect(neigh);
890         else
891                 neigh_suspect(neigh);
892         if (!(old & NUD_VALID)) {
893                 struct sk_buff *skb;
894
895                 /* Again: avoid dead loop if something went wrong */
896
897                 while (neigh->nud_state & NUD_VALID &&
898                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
899                         struct neighbour *n1 = neigh;
900                         write_unlock_bh(&neigh->lock);
901                         /* On shaper/eql skb->dst->neighbour != neigh :( */
902                         if (skb->dst && skb->dst->neighbour)
903                                 n1 = skb->dst->neighbour;
904                         n1->output(skb);
905                         write_lock_bh(&neigh->lock);
906                 }
907                 skb_queue_purge(&neigh->arp_queue);
908         }
909 out:
910         write_unlock_bh(&neigh->lock);
911 #ifdef CONFIG_ARPD
912         if (notify && neigh->parms->app_probes)
913                 neigh_app_notify(neigh);
914 #endif
915         return err;
916 }
917
918 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
919                                  u8 *lladdr, void *saddr,
920                                  struct net_device *dev)
921 {
922         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
923                                                  lladdr || !dev->addr_len);
924         if (neigh)
925                 neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
926         return neigh;
927 }
928
929 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
930                           u16 protocol)
931 {
932         struct hh_cache *hh;
933         struct net_device *dev = dst->dev;
934
935         for (hh = n->hh; hh; hh = hh->hh_next)
936                 if (hh->hh_type == protocol)
937                         break;
938
939         if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
940                 memset(hh, 0, sizeof(struct hh_cache));
941                 hh->hh_lock = RW_LOCK_UNLOCKED;
942                 hh->hh_type = protocol;
943                 atomic_set(&hh->hh_refcnt, 0);
944                 hh->hh_next = NULL;
945                 if (dev->hard_header_cache(n, hh)) {
946                         kfree(hh);
947                         hh = NULL;
948                 } else {
949                         atomic_inc(&hh->hh_refcnt);
950                         hh->hh_next = n->hh;
951                         n->hh       = hh;
952                         if (n->nud_state & NUD_CONNECTED)
953                                 hh->hh_output = n->ops->hh_output;
954                         else
955                                 hh->hh_output = n->ops->output;
956                 }
957         }
958         if (hh) {
959                 atomic_inc(&hh->hh_refcnt);
960                 dst->hh = hh;
961         }
962 }
963
964 /* This function can be used in contexts, where only old dev_queue_xmit
965    worked, f.e. if you want to override normal output path (eql, shaper),
966    but resolution is not made yet.
967  */
968
969 int neigh_compat_output(struct sk_buff *skb)
970 {
971         struct net_device *dev = skb->dev;
972
973         __skb_pull(skb, skb->nh.raw - skb->data);
974
975         if (dev->hard_header &&
976             dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
977                              skb->len) < 0 &&
978             dev->rebuild_header(skb))
979                 return 0;
980
981         return dev_queue_xmit(skb);
982 }
983
984 /* Slow and careful. */
985
986 int neigh_resolve_output(struct sk_buff *skb)
987 {
988         struct dst_entry *dst = skb->dst;
989         struct neighbour *neigh;
990         int rc = 0;
991
992         if (!dst || !(neigh = dst->neighbour))
993                 goto discard;
994
995         __skb_pull(skb, skb->nh.raw - skb->data);
996
997         if (!neigh_event_send(neigh, skb)) {
998                 int err;
999                 struct net_device *dev = neigh->dev;
1000                 if (dev->hard_header_cache && !dst->hh) {
1001                         write_lock_bh(&neigh->lock);
1002                         if (!dst->hh)
1003                                 neigh_hh_init(neigh, dst, dst->ops->protocol);
1004                         err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1005                                                neigh->ha, NULL, skb->len);
1006                         write_unlock_bh(&neigh->lock);
1007                 } else {
1008                         read_lock_bh(&neigh->lock);
1009                         err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1010                                                neigh->ha, NULL, skb->len);
1011                         read_unlock_bh(&neigh->lock);
1012                 }
1013                 if (err >= 0)
1014                         rc = neigh->ops->queue_xmit(skb);
1015                 else
1016                         goto out_kfree_skb;
1017         }
1018 out:
1019         return rc;
1020 discard:
1021         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1022                       dst, dst ? dst->neighbour : NULL);
1023 out_kfree_skb:
1024         rc = -EINVAL;
1025         kfree_skb(skb);
1026         goto out;
1027 }
1028
1029 /* As fast as possible without hh cache */
1030
1031 int neigh_connected_output(struct sk_buff *skb)
1032 {
1033         int err;
1034         struct dst_entry *dst = skb->dst;
1035         struct neighbour *neigh = dst->neighbour;
1036         struct net_device *dev = neigh->dev;
1037
1038         __skb_pull(skb, skb->nh.raw - skb->data);
1039
1040         read_lock_bh(&neigh->lock);
1041         err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1042                                neigh->ha, NULL, skb->len);
1043         read_unlock_bh(&neigh->lock);
1044         if (err >= 0)
1045                 err = neigh->ops->queue_xmit(skb);
1046         else {
1047                 err = -EINVAL;
1048                 kfree_skb(skb);
1049         }
1050         return err;
1051 }
1052
1053 static void neigh_proxy_process(unsigned long arg)
1054 {
1055         struct neigh_table *tbl = (struct neigh_table *)arg;
1056         long sched_next = 0;
1057         unsigned long now = jiffies;
1058         struct sk_buff *skb;
1059
1060         spin_lock(&tbl->proxy_queue.lock);
1061
1062         skb = tbl->proxy_queue.next;
1063
1064         while (skb != (struct sk_buff *)&tbl->proxy_queue) {
1065                 struct sk_buff *back = skb;
1066                 long tdif = back->stamp.tv_usec - now;
1067
1068                 skb = skb->next;
1069                 if (tdif <= 0) {
1070                         struct net_device *dev = back->dev;
1071                         __skb_unlink(back, &tbl->proxy_queue);
1072                         if (tbl->proxy_redo && netif_running(dev))
1073                                 tbl->proxy_redo(back);
1074                         else
1075                                 kfree_skb(back);
1076
1077                         dev_put(dev);
1078                 } else if (!sched_next || tdif < sched_next)
1079                         sched_next = tdif;
1080         }
1081         del_timer(&tbl->proxy_timer);
1082         if (sched_next)
1083                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1084         spin_unlock(&tbl->proxy_queue.lock);
1085 }
1086
1087 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1088                     struct sk_buff *skb)
1089 {
1090         unsigned long now = jiffies;
1091         long sched_next = net_random() % p->proxy_delay;
1092
1093         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1094                 kfree_skb(skb);
1095                 return;
1096         }
1097         skb->stamp.tv_sec  = LOCALLY_ENQUEUED;
1098         skb->stamp.tv_usec = now + sched_next;
1099
1100         spin_lock(&tbl->proxy_queue.lock);
1101         if (del_timer(&tbl->proxy_timer)) {
1102                 long tval = tbl->proxy_timer.expires - now;
1103                 if (tval < sched_next)
1104                         sched_next = tval;
1105         }
1106         dst_release(skb->dst);
1107         skb->dst = NULL;
1108         dev_hold(skb->dev);
1109         __skb_queue_tail(&tbl->proxy_queue, skb);
1110         mod_timer(&tbl->proxy_timer, now + sched_next);
1111         spin_unlock(&tbl->proxy_queue.lock);
1112 }
1113
1114
1115 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1116                                       struct neigh_table *tbl)
1117 {
1118         struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL);
1119
1120         if (p) {
1121                 memcpy(p, &tbl->parms, sizeof(*p));
1122                 p->tbl            = tbl;
1123                 p->reachable_time =
1124                                 neigh_rand_reach_time(p->base_reachable_time);
1125                 if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) {
1126                         kfree(p);
1127                         return NULL;
1128                 }
1129                 p->sysctl_table = NULL;
1130                 write_lock_bh(&tbl->lock);
1131                 p->next         = tbl->parms.next;
1132                 tbl->parms.next = p;
1133                 write_unlock_bh(&tbl->lock);
1134         }
1135         return p;
1136 }
1137
1138 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1139 {
1140         struct neigh_parms **p;
1141
1142         if (!parms || parms == &tbl->parms)
1143                 return;
1144         write_lock_bh(&tbl->lock);
1145         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1146                 if (*p == parms) {
1147                         *p = parms->next;
1148                         write_unlock_bh(&tbl->lock);
1149                         kfree(parms);
1150                         return;
1151                 }
1152         }
1153         write_unlock_bh(&tbl->lock);
1154         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1155 }
1156
1157
1158 void neigh_table_init(struct neigh_table *tbl)
1159 {
1160         unsigned long now = jiffies;
1161
1162         tbl->parms.reachable_time =
1163                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1164
1165         if (!tbl->kmem_cachep)
1166                 tbl->kmem_cachep = kmem_cache_create(tbl->id,
1167                                                      tbl->entry_size,
1168                                                      0, SLAB_HWCACHE_ALIGN,
1169                                                      NULL, NULL);
1170
1171         if (!tbl->kmem_cachep)
1172                 panic("cannot create neighbour cache");
1173
1174         tbl->lock              = RW_LOCK_UNLOCKED;
1175         init_timer(&tbl->gc_timer);
1176         tbl->gc_timer.data     = (unsigned long)tbl;
1177         tbl->gc_timer.function = neigh_periodic_timer;
1178         tbl->gc_timer.expires  = now + tbl->gc_interval +
1179                                  tbl->parms.reachable_time;
1180         add_timer(&tbl->gc_timer);
1181
1182         init_timer(&tbl->proxy_timer);
1183         tbl->proxy_timer.data     = (unsigned long)tbl;
1184         tbl->proxy_timer.function = neigh_proxy_process;
1185         skb_queue_head_init(&tbl->proxy_queue);
1186
1187         tbl->last_flush = now;
1188         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1189         write_lock(&neigh_tbl_lock);
1190         tbl->next       = neigh_tables;
1191         neigh_tables    = tbl;
1192         write_unlock(&neigh_tbl_lock);
1193 }
1194
1195 int neigh_table_clear(struct neigh_table *tbl)
1196 {
1197         struct neigh_table **tp;
1198
1199         /* It is not clean... Fix it to unload IPv6 module safely */
1200         del_timer_sync(&tbl->gc_timer);
1201         del_timer_sync(&tbl->proxy_timer);
1202         pneigh_queue_purge(&tbl->proxy_queue);
1203         neigh_ifdown(tbl, NULL);
1204         if (tbl->entries)
1205                 printk(KERN_CRIT "neighbour leakage\n");
1206         write_lock(&neigh_tbl_lock);
1207         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1208                 if (*tp == tbl) {
1209                         *tp = tbl->next;
1210                         break;
1211                 }
1212         }
1213         write_unlock(&neigh_tbl_lock);
1214         return 0;
1215 }
1216
1217 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1218 {
1219         struct ndmsg *ndm = NLMSG_DATA(nlh);
1220         struct rtattr **nda = arg;
1221         struct neigh_table *tbl;
1222         struct net_device *dev = NULL;
1223         int err = -ENODEV;
1224
1225         if (ndm->ndm_ifindex &&
1226             (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
1227                 goto out;
1228
1229         read_lock(&neigh_tbl_lock);
1230         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1231                 struct neighbour *n;
1232
1233                 if (tbl->family != ndm->ndm_family)
1234                         continue;
1235                 read_unlock(&neigh_tbl_lock);
1236
1237                 err = -EINVAL;
1238                 if (!nda[NDA_DST - 1] ||
1239                     nda[NDA_DST - 1]->rta_len != RTA_LENGTH(tbl->key_len))
1240                         goto out_dev_put;
1241
1242                 if (ndm->ndm_flags & NTF_PROXY) {
1243                         err = pneigh_delete(tbl,
1244                                             RTA_DATA(nda[NDA_DST - 1]), dev);
1245                         goto out_dev_put;
1246                 }
1247
1248                 if (!dev)
1249                         goto out;
1250
1251                 n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST - 1]), dev);
1252                 if (n) {
1253                         err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
1254                         neigh_release(n);
1255                 }
1256                 goto out_dev_put;
1257         }
1258         read_unlock(&neigh_tbl_lock);
1259         err = -EADDRNOTAVAIL;
1260 out_dev_put:
1261         if (dev)
1262                 dev_put(dev);
1263 out:
1264         return err;
1265 }
1266
1267 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1268 {
1269         struct ndmsg *ndm = NLMSG_DATA(nlh);
1270         struct rtattr **nda = arg;
1271         struct neigh_table *tbl;
1272         struct net_device *dev = NULL;
1273         int err = -ENODEV;
1274
1275         if (ndm->ndm_ifindex &&
1276             (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
1277                 goto out;
1278
1279         read_lock(&neigh_tbl_lock);
1280         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1281                 int override = 1;
1282                 struct neighbour *n;
1283
1284                 if (tbl->family != ndm->ndm_family)
1285                         continue;
1286                 read_unlock(&neigh_tbl_lock);
1287
1288                 err = -EINVAL;
1289                 if (!nda[NDA_DST - 1] ||
1290                     nda[NDA_DST - 1]->rta_len != RTA_LENGTH(tbl->key_len))
1291                         goto out_dev_put;
1292                 if (ndm->ndm_flags & NTF_PROXY) {
1293                         err = -ENOBUFS;
1294                         if (pneigh_lookup(tbl,
1295                                           RTA_DATA(nda[NDA_DST - 1]), dev, 1))
1296                                 err = 0;
1297                         goto out_dev_put;
1298                 }
1299                 err = -EINVAL;
1300                 if (!dev)
1301                         goto out;
1302                 if (nda[NDA_LLADDR - 1] &&
1303                     nda[NDA_LLADDR - 1]->rta_len != RTA_LENGTH(dev->addr_len))
1304                         goto out_dev_put;
1305                 err = 0;
1306                 n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST - 1]), dev);
1307                 if (n) {
1308                         if (nlh->nlmsg_flags & NLM_F_EXCL)
1309                                 err = -EEXIST;
1310                         override = nlh->nlmsg_flags & NLM_F_REPLACE;
1311                 } else if (!(nlh->nlmsg_flags & NLM_F_CREATE))
1312                         err = -ENOENT;
1313                 else {
1314                         n = __neigh_lookup_errno(tbl, RTA_DATA(nda[NDA_DST - 1]),
1315                                                  dev);
1316                         if (IS_ERR(n)) {
1317                                 err = PTR_ERR(n);
1318                                 n = NULL;
1319                         }
1320                 }
1321                 if (!err) {
1322                         err = neigh_update(n, nda[NDA_LLADDR - 1] ?
1323                                                 RTA_DATA(nda[NDA_LLADDR - 1]) :
1324                                                 NULL,
1325                                            ndm->ndm_state,
1326                                            override, 0);
1327                 }
1328                 if (n)
1329                         neigh_release(n);
1330                 goto out_dev_put;
1331         }
1332
1333         read_unlock(&neigh_tbl_lock);
1334         err = -EADDRNOTAVAIL;
1335 out_dev_put:
1336         if (dev)
1337                 dev_put(dev);
1338 out:
1339         return err;
1340 }
1341
1342
1343 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
1344                            u32 pid, u32 seq, int event)
1345 {
1346         unsigned long now = jiffies;
1347         unsigned char *b = skb->tail;
1348         struct nda_cacheinfo ci;
1349         int locked = 0;
1350         struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event,
1351                                          sizeof(struct ndmsg));
1352         struct ndmsg *ndm = NLMSG_DATA(nlh);
1353
1354         ndm->ndm_family  = n->ops->family;
1355         ndm->ndm_flags   = n->flags;
1356         ndm->ndm_type    = n->type;
1357         ndm->ndm_ifindex = n->dev->ifindex;
1358         RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
1359         read_lock_bh(&n->lock);
1360         locked           = 1;
1361         ndm->ndm_state   = n->nud_state;
1362         if (n->nud_state & NUD_VALID)
1363                 RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
1364         ci.ndm_used      = now - n->used;
1365         ci.ndm_confirmed = now - n->confirmed;
1366         ci.ndm_updated   = now - n->updated;
1367         ci.ndm_refcnt    = atomic_read(&n->refcnt) - 1;
1368         read_unlock_bh(&n->lock);
1369         locked           = 0;
1370         RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
1371         nlh->nlmsg_len   = skb->tail - b;
1372         return skb->len;
1373
1374 nlmsg_failure:
1375 rtattr_failure:
1376         if (locked)
1377                 read_unlock_bh(&n->lock);
1378         skb_trim(skb, b - skb->data);
1379         return -1;
1380 }
1381
1382
1383 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
1384                             struct netlink_callback *cb)
1385 {
1386         struct neighbour *n;
1387         int rc, h, s_h = cb->args[1];
1388         int idx, s_idx = idx = cb->args[2];
1389
1390         for (h = 0; h <= NEIGH_HASHMASK; h++) {
1391                 if (h < s_h)
1392                         continue;
1393                 if (h > s_h)
1394                         s_idx = 0;
1395                 read_lock_bh(&tbl->lock);
1396                 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
1397                         if (idx < s_idx)
1398                                 continue;
1399                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
1400                                             cb->nlh->nlmsg_seq,
1401                                             RTM_NEWNEIGH) <= 0) {
1402                                 read_unlock_bh(&tbl->lock);
1403                                 rc = -1;
1404                                 goto out;
1405                         }
1406                 }
1407                 read_unlock_bh(&tbl->lock);
1408         }
1409         rc = skb->len;
1410 out:
1411         cb->args[1] = h;
1412         cb->args[2] = idx;
1413         return rc;
1414 }
1415
1416 int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1417 {
1418         struct neigh_table *tbl;
1419         int t, family, s_t;
1420
1421         read_lock(&neigh_tbl_lock);
1422         family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
1423         s_t = cb->args[0];
1424
1425         for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
1426                 if (t < s_t || (family && tbl->family != family))
1427                         continue;
1428                 if (t > s_t)
1429                         memset(&cb->args[1], 0, sizeof(cb->args) -
1430                                                 sizeof(cb->args[0]));
1431                 if (neigh_dump_table(tbl, skb, cb) < 0)
1432                         break;
1433         }
1434         read_unlock(&neigh_tbl_lock);
1435
1436         cb->args[0] = t;
1437         return skb->len;
1438 }
1439
1440 #ifdef CONFIG_ARPD
1441 void neigh_app_ns(struct neighbour *n)
1442 {
1443         struct nlmsghdr  *nlh;
1444         int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
1445         struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
1446
1447         if (!skb)
1448                 return;
1449
1450         if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
1451                 kfree_skb(skb);
1452                 return;
1453         }
1454         nlh                        = (struct nlmsghdr *)skb->data;
1455         nlh->nlmsg_flags           = NLM_F_REQUEST;
1456         NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
1457         netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
1458 }
1459
1460 static void neigh_app_notify(struct neighbour *n)
1461 {
1462         struct nlmsghdr *nlh;
1463         int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
1464         struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
1465
1466         if (!skb)
1467                 return;
1468
1469         if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
1470                 kfree_skb(skb);
1471                 return;
1472         }
1473         nlh                        = (struct nlmsghdr *)skb->data;
1474         NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
1475         netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
1476 }
1477
1478 #endif /* CONFIG_ARPD */
1479
1480 #ifdef CONFIG_SYSCTL
1481
1482 static struct neigh_sysctl_table {
1483         struct ctl_table_header *sysctl_header;
1484         ctl_table               neigh_vars[17];
1485         ctl_table               neigh_dev[2];
1486         ctl_table               neigh_neigh_dir[2];
1487         ctl_table               neigh_proto_dir[2];
1488         ctl_table               neigh_root_dir[2];
1489 } neigh_sysctl_template = {
1490         .neigh_vars = {
1491                 {
1492                         .ctl_name       = NET_NEIGH_MCAST_SOLICIT,
1493                         .procname       = "mcast_solicit",
1494                         .maxlen         = sizeof(int),
1495                         .mode           = 0644,
1496                         .proc_handler   = &proc_dointvec,
1497                 },
1498                 {
1499                         .ctl_name       = NET_NEIGH_UCAST_SOLICIT,
1500                         .procname       = "ucast_solicit",
1501                         .maxlen         = sizeof(int),
1502                         .mode           = 0644,
1503                         .proc_handler   = &proc_dointvec,
1504                 },
1505                 {
1506                         .ctl_name       = NET_NEIGH_APP_SOLICIT,
1507                         .procname       = "app_solicit",
1508                         .maxlen         = sizeof(int),
1509                         .mode           = 0644,
1510                         .proc_handler   = &proc_dointvec,
1511                 },
1512                 {
1513                         .ctl_name       = NET_NEIGH_RETRANS_TIME,
1514                         .procname       = "retrans_time",
1515                         .maxlen         = sizeof(int),
1516                         .mode           = 0644,
1517                         .proc_handler   = &proc_dointvec_userhz_jiffies,
1518                 },
1519                 {
1520                         .ctl_name       = NET_NEIGH_REACHABLE_TIME,
1521                         .procname       = "base_reachable_time",
1522                         .maxlen         = sizeof(int),
1523                         .mode           = 0644,
1524                         .proc_handler   = &proc_dointvec_jiffies,
1525                         .strategy       = &sysctl_jiffies,
1526                 },
1527                 {
1528                         .ctl_name       = NET_NEIGH_DELAY_PROBE_TIME,
1529                         .procname       = "delay_first_probe_time",
1530                         .maxlen         = sizeof(int),
1531                         .mode           = 0644,
1532                         .proc_handler   = &proc_dointvec_jiffies,
1533                         .strategy       = &sysctl_jiffies,
1534                 },
1535                 {
1536                         .ctl_name       = NET_NEIGH_GC_STALE_TIME,
1537                         .procname       = "gc_stale_time",
1538                         .maxlen         = sizeof(int),
1539                         .mode           = 0644,
1540                         .proc_handler   = &proc_dointvec_jiffies,
1541                         .strategy       = &sysctl_jiffies,
1542                 },
1543                 {
1544                         .ctl_name       = NET_NEIGH_UNRES_QLEN,
1545                         .procname       = "unres_qlen",
1546                         .maxlen         = sizeof(int),
1547                         .mode           = 0644,
1548                         .proc_handler   = &proc_dointvec,
1549                 },
1550                 {
1551                         .ctl_name       = NET_NEIGH_PROXY_QLEN,
1552                         .procname       = "proxy_qlen",
1553                         .maxlen         = sizeof(int),
1554                         .mode           = 0644,
1555                         .proc_handler   = &proc_dointvec,
1556                 },
1557                 {
1558                         .ctl_name       = NET_NEIGH_ANYCAST_DELAY,
1559                         .procname       = "anycast_delay",
1560                         .maxlen         = sizeof(int),
1561                         .mode           = 0644,
1562                         .proc_handler   = &proc_dointvec_userhz_jiffies,
1563                 },
1564                 {
1565                         .ctl_name       = NET_NEIGH_PROXY_DELAY,
1566                         .procname       = "proxy_delay",
1567                         .maxlen         = sizeof(int),
1568                         .mode           = 0644,
1569                         .proc_handler   = &proc_dointvec_userhz_jiffies,
1570                 },
1571                 {
1572                         .ctl_name       = NET_NEIGH_LOCKTIME,
1573                         .procname       = "locktime",
1574                         .maxlen         = sizeof(int),
1575                         .mode           = 0644,
1576                         .proc_handler   = &proc_dointvec_userhz_jiffies,
1577                 },
1578                 {
1579                         .ctl_name       = NET_NEIGH_GC_INTERVAL,
1580                         .procname       = "gc_interval",
1581                         .maxlen         = sizeof(int),
1582                         .mode           = 0644,
1583                         .proc_handler   = &proc_dointvec_jiffies,
1584                         .strategy       = &sysctl_jiffies,
1585                 },
1586                 {
1587                         .ctl_name       = NET_NEIGH_GC_THRESH1,
1588                         .procname       = "gc_thresh1",
1589                         .maxlen         = sizeof(int),
1590                         .mode           = 0644,
1591                         .proc_handler   = &proc_dointvec,
1592                 },
1593                 {
1594                         .ctl_name       = NET_NEIGH_GC_THRESH2,
1595                         .procname       = "gc_thresh2",
1596                         .maxlen         = sizeof(int),
1597                         .mode           = 0644,
1598                         .proc_handler   = &proc_dointvec,
1599                 },
1600                 {
1601                         .ctl_name       = NET_NEIGH_GC_THRESH3,
1602                         .procname       = "gc_thresh3",
1603                         .maxlen         = sizeof(int),
1604                         .mode           = 0644,
1605                         .proc_handler   = &proc_dointvec,
1606                 },
1607         },
1608         .neigh_dev = {
1609                 {
1610                         .ctl_name       = NET_PROTO_CONF_DEFAULT,
1611                         .procname       = "default",
1612                         .mode           = 0555,
1613                 },
1614         },
1615         .neigh_neigh_dir = {
1616                 {
1617                         .procname       = "neigh",
1618                         .mode           = 0555,
1619                 },
1620         },
1621         .neigh_proto_dir = {
1622                 {
1623                         .mode           = 0555,
1624                 },
1625         },
1626         .neigh_root_dir = {
1627                 {
1628                         .ctl_name       = CTL_NET,
1629                         .procname       = "net",
1630                         .mode           = 0555,
1631                 },
1632         },
1633 };
1634
1635 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
1636                           int p_id, int pdev_id, char *p_name, 
1637                           proc_handler *handler)
1638 {
1639         struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
1640         const char *dev_name_source = NULL;
1641         char *dev_name = NULL;
1642         int err = 0;
1643
1644         if (!t)
1645                 return -ENOBUFS;
1646         memcpy(t, &neigh_sysctl_template, sizeof(*t));
1647         t->neigh_vars[0].data  = &p->mcast_probes;
1648         t->neigh_vars[1].data  = &p->ucast_probes;
1649         t->neigh_vars[2].data  = &p->app_probes;
1650         t->neigh_vars[3].data  = &p->retrans_time;
1651         if (handler) {
1652                 t->neigh_vars[3].proc_handler = handler;
1653                 t->neigh_vars[3].extra1 = dev;
1654         }
1655         t->neigh_vars[4].data  = &p->base_reachable_time;
1656         t->neigh_vars[5].data  = &p->delay_probe_time;
1657         t->neigh_vars[6].data  = &p->gc_staletime;
1658         t->neigh_vars[7].data  = &p->queue_len;
1659         t->neigh_vars[8].data  = &p->proxy_qlen;
1660         t->neigh_vars[9].data  = &p->anycast_delay;
1661         t->neigh_vars[10].data = &p->proxy_delay;
1662         t->neigh_vars[11].data = &p->locktime;
1663
1664         dev_name_source = t->neigh_dev[0].procname;
1665         if (dev) {
1666                 dev_name_source = dev->name;
1667                 t->neigh_dev[0].ctl_name = dev->ifindex;
1668                 memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
1669         } else {
1670                 t->neigh_vars[12].data = (int *)(p + 1);
1671                 t->neigh_vars[13].data = (int *)(p + 1) + 1;
1672                 t->neigh_vars[14].data = (int *)(p + 1) + 2;
1673                 t->neigh_vars[15].data = (int *)(p + 1) + 3;
1674         }
1675
1676         dev_name = net_sysctl_strdup(dev_name_source);
1677         if (!dev_name) {
1678                 err = -ENOBUFS;
1679                 goto free;
1680         }
1681
1682         t->neigh_dev[0].procname = dev_name;
1683
1684         t->neigh_neigh_dir[0].ctl_name = pdev_id;
1685
1686         t->neigh_proto_dir[0].procname = p_name;
1687         t->neigh_proto_dir[0].ctl_name = p_id;
1688
1689         t->neigh_dev[0].child          = t->neigh_vars;
1690         t->neigh_neigh_dir[0].child    = t->neigh_dev;
1691         t->neigh_proto_dir[0].child    = t->neigh_neigh_dir;
1692         t->neigh_root_dir[0].child     = t->neigh_proto_dir;
1693
1694         t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
1695         if (!t->sysctl_header) {
1696                 err = -ENOBUFS;
1697                 goto free_procname;
1698         }
1699         p->sysctl_table = t;
1700         return 0;
1701
1702         /* error path */
1703  free_procname:
1704         kfree(dev_name);
1705  free:
1706         kfree(t);
1707
1708         return err;
1709 }
1710
1711 void neigh_sysctl_unregister(struct neigh_parms *p)
1712 {
1713         if (p->sysctl_table) {
1714                 struct neigh_sysctl_table *t = p->sysctl_table;
1715                 p->sysctl_table = NULL;
1716                 unregister_sysctl_table(t->sysctl_header);
1717                 kfree(t->neigh_dev[0].procname);
1718                 kfree(t);
1719         }
1720 }
1721
1722 #endif  /* CONFIG_SYSCTL */
1723
1724 EXPORT_SYMBOL(__neigh_event_send);
1725 EXPORT_SYMBOL(neigh_add);
1726 EXPORT_SYMBOL(neigh_changeaddr);
1727 EXPORT_SYMBOL(neigh_compat_output);
1728 EXPORT_SYMBOL(neigh_connected_output);
1729 EXPORT_SYMBOL(neigh_create);
1730 EXPORT_SYMBOL(neigh_delete);
1731 EXPORT_SYMBOL(neigh_destroy);
1732 EXPORT_SYMBOL(neigh_dump_info);
1733 EXPORT_SYMBOL(neigh_event_ns);
1734 EXPORT_SYMBOL(neigh_ifdown);
1735 EXPORT_SYMBOL(neigh_lookup);
1736 EXPORT_SYMBOL(neigh_parms_alloc);
1737 EXPORT_SYMBOL(neigh_parms_release);
1738 EXPORT_SYMBOL(neigh_rand_reach_time);
1739 EXPORT_SYMBOL(neigh_resolve_output);
1740 EXPORT_SYMBOL(neigh_table_clear);
1741 EXPORT_SYMBOL(neigh_table_init);
1742 EXPORT_SYMBOL(neigh_update);
1743 EXPORT_SYMBOL(neigh_update_hhs);
1744 EXPORT_SYMBOL(pneigh_enqueue);
1745 EXPORT_SYMBOL(pneigh_lookup);
1746
1747 #ifdef CONFIG_ARPD
1748 EXPORT_SYMBOL(neigh_app_ns);
1749 #endif
1750 #ifdef CONFIG_SYSCTL
1751 EXPORT_SYMBOL(neigh_sysctl_register);
1752 EXPORT_SYMBOL(neigh_sysctl_unregister);
1753 #endif