2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/sched.h>
23 #include <linux/netdevice.h>
25 #include <linux/sysctl.h>
27 #include <linux/times.h>
28 #include <net/neighbour.h>
31 #include <linux/rtnetlink.h>
35 #define NEIGH_PRINTK(x...) printk(x)
36 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
37 #define NEIGH_PRINTK0 NEIGH_PRINTK
38 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
39 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
43 #define NEIGH_PRINTK1 NEIGH_PRINTK
47 #define NEIGH_PRINTK2 NEIGH_PRINTK
50 static void neigh_timer_handler(unsigned long arg);
52 static void neigh_app_notify(struct neighbour *n);
54 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
55 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
57 static int neigh_glbl_allocs;
58 static struct neigh_table *neigh_tables;
61 Neighbour hash table buckets are protected with rwlock tbl->lock.
63 - All the scans/updates to hash buckets MUST be made under this lock.
64 - NOTHING clever should be made under this lock: no callbacks
65 to protocol backends, no attempts to send something to network.
66 It will result in deadlocks, if backend/driver wants to use neighbour
68 - If the entry requires some non-trivial actions, increase
69 its reference count and release table lock.
71 Neighbour entries are protected:
72 - with reference count.
73 - with rwlock neigh->lock
75 Reference count prevents destruction.
77 neigh->lock mainly serializes ll address data and its validity state.
78 However, the same lock is used to protect another entry fields:
82 Again, nothing clever shall be made under neigh->lock,
83 the most complicated procedure, which we allow is dev->hard_header.
84 It is supposed, that dev->hard_header is simplistic and does
85 not make callbacks to neighbour tables.
87 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
88 list of neighbour tables. This list is used only in process context,
91 static rwlock_t neigh_tbl_lock = RW_LOCK_UNLOCKED;
93 static int neigh_blackhole(struct sk_buff *skb)
100 * It is random distribution in the interval (1/2)*base...(3/2)*base.
101 * It corresponds to default IPv6 settings and is not overridable,
102 * because it is really reasonable choice.
105 unsigned long neigh_rand_reach_time(unsigned long base)
107 return (net_random() % base) + (base >> 1);
111 static int neigh_forced_gc(struct neigh_table *tbl)
116 for (i = 0; i <= NEIGH_HASHMASK; i++) {
117 struct neighbour *n, **np;
119 np = &tbl->hash_buckets[i];
120 write_lock_bh(&tbl->lock);
121 while ((n = *np) != NULL) {
122 /* Neighbour record may be discarded if:
123 - nobody refers to it.
124 - it is not permanent
125 - (NEW and probably wrong)
126 INCOMPLETE entries are kept at least for
127 n->parms->retrans_time, otherwise we could
128 flood network with resolution requests.
129 It is not clear, what is better table overflow
132 write_lock(&n->lock);
133 if (atomic_read(&n->refcnt) == 1 &&
134 !(n->nud_state & NUD_PERMANENT) &&
135 (n->nud_state != NUD_INCOMPLETE ||
136 jiffies - n->used > n->parms->retrans_time)) {
140 write_unlock(&n->lock);
144 write_unlock(&n->lock);
147 write_unlock_bh(&tbl->lock);
150 tbl->last_flush = jiffies;
154 static int neigh_del_timer(struct neighbour *n)
156 if ((n->nud_state & NUD_IN_TIMER) &&
157 del_timer(&n->timer)) {
164 static void pneigh_queue_purge(struct sk_buff_head *list)
168 while ((skb = skb_dequeue(list)) != NULL) {
174 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
178 write_lock_bh(&tbl->lock);
180 for (i=0; i <= NEIGH_HASHMASK; i++) {
181 struct neighbour *n, **np;
183 np = &tbl->hash_buckets[i];
184 while ((n = *np) != NULL) {
185 if (dev && n->dev != dev) {
190 write_lock_bh(&n->lock);
193 write_unlock_bh(&n->lock);
198 write_unlock_bh(&tbl->lock);
201 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
205 write_lock_bh(&tbl->lock);
207 for (i = 0; i <= NEIGH_HASHMASK; i++) {
208 struct neighbour *n, **np = &tbl->hash_buckets[i];
210 while ((n = *np) != NULL) {
211 if (dev && n->dev != dev) {
216 write_lock(&n->lock);
220 if (atomic_read(&n->refcnt) != 1) {
221 /* The most unpleasant situation.
222 We must destroy neighbour entry,
223 but someone still uses it.
225 The destroy will be delayed until
226 the last user releases us, but
227 we must kill timers etc. and move
230 n->parms = &tbl->parms;
231 skb_queue_purge(&n->arp_queue);
232 n->output = neigh_blackhole;
233 if (n->nud_state & NUD_VALID)
234 n->nud_state = NUD_NOARP;
236 n->nud_state = NUD_NONE;
237 NEIGH_PRINTK2("neigh %p is stray.\n", n);
239 write_unlock(&n->lock);
244 pneigh_ifdown(tbl, dev);
245 write_unlock_bh(&tbl->lock);
247 del_timer_sync(&tbl->proxy_timer);
248 pneigh_queue_purge(&tbl->proxy_queue);
252 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
254 struct neighbour *n = NULL;
255 unsigned long now = jiffies;
257 if (tbl->entries > tbl->gc_thresh3 ||
258 (tbl->entries > tbl->gc_thresh2 &&
259 now - tbl->last_flush > 5 * HZ)) {
260 if (!neigh_forced_gc(tbl) &&
261 tbl->entries > tbl->gc_thresh3)
265 n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
269 memset(n, 0, tbl->entry_size);
271 skb_queue_head_init(&n->arp_queue);
272 n->lock = RW_LOCK_UNLOCKED;
273 n->updated = n->used = now;
274 n->nud_state = NUD_NONE;
275 n->output = neigh_blackhole;
276 n->parms = &tbl->parms;
277 init_timer(&n->timer);
278 n->timer.function = neigh_timer_handler;
279 n->timer.data = (unsigned long)n;
284 atomic_set(&n->refcnt, 1);
290 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
291 struct net_device *dev)
294 int key_len = tbl->key_len;
295 u32 hash_val = tbl->hash(pkey, dev);
297 read_lock_bh(&tbl->lock);
298 for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
299 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
304 read_unlock_bh(&tbl->lock);
308 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
309 struct net_device *dev)
312 int key_len = tbl->key_len;
314 struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
317 rc = ERR_PTR(-ENOBUFS);
321 memcpy(n->primary_key, pkey, key_len);
325 /* Protocol specific setup. */
326 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
328 goto out_neigh_release;
331 /* Device specific setup. */
332 if (n->parms->neigh_setup &&
333 (error = n->parms->neigh_setup(n)) < 0) {
335 goto out_neigh_release;
338 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
340 hash_val = tbl->hash(pkey, dev);
342 write_lock_bh(&tbl->lock);
343 for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
344 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
346 write_unlock_bh(&tbl->lock);
348 goto out_neigh_release;
352 n->next = tbl->hash_buckets[hash_val];
353 tbl->hash_buckets[hash_val] = n;
356 write_unlock_bh(&tbl->lock);
357 NEIGH_PRINTK2("neigh %p is created.\n", n);
366 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
367 struct net_device *dev, int creat)
369 struct pneigh_entry *n;
370 int key_len = tbl->key_len;
371 u32 hash_val = *(u32 *)(pkey + key_len - 4);
373 hash_val ^= (hash_val >> 16);
374 hash_val ^= hash_val >> 8;
375 hash_val ^= hash_val >> 4;
376 hash_val &= PNEIGH_HASHMASK;
378 read_lock_bh(&tbl->lock);
380 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
381 if (!memcmp(n->key, pkey, key_len) &&
382 (n->dev == dev || !n->dev)) {
383 read_unlock_bh(&tbl->lock);
387 read_unlock_bh(&tbl->lock);
392 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
396 memcpy(n->key, pkey, key_len);
399 if (tbl->pconstructor && tbl->pconstructor(n)) {
405 write_lock_bh(&tbl->lock);
406 n->next = tbl->phash_buckets[hash_val];
407 tbl->phash_buckets[hash_val] = n;
408 write_unlock_bh(&tbl->lock);
414 int pneigh_delete(struct neigh_table *tbl, const void *pkey,
415 struct net_device *dev)
417 struct pneigh_entry *n, **np;
418 int key_len = tbl->key_len;
419 u32 hash_val = *(u32 *)(pkey + key_len - 4);
421 hash_val ^= (hash_val >> 16);
422 hash_val ^= hash_val >> 8;
423 hash_val ^= hash_val >> 4;
424 hash_val &= PNEIGH_HASHMASK;
426 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
428 if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
429 write_lock_bh(&tbl->lock);
431 write_unlock_bh(&tbl->lock);
432 if (tbl->pdestructor)
441 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
443 struct pneigh_entry *n, **np;
446 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
447 np = &tbl->phash_buckets[h];
448 while ((n = *np) != NULL) {
449 if (!dev || n->dev == dev) {
451 if (tbl->pdestructor)
464 * neighbour must already be out of the table;
467 void neigh_destroy(struct neighbour *neigh)
473 "Destroying alive neighbour %p\n", neigh);
478 if (neigh_del_timer(neigh))
479 printk(KERN_WARNING "Impossible event.\n");
481 while ((hh = neigh->hh) != NULL) {
482 neigh->hh = hh->hh_next;
484 write_lock_bh(&hh->hh_lock);
485 hh->hh_output = neigh_blackhole;
486 write_unlock_bh(&hh->hh_lock);
487 if (atomic_dec_and_test(&hh->hh_refcnt))
491 if (neigh->ops && neigh->ops->destructor)
492 (neigh->ops->destructor)(neigh);
494 skb_queue_purge(&neigh->arp_queue);
498 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
501 neigh->tbl->entries--;
502 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
505 /* Neighbour state is suspicious;
508 Called with write_locked neigh.
510 static void neigh_suspect(struct neighbour *neigh)
514 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
516 neigh->output = neigh->ops->output;
518 for (hh = neigh->hh; hh; hh = hh->hh_next)
519 hh->hh_output = neigh->ops->output;
522 /* Neighbour state is OK;
525 Called with write_locked neigh.
527 static void neigh_connect(struct neighbour *neigh)
531 NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
533 neigh->output = neigh->ops->connected_output;
535 for (hh = neigh->hh; hh; hh = hh->hh_next)
536 hh->hh_output = neigh->ops->hh_output;
540 Transitions NUD_STALE <-> NUD_REACHABLE do not occur
541 when fast path is built: we have no timers associated with
542 these states, we do not have time to check state when sending.
543 neigh_periodic_timer check periodically neigh->confirmed
544 time and moves NUD_REACHABLE -> NUD_STALE.
546 If a routine wants to know TRUE entry state, it calls
547 neigh_sync before checking state.
549 Called with write_locked neigh.
552 static void neigh_sync(struct neighbour *n)
554 unsigned long now = jiffies;
555 u8 state = n->nud_state;
557 if (state & (NUD_NOARP | NUD_PERMANENT))
559 if (state & NUD_REACHABLE) {
560 if (now - n->confirmed > n->parms->reachable_time) {
561 n->nud_state = NUD_STALE;
564 } else if (state & NUD_VALID) {
565 if (now - n->confirmed < n->parms->reachable_time) {
567 n->nud_state = NUD_REACHABLE;
573 static void neigh_periodic_timer(unsigned long arg)
575 struct neigh_table *tbl = (struct neigh_table *)arg;
576 unsigned long now = jiffies;
580 write_lock(&tbl->lock);
583 * periodically recompute ReachableTime from random function
586 if (now - tbl->last_rand > 300 * HZ) {
587 struct neigh_parms *p;
588 tbl->last_rand = now;
589 for (p = &tbl->parms; p; p = p->next)
591 neigh_rand_reach_time(p->base_reachable_time);
594 for (i = 0; i <= NEIGH_HASHMASK; i++) {
595 struct neighbour *n, **np;
597 np = &tbl->hash_buckets[i];
598 while ((n = *np) != NULL) {
601 write_lock(&n->lock);
603 state = n->nud_state;
604 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
605 write_unlock(&n->lock);
609 if ((long)(n->used - n->confirmed) < 0)
610 n->used = n->confirmed;
612 if (atomic_read(&n->refcnt) == 1 &&
613 (state == NUD_FAILED ||
614 now - n->used > n->parms->gc_staletime)) {
617 write_unlock(&n->lock);
622 if (n->nud_state & NUD_REACHABLE &&
623 now - n->confirmed > n->parms->reachable_time) {
624 n->nud_state = NUD_STALE;
627 write_unlock(&n->lock);
634 mod_timer(&tbl->gc_timer, now + tbl->gc_interval);
635 write_unlock(&tbl->lock);
638 static __inline__ int neigh_max_probes(struct neighbour *n)
640 struct neigh_parms *p = n->parms;
641 return (n->nud_state & NUD_PROBE ?
643 p->ucast_probes + p->app_probes + p->mcast_probes);
647 /* Called when a timer expires for a neighbour entry. */
649 static void neigh_timer_handler(unsigned long arg)
651 unsigned long now = jiffies;
652 struct neighbour *neigh = (struct neighbour *)arg;
656 write_lock(&neigh->lock);
658 state = neigh->nud_state;
660 if (!(state & NUD_IN_TIMER)) {
662 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
667 if ((state & NUD_VALID) &&
668 now - neigh->confirmed < neigh->parms->reachable_time) {
669 neigh->nud_state = NUD_REACHABLE;
670 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
671 neigh_connect(neigh);
674 if (state == NUD_DELAY) {
675 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
676 neigh->nud_state = NUD_PROBE;
677 atomic_set(&neigh->probes, 0);
680 if (atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
683 neigh->nud_state = NUD_FAILED;
685 neigh->tbl->stats.res_failed++;
686 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
688 /* It is very thin place. report_unreachable is very complicated
689 routine. Particularly, it can hit the same neighbour entry!
691 So that, we try to be accurate and avoid dead loop. --ANK
693 while (neigh->nud_state == NUD_FAILED &&
694 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
695 write_unlock(&neigh->lock);
696 neigh->ops->error_report(neigh, skb);
697 write_lock(&neigh->lock);
699 skb_queue_purge(&neigh->arp_queue);
703 neigh->timer.expires = now + neigh->parms->retrans_time;
704 add_timer(&neigh->timer);
705 write_unlock(&neigh->lock);
707 neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
708 atomic_inc(&neigh->probes);
712 write_unlock(&neigh->lock);
714 if (notify && neigh->parms->app_probes)
715 neigh_app_notify(neigh);
717 neigh_release(neigh);
720 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
724 write_lock_bh(&neigh->lock);
727 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
730 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
731 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
732 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
733 neigh->nud_state = NUD_INCOMPLETE;
735 neigh->timer.expires = jiffies +
736 neigh->parms->retrans_time;
737 add_timer(&neigh->timer);
738 write_unlock_bh(&neigh->lock);
739 neigh->ops->solicit(neigh, skb);
740 atomic_inc(&neigh->probes);
741 write_lock_bh(&neigh->lock);
743 neigh->nud_state = NUD_FAILED;
744 write_unlock_bh(&neigh->lock);
752 if (neigh->nud_state == NUD_INCOMPLETE) {
754 if (skb_queue_len(&neigh->arp_queue) >=
755 neigh->parms->queue_len) {
756 struct sk_buff *buff;
757 buff = neigh->arp_queue.next;
758 __skb_unlink(buff, &neigh->arp_queue);
761 __skb_queue_tail(&neigh->arp_queue, skb);
764 } else if (neigh->nud_state == NUD_STALE) {
765 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
767 neigh->nud_state = NUD_DELAY;
768 neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
769 add_timer(&neigh->timer);
773 write_unlock_bh(&neigh->lock);
777 static __inline__ void neigh_update_hhs(struct neighbour *neigh)
780 void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
781 neigh->dev->header_cache_update;
784 for (hh = neigh->hh; hh; hh = hh->hh_next) {
785 write_lock_bh(&hh->hh_lock);
786 update(hh, neigh->dev, neigh->ha);
787 write_unlock_bh(&hh->hh_lock);
794 /* Generic update routine.
795 -- lladdr is new lladdr or NULL, if it is not supplied.
797 -- override == 1 allows to override existing lladdr, if it is different.
798 -- arp == 0 means that the change is administrative.
800 Caller MUST hold reference count on the entry.
803 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
804 int override, int arp)
811 struct net_device *dev;
813 write_lock_bh(&neigh->lock);
816 old = neigh->nud_state;
819 if (arp && (old & (NUD_NOARP | NUD_PERMANENT)))
822 if (!(new & NUD_VALID)) {
823 neigh_del_timer(neigh);
824 if (old & NUD_CONNECTED)
825 neigh_suspect(neigh);
826 neigh->nud_state = new;
829 notify = old & NUD_VALID;
834 /* Compare new lladdr with cached one */
835 if (!dev->addr_len) {
836 /* First case: device needs no address. */
839 /* The second case: if something is already cached
840 and a new address is proposed:
842 - if they are different, check override flag
844 if (old & NUD_VALID) {
845 if (!memcmp(lladdr, neigh->ha, dev->addr_len))
851 /* No address is supplied; if we know something,
852 use it, otherwise discard the request.
855 if (!(old & NUD_VALID))
861 old = neigh->nud_state;
862 if (new & NUD_CONNECTED)
863 neigh->confirmed = jiffies;
864 neigh->updated = jiffies;
866 /* If entry was valid and address is not changed,
867 do not change entry state, if new one is STALE.
870 if ((old & NUD_VALID) && lladdr == neigh->ha &&
871 (new == old || (new == NUD_STALE && (old & NUD_CONNECTED))))
874 neigh_del_timer(neigh);
875 neigh->nud_state = new;
876 if (lladdr != neigh->ha) {
877 memcpy(&neigh->ha, lladdr, dev->addr_len);
878 neigh_update_hhs(neigh);
879 if (!(new & NUD_CONNECTED))
880 neigh->confirmed = jiffies -
881 (neigh->parms->base_reachable_time << 1);
888 if (new & NUD_CONNECTED)
889 neigh_connect(neigh);
891 neigh_suspect(neigh);
892 if (!(old & NUD_VALID)) {
895 /* Again: avoid dead loop if something went wrong */
897 while (neigh->nud_state & NUD_VALID &&
898 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
899 struct neighbour *n1 = neigh;
900 write_unlock_bh(&neigh->lock);
901 /* On shaper/eql skb->dst->neighbour != neigh :( */
902 if (skb->dst && skb->dst->neighbour)
903 n1 = skb->dst->neighbour;
905 write_lock_bh(&neigh->lock);
907 skb_queue_purge(&neigh->arp_queue);
910 write_unlock_bh(&neigh->lock);
912 if (notify && neigh->parms->app_probes)
913 neigh_app_notify(neigh);
918 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
919 u8 *lladdr, void *saddr,
920 struct net_device *dev)
922 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
923 lladdr || !dev->addr_len);
925 neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
929 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
933 struct net_device *dev = dst->dev;
935 for (hh = n->hh; hh; hh = hh->hh_next)
936 if (hh->hh_type == protocol)
939 if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
940 memset(hh, 0, sizeof(struct hh_cache));
941 hh->hh_lock = RW_LOCK_UNLOCKED;
942 hh->hh_type = protocol;
943 atomic_set(&hh->hh_refcnt, 0);
945 if (dev->hard_header_cache(n, hh)) {
949 atomic_inc(&hh->hh_refcnt);
952 if (n->nud_state & NUD_CONNECTED)
953 hh->hh_output = n->ops->hh_output;
955 hh->hh_output = n->ops->output;
959 atomic_inc(&hh->hh_refcnt);
964 /* This function can be used in contexts, where only old dev_queue_xmit
965 worked, f.e. if you want to override normal output path (eql, shaper),
966 but resolution is not made yet.
969 int neigh_compat_output(struct sk_buff *skb)
971 struct net_device *dev = skb->dev;
973 __skb_pull(skb, skb->nh.raw - skb->data);
975 if (dev->hard_header &&
976 dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
978 dev->rebuild_header(skb))
981 return dev_queue_xmit(skb);
984 /* Slow and careful. */
986 int neigh_resolve_output(struct sk_buff *skb)
988 struct dst_entry *dst = skb->dst;
989 struct neighbour *neigh;
992 if (!dst || !(neigh = dst->neighbour))
995 __skb_pull(skb, skb->nh.raw - skb->data);
997 if (!neigh_event_send(neigh, skb)) {
999 struct net_device *dev = neigh->dev;
1000 if (dev->hard_header_cache && !dst->hh) {
1001 write_lock_bh(&neigh->lock);
1003 neigh_hh_init(neigh, dst, dst->ops->protocol);
1004 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1005 neigh->ha, NULL, skb->len);
1006 write_unlock_bh(&neigh->lock);
1008 read_lock_bh(&neigh->lock);
1009 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1010 neigh->ha, NULL, skb->len);
1011 read_unlock_bh(&neigh->lock);
1014 rc = neigh->ops->queue_xmit(skb);
1021 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1022 dst, dst ? dst->neighbour : NULL);
1029 /* As fast as possible without hh cache */
1031 int neigh_connected_output(struct sk_buff *skb)
1034 struct dst_entry *dst = skb->dst;
1035 struct neighbour *neigh = dst->neighbour;
1036 struct net_device *dev = neigh->dev;
1038 __skb_pull(skb, skb->nh.raw - skb->data);
1040 read_lock_bh(&neigh->lock);
1041 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1042 neigh->ha, NULL, skb->len);
1043 read_unlock_bh(&neigh->lock);
1045 err = neigh->ops->queue_xmit(skb);
1053 static void neigh_proxy_process(unsigned long arg)
1055 struct neigh_table *tbl = (struct neigh_table *)arg;
1056 long sched_next = 0;
1057 unsigned long now = jiffies;
1058 struct sk_buff *skb;
1060 spin_lock(&tbl->proxy_queue.lock);
1062 skb = tbl->proxy_queue.next;
1064 while (skb != (struct sk_buff *)&tbl->proxy_queue) {
1065 struct sk_buff *back = skb;
1066 long tdif = back->stamp.tv_usec - now;
1070 struct net_device *dev = back->dev;
1071 __skb_unlink(back, &tbl->proxy_queue);
1072 if (tbl->proxy_redo && netif_running(dev))
1073 tbl->proxy_redo(back);
1078 } else if (!sched_next || tdif < sched_next)
1081 del_timer(&tbl->proxy_timer);
1083 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1084 spin_unlock(&tbl->proxy_queue.lock);
1087 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1088 struct sk_buff *skb)
1090 unsigned long now = jiffies;
1091 long sched_next = net_random() % p->proxy_delay;
1093 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1097 skb->stamp.tv_sec = LOCALLY_ENQUEUED;
1098 skb->stamp.tv_usec = now + sched_next;
1100 spin_lock(&tbl->proxy_queue.lock);
1101 if (del_timer(&tbl->proxy_timer)) {
1102 long tval = tbl->proxy_timer.expires - now;
1103 if (tval < sched_next)
1106 dst_release(skb->dst);
1109 __skb_queue_tail(&tbl->proxy_queue, skb);
1110 mod_timer(&tbl->proxy_timer, now + sched_next);
1111 spin_unlock(&tbl->proxy_queue.lock);
1115 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1116 struct neigh_table *tbl)
1118 struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL);
1121 memcpy(p, &tbl->parms, sizeof(*p));
1124 neigh_rand_reach_time(p->base_reachable_time);
1125 if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) {
1129 p->sysctl_table = NULL;
1130 write_lock_bh(&tbl->lock);
1131 p->next = tbl->parms.next;
1132 tbl->parms.next = p;
1133 write_unlock_bh(&tbl->lock);
1138 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1140 struct neigh_parms **p;
1142 if (!parms || parms == &tbl->parms)
1144 write_lock_bh(&tbl->lock);
1145 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1148 write_unlock_bh(&tbl->lock);
1153 write_unlock_bh(&tbl->lock);
1154 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1158 void neigh_table_init(struct neigh_table *tbl)
1160 unsigned long now = jiffies;
1162 tbl->parms.reachable_time =
1163 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1165 if (!tbl->kmem_cachep)
1166 tbl->kmem_cachep = kmem_cache_create(tbl->id,
1168 0, SLAB_HWCACHE_ALIGN,
1171 if (!tbl->kmem_cachep)
1172 panic("cannot create neighbour cache");
1174 tbl->lock = RW_LOCK_UNLOCKED;
1175 init_timer(&tbl->gc_timer);
1176 tbl->gc_timer.data = (unsigned long)tbl;
1177 tbl->gc_timer.function = neigh_periodic_timer;
1178 tbl->gc_timer.expires = now + tbl->gc_interval +
1179 tbl->parms.reachable_time;
1180 add_timer(&tbl->gc_timer);
1182 init_timer(&tbl->proxy_timer);
1183 tbl->proxy_timer.data = (unsigned long)tbl;
1184 tbl->proxy_timer.function = neigh_proxy_process;
1185 skb_queue_head_init(&tbl->proxy_queue);
1187 tbl->last_flush = now;
1188 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1189 write_lock(&neigh_tbl_lock);
1190 tbl->next = neigh_tables;
1192 write_unlock(&neigh_tbl_lock);
1195 int neigh_table_clear(struct neigh_table *tbl)
1197 struct neigh_table **tp;
1199 /* It is not clean... Fix it to unload IPv6 module safely */
1200 del_timer_sync(&tbl->gc_timer);
1201 del_timer_sync(&tbl->proxy_timer);
1202 pneigh_queue_purge(&tbl->proxy_queue);
1203 neigh_ifdown(tbl, NULL);
1205 printk(KERN_CRIT "neighbour leakage\n");
1206 write_lock(&neigh_tbl_lock);
1207 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1213 write_unlock(&neigh_tbl_lock);
1217 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1219 struct ndmsg *ndm = NLMSG_DATA(nlh);
1220 struct rtattr **nda = arg;
1221 struct neigh_table *tbl;
1222 struct net_device *dev = NULL;
1225 if (ndm->ndm_ifindex &&
1226 (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
1229 read_lock(&neigh_tbl_lock);
1230 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1231 struct neighbour *n;
1233 if (tbl->family != ndm->ndm_family)
1235 read_unlock(&neigh_tbl_lock);
1238 if (!nda[NDA_DST - 1] ||
1239 nda[NDA_DST - 1]->rta_len != RTA_LENGTH(tbl->key_len))
1242 if (ndm->ndm_flags & NTF_PROXY) {
1243 err = pneigh_delete(tbl,
1244 RTA_DATA(nda[NDA_DST - 1]), dev);
1251 n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST - 1]), dev);
1253 err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
1258 read_unlock(&neigh_tbl_lock);
1259 err = -EADDRNOTAVAIL;
1267 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1269 struct ndmsg *ndm = NLMSG_DATA(nlh);
1270 struct rtattr **nda = arg;
1271 struct neigh_table *tbl;
1272 struct net_device *dev = NULL;
1275 if (ndm->ndm_ifindex &&
1276 (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
1279 read_lock(&neigh_tbl_lock);
1280 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1282 struct neighbour *n;
1284 if (tbl->family != ndm->ndm_family)
1286 read_unlock(&neigh_tbl_lock);
1289 if (!nda[NDA_DST - 1] ||
1290 nda[NDA_DST - 1]->rta_len != RTA_LENGTH(tbl->key_len))
1292 if (ndm->ndm_flags & NTF_PROXY) {
1294 if (pneigh_lookup(tbl,
1295 RTA_DATA(nda[NDA_DST - 1]), dev, 1))
1302 if (nda[NDA_LLADDR - 1] &&
1303 nda[NDA_LLADDR - 1]->rta_len != RTA_LENGTH(dev->addr_len))
1306 n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST - 1]), dev);
1308 if (nlh->nlmsg_flags & NLM_F_EXCL)
1310 override = nlh->nlmsg_flags & NLM_F_REPLACE;
1311 } else if (!(nlh->nlmsg_flags & NLM_F_CREATE))
1314 n = __neigh_lookup_errno(tbl, RTA_DATA(nda[NDA_DST - 1]),
1322 err = neigh_update(n, nda[NDA_LLADDR - 1] ?
1323 RTA_DATA(nda[NDA_LLADDR - 1]) :
1333 read_unlock(&neigh_tbl_lock);
1334 err = -EADDRNOTAVAIL;
1343 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
1344 u32 pid, u32 seq, int event)
1346 unsigned long now = jiffies;
1347 unsigned char *b = skb->tail;
1348 struct nda_cacheinfo ci;
1350 struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event,
1351 sizeof(struct ndmsg));
1352 struct ndmsg *ndm = NLMSG_DATA(nlh);
1354 ndm->ndm_family = n->ops->family;
1355 ndm->ndm_flags = n->flags;
1356 ndm->ndm_type = n->type;
1357 ndm->ndm_ifindex = n->dev->ifindex;
1358 RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
1359 read_lock_bh(&n->lock);
1361 ndm->ndm_state = n->nud_state;
1362 if (n->nud_state & NUD_VALID)
1363 RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
1364 ci.ndm_used = now - n->used;
1365 ci.ndm_confirmed = now - n->confirmed;
1366 ci.ndm_updated = now - n->updated;
1367 ci.ndm_refcnt = atomic_read(&n->refcnt) - 1;
1368 read_unlock_bh(&n->lock);
1370 RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
1371 nlh->nlmsg_len = skb->tail - b;
1377 read_unlock_bh(&n->lock);
1378 skb_trim(skb, b - skb->data);
1383 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
1384 struct netlink_callback *cb)
1386 struct neighbour *n;
1387 int rc, h, s_h = cb->args[1];
1388 int idx, s_idx = idx = cb->args[2];
1390 for (h = 0; h <= NEIGH_HASHMASK; h++) {
1395 read_lock_bh(&tbl->lock);
1396 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
1399 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
1401 RTM_NEWNEIGH) <= 0) {
1402 read_unlock_bh(&tbl->lock);
1407 read_unlock_bh(&tbl->lock);
1416 int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1418 struct neigh_table *tbl;
1421 read_lock(&neigh_tbl_lock);
1422 family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
1425 for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
1426 if (t < s_t || (family && tbl->family != family))
1429 memset(&cb->args[1], 0, sizeof(cb->args) -
1430 sizeof(cb->args[0]));
1431 if (neigh_dump_table(tbl, skb, cb) < 0)
1434 read_unlock(&neigh_tbl_lock);
1441 void neigh_app_ns(struct neighbour *n)
1443 struct nlmsghdr *nlh;
1444 int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
1445 struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
1450 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
1454 nlh = (struct nlmsghdr *)skb->data;
1455 nlh->nlmsg_flags = NLM_F_REQUEST;
1456 NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
1457 netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
1460 static void neigh_app_notify(struct neighbour *n)
1462 struct nlmsghdr *nlh;
1463 int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
1464 struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
1469 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
1473 nlh = (struct nlmsghdr *)skb->data;
1474 NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
1475 netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
1478 #endif /* CONFIG_ARPD */
1480 #ifdef CONFIG_SYSCTL
1482 static struct neigh_sysctl_table {
1483 struct ctl_table_header *sysctl_header;
1484 ctl_table neigh_vars[17];
1485 ctl_table neigh_dev[2];
1486 ctl_table neigh_neigh_dir[2];
1487 ctl_table neigh_proto_dir[2];
1488 ctl_table neigh_root_dir[2];
1489 } neigh_sysctl_template = {
1492 .ctl_name = NET_NEIGH_MCAST_SOLICIT,
1493 .procname = "mcast_solicit",
1494 .maxlen = sizeof(int),
1496 .proc_handler = &proc_dointvec,
1499 .ctl_name = NET_NEIGH_UCAST_SOLICIT,
1500 .procname = "ucast_solicit",
1501 .maxlen = sizeof(int),
1503 .proc_handler = &proc_dointvec,
1506 .ctl_name = NET_NEIGH_APP_SOLICIT,
1507 .procname = "app_solicit",
1508 .maxlen = sizeof(int),
1510 .proc_handler = &proc_dointvec,
1513 .ctl_name = NET_NEIGH_RETRANS_TIME,
1514 .procname = "retrans_time",
1515 .maxlen = sizeof(int),
1517 .proc_handler = &proc_dointvec_userhz_jiffies,
1520 .ctl_name = NET_NEIGH_REACHABLE_TIME,
1521 .procname = "base_reachable_time",
1522 .maxlen = sizeof(int),
1524 .proc_handler = &proc_dointvec_jiffies,
1525 .strategy = &sysctl_jiffies,
1528 .ctl_name = NET_NEIGH_DELAY_PROBE_TIME,
1529 .procname = "delay_first_probe_time",
1530 .maxlen = sizeof(int),
1532 .proc_handler = &proc_dointvec_jiffies,
1533 .strategy = &sysctl_jiffies,
1536 .ctl_name = NET_NEIGH_GC_STALE_TIME,
1537 .procname = "gc_stale_time",
1538 .maxlen = sizeof(int),
1540 .proc_handler = &proc_dointvec_jiffies,
1541 .strategy = &sysctl_jiffies,
1544 .ctl_name = NET_NEIGH_UNRES_QLEN,
1545 .procname = "unres_qlen",
1546 .maxlen = sizeof(int),
1548 .proc_handler = &proc_dointvec,
1551 .ctl_name = NET_NEIGH_PROXY_QLEN,
1552 .procname = "proxy_qlen",
1553 .maxlen = sizeof(int),
1555 .proc_handler = &proc_dointvec,
1558 .ctl_name = NET_NEIGH_ANYCAST_DELAY,
1559 .procname = "anycast_delay",
1560 .maxlen = sizeof(int),
1562 .proc_handler = &proc_dointvec_userhz_jiffies,
1565 .ctl_name = NET_NEIGH_PROXY_DELAY,
1566 .procname = "proxy_delay",
1567 .maxlen = sizeof(int),
1569 .proc_handler = &proc_dointvec_userhz_jiffies,
1572 .ctl_name = NET_NEIGH_LOCKTIME,
1573 .procname = "locktime",
1574 .maxlen = sizeof(int),
1576 .proc_handler = &proc_dointvec_userhz_jiffies,
1579 .ctl_name = NET_NEIGH_GC_INTERVAL,
1580 .procname = "gc_interval",
1581 .maxlen = sizeof(int),
1583 .proc_handler = &proc_dointvec_jiffies,
1584 .strategy = &sysctl_jiffies,
1587 .ctl_name = NET_NEIGH_GC_THRESH1,
1588 .procname = "gc_thresh1",
1589 .maxlen = sizeof(int),
1591 .proc_handler = &proc_dointvec,
1594 .ctl_name = NET_NEIGH_GC_THRESH2,
1595 .procname = "gc_thresh2",
1596 .maxlen = sizeof(int),
1598 .proc_handler = &proc_dointvec,
1601 .ctl_name = NET_NEIGH_GC_THRESH3,
1602 .procname = "gc_thresh3",
1603 .maxlen = sizeof(int),
1605 .proc_handler = &proc_dointvec,
1610 .ctl_name = NET_PROTO_CONF_DEFAULT,
1611 .procname = "default",
1615 .neigh_neigh_dir = {
1617 .procname = "neigh",
1621 .neigh_proto_dir = {
1628 .ctl_name = CTL_NET,
1635 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
1636 int p_id, int pdev_id, char *p_name,
1637 proc_handler *handler)
1639 struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
1640 const char *dev_name_source = NULL;
1641 char *dev_name = NULL;
1646 memcpy(t, &neigh_sysctl_template, sizeof(*t));
1647 t->neigh_vars[0].data = &p->mcast_probes;
1648 t->neigh_vars[1].data = &p->ucast_probes;
1649 t->neigh_vars[2].data = &p->app_probes;
1650 t->neigh_vars[3].data = &p->retrans_time;
1652 t->neigh_vars[3].proc_handler = handler;
1653 t->neigh_vars[3].extra1 = dev;
1655 t->neigh_vars[4].data = &p->base_reachable_time;
1656 t->neigh_vars[5].data = &p->delay_probe_time;
1657 t->neigh_vars[6].data = &p->gc_staletime;
1658 t->neigh_vars[7].data = &p->queue_len;
1659 t->neigh_vars[8].data = &p->proxy_qlen;
1660 t->neigh_vars[9].data = &p->anycast_delay;
1661 t->neigh_vars[10].data = &p->proxy_delay;
1662 t->neigh_vars[11].data = &p->locktime;
1664 dev_name_source = t->neigh_dev[0].procname;
1666 dev_name_source = dev->name;
1667 t->neigh_dev[0].ctl_name = dev->ifindex;
1668 memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
1670 t->neigh_vars[12].data = (int *)(p + 1);
1671 t->neigh_vars[13].data = (int *)(p + 1) + 1;
1672 t->neigh_vars[14].data = (int *)(p + 1) + 2;
1673 t->neigh_vars[15].data = (int *)(p + 1) + 3;
1676 dev_name = net_sysctl_strdup(dev_name_source);
1682 t->neigh_dev[0].procname = dev_name;
1684 t->neigh_neigh_dir[0].ctl_name = pdev_id;
1686 t->neigh_proto_dir[0].procname = p_name;
1687 t->neigh_proto_dir[0].ctl_name = p_id;
1689 t->neigh_dev[0].child = t->neigh_vars;
1690 t->neigh_neigh_dir[0].child = t->neigh_dev;
1691 t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
1692 t->neigh_root_dir[0].child = t->neigh_proto_dir;
1694 t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
1695 if (!t->sysctl_header) {
1699 p->sysctl_table = t;
1711 void neigh_sysctl_unregister(struct neigh_parms *p)
1713 if (p->sysctl_table) {
1714 struct neigh_sysctl_table *t = p->sysctl_table;
1715 p->sysctl_table = NULL;
1716 unregister_sysctl_table(t->sysctl_header);
1717 kfree(t->neigh_dev[0].procname);
1722 #endif /* CONFIG_SYSCTL */
1724 EXPORT_SYMBOL(__neigh_event_send);
1725 EXPORT_SYMBOL(neigh_add);
1726 EXPORT_SYMBOL(neigh_changeaddr);
1727 EXPORT_SYMBOL(neigh_compat_output);
1728 EXPORT_SYMBOL(neigh_connected_output);
1729 EXPORT_SYMBOL(neigh_create);
1730 EXPORT_SYMBOL(neigh_delete);
1731 EXPORT_SYMBOL(neigh_destroy);
1732 EXPORT_SYMBOL(neigh_dump_info);
1733 EXPORT_SYMBOL(neigh_event_ns);
1734 EXPORT_SYMBOL(neigh_ifdown);
1735 EXPORT_SYMBOL(neigh_lookup);
1736 EXPORT_SYMBOL(neigh_parms_alloc);
1737 EXPORT_SYMBOL(neigh_parms_release);
1738 EXPORT_SYMBOL(neigh_rand_reach_time);
1739 EXPORT_SYMBOL(neigh_resolve_output);
1740 EXPORT_SYMBOL(neigh_table_clear);
1741 EXPORT_SYMBOL(neigh_table_init);
1742 EXPORT_SYMBOL(neigh_update);
1743 EXPORT_SYMBOL(neigh_update_hhs);
1744 EXPORT_SYMBOL(pneigh_enqueue);
1745 EXPORT_SYMBOL(pneigh_lookup);
1748 EXPORT_SYMBOL(neigh_app_ns);
1750 #ifdef CONFIG_SYSCTL
1751 EXPORT_SYMBOL(neigh_sysctl_register);
1752 EXPORT_SYMBOL(neigh_sysctl_unregister);