fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / net / core / neighbour.c
index ee410cb..e7300b6 100644 (file)
  *
  *     Fixes:
  *     Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
+ *     Harald Welte            Add neighbour cache statistics like rtstat
  */
 
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/sched.h>
 #include <linux/netdevice.h>
+#include <linux/proc_fs.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/sock.h>
+#include <net/netevent.h>
+#include <net/netlink.h>
 #include <linux/rtnetlink.h>
+#include <linux/random.h>
+#include <linux/string.h>
 
 #define NEIGH_DEBUG 1
 
@@ -47,6 +52,8 @@
 #define NEIGH_PRINTK2 NEIGH_PRINTK
 #endif
 
+#define PNEIGH_HASHMASK                0xF
+
 static void neigh_timer_handler(unsigned long arg);
 #ifdef CONFIG_ARPD
 static void neigh_app_notify(struct neighbour *n);
@@ -54,8 +61,10 @@ static void neigh_app_notify(struct neighbour *n);
 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
 
-static int neigh_glbl_allocs;
 static struct neigh_table *neigh_tables;
+#ifdef CONFIG_PROC_FS
+static struct file_operations neigh_stat_seq_fops;
+#endif
 
 /*
    Neighbour hash table buckets are protected with rwlock tbl->lock.
@@ -88,7 +97,7 @@ static struct neigh_table *neigh_tables;
    list of neighbour tables. This list is used only in process context,
  */
 
-static rwlock_t neigh_tbl_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(neigh_tbl_lock);
 
 static int neigh_blackhole(struct sk_buff *skb)
 {
@@ -104,7 +113,7 @@ static int neigh_blackhole(struct sk_buff *skb)
 
 unsigned long neigh_rand_reach_time(unsigned long base)
 {
-       return (net_random() % base) + (base >> 1);
+       return (base ? (net_random() % base) + (base >> 1) : 0);
 }
 
 
@@ -113,27 +122,21 @@ static int neigh_forced_gc(struct neigh_table *tbl)
        int shrunk = 0;
        int i;
 
-       for (i = 0; i <= NEIGH_HASHMASK; i++) {
+       NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
+
+       write_lock_bh(&tbl->lock);
+       for (i = 0; i <= tbl->hash_mask; i++) {
                struct neighbour *n, **np;
 
                np = &tbl->hash_buckets[i];
-               write_lock_bh(&tbl->lock);
                while ((n = *np) != NULL) {
                        /* Neighbour record may be discarded if:
-                          - nobody refers to it.
-                          - it is not permanent
-                          - (NEW and probably wrong)
-                            INCOMPLETE entries are kept at least for
-                            n->parms->retrans_time, otherwise we could
-                            flood network with resolution requests.
-                            It is not clear, what is better table overflow
-                            or flooding.
+                        * - nobody refers to it.
+                        * - it is not permanent
                         */
                        write_lock(&n->lock);
                        if (atomic_read(&n->refcnt) == 1 &&
-                           !(n->nud_state & NUD_PERMANENT) &&
-                           (n->nud_state != NUD_INCOMPLETE ||
-                            jiffies - n->used > n->parms->retrans_time)) {
+                           !(n->nud_state & NUD_PERMANENT)) {
                                *np     = n->next;
                                n->dead = 1;
                                shrunk  = 1;
@@ -144,10 +147,12 @@ static int neigh_forced_gc(struct neigh_table *tbl)
                        write_unlock(&n->lock);
                        np = &n->next;
                }
-               write_unlock_bh(&tbl->lock);
        }
 
        tbl->last_flush = jiffies;
+
+       write_unlock_bh(&tbl->lock);
+
        return shrunk;
 }
 
@@ -171,40 +176,11 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
        }
 }
 
-void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
-{
-       int i;
-
-       write_lock_bh(&tbl->lock);
-
-       for (i=0; i <= NEIGH_HASHMASK; i++) {
-               struct neighbour *n, **np;
-
-               np = &tbl->hash_buckets[i];
-               while ((n = *np) != NULL) {
-                       if (dev && n->dev != dev) {
-                               np = &n->next;
-                               continue;
-                       }
-                       *np = n->next;
-                       write_lock_bh(&n->lock);
-                       n->dead = 1;
-                       neigh_del_timer(n);
-                       write_unlock_bh(&n->lock);
-                       neigh_release(n);
-               }
-       }
-
-        write_unlock_bh(&tbl->lock);
-}
-
-int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 {
        int i;
 
-       write_lock_bh(&tbl->lock);
-
-       for (i = 0; i <= NEIGH_HASHMASK; i++) {
+       for (i = 0; i <= tbl->hash_mask; i++) {
                struct neighbour *n, **np = &tbl->hash_buckets[i];
 
                while ((n = *np) != NULL) {
@@ -227,7 +203,6 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
                                   we must kill timers etc. and move
                                   it to safe state.
                                 */
-                               n->parms = &tbl->parms;
                                skb_queue_purge(&n->arp_queue);
                                n->output = neigh_blackhole;
                                if (n->nud_state & NUD_VALID)
@@ -240,7 +215,19 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
                        neigh_release(n);
                }
        }
+}
+
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
+{
+       write_lock_bh(&tbl->lock);
+       neigh_flush_dev(tbl, dev);
+       write_unlock_bh(&tbl->lock);
+}
 
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+       write_lock_bh(&tbl->lock);
+       neigh_flush_dev(tbl, dev);
        pneigh_ifdown(tbl, dev);
        write_unlock_bh(&tbl->lock);
 
@@ -253,38 +240,103 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 {
        struct neighbour *n = NULL;
        unsigned long now = jiffies;
+       int entries;
 
-       if (tbl->entries > tbl->gc_thresh3 ||
-           (tbl->entries > tbl->gc_thresh2 &&
-            now - tbl->last_flush > 5 * HZ)) {
+       entries = atomic_inc_return(&tbl->entries) - 1;
+       if (entries >= tbl->gc_thresh3 ||
+           (entries >= tbl->gc_thresh2 &&
+            time_after(now, tbl->last_flush + 5 * HZ))) {
                if (!neigh_forced_gc(tbl) &&
-                   tbl->entries > tbl->gc_thresh3)
-                       goto out;
+                   entries >= tbl->gc_thresh3)
+                       goto out_entries;
        }
 
-       n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
+       n = kmem_cache_alloc(tbl->kmem_cachep, GFP_ATOMIC);
        if (!n)
-               goto out;
+               goto out_entries;
 
        memset(n, 0, tbl->entry_size);
 
        skb_queue_head_init(&n->arp_queue);
-       n->lock           = RW_LOCK_UNLOCKED;
+       rwlock_init(&n->lock);
        n->updated        = n->used = now;
        n->nud_state      = NUD_NONE;
        n->output         = neigh_blackhole;
-       n->parms          = &tbl->parms;
+       n->parms          = neigh_parms_clone(&tbl->parms);
        init_timer(&n->timer);
        n->timer.function = neigh_timer_handler;
        n->timer.data     = (unsigned long)n;
-       tbl->stats.allocs++;
-       neigh_glbl_allocs++;
-       tbl->entries++;
+
+       NEIGH_CACHE_STAT_INC(tbl, allocs);
        n->tbl            = tbl;
        atomic_set(&n->refcnt, 1);
        n->dead           = 1;
 out:
        return n;
+
+out_entries:
+       atomic_dec(&tbl->entries);
+       goto out;
+}
+
+static struct neighbour **neigh_hash_alloc(unsigned int entries)
+{
+       unsigned long size = entries * sizeof(struct neighbour *);
+       struct neighbour **ret;
+
+       if (size <= PAGE_SIZE) {
+               ret = kzalloc(size, GFP_ATOMIC);
+       } else {
+               ret = (struct neighbour **)
+                     __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
+       }
+       return ret;
+}
+
+static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+{
+       unsigned long size = entries * sizeof(struct neighbour *);
+
+       if (size <= PAGE_SIZE)
+               kfree(hash);
+       else
+               free_pages((unsigned long)hash, get_order(size));
+}
+
+static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
+{
+       struct neighbour **new_hash, **old_hash;
+       unsigned int i, new_hash_mask, old_entries;
+
+       NEIGH_CACHE_STAT_INC(tbl, hash_grows);
+
+       BUG_ON(new_entries & (new_entries - 1));
+       new_hash = neigh_hash_alloc(new_entries);
+       if (!new_hash)
+               return;
+
+       old_entries = tbl->hash_mask + 1;
+       new_hash_mask = new_entries - 1;
+       old_hash = tbl->hash_buckets;
+
+       get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+       for (i = 0; i < old_entries; i++) {
+               struct neighbour *n, *next;
+
+               for (n = old_hash[i]; n; n = next) {
+                       unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
+
+                       hash_val &= new_hash_mask;
+                       next = n->next;
+
+                       n->next = new_hash[hash_val];
+                       new_hash[hash_val] = n;
+               }
+       }
+       tbl->hash_buckets = new_hash;
+       tbl->hash_mask = new_hash_mask;
+
+       neigh_hash_free(old_hash, old_entries);
 }
 
 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
@@ -293,11 +345,34 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
        struct neighbour *n;
        int key_len = tbl->key_len;
        u32 hash_val = tbl->hash(pkey, dev);
+       
+       NEIGH_CACHE_STAT_INC(tbl, lookups);
 
        read_lock_bh(&tbl->lock);
-       for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+       for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
                if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
                        neigh_hold(n);
+                       NEIGH_CACHE_STAT_INC(tbl, hits);
+                       break;
+               }
+       }
+       read_unlock_bh(&tbl->lock);
+       return n;
+}
+
+struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
+{
+       struct neighbour *n;
+       int key_len = tbl->key_len;
+       u32 hash_val = tbl->hash(pkey, NULL);
+
+       NEIGH_CACHE_STAT_INC(tbl, lookups);
+
+       read_lock_bh(&tbl->lock);
+       for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+               if (!memcmp(n->primary_key, pkey, key_len)) {
+                       neigh_hold(n);
+                       NEIGH_CACHE_STAT_INC(tbl, hits);
                        break;
                }
        }
@@ -337,15 +412,23 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 
        n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 
-       hash_val = tbl->hash(pkey, dev);
-
        write_lock_bh(&tbl->lock);
+
+       if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
+               neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
+
+       hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+
+       if (n->parms->dead) {
+               rc = ERR_PTR(-EINVAL);
+               goto out_tbl_unlock;
+       }
+
        for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
                if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
                        neigh_hold(n1);
-                       write_unlock_bh(&tbl->lock);
                        rc = n1;
-                       goto out_neigh_release;
+                       goto out_tbl_unlock;
                }
        }
 
@@ -358,6 +441,8 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
        rc = n;
 out:
        return rc;
+out_tbl_unlock:
+       write_unlock_bh(&tbl->lock);
 out_neigh_release:
        neigh_release(n);
        goto out;
@@ -395,8 +480,12 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
 
        memcpy(n->key, pkey, key_len);
        n->dev = dev;
+       if (dev)
+               dev_hold(dev);
 
        if (tbl->pconstructor && tbl->pconstructor(n)) {
+               if (dev)
+                       dev_put(dev);
                kfree(n);
                n = NULL;
                goto out;
@@ -423,18 +512,21 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey,
        hash_val ^= hash_val >> 4;
        hash_val &= PNEIGH_HASHMASK;
 
+       write_lock_bh(&tbl->lock);
        for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
             np = &n->next) {
                if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
-                       write_lock_bh(&tbl->lock);
                        *np = n->next;
                        write_unlock_bh(&tbl->lock);
                        if (tbl->pdestructor)
                                tbl->pdestructor(n);
+                       if (n->dev)
+                               dev_put(n->dev);
                        kfree(n);
                        return 0;
                }
        }
+       write_unlock_bh(&tbl->lock);
        return -ENOENT;
 }
 
@@ -450,6 +542,8 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
                                *np = n->next;
                                if (tbl->pdestructor)
                                        tbl->pdestructor(n);
+                               if (n->dev)
+                                       dev_put(n->dev);
                                kfree(n);
                                continue;
                        }
@@ -468,6 +562,8 @@ void neigh_destroy(struct neighbour *neigh)
 {
        struct hh_cache *hh;
 
+       NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
+
        if (!neigh->dead) {
                printk(KERN_WARNING
                       "Destroying alive neighbour %p\n", neigh);
@@ -481,24 +577,25 @@ void neigh_destroy(struct neighbour *neigh)
        while ((hh = neigh->hh) != NULL) {
                neigh->hh = hh->hh_next;
                hh->hh_next = NULL;
-               write_lock_bh(&hh->hh_lock);
+
+               write_seqlock_bh(&hh->hh_lock);
                hh->hh_output = neigh_blackhole;
-               write_unlock_bh(&hh->hh_lock);
+               write_sequnlock_bh(&hh->hh_lock);
                if (atomic_dec_and_test(&hh->hh_refcnt))
                        kfree(hh);
        }
 
-       if (neigh->ops && neigh->ops->destructor)
-               (neigh->ops->destructor)(neigh);
+       if (neigh->parms->neigh_destructor)
+               (neigh->parms->neigh_destructor)(neigh);
 
        skb_queue_purge(&neigh->arp_queue);
 
        dev_put(neigh->dev);
+       neigh_parms_put(neigh->parms);
 
        NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
 
-       neigh_glbl_allocs--;
-       neigh->tbl->entries--;
+       atomic_dec(&neigh->tbl->entries);
        kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
 }
 
@@ -536,46 +633,13 @@ static void neigh_connect(struct neighbour *neigh)
                hh->hh_output = neigh->ops->hh_output;
 }
 
-/*
-   Transitions NUD_STALE <-> NUD_REACHABLE do not occur
-   when fast path is built: we have no timers associated with
-   these states, we do not have time to check state when sending.
-   neigh_periodic_timer check periodically neigh->confirmed
-   time and moves NUD_REACHABLE -> NUD_STALE.
-
-   If a routine wants to know TRUE entry state, it calls
-   neigh_sync before checking state.
-
-   Called with write_locked neigh.
- */
-
-static void neigh_sync(struct neighbour *n)
-{
-       unsigned long now = jiffies;
-       u8 state = n->nud_state;
-
-       if (state & (NUD_NOARP | NUD_PERMANENT))
-               return;
-       if (state & NUD_REACHABLE) {
-               if (now - n->confirmed > n->parms->reachable_time) {
-                       n->nud_state = NUD_STALE;
-                       neigh_suspect(n);
-               }
-       } else if (state & NUD_VALID) {
-               if (now - n->confirmed < n->parms->reachable_time) {
-                       neigh_del_timer(n);
-                       n->nud_state = NUD_REACHABLE;
-                       neigh_connect(n);
-               }
-       }
-}
-
 static void neigh_periodic_timer(unsigned long arg)
 {
        struct neigh_table *tbl = (struct neigh_table *)arg;
-       unsigned long now = jiffies;
-       int i;
+       struct neighbour *n, **np;
+       unsigned long expire, now = jiffies;
 
+       NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
        write_lock(&tbl->lock);
 
@@ -583,7 +647,7 @@ static void neigh_periodic_timer(unsigned long arg)
         *      periodically recompute ReachableTime from random function
         */
 
-       if (now - tbl->last_rand > 300 * HZ) {
+       if (time_after(now, tbl->last_rand + 300 * HZ)) {
                struct neigh_parms *p;
                tbl->last_rand = now;
                for (p = &tbl->parms; p; p = p->next)
@@ -591,47 +655,49 @@ static void neigh_periodic_timer(unsigned long arg)
                                neigh_rand_reach_time(p->base_reachable_time);
        }
 
-       for (i = 0; i <= NEIGH_HASHMASK; i++) {
-               struct neighbour *n, **np;
-
-               np = &tbl->hash_buckets[i];
-               while ((n = *np) != NULL) {
-                       unsigned state;
+       np = &tbl->hash_buckets[tbl->hash_chain_gc];
+       tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
 
-                       write_lock(&n->lock);
+       while ((n = *np) != NULL) {
+               unsigned int state;
 
-                       state = n->nud_state;
-                       if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
-                               write_unlock(&n->lock);
-                               goto next_elt;
-                       }
+               write_lock(&n->lock);
 
-                       if ((long)(n->used - n->confirmed) < 0)
-                               n->used = n->confirmed;
+               state = n->nud_state;
+               if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+                       write_unlock(&n->lock);
+                       goto next_elt;
+               }
 
-                       if (atomic_read(&n->refcnt) == 1 &&
-                           (state == NUD_FAILED ||
-                            now - n->used > n->parms->gc_staletime)) {
-                               *np = n->next;
-                               n->dead = 1;
-                               write_unlock(&n->lock);
-                               neigh_release(n);
-                               continue;
-                       }
+               if (time_before(n->used, n->confirmed))
+                       n->used = n->confirmed;
 
-                       if (n->nud_state & NUD_REACHABLE &&
-                           now - n->confirmed > n->parms->reachable_time) {
-                               n->nud_state = NUD_STALE;
-                               neigh_suspect(n);
-                       }
+               if (atomic_read(&n->refcnt) == 1 &&
+                   (state == NUD_FAILED ||
+                    time_after(now, n->used + n->parms->gc_staletime))) {
+                       *np = n->next;
+                       n->dead = 1;
                        write_unlock(&n->lock);
+                       neigh_release(n);
+                       continue;
+               }
+               write_unlock(&n->lock);
 
 next_elt:
-                       np = &n->next;
-               }
+               np = &n->next;
        }
 
-       mod_timer(&tbl->gc_timer, now + tbl->gc_interval);
+       /* Cycle through all hash buckets every base_reachable_time/2 ticks.
+        * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
+        * base_reachable_time.
+        */
+       expire = tbl->parms.base_reachable_time >> 1;
+       expire /= (tbl->hash_mask + 1);
+       if (!expire)
+               expire = 1;
+
+       mod_timer(&tbl->gc_timer, now + expire);
+
        write_unlock(&tbl->lock);
 }
 
@@ -643,12 +709,20 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
                p->ucast_probes + p->app_probes + p->mcast_probes);
 }
 
+static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
+{
+       if (unlikely(mod_timer(&n->timer, when))) {
+               printk("NEIGH: BUG, double timer add, state is %x\n",
+                      n->nud_state);
+               dump_stack();
+       }
+}
 
 /* Called when a timer expires for a neighbour entry. */
 
 static void neigh_timer_handler(unsigned long arg)
 {
-       unsigned long now = jiffies;
+       unsigned long now, next;
        struct neighbour *neigh = (struct neighbour *)arg;
        unsigned state;
        int notify = 0;
@@ -656,6 +730,8 @@ static void neigh_timer_handler(unsigned long arg)
        write_lock(&neigh->lock);
 
        state = neigh->nud_state;
+       now = jiffies;
+       next = now + HZ;
 
        if (!(state & NUD_IN_TIMER)) {
 #ifndef CONFIG_SMP
@@ -664,25 +740,54 @@ static void neigh_timer_handler(unsigned long arg)
                goto out;
        }
 
-       if ((state & NUD_VALID) &&
-           now - neigh->confirmed < neigh->parms->reachable_time) {
-               neigh->nud_state = NUD_REACHABLE;
-               NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
-               neigh_connect(neigh);
-               goto out;
-       }
-       if (state == NUD_DELAY) {
-               NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
-               neigh->nud_state = NUD_PROBE;
-               atomic_set(&neigh->probes, 0);
+       if (state & NUD_REACHABLE) {
+               if (time_before_eq(now, 
+                                  neigh->confirmed + neigh->parms->reachable_time)) {
+                       NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+                       next = neigh->confirmed + neigh->parms->reachable_time;
+               } else if (time_before_eq(now,
+                                         neigh->used + neigh->parms->delay_probe_time)) {
+                       NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+                       neigh->nud_state = NUD_DELAY;
+                       neigh->updated = jiffies;
+                       neigh_suspect(neigh);
+                       next = now + neigh->parms->delay_probe_time;
+               } else {
+                       NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+                       neigh->nud_state = NUD_STALE;
+                       neigh->updated = jiffies;
+                       neigh_suspect(neigh);
+                       notify = 1;
+               }
+       } else if (state & NUD_DELAY) {
+               if (time_before_eq(now, 
+                                  neigh->confirmed + neigh->parms->delay_probe_time)) {
+                       NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
+                       neigh->nud_state = NUD_REACHABLE;
+                       neigh->updated = jiffies;
+                       neigh_connect(neigh);
+                       notify = 1;
+                       next = neigh->confirmed + neigh->parms->reachable_time;
+               } else {
+                       NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+                       neigh->nud_state = NUD_PROBE;
+                       neigh->updated = jiffies;
+                       atomic_set(&neigh->probes, 0);
+                       next = now + neigh->parms->retrans_time;
+               }
+       } else {
+               /* NUD_PROBE|NUD_INCOMPLETE */
+               next = now + neigh->parms->retrans_time;
        }
 
-       if (atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
+       if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
+           atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
                struct sk_buff *skb;
 
                neigh->nud_state = NUD_FAILED;
+               neigh->updated = jiffies;
                notify = 1;
-               neigh->tbl->stats.res_failed++;
+               NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
                NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
 
                /* It is very thin place. report_unreachable is very complicated
@@ -697,19 +802,31 @@ static void neigh_timer_handler(unsigned long arg)
                        write_lock(&neigh->lock);
                }
                skb_queue_purge(&neigh->arp_queue);
-               goto out;
        }
 
-       neigh->timer.expires = now + neigh->parms->retrans_time;
-       add_timer(&neigh->timer);
-       write_unlock(&neigh->lock);
-
-       neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
-       atomic_inc(&neigh->probes);
-       return;
-
+       if (neigh->nud_state & NUD_IN_TIMER) {
+               if (time_before(next, jiffies + HZ/2))
+                       next = jiffies + HZ/2;
+               if (!mod_timer(&neigh->timer, next))
+                       neigh_hold(neigh);
+       }
+       if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
+               struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+               /* keep skb alive even if arp_queue overflows */
+               if (skb)
+                       skb_get(skb);
+               write_unlock(&neigh->lock);
+               neigh->ops->solicit(neigh, skb);
+               atomic_inc(&neigh->probes);
+               if (skb)
+                       kfree_skb(skb);
+       } else {
 out:
-       write_unlock(&neigh->lock);
+               write_unlock(&neigh->lock);
+       }
+       if (notify)
+               call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
+
 #ifdef CONFIG_ARPD
        if (notify && neigh->parms->app_probes)
                neigh_app_notify(neigh);
@@ -720,6 +837,7 @@ out:
 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 {
        int rc;
+       unsigned long now;
 
        write_lock_bh(&neigh->lock);
 
@@ -727,26 +845,31 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
        if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
                goto out_unlock_bh;
 
+       now = jiffies;
+       
        if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
                if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
                        atomic_set(&neigh->probes, neigh->parms->ucast_probes);
                        neigh->nud_state     = NUD_INCOMPLETE;
+                       neigh->updated = jiffies;
                        neigh_hold(neigh);
-                       neigh->timer.expires = jiffies +
-                                              neigh->parms->retrans_time;
-                       add_timer(&neigh->timer);
-                       write_unlock_bh(&neigh->lock);
-                       neigh->ops->solicit(neigh, skb);
-                       atomic_inc(&neigh->probes);
-                       write_lock_bh(&neigh->lock);
+                       neigh_add_timer(neigh, now + 1);
                } else {
                        neigh->nud_state = NUD_FAILED;
+                       neigh->updated = jiffies;
                        write_unlock_bh(&neigh->lock);
 
                        if (skb)
                                kfree_skb(skb);
                        return 1;
                }
+       } else if (neigh->nud_state & NUD_STALE) {
+               NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+               neigh_hold(neigh);
+               neigh->nud_state = NUD_DELAY;
+               neigh->updated = jiffies;
+               neigh_add_timer(neigh,
+                               jiffies + neigh->parms->delay_probe_time);
        }
 
        if (neigh->nud_state == NUD_INCOMPLETE) {
@@ -761,20 +884,13 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
                        __skb_queue_tail(&neigh->arp_queue, skb);
                }
                rc = 1;
-       } else if (neigh->nud_state == NUD_STALE) {
-               NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
-               neigh_hold(neigh);
-               neigh->nud_state = NUD_DELAY;
-               neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
-               add_timer(&neigh->timer);
-               rc = 0;
        }
 out_unlock_bh:
        write_unlock_bh(&neigh->lock);
        return rc;
 }
 
-static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(struct neighbour *neigh)
 {
        struct hh_cache *hh;
        void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
@@ -782,9 +898,9 @@ static __inline__ void neigh_update_hhs(struct neighbour *neigh)
 
        if (update) {
                for (hh = neigh->hh; hh; hh = hh->hh_next) {
-                       write_lock_bh(&hh->hh_lock);
+                       write_seqlock_bh(&hh->hh_lock);
                        update(hh, neigh->dev, neigh->ha);
-                       write_unlock_bh(&hh->hh_lock);
+                       write_sequnlock_bh(&hh->hh_lock);
                }
        }
 }
@@ -794,21 +910,32 @@ static __inline__ void neigh_update_hhs(struct neighbour *neigh)
 /* Generic update routine.
    -- lladdr is new lladdr or NULL, if it is not supplied.
    -- new    is new state.
-   -- override == 1 allows to override existing lladdr, if it is different.
-   -- arp == 0 means that the change is administrative.
+   -- flags
+       NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
+                               if it is different.
+       NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
+                               lladdr instead of overriding it 
+                               if it is different.
+                               It also allows to retain current state
+                               if lladdr is unchanged.
+       NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
+
+       NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 
+                               NTF_ROUTER flag.
+       NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
+                               a router.
 
    Caller MUST hold reference count on the entry.
  */
 
 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
-                int override, int arp)
+                u32 flags)
 {
        u8 old;
        int err;
-#ifdef CONFIG_ARPD
        int notify = 0;
-#endif
        struct net_device *dev;
+       int update_isrouter = 0;
 
        write_lock_bh(&neigh->lock);
 
@@ -816,7 +943,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
        old    = neigh->nud_state;
        err    = -EPERM;
 
-       if (arp && (old & (NUD_NOARP | NUD_PERMANENT)))
+       if (!(flags & NEIGH_UPDATE_F_ADMIN) && 
+           (old & (NUD_NOARP | NUD_PERMANENT)))
                goto out;
 
        if (!(new & NUD_VALID)) {
@@ -825,9 +953,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
                        neigh_suspect(neigh);
                neigh->nud_state = new;
                err = 0;
-#ifdef CONFIG_ARPD
                notify = old & NUD_VALID;
-#endif
                goto out;
        }
 
@@ -841,12 +967,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
                   - compare new & old
                   - if they are different, check override flag
                 */
-               if (old & NUD_VALID) {
-                       if (!memcmp(lladdr, neigh->ha, dev->addr_len))
-                               lladdr = neigh->ha;
-                       else if (!override)
-                               goto out;
-               }
+               if ((old & NUD_VALID) && 
+                   !memcmp(lladdr, neigh->ha, dev->addr_len))
+                       lladdr = neigh->ha;
        } else {
                /* No address is supplied; if we know something,
                   use it, otherwise discard the request.
@@ -857,8 +980,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
                lladdr = neigh->ha;
        }
 
-       neigh_sync(neigh);
-       old = neigh->nud_state;
        if (new & NUD_CONNECTED)
                neigh->confirmed = jiffies;
        neigh->updated = jiffies;
@@ -867,21 +988,44 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
           do not change entry state, if new one is STALE.
         */
        err = 0;
-       if ((old & NUD_VALID) && lladdr == neigh->ha &&
-           (new == old || (new == NUD_STALE && (old & NUD_CONNECTED))))
-               goto out;
+       update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+       if (old & NUD_VALID) {
+               if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
+                       update_isrouter = 0;
+                       if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
+                           (old & NUD_CONNECTED)) {
+                               lladdr = neigh->ha;
+                               new = NUD_STALE;
+                       } else
+                               goto out;
+               } else {
+                       if (lladdr == neigh->ha && new == NUD_STALE &&
+                           ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
+                            (old & NUD_CONNECTED))
+                           )
+                               new = old;
+               }
+       }
+
+       if (new != old) {
+               neigh_del_timer(neigh);
+               if (new & NUD_IN_TIMER) {
+                       neigh_hold(neigh);
+                       neigh_add_timer(neigh, (jiffies + 
+                                               ((new & NUD_REACHABLE) ? 
+                                                neigh->parms->reachable_time :
+                                                0)));
+               }
+               neigh->nud_state = new;
+       }
 
-       neigh_del_timer(neigh);
-       neigh->nud_state = new;
        if (lladdr != neigh->ha) {
                memcpy(&neigh->ha, lladdr, dev->addr_len);
                neigh_update_hhs(neigh);
                if (!(new & NUD_CONNECTED))
                        neigh->confirmed = jiffies -
                                      (neigh->parms->base_reachable_time << 1);
-#ifdef CONFIG_ARPD
                notify = 1;
-#endif
        }
        if (new == old)
                goto out;
@@ -907,7 +1051,15 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
                skb_queue_purge(&neigh->arp_queue);
        }
 out:
+       if (update_isrouter) {
+               neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
+                       (neigh->flags | NTF_ROUTER) :
+                       (neigh->flags & ~NTF_ROUTER);
+       }
        write_unlock_bh(&neigh->lock);
+
+       if (notify)
+               call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
 #ifdef CONFIG_ARPD
        if (notify && neigh->parms->app_probes)
                neigh_app_notify(neigh);
@@ -922,12 +1074,13 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
        struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
                                                 lladdr || !dev->addr_len);
        if (neigh)
-               neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
+               neigh_update(neigh, lladdr, NUD_STALE, 
+                            NEIGH_UPDATE_F_OVERRIDE);
        return neigh;
 }
 
 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
-                         u16 protocol)
+                         __be16 protocol)
 {
        struct hh_cache *hh;
        struct net_device *dev = dst->dev;
@@ -936,9 +1089,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
                if (hh->hh_type == protocol)
                        break;
 
-       if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
-               memset(hh, 0, sizeof(struct hh_cache));
-               hh->hh_lock = RW_LOCK_UNLOCKED;
+       if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
+               seqlock_init(&hh->hh_lock);
                hh->hh_type = protocol;
                atomic_set(&hh->hh_refcnt, 0);
                hh->hh_next = NULL;
@@ -1063,7 +1215,7 @@ static void neigh_proxy_process(unsigned long arg)
 
        while (skb != (struct sk_buff *)&tbl->proxy_queue) {
                struct sk_buff *back = skb;
-               long tdif = back->stamp.tv_usec - now;
+               long tdif = NEIGH_CB(back)->sched_next - now;
 
                skb = skb->next;
                if (tdif <= 0) {
@@ -1088,26 +1240,26 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
                    struct sk_buff *skb)
 {
        unsigned long now = jiffies;
-       long sched_next = net_random() % p->proxy_delay;
+       unsigned long sched_next = now + (net_random() % p->proxy_delay);
 
        if (tbl->proxy_queue.qlen > p->proxy_qlen) {
                kfree_skb(skb);
                return;
        }
-       skb->stamp.tv_sec  = LOCALLY_ENQUEUED;
-       skb->stamp.tv_usec = now + sched_next;
+
+       NEIGH_CB(skb)->sched_next = sched_next;
+       NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
 
        spin_lock(&tbl->proxy_queue.lock);
        if (del_timer(&tbl->proxy_timer)) {
-               long tval = tbl->proxy_timer.expires - now;
-               if (tval < sched_next)
-                       sched_next = tval;
+               if (time_before(tbl->proxy_timer.expires, sched_next))
+                       sched_next = tbl->proxy_timer.expires;
        }
        dst_release(skb->dst);
        skb->dst = NULL;
        dev_hold(skb->dev);
        __skb_queue_tail(&tbl->proxy_queue, skb);
-       mod_timer(&tbl->proxy_timer, now + sched_next);
+       mod_timer(&tbl->proxy_timer, sched_next);
        spin_unlock(&tbl->proxy_queue.lock);
 }
 
@@ -1115,16 +1267,22 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
                                      struct neigh_table *tbl)
 {
-       struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL);
+       struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
 
        if (p) {
-               memcpy(p, &tbl->parms, sizeof(*p));
                p->tbl            = tbl;
+               atomic_set(&p->refcnt, 1);
+               INIT_RCU_HEAD(&p->rcu_head);
                p->reachable_time =
                                neigh_rand_reach_time(p->base_reachable_time);
-               if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) {
-                       kfree(p);
-                       return NULL;
+               if (dev) {
+                       if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
+                               kfree(p);
+                               return NULL;
+                       }
+
+                       dev_hold(dev);
+                       p->dev = dev;
                }
                p->sysctl_table = NULL;
                write_lock_bh(&tbl->lock);
@@ -1135,6 +1293,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
        return p;
 }
 
+static void neigh_rcu_free_parms(struct rcu_head *head)
+{
+       struct neigh_parms *parms =
+               container_of(head, struct neigh_parms, rcu_head);
+
+       neigh_parms_put(parms);
+}
+
 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
 {
        struct neigh_parms **p;
@@ -1145,8 +1311,11 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
        for (p = &tbl->parms.next; *p; p = &(*p)->next) {
                if (*p == parms) {
                        *p = parms->next;
+                       parms->dead = 1;
                        write_unlock_bh(&tbl->lock);
-                       kfree(parms);
+                       if (parms->dev)
+                               dev_put(parms->dev);
+                       call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
                        return;
                }
        }
@@ -1154,29 +1323,54 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
        NEIGH_PRINTK1("neigh_parms_release: not found\n");
 }
 
+void neigh_parms_destroy(struct neigh_parms *parms)
+{
+       kfree(parms);
+}
 
-void neigh_table_init(struct neigh_table *tbl)
+void neigh_table_init_no_netlink(struct neigh_table *tbl)
 {
        unsigned long now = jiffies;
+       unsigned long phsize;
 
+       atomic_set(&tbl->parms.refcnt, 1);
+       INIT_RCU_HEAD(&tbl->parms.rcu_head);
        tbl->parms.reachable_time =
                          neigh_rand_reach_time(tbl->parms.base_reachable_time);
 
        if (!tbl->kmem_cachep)
-               tbl->kmem_cachep = kmem_cache_create(tbl->id,
-                                                    tbl->entry_size,
-                                                    0, SLAB_HWCACHE_ALIGN,
-                                                    NULL, NULL);
+               tbl->kmem_cachep =
+                       kmem_cache_create(tbl->id, tbl->entry_size, 0,
+                                         SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+                                         NULL, NULL);
+       tbl->stats = alloc_percpu(struct neigh_statistics);
+       if (!tbl->stats)
+               panic("cannot create neighbour cache statistics");
+       
+#ifdef CONFIG_PROC_FS
+       tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
+       if (!tbl->pde) 
+               panic("cannot create neighbour proc dir entry");
+       tbl->pde->proc_fops = &neigh_stat_seq_fops;
+       tbl->pde->data = tbl;
+#endif
 
-       if (!tbl->kmem_cachep)
-               panic("cannot create neighbour cache");
+       tbl->hash_mask = 1;
+       tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
 
-       tbl->lock              = RW_LOCK_UNLOCKED;
+       phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
+       tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
+
+       if (!tbl->hash_buckets || !tbl->phash_buckets)
+               panic("cannot allocate neighbour cache hashes");
+
+       get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+
+       rwlock_init(&tbl->lock);
        init_timer(&tbl->gc_timer);
        tbl->gc_timer.data     = (unsigned long)tbl;
        tbl->gc_timer.function = neigh_periodic_timer;
-       tbl->gc_timer.expires  = now + tbl->gc_interval +
-                                tbl->parms.reachable_time;
+       tbl->gc_timer.expires  = now + 1;
        add_timer(&tbl->gc_timer);
 
        init_timer(&tbl->proxy_timer);
@@ -1186,10 +1380,27 @@ void neigh_table_init(struct neigh_table *tbl)
 
        tbl->last_flush = now;
        tbl->last_rand  = now + tbl->parms.reachable_time * 20;
+}
+
+void neigh_table_init(struct neigh_table *tbl)
+{
+       struct neigh_table *tmp;
+
+       neigh_table_init_no_netlink(tbl);
        write_lock(&neigh_tbl_lock);
+       for (tmp = neigh_tables; tmp; tmp = tmp->next) {
+               if (tmp->family == tbl->family)
+                       break;
+       }
        tbl->next       = neigh_tables;
        neigh_tables    = tbl;
        write_unlock(&neigh_tbl_lock);
+
+       if (unlikely(tmp)) {
+               printk(KERN_ERR "NEIGH: Registering multiple tables for "
+                      "family %d\n", tbl->family);
+               dump_stack();
+       }
 }
 
 int neigh_table_clear(struct neigh_table *tbl)
@@ -1201,7 +1412,7 @@ int neigh_table_clear(struct neigh_table *tbl)
        del_timer_sync(&tbl->proxy_timer);
        pneigh_queue_purge(&tbl->proxy_queue);
        neigh_ifdown(tbl, NULL);
-       if (tbl->entries)
+       if (atomic_read(&tbl->entries))
                printk(KERN_CRIT "neighbour leakage\n");
        write_lock(&neigh_tbl_lock);
        for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
@@ -1211,52 +1422,77 @@ int neigh_table_clear(struct neigh_table *tbl)
                }
        }
        write_unlock(&neigh_tbl_lock);
+
+       neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
+       tbl->hash_buckets = NULL;
+
+       kfree(tbl->phash_buckets);
+       tbl->phash_buckets = NULL;
+
+       free_percpu(tbl->stats);
+       tbl->stats = NULL;
+
        return 0;
 }
 
 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-       struct ndmsg *ndm = NLMSG_DATA(nlh);
-       struct rtattr **nda = arg;
+       struct ndmsg *ndm;
+       struct nlattr *dst_attr;
        struct neigh_table *tbl;
        struct net_device *dev = NULL;
-       int err = -ENODEV;
+       int err = -EINVAL;
+
+       if (nlmsg_len(nlh) < sizeof(*ndm))
+               goto out;
 
-       if (ndm->ndm_ifindex &&
-           (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+       dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
+       if (dst_attr == NULL)
                goto out;
 
+       ndm = nlmsg_data(nlh);
+       if (ndm->ndm_ifindex) {
+               dev = dev_get_by_index(ndm->ndm_ifindex);
+               if (dev == NULL) {
+                       err = -ENODEV;
+                       goto out;
+               }
+       }
+
        read_lock(&neigh_tbl_lock);
        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
-               struct neighbour *n;
+               struct neighbour *neigh;
 
                if (tbl->family != ndm->ndm_family)
                        continue;
                read_unlock(&neigh_tbl_lock);
 
-               err = -EINVAL;
-               if (!nda[NDA_DST - 1] ||
-                   nda[NDA_DST - 1]->rta_len != RTA_LENGTH(tbl->key_len))
+               if (nla_len(dst_attr) < tbl->key_len)
                        goto out_dev_put;
 
                if (ndm->ndm_flags & NTF_PROXY) {
-                       err = pneigh_delete(tbl,
-                                           RTA_DATA(nda[NDA_DST - 1]), dev);
+                       err = pneigh_delete(tbl, nla_data(dst_attr), dev);
                        goto out_dev_put;
                }
 
-               if (!dev)
-                       goto out;
+               if (dev == NULL)
+                       goto out_dev_put;
 
-               n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST - 1]), dev);
-               if (n) {
-                       err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
-                       neigh_release(n);
+               neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
+               if (neigh == NULL) {
+                       err = -ENOENT;
+                       goto out_dev_put;
                }
+
+               err = neigh_update(neigh, NULL, NUD_FAILED,
+                                  NEIGH_UPDATE_F_OVERRIDE |
+                                  NEIGH_UPDATE_F_ADMIN);
+               neigh_release(neigh);
                goto out_dev_put;
        }
        read_unlock(&neigh_tbl_lock);
-       err = -EADDRNOTAVAIL;
+       err = -EAFNOSUPPORT;
+
 out_dev_put:
        if (dev)
                dev_put(dev);
@@ -1266,72 +1502,93 @@ out:
 
 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
-       struct ndmsg *ndm = NLMSG_DATA(nlh);
-       struct rtattr **nda = arg;
+       struct ndmsg *ndm;
+       struct nlattr *tb[NDA_MAX+1];
        struct neigh_table *tbl;
        struct net_device *dev = NULL;
-       int err = -ENODEV;
+       int err;
 
-       if (ndm->ndm_ifindex &&
-           (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
+       err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
+       if (err < 0)
                goto out;
 
-       read_lock(&neigh_tbl_lock);
-       for (tbl = neigh_tables; tbl; tbl = tbl->next) {
-               int override = 1;
-               struct neighbour *n;
+       err = -EINVAL;
+       if (tb[NDA_DST] == NULL)
+               goto out;
 
-               if (tbl->family != ndm->ndm_family)
-                       continue;
-               read_unlock(&neigh_tbl_lock);
+       ndm = nlmsg_data(nlh);
+       if (ndm->ndm_ifindex) {
+               dev = dev_get_by_index(ndm->ndm_ifindex);
+               if (dev == NULL) {
+                       err = -ENODEV;
+                       goto out;
+               }
 
-               err = -EINVAL;
-               if (!nda[NDA_DST - 1] ||
-                   nda[NDA_DST - 1]->rta_len != RTA_LENGTH(tbl->key_len))
+               if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+                       goto out_dev_put;
+       }
+
+       read_lock(&neigh_tbl_lock);
+       for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+               int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
+               struct neighbour *neigh;
+               void *dst, *lladdr;
+
+               if (tbl->family != ndm->ndm_family)
+                       continue;
+               read_unlock(&neigh_tbl_lock);
+
+               if (nla_len(tb[NDA_DST]) < tbl->key_len)
                        goto out_dev_put;
+               dst = nla_data(tb[NDA_DST]);
+               lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
+
                if (ndm->ndm_flags & NTF_PROXY) {
+                       struct pneigh_entry *pn;
+
                        err = -ENOBUFS;
-                       if (pneigh_lookup(tbl,
-                                         RTA_DATA(nda[NDA_DST - 1]), dev, 1))
+                       pn = pneigh_lookup(tbl, dst, dev, 1);
+                       if (pn) {
+                               pn->flags = ndm->ndm_flags;
                                err = 0;
+                       }
                        goto out_dev_put;
                }
-               err = -EINVAL;
-               if (!dev)
-                       goto out;
-               if (nda[NDA_LLADDR - 1] &&
-                   nda[NDA_LLADDR - 1]->rta_len != RTA_LENGTH(dev->addr_len))
+
+               if (dev == NULL)
                        goto out_dev_put;
-               err = 0;
-               n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST - 1]), dev);
-               if (n) {
-                       if (nlh->nlmsg_flags & NLM_F_EXCL)
+
+               neigh = neigh_lookup(tbl, dst, dev);
+               if (neigh == NULL) {
+                       if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+                               err = -ENOENT;
+                               goto out_dev_put;
+                       }
+       
+                       neigh = __neigh_lookup_errno(tbl, dst, dev);
+                       if (IS_ERR(neigh)) {
+                               err = PTR_ERR(neigh);
+                               goto out_dev_put;
+                       }
+               } else {
+                       if (nlh->nlmsg_flags & NLM_F_EXCL) {
                                err = -EEXIST;
-                       override = nlh->nlmsg_flags & NLM_F_REPLACE;
-               } else if (!(nlh->nlmsg_flags & NLM_F_CREATE))
-                       err = -ENOENT;
-               else {
-                       n = __neigh_lookup_errno(tbl, RTA_DATA(nda[NDA_DST - 1]),
-                                                dev);
-                       if (IS_ERR(n)) {
-                               err = PTR_ERR(n);
-                               n = NULL;
+                               neigh_release(neigh);
+                               goto out_dev_put;
                        }
+
+                       if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
+                               flags &= ~NEIGH_UPDATE_F_OVERRIDE;
                }
-               if (!err) {
-                       err = neigh_update(n, nda[NDA_LLADDR - 1] ?
-                                               RTA_DATA(nda[NDA_LLADDR - 1]) :
-                                               NULL,
-                                          ndm->ndm_state,
-                                          override, 0);
-               }
-               if (n)
-                       neigh_release(n);
+
+               err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+               neigh_release(neigh);
                goto out_dev_put;
        }
 
        read_unlock(&neigh_tbl_lock);
-       err = -EADDRNOTAVAIL;
+       err = -EAFNOSUPPORT;
+
 out_dev_put:
        if (dev)
                dev_put(dev);
@@ -1339,44 +1596,398 @@ out:
        return err;
 }
 
+static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
+{
+       struct nlattr *nest;
+
+       nest = nla_nest_start(skb, NDTA_PARMS);
+       if (nest == NULL)
+               return -ENOBUFS;
+
+       if (parms->dev)
+               NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
+
+       NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
+       NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+       NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
+       NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
+       NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
+       NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
+       NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
+       NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
+                     parms->base_reachable_time);
+       NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
+       NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
+       NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
+       NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
+       NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
+       NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
+
+       return nla_nest_end(skb, nest);
+
+nla_put_failure:
+       return nla_nest_cancel(skb, nest);
+}
+
+static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
+                             u32 pid, u32 seq, int type, int flags)
+{
+       struct nlmsghdr *nlh;
+       struct ndtmsg *ndtmsg;
+
+       nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+       if (nlh == NULL)
+               return -ENOBUFS;
+
+       ndtmsg = nlmsg_data(nlh);
+
+       read_lock_bh(&tbl->lock);
+       ndtmsg->ndtm_family = tbl->family;
+       ndtmsg->ndtm_pad1   = 0;
+       ndtmsg->ndtm_pad2   = 0;
+
+       NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
+       NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
+       NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
+       NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
+       NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
+
+       {
+               unsigned long now = jiffies;
+               unsigned int flush_delta = now - tbl->last_flush;
+               unsigned int rand_delta = now - tbl->last_rand;
+
+               struct ndt_config ndc = {
+                       .ndtc_key_len           = tbl->key_len,
+                       .ndtc_entry_size        = tbl->entry_size,
+                       .ndtc_entries           = atomic_read(&tbl->entries),
+                       .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
+                       .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
+                       .ndtc_hash_rnd          = tbl->hash_rnd,
+                       .ndtc_hash_mask         = tbl->hash_mask,
+                       .ndtc_hash_chain_gc     = tbl->hash_chain_gc,
+                       .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
+               };
+
+               NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
+       }
+
+       {
+               int cpu;
+               struct ndt_stats ndst;
+
+               memset(&ndst, 0, sizeof(ndst));
+
+               for_each_possible_cpu(cpu) {
+                       struct neigh_statistics *st;
+
+                       st = per_cpu_ptr(tbl->stats, cpu);
+                       ndst.ndts_allocs                += st->allocs;
+                       ndst.ndts_destroys              += st->destroys;
+                       ndst.ndts_hash_grows            += st->hash_grows;
+                       ndst.ndts_res_failed            += st->res_failed;
+                       ndst.ndts_lookups               += st->lookups;
+                       ndst.ndts_hits                  += st->hits;
+                       ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
+                       ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
+                       ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
+                       ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
+               }
+
+               NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
+       }
+
+       BUG_ON(tbl->parms.dev);
+       if (neightbl_fill_parms(skb, &tbl->parms) < 0)
+               goto nla_put_failure;
+
+       read_unlock_bh(&tbl->lock);
+       return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+       read_unlock_bh(&tbl->lock);
+       return nlmsg_cancel(skb, nlh);
+}
+
+static int neightbl_fill_param_info(struct sk_buff *skb,
+                                   struct neigh_table *tbl,
+                                   struct neigh_parms *parms,
+                                   u32 pid, u32 seq, int type,
+                                   unsigned int flags)
+{
+       struct ndtmsg *ndtmsg;
+       struct nlmsghdr *nlh;
+
+       nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
+       if (nlh == NULL)
+               return -ENOBUFS;
+
+       ndtmsg = nlmsg_data(nlh);
+
+       read_lock_bh(&tbl->lock);
+       ndtmsg->ndtm_family = tbl->family;
+       ndtmsg->ndtm_pad1   = 0;
+       ndtmsg->ndtm_pad2   = 0;
+
+       if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
+           neightbl_fill_parms(skb, parms) < 0)
+               goto errout;
+
+       read_unlock_bh(&tbl->lock);
+       return nlmsg_end(skb, nlh);
+errout:
+       read_unlock_bh(&tbl->lock);
+       return nlmsg_cancel(skb, nlh);
+}
+static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
+                                                     int ifindex)
+{
+       struct neigh_parms *p;
+       
+       for (p = &tbl->parms; p; p = p->next)
+               if ((p->dev && p->dev->ifindex == ifindex) ||
+                   (!p->dev && !ifindex))
+                       return p;
+
+       return NULL;
+}
+
+static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
+       [NDTA_NAME]             = { .type = NLA_STRING },
+       [NDTA_THRESH1]          = { .type = NLA_U32 },
+       [NDTA_THRESH2]          = { .type = NLA_U32 },
+       [NDTA_THRESH3]          = { .type = NLA_U32 },
+       [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
+       [NDTA_PARMS]            = { .type = NLA_NESTED },
+};
+
+static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
+       [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
+       [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
+       [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
+       [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
+       [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
+       [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
+       [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
+       [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
+       [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
+       [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
+       [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
+       [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
+       [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
+};
+
+int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+       struct neigh_table *tbl;
+       struct ndtmsg *ndtmsg;
+       struct nlattr *tb[NDTA_MAX+1];
+       int err;
+
+       err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
+                         nl_neightbl_policy);
+       if (err < 0)
+               goto errout;
+
+       if (tb[NDTA_NAME] == NULL) {
+               err = -EINVAL;
+               goto errout;
+       }
+
+       ndtmsg = nlmsg_data(nlh);
+       read_lock(&neigh_tbl_lock);
+       for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+               if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
+                       continue;
+
+               if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
+                       break;
+       }
+
+       if (tbl == NULL) {
+               err = -ENOENT;
+               goto errout_locked;
+       }
+
+       /* 
+        * We acquire tbl->lock to be nice to the periodic timers and
+        * make sure they always see a consistent set of values.
+        */
+       write_lock_bh(&tbl->lock);
+
+       if (tb[NDTA_PARMS]) {
+               struct nlattr *tbp[NDTPA_MAX+1];
+               struct neigh_parms *p;
+               int i, ifindex = 0;
+
+               err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
+                                      nl_ntbl_parm_policy);
+               if (err < 0)
+                       goto errout_tbl_lock;
+
+               if (tbp[NDTPA_IFINDEX])
+                       ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
+
+               p = lookup_neigh_params(tbl, ifindex);
+               if (p == NULL) {
+                       err = -ENOENT;
+                       goto errout_tbl_lock;
+               }
+
+               for (i = 1; i <= NDTPA_MAX; i++) {
+                       if (tbp[i] == NULL)
+                               continue;
+
+                       switch (i) {
+                       case NDTPA_QUEUE_LEN:
+                               p->queue_len = nla_get_u32(tbp[i]);
+                               break;
+                       case NDTPA_PROXY_QLEN:
+                               p->proxy_qlen = nla_get_u32(tbp[i]);
+                               break;
+                       case NDTPA_APP_PROBES:
+                               p->app_probes = nla_get_u32(tbp[i]);
+                               break;
+                       case NDTPA_UCAST_PROBES:
+                               p->ucast_probes = nla_get_u32(tbp[i]);
+                               break;
+                       case NDTPA_MCAST_PROBES:
+                               p->mcast_probes = nla_get_u32(tbp[i]);
+                               break;
+                       case NDTPA_BASE_REACHABLE_TIME:
+                               p->base_reachable_time = nla_get_msecs(tbp[i]);
+                               break;
+                       case NDTPA_GC_STALETIME:
+                               p->gc_staletime = nla_get_msecs(tbp[i]);
+                               break;
+                       case NDTPA_DELAY_PROBE_TIME:
+                               p->delay_probe_time = nla_get_msecs(tbp[i]);
+                               break;
+                       case NDTPA_RETRANS_TIME:
+                               p->retrans_time = nla_get_msecs(tbp[i]);
+                               break;
+                       case NDTPA_ANYCAST_DELAY:
+                               p->anycast_delay = nla_get_msecs(tbp[i]);
+                               break;
+                       case NDTPA_PROXY_DELAY:
+                               p->proxy_delay = nla_get_msecs(tbp[i]);
+                               break;
+                       case NDTPA_LOCKTIME:
+                               p->locktime = nla_get_msecs(tbp[i]);
+                               break;
+                       }
+               }
+       }
+
+       if (tb[NDTA_THRESH1])
+               tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
+
+       if (tb[NDTA_THRESH2])
+               tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
+
+       if (tb[NDTA_THRESH3])
+               tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
+
+       if (tb[NDTA_GC_INTERVAL])
+               tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
+
+       err = 0;
+
+errout_tbl_lock:
+       write_unlock_bh(&tbl->lock);
+errout_locked:
+       read_unlock(&neigh_tbl_lock);
+errout:
+       return err;
+}
+
+int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       int family, tidx, nidx = 0;
+       int tbl_skip = cb->args[0];
+       int neigh_skip = cb->args[1];
+       struct neigh_table *tbl;
+
+       family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
+
+       read_lock(&neigh_tbl_lock);
+       for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
+               struct neigh_parms *p;
+
+               if (tidx < tbl_skip || (family && tbl->family != family))
+                       continue;
+
+               if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
+                                      cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
+                                      NLM_F_MULTI) <= 0)
+                       break;
+
+               for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
+                       if (nidx < neigh_skip)
+                               continue;
+
+                       if (neightbl_fill_param_info(skb, tbl, p,
+                                                    NETLINK_CB(cb->skb).pid,
+                                                    cb->nlh->nlmsg_seq,
+                                                    RTM_NEWNEIGHTBL,
+                                                    NLM_F_MULTI) <= 0)
+                               goto out;
+               }
+
+               neigh_skip = 0;
+       }
+out:
+       read_unlock(&neigh_tbl_lock);
+       cb->args[0] = tidx;
+       cb->args[1] = nidx;
+
+       return skb->len;
+}
 
-static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
-                          u32 pid, u32 seq, int event)
+static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
+                          u32 pid, u32 seq, int type, unsigned int flags)
 {
        unsigned long now = jiffies;
-       unsigned char *b = skb->tail;
        struct nda_cacheinfo ci;
-       int locked = 0;
-       struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event,
-                                        sizeof(struct ndmsg));
-       struct ndmsg *ndm = NLMSG_DATA(nlh);
-
-       ndm->ndm_family  = n->ops->family;
-       ndm->ndm_flags   = n->flags;
-       ndm->ndm_type    = n->type;
-       ndm->ndm_ifindex = n->dev->ifindex;
-       RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
-       read_lock_bh(&n->lock);
-       locked           = 1;
-       ndm->ndm_state   = n->nud_state;
-       if (n->nud_state & NUD_VALID)
-               RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
-       ci.ndm_used      = now - n->used;
-       ci.ndm_confirmed = now - n->confirmed;
-       ci.ndm_updated   = now - n->updated;
-       ci.ndm_refcnt    = atomic_read(&n->refcnt) - 1;
-       read_unlock_bh(&n->lock);
-       locked           = 0;
-       RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
-       nlh->nlmsg_len   = skb->tail - b;
-       return skb->len;
+       struct nlmsghdr *nlh;
+       struct ndmsg *ndm;
+
+       nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+       if (nlh == NULL)
+               return -ENOBUFS;
+
+       ndm = nlmsg_data(nlh);
+       ndm->ndm_family  = neigh->ops->family;
+       ndm->ndm_pad1    = 0;
+       ndm->ndm_pad2    = 0;
+       ndm->ndm_flags   = neigh->flags;
+       ndm->ndm_type    = neigh->type;
+       ndm->ndm_ifindex = neigh->dev->ifindex;
 
-nlmsg_failure:
-rtattr_failure:
-       if (locked)
-               read_unlock_bh(&n->lock);
-       skb_trim(skb, b - skb->data);
-       return -1;
+       NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
+
+       read_lock_bh(&neigh->lock);
+       ndm->ndm_state   = neigh->nud_state;
+       if ((neigh->nud_state & NUD_VALID) &&
+           nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
+               read_unlock_bh(&neigh->lock);
+               goto nla_put_failure;
+       }
+
+       ci.ndm_used      = now - neigh->used;
+       ci.ndm_confirmed = now - neigh->confirmed;
+       ci.ndm_updated   = now - neigh->updated;
+       ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
+       read_unlock_bh(&neigh->lock);
+
+       NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
+       NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
+
+       return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+       return nlmsg_cancel(skb, nlh);
 }
 
 
@@ -1387,25 +1998,26 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
        int rc, h, s_h = cb->args[1];
        int idx, s_idx = idx = cb->args[2];
 
-       for (h = 0; h <= NEIGH_HASHMASK; h++) {
+       read_lock_bh(&tbl->lock);
+       for (h = 0; h <= tbl->hash_mask; h++) {
                if (h < s_h)
                        continue;
                if (h > s_h)
                        s_idx = 0;
-               read_lock_bh(&tbl->lock);
                for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
                        if (idx < s_idx)
                                continue;
                        if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
                                            cb->nlh->nlmsg_seq,
-                                           RTM_NEWNEIGH) <= 0) {
+                                           RTM_NEWNEIGH,
+                                           NLM_F_MULTI) <= 0) {
                                read_unlock_bh(&tbl->lock);
                                rc = -1;
                                goto out;
                        }
                }
-               read_unlock_bh(&tbl->lock);
        }
+       read_unlock_bh(&tbl->lock);
        rc = skb->len;
 out:
        cb->args[1] = h;
@@ -1419,7 +2031,7 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
        int t, family, s_t;
 
        read_lock(&neigh_tbl_lock);
-       family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family;
+       family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
        s_t = cb->args[0];
 
        for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
@@ -1437,42 +2049,403 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
        return skb->len;
 }
 
-#ifdef CONFIG_ARPD
-void neigh_app_ns(struct neighbour *n)
+void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
 {
-       struct nlmsghdr  *nlh;
-       int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-       struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+       int chain;
 
-       if (!skb)
-               return;
+       read_lock_bh(&tbl->lock);
+       for (chain = 0; chain <= tbl->hash_mask; chain++) {
+               struct neighbour *n;
 
-       if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
-               kfree_skb(skb);
-               return;
+               for (n = tbl->hash_buckets[chain]; n; n = n->next)
+                       cb(n, cookie);
        }
-       nlh                        = (struct nlmsghdr *)skb->data;
-       nlh->nlmsg_flags           = NLM_F_REQUEST;
-       NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+       read_unlock_bh(&tbl->lock);
 }
+EXPORT_SYMBOL(neigh_for_each);
 
-static void neigh_app_notify(struct neighbour *n)
+/* The tbl->lock must be held as a writer and BH disabled. */
+void __neigh_for_each_release(struct neigh_table *tbl,
+                             int (*cb)(struct neighbour *))
 {
-       struct nlmsghdr *nlh;
-       int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256);
-       struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC);
+       int chain;
 
-       if (!skb)
-               return;
+       for (chain = 0; chain <= tbl->hash_mask; chain++) {
+               struct neighbour *n, **np;
 
-       if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
-               kfree_skb(skb);
-               return;
+               np = &tbl->hash_buckets[chain];
+               while ((n = *np) != NULL) {
+                       int release;
+
+                       write_lock(&n->lock);
+                       release = cb(n);
+                       if (release) {
+                               *np = n->next;
+                               n->dead = 1;
+                       } else
+                               np = &n->next;
+                       write_unlock(&n->lock);
+                       if (release)
+                               neigh_release(n);
+               }
+       }
+}
+EXPORT_SYMBOL(__neigh_for_each_release);
+
+#ifdef CONFIG_PROC_FS
+
+static struct neighbour *neigh_get_first(struct seq_file *seq)
+{
+       struct neigh_seq_state *state = seq->private;
+       struct neigh_table *tbl = state->tbl;
+       struct neighbour *n = NULL;
+       int bucket = state->bucket;
+
+       state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
+       for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
+               n = tbl->hash_buckets[bucket];
+
+               while (n) {
+                       if (state->neigh_sub_iter) {
+                               loff_t fakep = 0;
+                               void *v;
+
+                               v = state->neigh_sub_iter(state, n, &fakep);
+                               if (!v)
+                                       goto next;
+                       }
+                       if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+                               break;
+                       if (n->nud_state & ~NUD_NOARP)
+                               break;
+               next:
+                       n = n->next;
+               }
+
+               if (n)
+                       break;
+       }
+       state->bucket = bucket;
+
+       return n;
+}
+
+static struct neighbour *neigh_get_next(struct seq_file *seq,
+                                       struct neighbour *n,
+                                       loff_t *pos)
+{
+       struct neigh_seq_state *state = seq->private;
+       struct neigh_table *tbl = state->tbl;
+
+       if (state->neigh_sub_iter) {
+               void *v = state->neigh_sub_iter(state, n, pos);
+               if (v)
+                       return n;
+       }
+       n = n->next;
+
+       while (1) {
+               while (n) {
+                       if (state->neigh_sub_iter) {
+                               void *v = state->neigh_sub_iter(state, n, pos);
+                               if (v)
+                                       return n;
+                               goto next;
+                       }
+                       if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
+                               break;
+
+                       if (n->nud_state & ~NUD_NOARP)
+                               break;
+               next:
+                       n = n->next;
+               }
+
+               if (n)
+                       break;
+
+               if (++state->bucket > tbl->hash_mask)
+                       break;
+
+               n = tbl->hash_buckets[state->bucket];
+       }
+
+       if (n && pos)
+               --(*pos);
+       return n;
+}
+
+static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
+{
+       struct neighbour *n = neigh_get_first(seq);
+
+       if (n) {
+               while (*pos) {
+                       n = neigh_get_next(seq, n, pos);
+                       if (!n)
+                               break;
+               }
+       }
+       return *pos ? NULL : n;
+}
+
+static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
+{
+       struct neigh_seq_state *state = seq->private;
+       struct neigh_table *tbl = state->tbl;
+       struct pneigh_entry *pn = NULL;
+       int bucket = state->bucket;
+
+       state->flags |= NEIGH_SEQ_IS_PNEIGH;
+       for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
+               pn = tbl->phash_buckets[bucket];
+               if (pn)
+                       break;
+       }
+       state->bucket = bucket;
+
+       return pn;
+}
+
+static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
+                                           struct pneigh_entry *pn,
+                                           loff_t *pos)
+{
+       struct neigh_seq_state *state = seq->private;
+       struct neigh_table *tbl = state->tbl;
+
+       pn = pn->next;
+       while (!pn) {
+               if (++state->bucket > PNEIGH_HASHMASK)
+                       break;
+               pn = tbl->phash_buckets[state->bucket];
+               if (pn)
+                       break;
+       }
+
+       if (pn && pos)
+               --(*pos);
+
+       return pn;
+}
+
+static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
+{
+       struct pneigh_entry *pn = pneigh_get_first(seq);
+
+       if (pn) {
+               while (*pos) {
+                       pn = pneigh_get_next(seq, pn, pos);
+                       if (!pn)
+                               break;
+               }
+       }
+       return *pos ? NULL : pn;
+}
+
+static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
+{
+       struct neigh_seq_state *state = seq->private;
+       void *rc;
+
+       rc = neigh_get_idx(seq, pos);
+       if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
+               rc = pneigh_get_idx(seq, pos);
+
+       return rc;
+}
+
+void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
+{
+       struct neigh_seq_state *state = seq->private;
+       loff_t pos_minus_one;
+
+       state->tbl = tbl;
+       state->bucket = 0;
+       state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
+
+       read_lock_bh(&tbl->lock);
+
+       pos_minus_one = *pos - 1;
+       return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
+}
+EXPORT_SYMBOL(neigh_seq_start);
+
+void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct neigh_seq_state *state;
+       void *rc;
+
+       if (v == SEQ_START_TOKEN) {
+               rc = neigh_get_idx(seq, pos);
+               goto out;
        }
-       nlh                        = (struct nlmsghdr *)skb->data;
-       NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
-       netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
+
+       state = seq->private;
+       if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
+               rc = neigh_get_next(seq, v, NULL);
+               if (rc)
+                       goto out;
+               if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
+                       rc = pneigh_get_first(seq);
+       } else {
+               BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
+               rc = pneigh_get_next(seq, v, NULL);
+       }
+out:
+       ++(*pos);
+       return rc;
+}
+EXPORT_SYMBOL(neigh_seq_next);
+
+void neigh_seq_stop(struct seq_file *seq, void *v)
+{
+       struct neigh_seq_state *state = seq->private;
+       struct neigh_table *tbl = state->tbl;
+
+       read_unlock_bh(&tbl->lock);
+}
+EXPORT_SYMBOL(neigh_seq_stop);
+
+/* statistics via seq_file */
+
+static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       struct proc_dir_entry *pde = seq->private;
+       struct neigh_table *tbl = pde->data;
+       int cpu;
+
+       if (*pos == 0)
+               return SEQ_START_TOKEN;
+       
+       for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
+               if (!cpu_possible(cpu))
+                       continue;
+               *pos = cpu+1;
+               return per_cpu_ptr(tbl->stats, cpu);
+       }
+       return NULL;
+}
+
+static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct proc_dir_entry *pde = seq->private;
+       struct neigh_table *tbl = pde->data;
+       int cpu;
+
+       for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
+               if (!cpu_possible(cpu))
+                       continue;
+               *pos = cpu+1;
+               return per_cpu_ptr(tbl->stats, cpu);
+       }
+       return NULL;
+}
+
+static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
+{
+
+}
+
+static int neigh_stat_seq_show(struct seq_file *seq, void *v)
+{
+       struct proc_dir_entry *pde = seq->private;
+       struct neigh_table *tbl = pde->data;
+       struct neigh_statistics *st = v;
+
+       if (v == SEQ_START_TOKEN) {
+               seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs\n");
+               return 0;
+       }
+
+       seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
+                       "%08lx %08lx  %08lx %08lx\n",
+                  atomic_read(&tbl->entries),
+
+                  st->allocs,
+                  st->destroys,
+                  st->hash_grows,
+
+                  st->lookups,
+                  st->hits,
+
+                  st->res_failed,
+
+                  st->rcv_probes_mcast,
+                  st->rcv_probes_ucast,
+
+                  st->periodic_gc_runs,
+                  st->forced_gc_runs
+                  );
+
+       return 0;
+}
+
+static struct seq_operations neigh_stat_seq_ops = {
+       .start  = neigh_stat_seq_start,
+       .next   = neigh_stat_seq_next,
+       .stop   = neigh_stat_seq_stop,
+       .show   = neigh_stat_seq_show,
+};
+
+static int neigh_stat_seq_open(struct inode *inode, struct file *file)
+{
+       int ret = seq_open(file, &neigh_stat_seq_ops);
+
+       if (!ret) {
+               struct seq_file *sf = file->private_data;
+               sf->private = PDE(inode);
+       }
+       return ret;
+};
+
+static struct file_operations neigh_stat_seq_fops = {
+       .owner   = THIS_MODULE,
+       .open    = neigh_stat_seq_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_ARPD
+static inline size_t neigh_nlmsg_size(void)
+{
+       return NLMSG_ALIGN(sizeof(struct ndmsg))
+              + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+              + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+              + nla_total_size(sizeof(struct nda_cacheinfo))
+              + nla_total_size(4); /* NDA_PROBES */
+}
+
+static void __neigh_notify(struct neighbour *n, int type, int flags)
+{
+       struct sk_buff *skb;
+       int err = -ENOBUFS;
+
+       skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
+       if (skb == NULL)
+               goto errout;
+
+       err = neigh_fill_info(skb, n, 0, 0, type, flags);
+       /* failure implies BUG in neigh_nlmsg_size() */
+       BUG_ON(err < 0);
+
+       err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+errout:
+       if (err < 0)
+               rtnl_set_sk_err(RTNLGRP_NEIGH, err);
+}
+
+void neigh_app_ns(struct neighbour *n)
+{
+       __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+}
+
+static void neigh_app_notify(struct neighbour *n)
+{
+       __neigh_notify(n, RTM_NEWNEIGH, 0);
 }
 
 #endif /* CONFIG_ARPD */
@@ -1481,12 +2454,12 @@ static void neigh_app_notify(struct neighbour *n)
 
 static struct neigh_sysctl_table {
        struct ctl_table_header *sysctl_header;
-       ctl_table               neigh_vars[17];
+       ctl_table               neigh_vars[__NET_NEIGH_MAX];
        ctl_table               neigh_dev[2];
        ctl_table               neigh_neigh_dir[2];
        ctl_table               neigh_proto_dir[2];
        ctl_table               neigh_root_dir[2];
-} neigh_sysctl_template = {
+} neigh_sysctl_template __read_mostly = {
        .neigh_vars = {
                {
                        .ctl_name       = NET_NEIGH_MCAST_SOLICIT,
@@ -1604,6 +2577,22 @@ static struct neigh_sysctl_table {
                        .mode           = 0644,
                        .proc_handler   = &proc_dointvec,
                },
+               {
+                       .ctl_name       = NET_NEIGH_RETRANS_TIME_MS,
+                       .procname       = "retrans_time_ms",
+                       .maxlen         = sizeof(int),
+                       .mode           = 0644,
+                       .proc_handler   = &proc_dointvec_ms_jiffies,
+                       .strategy       = &sysctl_ms_jiffies,
+               },
+               {
+                       .ctl_name       = NET_NEIGH_REACHABLE_TIME_MS,
+                       .procname       = "base_reachable_time_ms",
+                       .maxlen         = sizeof(int),
+                       .mode           = 0644,
+                       .proc_handler   = &proc_dointvec_ms_jiffies,
+                       .strategy       = &sysctl_ms_jiffies,
+               },
        },
        .neigh_dev = {
                {
@@ -1634,24 +2623,20 @@ static struct neigh_sysctl_table {
 
 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
                          int p_id, int pdev_id, char *p_name, 
-                         proc_handler *handler)
+                         proc_handler *handler, ctl_handler *strategy)
 {
-       struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+       struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template,
+                                              sizeof(*t), GFP_KERNEL);
        const char *dev_name_source = NULL;
        char *dev_name = NULL;
        int err = 0;
 
        if (!t)
                return -ENOBUFS;
-       memcpy(t, &neigh_sysctl_template, sizeof(*t));
        t->neigh_vars[0].data  = &p->mcast_probes;
        t->neigh_vars[1].data  = &p->ucast_probes;
        t->neigh_vars[2].data  = &p->app_probes;
        t->neigh_vars[3].data  = &p->retrans_time;
-       if (handler) {
-               t->neigh_vars[3].proc_handler = handler;
-               t->neigh_vars[3].extra1 = dev;
-       }
        t->neigh_vars[4].data  = &p->base_reachable_time;
        t->neigh_vars[5].data  = &p->delay_probe_time;
        t->neigh_vars[6].data  = &p->gc_staletime;
@@ -1661,19 +2646,44 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
        t->neigh_vars[10].data = &p->proxy_delay;
        t->neigh_vars[11].data = &p->locktime;
 
-       dev_name_source = t->neigh_dev[0].procname;
        if (dev) {
                dev_name_source = dev->name;
                t->neigh_dev[0].ctl_name = dev->ifindex;
-               memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
+               t->neigh_vars[12].procname = NULL;
+               t->neigh_vars[13].procname = NULL;
+               t->neigh_vars[14].procname = NULL;
+               t->neigh_vars[15].procname = NULL;
        } else {
+               dev_name_source = t->neigh_dev[0].procname;
                t->neigh_vars[12].data = (int *)(p + 1);
                t->neigh_vars[13].data = (int *)(p + 1) + 1;
                t->neigh_vars[14].data = (int *)(p + 1) + 2;
                t->neigh_vars[15].data = (int *)(p + 1) + 3;
        }
 
-       dev_name = net_sysctl_strdup(dev_name_source);
+       t->neigh_vars[16].data  = &p->retrans_time;
+       t->neigh_vars[17].data  = &p->base_reachable_time;
+
+       if (handler || strategy) {
+               /* RetransTime */
+               t->neigh_vars[3].proc_handler = handler;
+               t->neigh_vars[3].strategy = strategy;
+               t->neigh_vars[3].extra1 = dev;
+               /* ReachableTime */
+               t->neigh_vars[4].proc_handler = handler;
+               t->neigh_vars[4].strategy = strategy;
+               t->neigh_vars[4].extra1 = dev;
+               /* RetransTime (in milliseconds)*/
+               t->neigh_vars[16].proc_handler = handler;
+               t->neigh_vars[16].strategy = strategy;
+               t->neigh_vars[16].extra1 = dev;
+               /* ReachableTime (in milliseconds) */
+               t->neigh_vars[17].proc_handler = handler;
+               t->neigh_vars[17].strategy = strategy;
+               t->neigh_vars[17].extra1 = dev;
+       }
+
+       dev_name = kstrdup(dev_name_source, GFP_KERNEL);
        if (!dev_name) {
                err = -ENOBUFS;
                goto free;
@@ -1722,7 +2732,6 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
 #endif /* CONFIG_SYSCTL */
 
 EXPORT_SYMBOL(__neigh_event_send);
-EXPORT_SYMBOL(neigh_add);
 EXPORT_SYMBOL(neigh_changeaddr);
 EXPORT_SYMBOL(neigh_compat_output);
 EXPORT_SYMBOL(neigh_connected_output);
@@ -1733,14 +2742,15 @@ EXPORT_SYMBOL(neigh_dump_info);
 EXPORT_SYMBOL(neigh_event_ns);
 EXPORT_SYMBOL(neigh_ifdown);
 EXPORT_SYMBOL(neigh_lookup);
+EXPORT_SYMBOL(neigh_lookup_nodev);
 EXPORT_SYMBOL(neigh_parms_alloc);
 EXPORT_SYMBOL(neigh_parms_release);
 EXPORT_SYMBOL(neigh_rand_reach_time);
 EXPORT_SYMBOL(neigh_resolve_output);
 EXPORT_SYMBOL(neigh_table_clear);
 EXPORT_SYMBOL(neigh_table_init);
+EXPORT_SYMBOL(neigh_table_init_no_netlink);
 EXPORT_SYMBOL(neigh_update);
-EXPORT_SYMBOL(neigh_update_hhs);
 EXPORT_SYMBOL(pneigh_enqueue);
 EXPORT_SYMBOL(pneigh_lookup);