X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fcore%2Fneighbour.c;h=e7300b6b40795e5011fa26ad87f0ed2766823fe7;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=ee410cb7d87851e96abed4de367459910c908178;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ee410cb7d..e7300b6b4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -12,15 +12,16 @@ * * Fixes: * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. + * Harald Welte Add neighbour cache statistics like rtstat */ -#include #include #include #include #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -28,7 +29,11 @@ #include #include #include +#include +#include #include +#include +#include #define NEIGH_DEBUG 1 @@ -47,6 +52,8 @@ #define NEIGH_PRINTK2 NEIGH_PRINTK #endif +#define PNEIGH_HASHMASK 0xF + static void neigh_timer_handler(unsigned long arg); #ifdef CONFIG_ARPD static void neigh_app_notify(struct neighbour *n); @@ -54,8 +61,10 @@ static void neigh_app_notify(struct neighbour *n); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); -static int neigh_glbl_allocs; static struct neigh_table *neigh_tables; +#ifdef CONFIG_PROC_FS +static struct file_operations neigh_stat_seq_fops; +#endif /* Neighbour hash table buckets are protected with rwlock tbl->lock. @@ -88,7 +97,7 @@ static struct neigh_table *neigh_tables; list of neighbour tables. This list is used only in process context, */ -static rwlock_t neigh_tbl_lock = RW_LOCK_UNLOCKED; +static DEFINE_RWLOCK(neigh_tbl_lock); static int neigh_blackhole(struct sk_buff *skb) { @@ -104,7 +113,7 @@ static int neigh_blackhole(struct sk_buff *skb) unsigned long neigh_rand_reach_time(unsigned long base) { - return (net_random() % base) + (base >> 1); + return (base ? (net_random() % base) + (base >> 1) : 0); } @@ -113,27 +122,21 @@ static int neigh_forced_gc(struct neigh_table *tbl) int shrunk = 0; int i; - for (i = 0; i <= NEIGH_HASHMASK; i++) { + NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); + + write_lock_bh(&tbl->lock); + for (i = 0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np; np = &tbl->hash_buckets[i]; - write_lock_bh(&tbl->lock); while ((n = *np) != NULL) { /* Neighbour record may be discarded if: - - nobody refers to it. - - it is not permanent - - (NEW and probably wrong) - INCOMPLETE entries are kept at least for - n->parms->retrans_time, otherwise we could - flood network with resolution requests. - It is not clear, what is better table overflow - or flooding. + * - nobody refers to it. + * - it is not permanent */ write_lock(&n->lock); if (atomic_read(&n->refcnt) == 1 && - !(n->nud_state & NUD_PERMANENT) && - (n->nud_state != NUD_INCOMPLETE || - jiffies - n->used > n->parms->retrans_time)) { + !(n->nud_state & NUD_PERMANENT)) { *np = n->next; n->dead = 1; shrunk = 1; @@ -144,10 +147,12 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_unlock(&n->lock); np = &n->next; } - write_unlock_bh(&tbl->lock); } tbl->last_flush = jiffies; + + write_unlock_bh(&tbl->lock); + return shrunk; } @@ -171,40 +176,11 @@ static void pneigh_queue_purge(struct sk_buff_head *list) } } -void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) -{ - int i; - - write_lock_bh(&tbl->lock); - - for (i=0; i <= NEIGH_HASHMASK; i++) { - struct neighbour *n, **np; - - np = &tbl->hash_buckets[i]; - while ((n = *np) != NULL) { - if (dev && n->dev != dev) { - np = &n->next; - continue; - } - *np = n->next; - write_lock_bh(&n->lock); - n->dead = 1; - neigh_del_timer(n); - write_unlock_bh(&n->lock); - neigh_release(n); - } - } - - write_unlock_bh(&tbl->lock); -} - -int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) +static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) { int i; - write_lock_bh(&tbl->lock); - - for (i = 0; i <= NEIGH_HASHMASK; i++) { + for (i = 0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np = &tbl->hash_buckets[i]; while ((n = *np) != NULL) { @@ -227,7 +203,6 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - n->parms = &tbl->parms; skb_queue_purge(&n->arp_queue); n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -240,7 +215,19 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) neigh_release(n); } } +} + +void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) +{ + write_lock_bh(&tbl->lock); + neigh_flush_dev(tbl, dev); + write_unlock_bh(&tbl->lock); +} +int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) +{ + write_lock_bh(&tbl->lock); + neigh_flush_dev(tbl, dev); pneigh_ifdown(tbl, dev); write_unlock_bh(&tbl->lock); @@ -253,38 +240,103 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) { struct neighbour *n = NULL; unsigned long now = jiffies; + int entries; - if (tbl->entries > tbl->gc_thresh3 || - (tbl->entries > tbl->gc_thresh2 && - now - tbl->last_flush > 5 * HZ)) { + entries = atomic_inc_return(&tbl->entries) - 1; + if (entries >= tbl->gc_thresh3 || + (entries >= tbl->gc_thresh2 && + time_after(now, tbl->last_flush + 5 * HZ))) { if (!neigh_forced_gc(tbl) && - tbl->entries > tbl->gc_thresh3) - goto out; + entries >= tbl->gc_thresh3) + goto out_entries; } - n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC); + n = kmem_cache_alloc(tbl->kmem_cachep, GFP_ATOMIC); if (!n) - goto out; + goto out_entries; memset(n, 0, tbl->entry_size); skb_queue_head_init(&n->arp_queue); - n->lock = RW_LOCK_UNLOCKED; + rwlock_init(&n->lock); n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; - n->parms = &tbl->parms; + n->parms = neigh_parms_clone(&tbl->parms); init_timer(&n->timer); n->timer.function = neigh_timer_handler; n->timer.data = (unsigned long)n; - tbl->stats.allocs++; - neigh_glbl_allocs++; - tbl->entries++; + + NEIGH_CACHE_STAT_INC(tbl, allocs); n->tbl = tbl; atomic_set(&n->refcnt, 1); n->dead = 1; out: return n; + +out_entries: + atomic_dec(&tbl->entries); + goto out; +} + +static struct neighbour **neigh_hash_alloc(unsigned int entries) +{ + unsigned long size = entries * sizeof(struct neighbour *); + struct neighbour **ret; + + if (size <= PAGE_SIZE) { + ret = kzalloc(size, GFP_ATOMIC); + } else { + ret = (struct neighbour **) + __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size)); + } + return ret; +} + +static void neigh_hash_free(struct neighbour **hash, unsigned int entries) +{ + unsigned long size = entries * sizeof(struct neighbour *); + + if (size <= PAGE_SIZE) + kfree(hash); + else + free_pages((unsigned long)hash, get_order(size)); +} + +static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) +{ + struct neighbour **new_hash, **old_hash; + unsigned int i, new_hash_mask, old_entries; + + NEIGH_CACHE_STAT_INC(tbl, hash_grows); + + BUG_ON(new_entries & (new_entries - 1)); + new_hash = neigh_hash_alloc(new_entries); + if (!new_hash) + return; + + old_entries = tbl->hash_mask + 1; + new_hash_mask = new_entries - 1; + old_hash = tbl->hash_buckets; + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + for (i = 0; i < old_entries; i++) { + struct neighbour *n, *next; + + for (n = old_hash[i]; n; n = next) { + unsigned int hash_val = tbl->hash(n->primary_key, n->dev); + + hash_val &= new_hash_mask; + next = n->next; + + n->next = new_hash[hash_val]; + new_hash[hash_val] = n; + } + } + tbl->hash_buckets = new_hash; + tbl->hash_mask = new_hash_mask; + + neigh_hash_free(old_hash, old_entries); } struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, @@ -293,11 +345,34 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct neighbour *n; int key_len = tbl->key_len; u32 hash_val = tbl->hash(pkey, dev); + + NEIGH_CACHE_STAT_INC(tbl, lookups); read_lock_bh(&tbl->lock); - for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { + for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { neigh_hold(n); + NEIGH_CACHE_STAT_INC(tbl, hits); + break; + } + } + read_unlock_bh(&tbl->lock); + return n; +} + +struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) +{ + struct neighbour *n; + int key_len = tbl->key_len; + u32 hash_val = tbl->hash(pkey, NULL); + + NEIGH_CACHE_STAT_INC(tbl, lookups); + + read_lock_bh(&tbl->lock); + for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { + if (!memcmp(n->primary_key, pkey, key_len)) { + neigh_hold(n); + NEIGH_CACHE_STAT_INC(tbl, hits); break; } } @@ -337,15 +412,23 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, n->confirmed = jiffies - (n->parms->base_reachable_time << 1); - hash_val = tbl->hash(pkey, dev); - write_lock_bh(&tbl->lock); + + if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) + neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); + + hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + + if (n->parms->dead) { + rc = ERR_PTR(-EINVAL); + goto out_tbl_unlock; + } + for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { neigh_hold(n1); - write_unlock_bh(&tbl->lock); rc = n1; - goto out_neigh_release; + goto out_tbl_unlock; } } @@ -358,6 +441,8 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, rc = n; out: return rc; +out_tbl_unlock: + write_unlock_bh(&tbl->lock); out_neigh_release: neigh_release(n); goto out; @@ -395,8 +480,12 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, memcpy(n->key, pkey, key_len); n->dev = dev; + if (dev) + dev_hold(dev); if (tbl->pconstructor && tbl->pconstructor(n)) { + if (dev) + dev_put(dev); kfree(n); n = NULL; goto out; @@ -423,18 +512,21 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey, hash_val ^= hash_val >> 4; hash_val &= PNEIGH_HASHMASK; + write_lock_bh(&tbl->lock); for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; np = &n->next) { if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { - write_lock_bh(&tbl->lock); *np = n->next; write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); kfree(n); return 0; } } + write_unlock_bh(&tbl->lock); return -ENOENT; } @@ -450,6 +542,8 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) *np = n->next; if (tbl->pdestructor) tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); kfree(n); continue; } @@ -468,6 +562,8 @@ void neigh_destroy(struct neighbour *neigh) { struct hh_cache *hh; + NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); + if (!neigh->dead) { printk(KERN_WARNING "Destroying alive neighbour %p\n", neigh); @@ -481,24 +577,25 @@ void neigh_destroy(struct neighbour *neigh) while ((hh = neigh->hh) != NULL) { neigh->hh = hh->hh_next; hh->hh_next = NULL; - write_lock_bh(&hh->hh_lock); + + write_seqlock_bh(&hh->hh_lock); hh->hh_output = neigh_blackhole; - write_unlock_bh(&hh->hh_lock); + write_sequnlock_bh(&hh->hh_lock); if (atomic_dec_and_test(&hh->hh_refcnt)) kfree(hh); } - if (neigh->ops && neigh->ops->destructor) - (neigh->ops->destructor)(neigh); + if (neigh->parms->neigh_destructor) + (neigh->parms->neigh_destructor)(neigh); skb_queue_purge(&neigh->arp_queue); dev_put(neigh->dev); + neigh_parms_put(neigh->parms); NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); - neigh_glbl_allocs--; - neigh->tbl->entries--; + atomic_dec(&neigh->tbl->entries); kmem_cache_free(neigh->tbl->kmem_cachep, neigh); } @@ -536,46 +633,13 @@ static void neigh_connect(struct neighbour *neigh) hh->hh_output = neigh->ops->hh_output; } -/* - Transitions NUD_STALE <-> NUD_REACHABLE do not occur - when fast path is built: we have no timers associated with - these states, we do not have time to check state when sending. - neigh_periodic_timer check periodically neigh->confirmed - time and moves NUD_REACHABLE -> NUD_STALE. - - If a routine wants to know TRUE entry state, it calls - neigh_sync before checking state. - - Called with write_locked neigh. - */ - -static void neigh_sync(struct neighbour *n) -{ - unsigned long now = jiffies; - u8 state = n->nud_state; - - if (state & (NUD_NOARP | NUD_PERMANENT)) - return; - if (state & NUD_REACHABLE) { - if (now - n->confirmed > n->parms->reachable_time) { - n->nud_state = NUD_STALE; - neigh_suspect(n); - } - } else if (state & NUD_VALID) { - if (now - n->confirmed < n->parms->reachable_time) { - neigh_del_timer(n); - n->nud_state = NUD_REACHABLE; - neigh_connect(n); - } - } -} - static void neigh_periodic_timer(unsigned long arg) { struct neigh_table *tbl = (struct neigh_table *)arg; - unsigned long now = jiffies; - int i; + struct neighbour *n, **np; + unsigned long expire, now = jiffies; + NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); write_lock(&tbl->lock); @@ -583,7 +647,7 @@ static void neigh_periodic_timer(unsigned long arg) * periodically recompute ReachableTime from random function */ - if (now - tbl->last_rand > 300 * HZ) { + if (time_after(now, tbl->last_rand + 300 * HZ)) { struct neigh_parms *p; tbl->last_rand = now; for (p = &tbl->parms; p; p = p->next) @@ -591,47 +655,49 @@ static void neigh_periodic_timer(unsigned long arg) neigh_rand_reach_time(p->base_reachable_time); } - for (i = 0; i <= NEIGH_HASHMASK; i++) { - struct neighbour *n, **np; - - np = &tbl->hash_buckets[i]; - while ((n = *np) != NULL) { - unsigned state; + np = &tbl->hash_buckets[tbl->hash_chain_gc]; + tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask); - write_lock(&n->lock); + while ((n = *np) != NULL) { + unsigned int state; - state = n->nud_state; - if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { - write_unlock(&n->lock); - goto next_elt; - } + write_lock(&n->lock); - if ((long)(n->used - n->confirmed) < 0) - n->used = n->confirmed; + state = n->nud_state; + if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { + write_unlock(&n->lock); + goto next_elt; + } - if (atomic_read(&n->refcnt) == 1 && - (state == NUD_FAILED || - now - n->used > n->parms->gc_staletime)) { - *np = n->next; - n->dead = 1; - write_unlock(&n->lock); - neigh_release(n); - continue; - } + if (time_before(n->used, n->confirmed)) + n->used = n->confirmed; - if (n->nud_state & NUD_REACHABLE && - now - n->confirmed > n->parms->reachable_time) { - n->nud_state = NUD_STALE; - neigh_suspect(n); - } + if (atomic_read(&n->refcnt) == 1 && + (state == NUD_FAILED || + time_after(now, n->used + n->parms->gc_staletime))) { + *np = n->next; + n->dead = 1; write_unlock(&n->lock); + neigh_release(n); + continue; + } + write_unlock(&n->lock); next_elt: - np = &n->next; - } + np = &n->next; } - mod_timer(&tbl->gc_timer, now + tbl->gc_interval); + /* Cycle through all hash buckets every base_reachable_time/2 ticks. + * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 + * base_reachable_time. + */ + expire = tbl->parms.base_reachable_time >> 1; + expire /= (tbl->hash_mask + 1); + if (!expire) + expire = 1; + + mod_timer(&tbl->gc_timer, now + expire); + write_unlock(&tbl->lock); } @@ -643,12 +709,20 @@ static __inline__ int neigh_max_probes(struct neighbour *n) p->ucast_probes + p->app_probes + p->mcast_probes); } +static inline void neigh_add_timer(struct neighbour *n, unsigned long when) +{ + if (unlikely(mod_timer(&n->timer, when))) { + printk("NEIGH: BUG, double timer add, state is %x\n", + n->nud_state); + dump_stack(); + } +} /* Called when a timer expires for a neighbour entry. */ static void neigh_timer_handler(unsigned long arg) { - unsigned long now = jiffies; + unsigned long now, next; struct neighbour *neigh = (struct neighbour *)arg; unsigned state; int notify = 0; @@ -656,6 +730,8 @@ static void neigh_timer_handler(unsigned long arg) write_lock(&neigh->lock); state = neigh->nud_state; + now = jiffies; + next = now + HZ; if (!(state & NUD_IN_TIMER)) { #ifndef CONFIG_SMP @@ -664,25 +740,54 @@ static void neigh_timer_handler(unsigned long arg) goto out; } - if ((state & NUD_VALID) && - now - neigh->confirmed < neigh->parms->reachable_time) { - neigh->nud_state = NUD_REACHABLE; - NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); - neigh_connect(neigh); - goto out; - } - if (state == NUD_DELAY) { - NEIGH_PRINTK2("neigh %p is probed.\n", neigh); - neigh->nud_state = NUD_PROBE; - atomic_set(&neigh->probes, 0); + if (state & NUD_REACHABLE) { + if (time_before_eq(now, + neigh->confirmed + neigh->parms->reachable_time)) { + NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); + next = neigh->confirmed + neigh->parms->reachable_time; + } else if (time_before_eq(now, + neigh->used + neigh->parms->delay_probe_time)) { + NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh->nud_state = NUD_DELAY; + neigh->updated = jiffies; + neigh_suspect(neigh); + next = now + neigh->parms->delay_probe_time; + } else { + NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + neigh->nud_state = NUD_STALE; + neigh->updated = jiffies; + neigh_suspect(neigh); + notify = 1; + } + } else if (state & NUD_DELAY) { + if (time_before_eq(now, + neigh->confirmed + neigh->parms->delay_probe_time)) { + NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); + neigh->nud_state = NUD_REACHABLE; + neigh->updated = jiffies; + neigh_connect(neigh); + notify = 1; + next = neigh->confirmed + neigh->parms->reachable_time; + } else { + NEIGH_PRINTK2("neigh %p is probed.\n", neigh); + neigh->nud_state = NUD_PROBE; + neigh->updated = jiffies; + atomic_set(&neigh->probes, 0); + next = now + neigh->parms->retrans_time; + } + } else { + /* NUD_PROBE|NUD_INCOMPLETE */ + next = now + neigh->parms->retrans_time; } - if (atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { + if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && + atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { struct sk_buff *skb; neigh->nud_state = NUD_FAILED; + neigh->updated = jiffies; notify = 1; - neigh->tbl->stats.res_failed++; + NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); NEIGH_PRINTK2("neigh %p is failed.\n", neigh); /* It is very thin place. report_unreachable is very complicated @@ -697,19 +802,31 @@ static void neigh_timer_handler(unsigned long arg) write_lock(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); - goto out; } - neigh->timer.expires = now + neigh->parms->retrans_time; - add_timer(&neigh->timer); - write_unlock(&neigh->lock); - - neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue)); - atomic_inc(&neigh->probes); - return; - + if (neigh->nud_state & NUD_IN_TIMER) { + if (time_before(next, jiffies + HZ/2)) + next = jiffies + HZ/2; + if (!mod_timer(&neigh->timer, next)) + neigh_hold(neigh); + } + if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { + struct sk_buff *skb = skb_peek(&neigh->arp_queue); + /* keep skb alive even if arp_queue overflows */ + if (skb) + skb_get(skb); + write_unlock(&neigh->lock); + neigh->ops->solicit(neigh, skb); + atomic_inc(&neigh->probes); + if (skb) + kfree_skb(skb); + } else { out: - write_unlock(&neigh->lock); + write_unlock(&neigh->lock); + } + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); + #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); @@ -720,6 +837,7 @@ out: int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; + unsigned long now; write_lock_bh(&neigh->lock); @@ -727,26 +845,31 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; + now = jiffies; + if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (neigh->parms->mcast_probes + neigh->parms->app_probes) { atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; + neigh->updated = jiffies; neigh_hold(neigh); - neigh->timer.expires = jiffies + - neigh->parms->retrans_time; - add_timer(&neigh->timer); - write_unlock_bh(&neigh->lock); - neigh->ops->solicit(neigh, skb); - atomic_inc(&neigh->probes); - write_lock_bh(&neigh->lock); + neigh_add_timer(neigh, now + 1); } else { neigh->nud_state = NUD_FAILED; + neigh->updated = jiffies; write_unlock_bh(&neigh->lock); if (skb) kfree_skb(skb); return 1; } + } else if (neigh->nud_state & NUD_STALE) { + NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh_hold(neigh); + neigh->nud_state = NUD_DELAY; + neigh->updated = jiffies; + neigh_add_timer(neigh, + jiffies + neigh->parms->delay_probe_time); } if (neigh->nud_state == NUD_INCOMPLETE) { @@ -761,20 +884,13 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) __skb_queue_tail(&neigh->arp_queue, skb); } rc = 1; - } else if (neigh->nud_state == NUD_STALE) { - NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); - neigh_hold(neigh); - neigh->nud_state = NUD_DELAY; - neigh->timer.expires = jiffies + neigh->parms->delay_probe_time; - add_timer(&neigh->timer); - rc = 0; } out_unlock_bh: write_unlock_bh(&neigh->lock); return rc; } -static __inline__ void neigh_update_hhs(struct neighbour *neigh) +static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = @@ -782,9 +898,9 @@ static __inline__ void neigh_update_hhs(struct neighbour *neigh) if (update) { for (hh = neigh->hh; hh; hh = hh->hh_next) { - write_lock_bh(&hh->hh_lock); + write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); - write_unlock_bh(&hh->hh_lock); + write_sequnlock_bh(&hh->hh_lock); } } } @@ -794,21 +910,32 @@ static __inline__ void neigh_update_hhs(struct neighbour *neigh) /* Generic update routine. -- lladdr is new lladdr or NULL, if it is not supplied. -- new is new state. - -- override == 1 allows to override existing lladdr, if it is different. - -- arp == 0 means that the change is administrative. + -- flags + NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, + if it is different. + NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" + lladdr instead of overriding it + if it is different. + It also allows to retain current state + if lladdr is unchanged. + NEIGH_UPDATE_F_ADMIN means that the change is administrative. + + NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing + NTF_ROUTER flag. + NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as + a router. Caller MUST hold reference count on the entry. */ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, - int override, int arp) + u32 flags) { u8 old; int err; -#ifdef CONFIG_ARPD int notify = 0; -#endif struct net_device *dev; + int update_isrouter = 0; write_lock_bh(&neigh->lock); @@ -816,7 +943,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, old = neigh->nud_state; err = -EPERM; - if (arp && (old & (NUD_NOARP | NUD_PERMANENT))) + if (!(flags & NEIGH_UPDATE_F_ADMIN) && + (old & (NUD_NOARP | NUD_PERMANENT))) goto out; if (!(new & NUD_VALID)) { @@ -825,9 +953,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_suspect(neigh); neigh->nud_state = new; err = 0; -#ifdef CONFIG_ARPD notify = old & NUD_VALID; -#endif goto out; } @@ -841,12 +967,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, - compare new & old - if they are different, check override flag */ - if (old & NUD_VALID) { - if (!memcmp(lladdr, neigh->ha, dev->addr_len)) - lladdr = neigh->ha; - else if (!override) - goto out; - } + if ((old & NUD_VALID) && + !memcmp(lladdr, neigh->ha, dev->addr_len)) + lladdr = neigh->ha; } else { /* No address is supplied; if we know something, use it, otherwise discard the request. @@ -857,8 +980,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, lladdr = neigh->ha; } - neigh_sync(neigh); - old = neigh->nud_state; if (new & NUD_CONNECTED) neigh->confirmed = jiffies; neigh->updated = jiffies; @@ -867,21 +988,44 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, do not change entry state, if new one is STALE. */ err = 0; - if ((old & NUD_VALID) && lladdr == neigh->ha && - (new == old || (new == NUD_STALE && (old & NUD_CONNECTED)))) - goto out; + update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; + if (old & NUD_VALID) { + if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { + update_isrouter = 0; + if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && + (old & NUD_CONNECTED)) { + lladdr = neigh->ha; + new = NUD_STALE; + } else + goto out; + } else { + if (lladdr == neigh->ha && new == NUD_STALE && + ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) || + (old & NUD_CONNECTED)) + ) + new = old; + } + } + + if (new != old) { + neigh_del_timer(neigh); + if (new & NUD_IN_TIMER) { + neigh_hold(neigh); + neigh_add_timer(neigh, (jiffies + + ((new & NUD_REACHABLE) ? + neigh->parms->reachable_time : + 0))); + } + neigh->nud_state = new; + } - neigh_del_timer(neigh); - neigh->nud_state = new; if (lladdr != neigh->ha) { memcpy(&neigh->ha, lladdr, dev->addr_len); neigh_update_hhs(neigh); if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - (neigh->parms->base_reachable_time << 1); -#ifdef CONFIG_ARPD notify = 1; -#endif } if (new == old) goto out; @@ -907,7 +1051,15 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, skb_queue_purge(&neigh->arp_queue); } out: + if (update_isrouter) { + neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ? + (neigh->flags | NTF_ROUTER) : + (neigh->flags & ~NTF_ROUTER); + } write_unlock_bh(&neigh->lock); + + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); @@ -922,12 +1074,13 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len); if (neigh) - neigh_update(neigh, lladdr, NUD_STALE, 1, 1); + neigh_update(neigh, lladdr, NUD_STALE, + NEIGH_UPDATE_F_OVERRIDE); return neigh; } static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, - u16 protocol) + __be16 protocol) { struct hh_cache *hh; struct net_device *dev = dst->dev; @@ -936,9 +1089,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, if (hh->hh_type == protocol) break; - if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { - memset(hh, 0, sizeof(struct hh_cache)); - hh->hh_lock = RW_LOCK_UNLOCKED; + if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { + seqlock_init(&hh->hh_lock); hh->hh_type = protocol; atomic_set(&hh->hh_refcnt, 0); hh->hh_next = NULL; @@ -1063,7 +1215,7 @@ static void neigh_proxy_process(unsigned long arg) while (skb != (struct sk_buff *)&tbl->proxy_queue) { struct sk_buff *back = skb; - long tdif = back->stamp.tv_usec - now; + long tdif = NEIGH_CB(back)->sched_next - now; skb = skb->next; if (tdif <= 0) { @@ -1088,26 +1240,26 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb) { unsigned long now = jiffies; - long sched_next = net_random() % p->proxy_delay; + unsigned long sched_next = now + (net_random() % p->proxy_delay); if (tbl->proxy_queue.qlen > p->proxy_qlen) { kfree_skb(skb); return; } - skb->stamp.tv_sec = LOCALLY_ENQUEUED; - skb->stamp.tv_usec = now + sched_next; + + NEIGH_CB(skb)->sched_next = sched_next; + NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; spin_lock(&tbl->proxy_queue.lock); if (del_timer(&tbl->proxy_timer)) { - long tval = tbl->proxy_timer.expires - now; - if (tval < sched_next) - sched_next = tval; + if (time_before(tbl->proxy_timer.expires, sched_next)) + sched_next = tbl->proxy_timer.expires; } dst_release(skb->dst); skb->dst = NULL; dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); - mod_timer(&tbl->proxy_timer, now + sched_next); + mod_timer(&tbl->proxy_timer, sched_next); spin_unlock(&tbl->proxy_queue.lock); } @@ -1115,16 +1267,22 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL); + struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { - memcpy(p, &tbl->parms, sizeof(*p)); p->tbl = tbl; + atomic_set(&p->refcnt, 1); + INIT_RCU_HEAD(&p->rcu_head); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); - if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) { - kfree(p); - return NULL; + if (dev) { + if (dev->neigh_setup && dev->neigh_setup(dev, p)) { + kfree(p); + return NULL; + } + + dev_hold(dev); + p->dev = dev; } p->sysctl_table = NULL; write_lock_bh(&tbl->lock); @@ -1135,6 +1293,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, return p; } +static void neigh_rcu_free_parms(struct rcu_head *head) +{ + struct neigh_parms *parms = + container_of(head, struct neigh_parms, rcu_head); + + neigh_parms_put(parms); +} + void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) { struct neigh_parms **p; @@ -1145,8 +1311,11 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) for (p = &tbl->parms.next; *p; p = &(*p)->next) { if (*p == parms) { *p = parms->next; + parms->dead = 1; write_unlock_bh(&tbl->lock); - kfree(parms); + if (parms->dev) + dev_put(parms->dev); + call_rcu(&parms->rcu_head, neigh_rcu_free_parms); return; } } @@ -1154,29 +1323,54 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) NEIGH_PRINTK1("neigh_parms_release: not found\n"); } +void neigh_parms_destroy(struct neigh_parms *parms) +{ + kfree(parms); +} -void neigh_table_init(struct neigh_table *tbl) +void neigh_table_init_no_netlink(struct neigh_table *tbl) { unsigned long now = jiffies; + unsigned long phsize; + atomic_set(&tbl->parms.refcnt, 1); + INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); if (!tbl->kmem_cachep) - tbl->kmem_cachep = kmem_cache_create(tbl->id, - tbl->entry_size, - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); + tbl->kmem_cachep = + kmem_cache_create(tbl->id, tbl->entry_size, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL, NULL); + tbl->stats = alloc_percpu(struct neigh_statistics); + if (!tbl->stats) + panic("cannot create neighbour cache statistics"); + +#ifdef CONFIG_PROC_FS + tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); + if (!tbl->pde) + panic("cannot create neighbour proc dir entry"); + tbl->pde->proc_fops = &neigh_stat_seq_fops; + tbl->pde->data = tbl; +#endif - if (!tbl->kmem_cachep) - panic("cannot create neighbour cache"); + tbl->hash_mask = 1; + tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); - tbl->lock = RW_LOCK_UNLOCKED; + phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); + tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); + + if (!tbl->hash_buckets || !tbl->phash_buckets) + panic("cannot allocate neighbour cache hashes"); + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + + rwlock_init(&tbl->lock); init_timer(&tbl->gc_timer); tbl->gc_timer.data = (unsigned long)tbl; tbl->gc_timer.function = neigh_periodic_timer; - tbl->gc_timer.expires = now + tbl->gc_interval + - tbl->parms.reachable_time; + tbl->gc_timer.expires = now + 1; add_timer(&tbl->gc_timer); init_timer(&tbl->proxy_timer); @@ -1186,10 +1380,27 @@ void neigh_table_init(struct neigh_table *tbl) tbl->last_flush = now; tbl->last_rand = now + tbl->parms.reachable_time * 20; +} + +void neigh_table_init(struct neigh_table *tbl) +{ + struct neigh_table *tmp; + + neigh_table_init_no_netlink(tbl); write_lock(&neigh_tbl_lock); + for (tmp = neigh_tables; tmp; tmp = tmp->next) { + if (tmp->family == tbl->family) + break; + } tbl->next = neigh_tables; neigh_tables = tbl; write_unlock(&neigh_tbl_lock); + + if (unlikely(tmp)) { + printk(KERN_ERR "NEIGH: Registering multiple tables for " + "family %d\n", tbl->family); + dump_stack(); + } } int neigh_table_clear(struct neigh_table *tbl) @@ -1201,7 +1412,7 @@ int neigh_table_clear(struct neigh_table *tbl) del_timer_sync(&tbl->proxy_timer); pneigh_queue_purge(&tbl->proxy_queue); neigh_ifdown(tbl, NULL); - if (tbl->entries) + if (atomic_read(&tbl->entries)) printk(KERN_CRIT "neighbour leakage\n"); write_lock(&neigh_tbl_lock); for (tp = &neigh_tables; *tp; tp = &(*tp)->next) { @@ -1211,52 +1422,77 @@ int neigh_table_clear(struct neigh_table *tbl) } } write_unlock(&neigh_tbl_lock); + + neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); + tbl->hash_buckets = NULL; + + kfree(tbl->phash_buckets); + tbl->phash_buckets = NULL; + + free_percpu(tbl->stats); + tbl->stats = NULL; + return 0; } int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm = NLMSG_DATA(nlh); - struct rtattr **nda = arg; + struct ndmsg *ndm; + struct nlattr *dst_attr; struct neigh_table *tbl; struct net_device *dev = NULL; - int err = -ENODEV; + int err = -EINVAL; + + if (nlmsg_len(nlh) < sizeof(*ndm)) + goto out; - if (ndm->ndm_ifindex && - (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) + dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); + if (dst_attr == NULL) goto out; + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex) { + dev = dev_get_by_index(ndm->ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; + } + } + read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { - struct neighbour *n; + struct neighbour *neigh; if (tbl->family != ndm->ndm_family) continue; read_unlock(&neigh_tbl_lock); - err = -EINVAL; - if (!nda[NDA_DST - 1] || - nda[NDA_DST - 1]->rta_len != RTA_LENGTH(tbl->key_len)) + if (nla_len(dst_attr) < tbl->key_len) goto out_dev_put; if (ndm->ndm_flags & NTF_PROXY) { - err = pneigh_delete(tbl, - RTA_DATA(nda[NDA_DST - 1]), dev); + err = pneigh_delete(tbl, nla_data(dst_attr), dev); goto out_dev_put; } - if (!dev) - goto out; + if (dev == NULL) + goto out_dev_put; - n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST - 1]), dev); - if (n) { - err = neigh_update(n, NULL, NUD_FAILED, 1, 0); - neigh_release(n); + neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); + if (neigh == NULL) { + err = -ENOENT; + goto out_dev_put; } + + err = neigh_update(neigh, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE | + NEIGH_UPDATE_F_ADMIN); + neigh_release(neigh); goto out_dev_put; } read_unlock(&neigh_tbl_lock); - err = -EADDRNOTAVAIL; + err = -EAFNOSUPPORT; + out_dev_put: if (dev) dev_put(dev); @@ -1266,72 +1502,93 @@ out: int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm = NLMSG_DATA(nlh); - struct rtattr **nda = arg; + struct ndmsg *ndm; + struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; struct net_device *dev = NULL; - int err = -ENODEV; + int err; - if (ndm->ndm_ifindex && - (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); + if (err < 0) goto out; - read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables; tbl; tbl = tbl->next) { - int override = 1; - struct neighbour *n; + err = -EINVAL; + if (tb[NDA_DST] == NULL) + goto out; - if (tbl->family != ndm->ndm_family) - continue; - read_unlock(&neigh_tbl_lock); + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex) { + dev = dev_get_by_index(ndm->ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; + } - err = -EINVAL; - if (!nda[NDA_DST - 1] || - nda[NDA_DST - 1]->rta_len != RTA_LENGTH(tbl->key_len)) + if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) + goto out_dev_put; + } + + read_lock(&neigh_tbl_lock); + for (tbl = neigh_tables; tbl; tbl = tbl->next) { + int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; + struct neighbour *neigh; + void *dst, *lladdr; + + if (tbl->family != ndm->ndm_family) + continue; + read_unlock(&neigh_tbl_lock); + + if (nla_len(tb[NDA_DST]) < tbl->key_len) goto out_dev_put; + dst = nla_data(tb[NDA_DST]); + lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; + if (ndm->ndm_flags & NTF_PROXY) { + struct pneigh_entry *pn; + err = -ENOBUFS; - if (pneigh_lookup(tbl, - RTA_DATA(nda[NDA_DST - 1]), dev, 1)) + pn = pneigh_lookup(tbl, dst, dev, 1); + if (pn) { + pn->flags = ndm->ndm_flags; err = 0; + } goto out_dev_put; } - err = -EINVAL; - if (!dev) - goto out; - if (nda[NDA_LLADDR - 1] && - nda[NDA_LLADDR - 1]->rta_len != RTA_LENGTH(dev->addr_len)) + + if (dev == NULL) goto out_dev_put; - err = 0; - n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST - 1]), dev); - if (n) { - if (nlh->nlmsg_flags & NLM_F_EXCL) + + neigh = neigh_lookup(tbl, dst, dev); + if (neigh == NULL) { + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; + goto out_dev_put; + } + + neigh = __neigh_lookup_errno(tbl, dst, dev); + if (IS_ERR(neigh)) { + err = PTR_ERR(neigh); + goto out_dev_put; + } + } else { + if (nlh->nlmsg_flags & NLM_F_EXCL) { err = -EEXIST; - override = nlh->nlmsg_flags & NLM_F_REPLACE; - } else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) - err = -ENOENT; - else { - n = __neigh_lookup_errno(tbl, RTA_DATA(nda[NDA_DST - 1]), - dev); - if (IS_ERR(n)) { - err = PTR_ERR(n); - n = NULL; + neigh_release(neigh); + goto out_dev_put; } + + if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) + flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - if (!err) { - err = neigh_update(n, nda[NDA_LLADDR - 1] ? - RTA_DATA(nda[NDA_LLADDR - 1]) : - NULL, - ndm->ndm_state, - override, 0); - } - if (n) - neigh_release(n); + + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + neigh_release(neigh); goto out_dev_put; } read_unlock(&neigh_tbl_lock); - err = -EADDRNOTAVAIL; + err = -EAFNOSUPPORT; + out_dev_put: if (dev) dev_put(dev); @@ -1339,44 +1596,398 @@ out: return err; } +static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, NDTA_PARMS); + if (nest == NULL) + return -ENOBUFS; + + if (parms->dev) + NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); + + NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); + NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); + NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); + NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); + NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); + NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); + NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); + NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, + parms->base_reachable_time); + NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); + NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); + NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); + NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); + NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); + NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); + + return nla_nest_end(skb, nest); + +nla_put_failure: + return nla_nest_cancel(skb, nest); +} + +static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, + u32 pid, u32 seq, int type, int flags) +{ + struct nlmsghdr *nlh; + struct ndtmsg *ndtmsg; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); + if (nlh == NULL) + return -ENOBUFS; + + ndtmsg = nlmsg_data(nlh); + + read_lock_bh(&tbl->lock); + ndtmsg->ndtm_family = tbl->family; + ndtmsg->ndtm_pad1 = 0; + ndtmsg->ndtm_pad2 = 0; + + NLA_PUT_STRING(skb, NDTA_NAME, tbl->id); + NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); + NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); + NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); + NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); + + { + unsigned long now = jiffies; + unsigned int flush_delta = now - tbl->last_flush; + unsigned int rand_delta = now - tbl->last_rand; + + struct ndt_config ndc = { + .ndtc_key_len = tbl->key_len, + .ndtc_entry_size = tbl->entry_size, + .ndtc_entries = atomic_read(&tbl->entries), + .ndtc_last_flush = jiffies_to_msecs(flush_delta), + .ndtc_last_rand = jiffies_to_msecs(rand_delta), + .ndtc_hash_rnd = tbl->hash_rnd, + .ndtc_hash_mask = tbl->hash_mask, + .ndtc_hash_chain_gc = tbl->hash_chain_gc, + .ndtc_proxy_qlen = tbl->proxy_queue.qlen, + }; + + NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); + } + + { + int cpu; + struct ndt_stats ndst; + + memset(&ndst, 0, sizeof(ndst)); + + for_each_possible_cpu(cpu) { + struct neigh_statistics *st; + + st = per_cpu_ptr(tbl->stats, cpu); + ndst.ndts_allocs += st->allocs; + ndst.ndts_destroys += st->destroys; + ndst.ndts_hash_grows += st->hash_grows; + ndst.ndts_res_failed += st->res_failed; + ndst.ndts_lookups += st->lookups; + ndst.ndts_hits += st->hits; + ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast; + ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast; + ndst.ndts_periodic_gc_runs += st->periodic_gc_runs; + ndst.ndts_forced_gc_runs += st->forced_gc_runs; + } + + NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); + } + + BUG_ON(tbl->parms.dev); + if (neightbl_fill_parms(skb, &tbl->parms) < 0) + goto nla_put_failure; + + read_unlock_bh(&tbl->lock); + return nlmsg_end(skb, nlh); + +nla_put_failure: + read_unlock_bh(&tbl->lock); + return nlmsg_cancel(skb, nlh); +} + +static int neightbl_fill_param_info(struct sk_buff *skb, + struct neigh_table *tbl, + struct neigh_parms *parms, + u32 pid, u32 seq, int type, + unsigned int flags) +{ + struct ndtmsg *ndtmsg; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); + if (nlh == NULL) + return -ENOBUFS; + + ndtmsg = nlmsg_data(nlh); + + read_lock_bh(&tbl->lock); + ndtmsg->ndtm_family = tbl->family; + ndtmsg->ndtm_pad1 = 0; + ndtmsg->ndtm_pad2 = 0; + + if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || + neightbl_fill_parms(skb, parms) < 0) + goto errout; + + read_unlock_bh(&tbl->lock); + return nlmsg_end(skb, nlh); +errout: + read_unlock_bh(&tbl->lock); + return nlmsg_cancel(skb, nlh); +} + +static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, + int ifindex) +{ + struct neigh_parms *p; + + for (p = &tbl->parms; p; p = p->next) + if ((p->dev && p->dev->ifindex == ifindex) || + (!p->dev && !ifindex)) + return p; + + return NULL; +} + +static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { + [NDTA_NAME] = { .type = NLA_STRING }, + [NDTA_THRESH1] = { .type = NLA_U32 }, + [NDTA_THRESH2] = { .type = NLA_U32 }, + [NDTA_THRESH3] = { .type = NLA_U32 }, + [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, + [NDTA_PARMS] = { .type = NLA_NESTED }, +}; + +static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = { + [NDTPA_IFINDEX] = { .type = NLA_U32 }, + [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, + [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, + [NDTPA_APP_PROBES] = { .type = NLA_U32 }, + [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, + [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, + [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, + [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, + [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, + [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, + [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, + [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, + [NDTPA_LOCKTIME] = { .type = NLA_U64 }, +}; + +int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct neigh_table *tbl; + struct ndtmsg *ndtmsg; + struct nlattr *tb[NDTA_MAX+1]; + int err; + + err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, + nl_neightbl_policy); + if (err < 0) + goto errout; + + if (tb[NDTA_NAME] == NULL) { + err = -EINVAL; + goto errout; + } + + ndtmsg = nlmsg_data(nlh); + read_lock(&neigh_tbl_lock); + for (tbl = neigh_tables; tbl; tbl = tbl->next) { + if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) + continue; + + if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) + break; + } + + if (tbl == NULL) { + err = -ENOENT; + goto errout_locked; + } + + /* + * We acquire tbl->lock to be nice to the periodic timers and + * make sure they always see a consistent set of values. + */ + write_lock_bh(&tbl->lock); + + if (tb[NDTA_PARMS]) { + struct nlattr *tbp[NDTPA_MAX+1]; + struct neigh_parms *p; + int i, ifindex = 0; + + err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], + nl_ntbl_parm_policy); + if (err < 0) + goto errout_tbl_lock; + + if (tbp[NDTPA_IFINDEX]) + ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); + + p = lookup_neigh_params(tbl, ifindex); + if (p == NULL) { + err = -ENOENT; + goto errout_tbl_lock; + } + + for (i = 1; i <= NDTPA_MAX; i++) { + if (tbp[i] == NULL) + continue; + + switch (i) { + case NDTPA_QUEUE_LEN: + p->queue_len = nla_get_u32(tbp[i]); + break; + case NDTPA_PROXY_QLEN: + p->proxy_qlen = nla_get_u32(tbp[i]); + break; + case NDTPA_APP_PROBES: + p->app_probes = nla_get_u32(tbp[i]); + break; + case NDTPA_UCAST_PROBES: + p->ucast_probes = nla_get_u32(tbp[i]); + break; + case NDTPA_MCAST_PROBES: + p->mcast_probes = nla_get_u32(tbp[i]); + break; + case NDTPA_BASE_REACHABLE_TIME: + p->base_reachable_time = nla_get_msecs(tbp[i]); + break; + case NDTPA_GC_STALETIME: + p->gc_staletime = nla_get_msecs(tbp[i]); + break; + case NDTPA_DELAY_PROBE_TIME: + p->delay_probe_time = nla_get_msecs(tbp[i]); + break; + case NDTPA_RETRANS_TIME: + p->retrans_time = nla_get_msecs(tbp[i]); + break; + case NDTPA_ANYCAST_DELAY: + p->anycast_delay = nla_get_msecs(tbp[i]); + break; + case NDTPA_PROXY_DELAY: + p->proxy_delay = nla_get_msecs(tbp[i]); + break; + case NDTPA_LOCKTIME: + p->locktime = nla_get_msecs(tbp[i]); + break; + } + } + } + + if (tb[NDTA_THRESH1]) + tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); + + if (tb[NDTA_THRESH2]) + tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); + + if (tb[NDTA_THRESH3]) + tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); + + if (tb[NDTA_GC_INTERVAL]) + tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); + + err = 0; + +errout_tbl_lock: + write_unlock_bh(&tbl->lock); +errout_locked: + read_unlock(&neigh_tbl_lock); +errout: + return err; +} + +int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) +{ + int family, tidx, nidx = 0; + int tbl_skip = cb->args[0]; + int neigh_skip = cb->args[1]; + struct neigh_table *tbl; + + family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; + + read_lock(&neigh_tbl_lock); + for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) { + struct neigh_parms *p; + + if (tidx < tbl_skip || (family && tbl->family != family)) + continue; + + if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, + NLM_F_MULTI) <= 0) + break; + + for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { + if (nidx < neigh_skip) + continue; + + if (neightbl_fill_param_info(skb, tbl, p, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGHTBL, + NLM_F_MULTI) <= 0) + goto out; + } + + neigh_skip = 0; + } +out: + read_unlock(&neigh_tbl_lock); + cb->args[0] = tidx; + cb->args[1] = nidx; + + return skb->len; +} -static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, - u32 pid, u32 seq, int event) +static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, + u32 pid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; - unsigned char *b = skb->tail; struct nda_cacheinfo ci; - int locked = 0; - struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq, event, - sizeof(struct ndmsg)); - struct ndmsg *ndm = NLMSG_DATA(nlh); - - ndm->ndm_family = n->ops->family; - ndm->ndm_flags = n->flags; - ndm->ndm_type = n->type; - ndm->ndm_ifindex = n->dev->ifindex; - RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key); - read_lock_bh(&n->lock); - locked = 1; - ndm->ndm_state = n->nud_state; - if (n->nud_state & NUD_VALID) - RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha); - ci.ndm_used = now - n->used; - ci.ndm_confirmed = now - n->confirmed; - ci.ndm_updated = now - n->updated; - ci.ndm_refcnt = atomic_read(&n->refcnt) - 1; - read_unlock_bh(&n->lock); - locked = 0; - RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -ENOBUFS; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = neigh->ops->family; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = neigh->flags; + ndm->ndm_type = neigh->type; + ndm->ndm_ifindex = neigh->dev->ifindex; -nlmsg_failure: -rtattr_failure: - if (locked) - read_unlock_bh(&n->lock); - skb_trim(skb, b - skb->data); - return -1; + NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key); + + read_lock_bh(&neigh->lock); + ndm->ndm_state = neigh->nud_state; + if ((neigh->nud_state & NUD_VALID) && + nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { + read_unlock_bh(&neigh->lock); + goto nla_put_failure; + } + + ci.ndm_used = now - neigh->used; + ci.ndm_confirmed = now - neigh->confirmed; + ci.ndm_updated = now - neigh->updated; + ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; + read_unlock_bh(&neigh->lock); + + NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes)); + NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } @@ -1387,25 +1998,26 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; - for (h = 0; h <= NEIGH_HASHMASK; h++) { + read_lock_bh(&tbl->lock); + for (h = 0; h <= tbl->hash_mask; h++) { if (h < s_h) continue; if (h > s_h) s_idx = 0; - read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { if (idx < s_idx) continue; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, - RTM_NEWNEIGH) <= 0) { + RTM_NEWNEIGH, + NLM_F_MULTI) <= 0) { read_unlock_bh(&tbl->lock); rc = -1; goto out; } } - read_unlock_bh(&tbl->lock); } + read_unlock_bh(&tbl->lock); rc = skb->len; out: cb->args[1] = h; @@ -1419,7 +2031,7 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) int t, family, s_t; read_lock(&neigh_tbl_lock); - family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; + family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; s_t = cb->args[0]; for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { @@ -1437,42 +2049,403 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -#ifdef CONFIG_ARPD -void neigh_app_ns(struct neighbour *n) +void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) { - struct nlmsghdr *nlh; - int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); - struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); + int chain; - if (!skb) - return; + read_lock_bh(&tbl->lock); + for (chain = 0; chain <= tbl->hash_mask; chain++) { + struct neighbour *n; - if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) { - kfree_skb(skb); - return; + for (n = tbl->hash_buckets[chain]; n; n = n->next) + cb(n, cookie); } - nlh = (struct nlmsghdr *)skb->data; - nlh->nlmsg_flags = NLM_F_REQUEST; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + read_unlock_bh(&tbl->lock); } +EXPORT_SYMBOL(neigh_for_each); -static void neigh_app_notify(struct neighbour *n) +/* The tbl->lock must be held as a writer and BH disabled. */ +void __neigh_for_each_release(struct neigh_table *tbl, + int (*cb)(struct neighbour *)) { - struct nlmsghdr *nlh; - int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); - struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); + int chain; - if (!skb) - return; + for (chain = 0; chain <= tbl->hash_mask; chain++) { + struct neighbour *n, **np; - if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) { - kfree_skb(skb); - return; + np = &tbl->hash_buckets[chain]; + while ((n = *np) != NULL) { + int release; + + write_lock(&n->lock); + release = cb(n); + if (release) { + *np = n->next; + n->dead = 1; + } else + np = &n->next; + write_unlock(&n->lock); + if (release) + neigh_release(n); + } + } +} +EXPORT_SYMBOL(__neigh_for_each_release); + +#ifdef CONFIG_PROC_FS + +static struct neighbour *neigh_get_first(struct seq_file *seq) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + struct neighbour *n = NULL; + int bucket = state->bucket; + + state->flags &= ~NEIGH_SEQ_IS_PNEIGH; + for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { + n = tbl->hash_buckets[bucket]; + + while (n) { + if (state->neigh_sub_iter) { + loff_t fakep = 0; + void *v; + + v = state->neigh_sub_iter(state, n, &fakep); + if (!v) + goto next; + } + if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) + break; + if (n->nud_state & ~NUD_NOARP) + break; + next: + n = n->next; + } + + if (n) + break; + } + state->bucket = bucket; + + return n; +} + +static struct neighbour *neigh_get_next(struct seq_file *seq, + struct neighbour *n, + loff_t *pos) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + if (state->neigh_sub_iter) { + void *v = state->neigh_sub_iter(state, n, pos); + if (v) + return n; + } + n = n->next; + + while (1) { + while (n) { + if (state->neigh_sub_iter) { + void *v = state->neigh_sub_iter(state, n, pos); + if (v) + return n; + goto next; + } + if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) + break; + + if (n->nud_state & ~NUD_NOARP) + break; + next: + n = n->next; + } + + if (n) + break; + + if (++state->bucket > tbl->hash_mask) + break; + + n = tbl->hash_buckets[state->bucket]; + } + + if (n && pos) + --(*pos); + return n; +} + +static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) +{ + struct neighbour *n = neigh_get_first(seq); + + if (n) { + while (*pos) { + n = neigh_get_next(seq, n, pos); + if (!n) + break; + } + } + return *pos ? NULL : n; +} + +static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + struct pneigh_entry *pn = NULL; + int bucket = state->bucket; + + state->flags |= NEIGH_SEQ_IS_PNEIGH; + for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { + pn = tbl->phash_buckets[bucket]; + if (pn) + break; + } + state->bucket = bucket; + + return pn; +} + +static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, + struct pneigh_entry *pn, + loff_t *pos) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + pn = pn->next; + while (!pn) { + if (++state->bucket > PNEIGH_HASHMASK) + break; + pn = tbl->phash_buckets[state->bucket]; + if (pn) + break; + } + + if (pn && pos) + --(*pos); + + return pn; +} + +static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) +{ + struct pneigh_entry *pn = pneigh_get_first(seq); + + if (pn) { + while (*pos) { + pn = pneigh_get_next(seq, pn, pos); + if (!pn) + break; + } + } + return *pos ? NULL : pn; +} + +static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) +{ + struct neigh_seq_state *state = seq->private; + void *rc; + + rc = neigh_get_idx(seq, pos); + if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) + rc = pneigh_get_idx(seq, pos); + + return rc; +} + +void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) +{ + struct neigh_seq_state *state = seq->private; + loff_t pos_minus_one; + + state->tbl = tbl; + state->bucket = 0; + state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); + + read_lock_bh(&tbl->lock); + + pos_minus_one = *pos - 1; + return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN; +} +EXPORT_SYMBOL(neigh_seq_start); + +void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct neigh_seq_state *state; + void *rc; + + if (v == SEQ_START_TOKEN) { + rc = neigh_get_idx(seq, pos); + goto out; } - nlh = (struct nlmsghdr *)skb->data; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + + state = seq->private; + if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { + rc = neigh_get_next(seq, v, NULL); + if (rc) + goto out; + if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) + rc = pneigh_get_first(seq); + } else { + BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); + rc = pneigh_get_next(seq, v, NULL); + } +out: + ++(*pos); + return rc; +} +EXPORT_SYMBOL(neigh_seq_next); + +void neigh_seq_stop(struct seq_file *seq, void *v) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + read_unlock_bh(&tbl->lock); +} +EXPORT_SYMBOL(neigh_seq_stop); + +/* statistics via seq_file */ + +static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct proc_dir_entry *pde = seq->private; + struct neigh_table *tbl = pde->data; + int cpu; + + if (*pos == 0) + return SEQ_START_TOKEN; + + for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return per_cpu_ptr(tbl->stats, cpu); + } + return NULL; +} + +static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct proc_dir_entry *pde = seq->private; + struct neigh_table *tbl = pde->data; + int cpu; + + for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return per_cpu_ptr(tbl->stats, cpu); + } + return NULL; +} + +static void neigh_stat_seq_stop(struct seq_file *seq, void *v) +{ + +} + +static int neigh_stat_seq_show(struct seq_file *seq, void *v) +{ + struct proc_dir_entry *pde = seq->private; + struct neigh_table *tbl = pde->data; + struct neigh_statistics *st = v; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n"); + return 0; + } + + seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " + "%08lx %08lx %08lx %08lx\n", + atomic_read(&tbl->entries), + + st->allocs, + st->destroys, + st->hash_grows, + + st->lookups, + st->hits, + + st->res_failed, + + st->rcv_probes_mcast, + st->rcv_probes_ucast, + + st->periodic_gc_runs, + st->forced_gc_runs + ); + + return 0; +} + +static struct seq_operations neigh_stat_seq_ops = { + .start = neigh_stat_seq_start, + .next = neigh_stat_seq_next, + .stop = neigh_stat_seq_stop, + .show = neigh_stat_seq_show, +}; + +static int neigh_stat_seq_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &neigh_stat_seq_ops); + + if (!ret) { + struct seq_file *sf = file->private_data; + sf->private = PDE(inode); + } + return ret; +}; + +static struct file_operations neigh_stat_seq_fops = { + .owner = THIS_MODULE, + .open = neigh_stat_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#endif /* CONFIG_PROC_FS */ + +#ifdef CONFIG_ARPD +static inline size_t neigh_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ + + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(struct nda_cacheinfo)) + + nla_total_size(4); /* NDA_PROBES */ +} + +static void __neigh_notify(struct neighbour *n, int type, int flags) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = neigh_fill_info(skb, n, 0, 0, type, flags); + /* failure implies BUG in neigh_nlmsg_size() */ + BUG_ON(err < 0); + + err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_NEIGH, err); +} + +void neigh_app_ns(struct neighbour *n) +{ + __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); +} + +static void neigh_app_notify(struct neighbour *n) +{ + __neigh_notify(n, RTM_NEWNEIGH, 0); } #endif /* CONFIG_ARPD */ @@ -1481,12 +2454,12 @@ static void neigh_app_notify(struct neighbour *n) static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table neigh_vars[17]; + ctl_table neigh_vars[__NET_NEIGH_MAX]; ctl_table neigh_dev[2]; ctl_table neigh_neigh_dir[2]; ctl_table neigh_proto_dir[2]; ctl_table neigh_root_dir[2]; -} neigh_sysctl_template = { +} neigh_sysctl_template __read_mostly = { .neigh_vars = { { .ctl_name = NET_NEIGH_MCAST_SOLICIT, @@ -1604,6 +2577,22 @@ static struct neigh_sysctl_table { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = NET_NEIGH_RETRANS_TIME_MS, + .procname = "retrans_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies, + }, + { + .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, + .procname = "base_reachable_time_ms", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies, + }, }, .neigh_dev = { { @@ -1634,24 +2623,20 @@ static struct neigh_sysctl_table { int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, - proc_handler *handler) + proc_handler *handler, ctl_handler *strategy) { - struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL); + struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template, + sizeof(*t), GFP_KERNEL); const char *dev_name_source = NULL; char *dev_name = NULL; int err = 0; if (!t) return -ENOBUFS; - memcpy(t, &neigh_sysctl_template, sizeof(*t)); t->neigh_vars[0].data = &p->mcast_probes; t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; t->neigh_vars[3].data = &p->retrans_time; - if (handler) { - t->neigh_vars[3].proc_handler = handler; - t->neigh_vars[3].extra1 = dev; - } t->neigh_vars[4].data = &p->base_reachable_time; t->neigh_vars[5].data = &p->delay_probe_time; t->neigh_vars[6].data = &p->gc_staletime; @@ -1661,19 +2646,44 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[10].data = &p->proxy_delay; t->neigh_vars[11].data = &p->locktime; - dev_name_source = t->neigh_dev[0].procname; if (dev) { dev_name_source = dev->name; t->neigh_dev[0].ctl_name = dev->ifindex; - memset(&t->neigh_vars[12], 0, sizeof(ctl_table)); + t->neigh_vars[12].procname = NULL; + t->neigh_vars[13].procname = NULL; + t->neigh_vars[14].procname = NULL; + t->neigh_vars[15].procname = NULL; } else { + dev_name_source = t->neigh_dev[0].procname; t->neigh_vars[12].data = (int *)(p + 1); t->neigh_vars[13].data = (int *)(p + 1) + 1; t->neigh_vars[14].data = (int *)(p + 1) + 2; t->neigh_vars[15].data = (int *)(p + 1) + 3; } - dev_name = net_sysctl_strdup(dev_name_source); + t->neigh_vars[16].data = &p->retrans_time; + t->neigh_vars[17].data = &p->base_reachable_time; + + if (handler || strategy) { + /* RetransTime */ + t->neigh_vars[3].proc_handler = handler; + t->neigh_vars[3].strategy = strategy; + t->neigh_vars[3].extra1 = dev; + /* ReachableTime */ + t->neigh_vars[4].proc_handler = handler; + t->neigh_vars[4].strategy = strategy; + t->neigh_vars[4].extra1 = dev; + /* RetransTime (in milliseconds)*/ + t->neigh_vars[16].proc_handler = handler; + t->neigh_vars[16].strategy = strategy; + t->neigh_vars[16].extra1 = dev; + /* ReachableTime (in milliseconds) */ + t->neigh_vars[17].proc_handler = handler; + t->neigh_vars[17].strategy = strategy; + t->neigh_vars[17].extra1 = dev; + } + + dev_name = kstrdup(dev_name_source, GFP_KERNEL); if (!dev_name) { err = -ENOBUFS; goto free; @@ -1722,7 +2732,6 @@ void neigh_sysctl_unregister(struct neigh_parms *p) #endif /* CONFIG_SYSCTL */ EXPORT_SYMBOL(__neigh_event_send); -EXPORT_SYMBOL(neigh_add); EXPORT_SYMBOL(neigh_changeaddr); EXPORT_SYMBOL(neigh_compat_output); EXPORT_SYMBOL(neigh_connected_output); @@ -1733,14 +2742,15 @@ EXPORT_SYMBOL(neigh_dump_info); EXPORT_SYMBOL(neigh_event_ns); EXPORT_SYMBOL(neigh_ifdown); EXPORT_SYMBOL(neigh_lookup); +EXPORT_SYMBOL(neigh_lookup_nodev); EXPORT_SYMBOL(neigh_parms_alloc); EXPORT_SYMBOL(neigh_parms_release); EXPORT_SYMBOL(neigh_rand_reach_time); EXPORT_SYMBOL(neigh_resolve_output); EXPORT_SYMBOL(neigh_table_clear); EXPORT_SYMBOL(neigh_table_init); +EXPORT_SYMBOL(neigh_table_init_no_netlink); EXPORT_SYMBOL(neigh_update); -EXPORT_SYMBOL(neigh_update_hhs); EXPORT_SYMBOL(pneigh_enqueue); EXPORT_SYMBOL(pneigh_lookup);