X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=net%2Fcore%2Fneighbour.c;h=c9bd1b74fc2d5557422a7878bb787cea6d40a9c1;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=ee410cb7d87851e96abed4de367459910c908178;hpb=a2c21200f1c81b08cb55e417b68150bba439b646;p=linux-2.6.git diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ee410cb7d..c9bd1b74f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -12,6 +12,7 @@ * * Fixes: * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. + * Harald Welte Add neighbour cache statistics like rtstat */ #include @@ -21,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -29,6 +31,7 @@ #include #include #include +#include #define NEIGH_DEBUG 1 @@ -47,6 +50,8 @@ #define NEIGH_PRINTK2 NEIGH_PRINTK #endif +#define PNEIGH_HASHMASK 0xF + static void neigh_timer_handler(unsigned long arg); #ifdef CONFIG_ARPD static void neigh_app_notify(struct neighbour *n); @@ -56,6 +61,7 @@ void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); static int neigh_glbl_allocs; static struct neigh_table *neigh_tables; +static struct file_operations neigh_stat_seq_fops; /* Neighbour hash table buckets are protected with rwlock tbl->lock. @@ -113,27 +119,21 @@ static int neigh_forced_gc(struct neigh_table *tbl) int shrunk = 0; int i; - for (i = 0; i <= NEIGH_HASHMASK; i++) { + NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); + + write_lock_bh(&tbl->lock); + for (i = 0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np; np = &tbl->hash_buckets[i]; - write_lock_bh(&tbl->lock); while ((n = *np) != NULL) { /* Neighbour record may be discarded if: - - nobody refers to it. - - it is not permanent - - (NEW and probably wrong) - INCOMPLETE entries are kept at least for - n->parms->retrans_time, otherwise we could - flood network with resolution requests. - It is not clear, what is better table overflow - or flooding. + * - nobody refers to it. + * - it is not permanent */ write_lock(&n->lock); if (atomic_read(&n->refcnt) == 1 && - !(n->nud_state & NUD_PERMANENT) && - (n->nud_state != NUD_INCOMPLETE || - jiffies - n->used > n->parms->retrans_time)) { + !(n->nud_state & NUD_PERMANENT)) { *np = n->next; n->dead = 1; shrunk = 1; @@ -144,10 +144,12 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_unlock(&n->lock); np = &n->next; } - write_unlock_bh(&tbl->lock); } tbl->last_flush = jiffies; + + write_unlock_bh(&tbl->lock); + return shrunk; } @@ -177,7 +179,7 @@ void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) write_lock_bh(&tbl->lock); - for (i=0; i <= NEIGH_HASHMASK; i++) { + for (i=0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np; np = &tbl->hash_buckets[i]; @@ -204,7 +206,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) write_lock_bh(&tbl->lock); - for (i = 0; i <= NEIGH_HASHMASK; i++) { + for (i = 0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np = &tbl->hash_buckets[i]; while ((n = *np) != NULL) { @@ -227,7 +229,6 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - n->parms = &tbl->parms; skb_queue_purge(&n->arp_queue); n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -256,7 +257,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) if (tbl->entries > tbl->gc_thresh3 || (tbl->entries > tbl->gc_thresh2 && - now - tbl->last_flush > 5 * HZ)) { + time_after(now, tbl->last_flush + 5 * HZ))) { if (!neigh_forced_gc(tbl) && tbl->entries > tbl->gc_thresh3) goto out; @@ -273,11 +274,12 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; - n->parms = &tbl->parms; + n->parms = neigh_parms_clone(&tbl->parms); init_timer(&n->timer); n->timer.function = neigh_timer_handler; n->timer.data = (unsigned long)n; - tbl->stats.allocs++; + + NEIGH_CACHE_STAT_INC(tbl, allocs); neigh_glbl_allocs++; tbl->entries++; n->tbl = tbl; @@ -287,17 +289,103 @@ out: return n; } +static struct neighbour **neigh_hash_alloc(unsigned int entries) +{ + unsigned long size = entries * sizeof(struct neighbour *); + struct neighbour **ret; + + if (size <= PAGE_SIZE) { + ret = kmalloc(size, GFP_ATOMIC); + } else { + ret = (struct neighbour **) + __get_free_pages(GFP_ATOMIC, get_order(size)); + } + if (ret) + memset(ret, 0, size); + + return ret; +} + +static void neigh_hash_free(struct neighbour **hash, unsigned int entries) +{ + unsigned long size = entries * sizeof(struct neighbour *); + + if (size <= PAGE_SIZE) + kfree(hash); + else + free_pages((unsigned long)hash, get_order(size)); +} + +static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) +{ + struct neighbour **new_hash, **old_hash; + unsigned int i, new_hash_mask, old_entries; + + NEIGH_CACHE_STAT_INC(tbl, hash_grows); + + BUG_ON(new_entries & (new_entries - 1)); + new_hash = neigh_hash_alloc(new_entries); + if (!new_hash) + return; + + old_entries = tbl->hash_mask + 1; + new_hash_mask = new_entries - 1; + old_hash = tbl->hash_buckets; + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + for (i = 0; i < old_entries; i++) { + struct neighbour *n, *next; + + for (n = old_hash[i]; n; n = next) { + unsigned int hash_val = tbl->hash(n->primary_key, n->dev); + + hash_val &= new_hash_mask; + next = n->next; + + n->next = new_hash[hash_val]; + new_hash[hash_val] = n; + } + } + tbl->hash_buckets = new_hash; + tbl->hash_mask = new_hash_mask; + + neigh_hash_free(old_hash, old_entries); +} + struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { struct neighbour *n; int key_len = tbl->key_len; - u32 hash_val = tbl->hash(pkey, dev); + u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + + NEIGH_CACHE_STAT_INC(tbl, lookups); read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { neigh_hold(n); + NEIGH_CACHE_STAT_INC(tbl, hits); + break; + } + } + read_unlock_bh(&tbl->lock); + return n; +} + +struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) +{ + struct neighbour *n; + int key_len = tbl->key_len; + u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask; + + NEIGH_CACHE_STAT_INC(tbl, lookups); + + read_lock_bh(&tbl->lock); + for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { + if (!memcmp(n->primary_key, pkey, key_len)) { + neigh_hold(n); + NEIGH_CACHE_STAT_INC(tbl, hits); break; } } @@ -337,15 +425,23 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, n->confirmed = jiffies - (n->parms->base_reachable_time << 1); - hash_val = tbl->hash(pkey, dev); - write_lock_bh(&tbl->lock); + + if (tbl->entries > (tbl->hash_mask + 1)) + neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); + + hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + + if (n->parms->dead) { + rc = ERR_PTR(-EINVAL); + goto out_tbl_unlock; + } + for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { neigh_hold(n1); - write_unlock_bh(&tbl->lock); rc = n1; - goto out_neigh_release; + goto out_tbl_unlock; } } @@ -358,6 +454,8 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, rc = n; out: return rc; +out_tbl_unlock: + write_unlock_bh(&tbl->lock); out_neigh_release: neigh_release(n); goto out; @@ -395,8 +493,12 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, memcpy(n->key, pkey, key_len); n->dev = dev; + if (dev) + dev_hold(dev); if (tbl->pconstructor && tbl->pconstructor(n)) { + if (dev) + dev_put(dev); kfree(n); n = NULL; goto out; @@ -423,18 +525,21 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey, hash_val ^= hash_val >> 4; hash_val &= PNEIGH_HASHMASK; + write_lock_bh(&tbl->lock); for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; np = &n->next) { if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { - write_lock_bh(&tbl->lock); *np = n->next; write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); kfree(n); return 0; } } + write_unlock_bh(&tbl->lock); return -ENOENT; } @@ -450,6 +555,8 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) *np = n->next; if (tbl->pdestructor) tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); kfree(n); continue; } @@ -468,6 +575,8 @@ void neigh_destroy(struct neighbour *neigh) { struct hh_cache *hh; + NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); + if (!neigh->dead) { printk(KERN_WARNING "Destroying alive neighbour %p\n", neigh); @@ -494,6 +603,7 @@ void neigh_destroy(struct neighbour *neigh) skb_queue_purge(&neigh->arp_queue); dev_put(neigh->dev); + neigh_parms_put(neigh->parms); NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); @@ -536,46 +646,13 @@ static void neigh_connect(struct neighbour *neigh) hh->hh_output = neigh->ops->hh_output; } -/* - Transitions NUD_STALE <-> NUD_REACHABLE do not occur - when fast path is built: we have no timers associated with - these states, we do not have time to check state when sending. - neigh_periodic_timer check periodically neigh->confirmed - time and moves NUD_REACHABLE -> NUD_STALE. - - If a routine wants to know TRUE entry state, it calls - neigh_sync before checking state. - - Called with write_locked neigh. - */ - -static void neigh_sync(struct neighbour *n) -{ - unsigned long now = jiffies; - u8 state = n->nud_state; - - if (state & (NUD_NOARP | NUD_PERMANENT)) - return; - if (state & NUD_REACHABLE) { - if (now - n->confirmed > n->parms->reachable_time) { - n->nud_state = NUD_STALE; - neigh_suspect(n); - } - } else if (state & NUD_VALID) { - if (now - n->confirmed < n->parms->reachable_time) { - neigh_del_timer(n); - n->nud_state = NUD_REACHABLE; - neigh_connect(n); - } - } -} - static void neigh_periodic_timer(unsigned long arg) { struct neigh_table *tbl = (struct neigh_table *)arg; - unsigned long now = jiffies; - int i; + struct neighbour *n, **np; + unsigned long expire, now = jiffies; + NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); write_lock(&tbl->lock); @@ -583,7 +660,7 @@ static void neigh_periodic_timer(unsigned long arg) * periodically recompute ReachableTime from random function */ - if (now - tbl->last_rand > 300 * HZ) { + if (time_after(now, tbl->last_rand + 300 * HZ)) { struct neigh_parms *p; tbl->last_rand = now; for (p = &tbl->parms; p; p = p->next) @@ -591,47 +668,49 @@ static void neigh_periodic_timer(unsigned long arg) neigh_rand_reach_time(p->base_reachable_time); } - for (i = 0; i <= NEIGH_HASHMASK; i++) { - struct neighbour *n, **np; - - np = &tbl->hash_buckets[i]; - while ((n = *np) != NULL) { - unsigned state; + np = &tbl->hash_buckets[tbl->hash_chain_gc]; + tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask); - write_lock(&n->lock); + while ((n = *np) != NULL) { + unsigned int state; - state = n->nud_state; - if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { - write_unlock(&n->lock); - goto next_elt; - } + write_lock(&n->lock); - if ((long)(n->used - n->confirmed) < 0) - n->used = n->confirmed; + state = n->nud_state; + if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { + write_unlock(&n->lock); + goto next_elt; + } - if (atomic_read(&n->refcnt) == 1 && - (state == NUD_FAILED || - now - n->used > n->parms->gc_staletime)) { - *np = n->next; - n->dead = 1; - write_unlock(&n->lock); - neigh_release(n); - continue; - } + if (time_before(n->used, n->confirmed)) + n->used = n->confirmed; - if (n->nud_state & NUD_REACHABLE && - now - n->confirmed > n->parms->reachable_time) { - n->nud_state = NUD_STALE; - neigh_suspect(n); - } + if (atomic_read(&n->refcnt) == 1 && + (state == NUD_FAILED || + time_after(now, n->used + n->parms->gc_staletime))) { + *np = n->next; + n->dead = 1; write_unlock(&n->lock); + neigh_release(n); + continue; + } + write_unlock(&n->lock); next_elt: - np = &n->next; - } + np = &n->next; } - mod_timer(&tbl->gc_timer, now + tbl->gc_interval); + /* Cycle through all hash buckets every base_reachable_time/2 ticks. + * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 + * base_reachable_time. + */ + expire = tbl->parms.base_reachable_time >> 1; + expire /= (tbl->hash_mask + 1); + if (!expire) + expire = 1; + + mod_timer(&tbl->gc_timer, now + expire); + write_unlock(&tbl->lock); } @@ -648,7 +727,7 @@ static __inline__ int neigh_max_probes(struct neighbour *n) static void neigh_timer_handler(unsigned long arg) { - unsigned long now = jiffies; + unsigned long now, next; struct neighbour *neigh = (struct neighbour *)arg; unsigned state; int notify = 0; @@ -656,6 +735,8 @@ static void neigh_timer_handler(unsigned long arg) write_lock(&neigh->lock); state = neigh->nud_state; + now = jiffies; + next = now + HZ; if (!(state & NUD_IN_TIMER)) { #ifndef CONFIG_SMP @@ -664,25 +745,47 @@ static void neigh_timer_handler(unsigned long arg) goto out; } - if ((state & NUD_VALID) && - now - neigh->confirmed < neigh->parms->reachable_time) { - neigh->nud_state = NUD_REACHABLE; - NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); - neigh_connect(neigh); - goto out; - } - if (state == NUD_DELAY) { - NEIGH_PRINTK2("neigh %p is probed.\n", neigh); - neigh->nud_state = NUD_PROBE; - atomic_set(&neigh->probes, 0); + if (state & NUD_REACHABLE) { + if (time_before_eq(now, + neigh->confirmed + neigh->parms->reachable_time)) { + NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); + next = neigh->confirmed + neigh->parms->reachable_time; + } else if (time_before_eq(now, + neigh->used + neigh->parms->delay_probe_time)) { + NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh->nud_state = NUD_DELAY; + neigh_suspect(neigh); + next = now + neigh->parms->delay_probe_time; + } else { + NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + neigh->nud_state = NUD_STALE; + neigh_suspect(neigh); + } + } else if (state & NUD_DELAY) { + if (time_before_eq(now, + neigh->confirmed + neigh->parms->delay_probe_time)) { + NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); + neigh->nud_state = NUD_REACHABLE; + neigh_connect(neigh); + next = neigh->confirmed + neigh->parms->reachable_time; + } else { + NEIGH_PRINTK2("neigh %p is probed.\n", neigh); + neigh->nud_state = NUD_PROBE; + atomic_set(&neigh->probes, 0); + next = now + neigh->parms->retrans_time; + } + } else { + /* NUD_PROBE|NUD_INCOMPLETE */ + next = now + neigh->parms->retrans_time; } - if (atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { + if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && + atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { struct sk_buff *skb; neigh->nud_state = NUD_FAILED; notify = 1; - neigh->tbl->stats.res_failed++; + NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); NEIGH_PRINTK2("neigh %p is failed.\n", neigh); /* It is very thin place. report_unreachable is very complicated @@ -697,19 +800,30 @@ static void neigh_timer_handler(unsigned long arg) write_lock(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); - goto out; } - neigh->timer.expires = now + neigh->parms->retrans_time; - add_timer(&neigh->timer); - write_unlock(&neigh->lock); - - neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue)); - atomic_inc(&neigh->probes); - return; - + if (neigh->nud_state & NUD_IN_TIMER) { + neigh_hold(neigh); + if (time_before(next, jiffies + HZ/2)) + next = jiffies + HZ/2; + neigh->timer.expires = next; + add_timer(&neigh->timer); + } + if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { + struct sk_buff *skb = skb_peek(&neigh->arp_queue); + /* keep skb alive even if arp_queue overflows */ + if (skb) + skb_get(skb); + write_unlock(&neigh->lock); + neigh->ops->solicit(neigh, skb); + atomic_inc(&neigh->probes); + if (skb) + kfree_skb(skb); + } else { out: - write_unlock(&neigh->lock); + write_unlock(&neigh->lock); + } + #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); @@ -720,6 +834,7 @@ out: int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; + unsigned long now; write_lock_bh(&neigh->lock); @@ -727,18 +842,15 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; + now = jiffies; + if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (neigh->parms->mcast_probes + neigh->parms->app_probes) { atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; neigh_hold(neigh); - neigh->timer.expires = jiffies + - neigh->parms->retrans_time; + neigh->timer.expires = now + 1; add_timer(&neigh->timer); - write_unlock_bh(&neigh->lock); - neigh->ops->solicit(neigh, skb); - atomic_inc(&neigh->probes); - write_lock_bh(&neigh->lock); } else { neigh->nud_state = NUD_FAILED; write_unlock_bh(&neigh->lock); @@ -747,6 +859,12 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) kfree_skb(skb); return 1; } + } else if (neigh->nud_state & NUD_STALE) { + NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh_hold(neigh); + neigh->nud_state = NUD_DELAY; + neigh->timer.expires = jiffies + neigh->parms->delay_probe_time; + add_timer(&neigh->timer); } if (neigh->nud_state == NUD_INCOMPLETE) { @@ -761,13 +879,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) __skb_queue_tail(&neigh->arp_queue, skb); } rc = 1; - } else if (neigh->nud_state == NUD_STALE) { - NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); - neigh_hold(neigh); - neigh->nud_state = NUD_DELAY; - neigh->timer.expires = jiffies + neigh->parms->delay_probe_time; - add_timer(&neigh->timer); - rc = 0; } out_unlock_bh: write_unlock_bh(&neigh->lock); @@ -794,14 +905,26 @@ static __inline__ void neigh_update_hhs(struct neighbour *neigh) /* Generic update routine. -- lladdr is new lladdr or NULL, if it is not supplied. -- new is new state. - -- override == 1 allows to override existing lladdr, if it is different. - -- arp == 0 means that the change is administrative. + -- flags + NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, + if it is different. + NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" + lladdr instead of overriding it + if it is different. + It also allows to retain current state + if lladdr is unchanged. + NEIGH_UPDATE_F_ADMIN means that the change is administrative. + + NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing + NTF_ROUTER flag. + NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as + a router. Caller MUST hold reference count on the entry. */ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, - int override, int arp) + u32 flags) { u8 old; int err; @@ -809,6 +932,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, int notify = 0; #endif struct net_device *dev; + int update_isrouter = 0; write_lock_bh(&neigh->lock); @@ -816,7 +940,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, old = neigh->nud_state; err = -EPERM; - if (arp && (old & (NUD_NOARP | NUD_PERMANENT))) + if (!(flags & NEIGH_UPDATE_F_ADMIN) && + (old & (NUD_NOARP | NUD_PERMANENT))) goto out; if (!(new & NUD_VALID)) { @@ -841,12 +966,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, - compare new & old - if they are different, check override flag */ - if (old & NUD_VALID) { - if (!memcmp(lladdr, neigh->ha, dev->addr_len)) - lladdr = neigh->ha; - else if (!override) - goto out; - } + if ((old & NUD_VALID) && + !memcmp(lladdr, neigh->ha, dev->addr_len)) + lladdr = neigh->ha; } else { /* No address is supplied; if we know something, use it, otherwise discard the request. @@ -857,8 +979,6 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, lladdr = neigh->ha; } - neigh_sync(neigh); - old = neigh->nud_state; if (new & NUD_CONNECTED) neigh->confirmed = jiffies; neigh->updated = jiffies; @@ -867,12 +987,37 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, do not change entry state, if new one is STALE. */ err = 0; - if ((old & NUD_VALID) && lladdr == neigh->ha && - (new == old || (new == NUD_STALE && (old & NUD_CONNECTED)))) - goto out; + update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; + if (old & NUD_VALID) { + if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { + update_isrouter = 0; + if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && + (old & NUD_CONNECTED)) { + lladdr = neigh->ha; + new = NUD_STALE; + } else + goto out; + } else { + if (lladdr == neigh->ha && new == NUD_STALE && + ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) || + (old & NUD_CONNECTED)) + ) + new = old; + } + } + + if (new != old) { + neigh_del_timer(neigh); + if (new & NUD_IN_TIMER) { + neigh_hold(neigh); + neigh->timer.expires = jiffies + + ((new & NUD_REACHABLE) ? + neigh->parms->reachable_time : 0); + add_timer(&neigh->timer); + } + neigh->nud_state = new; + } - neigh_del_timer(neigh); - neigh->nud_state = new; if (lladdr != neigh->ha) { memcpy(&neigh->ha, lladdr, dev->addr_len); neigh_update_hhs(neigh); @@ -907,6 +1052,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, skb_queue_purge(&neigh->arp_queue); } out: + if (update_isrouter) { + neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ? + (neigh->flags | NTF_ROUTER) : + (neigh->flags & ~NTF_ROUTER); + } write_unlock_bh(&neigh->lock); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) @@ -922,7 +1072,8 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len); if (neigh) - neigh_update(neigh, lladdr, NUD_STALE, 1, 1); + neigh_update(neigh, lladdr, NUD_STALE, + NEIGH_UPDATE_F_OVERRIDE); return neigh; } @@ -1088,26 +1239,25 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb) { unsigned long now = jiffies; - long sched_next = net_random() % p->proxy_delay; + unsigned long sched_next = now + (net_random() % p->proxy_delay); if (tbl->proxy_queue.qlen > p->proxy_qlen) { kfree_skb(skb); return; } skb->stamp.tv_sec = LOCALLY_ENQUEUED; - skb->stamp.tv_usec = now + sched_next; + skb->stamp.tv_usec = sched_next; spin_lock(&tbl->proxy_queue.lock); if (del_timer(&tbl->proxy_timer)) { - long tval = tbl->proxy_timer.expires - now; - if (tval < sched_next) - sched_next = tval; + if (time_before(tbl->proxy_timer.expires, sched_next)) + sched_next = tbl->proxy_timer.expires; } dst_release(skb->dst); skb->dst = NULL; dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); - mod_timer(&tbl->proxy_timer, now + sched_next); + mod_timer(&tbl->proxy_timer, sched_next); spin_unlock(&tbl->proxy_queue.lock); } @@ -1120,6 +1270,8 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, if (p) { memcpy(p, &tbl->parms, sizeof(*p)); p->tbl = tbl; + atomic_set(&p->refcnt, 1); + INIT_RCU_HEAD(&p->rcu_head); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); if (dev && dev->neigh_setup && dev->neigh_setup(dev, p)) { @@ -1135,6 +1287,14 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, return p; } +static void neigh_rcu_free_parms(struct rcu_head *head) +{ + struct neigh_parms *parms = + container_of(head, struct neigh_parms, rcu_head); + + neigh_parms_put(parms); +} + void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) { struct neigh_parms **p; @@ -1145,8 +1305,9 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) for (p = &tbl->parms.next; *p; p = &(*p)->next) { if (*p == parms) { *p = parms->next; + parms->dead = 1; write_unlock_bh(&tbl->lock); - kfree(parms); + call_rcu(&parms->rcu_head, neigh_rcu_free_parms); return; } } @@ -1154,11 +1315,19 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) NEIGH_PRINTK1("neigh_parms_release: not found\n"); } +void neigh_parms_destroy(struct neigh_parms *parms) +{ + kfree(parms); +} + void neigh_table_init(struct neigh_table *tbl) { unsigned long now = jiffies; + unsigned long phsize; + atomic_set(&tbl->parms.refcnt, 1); + INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); @@ -1171,12 +1340,36 @@ void neigh_table_init(struct neigh_table *tbl) if (!tbl->kmem_cachep) panic("cannot create neighbour cache"); + tbl->stats = alloc_percpu(struct neigh_statistics); + if (!tbl->stats) + panic("cannot create neighbour cache statistics"); + +#ifdef CONFIG_PROC_FS + tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); + if (!tbl->pde) + panic("cannot create neighbour proc dir entry"); + tbl->pde->proc_fops = &neigh_stat_seq_fops; + tbl->pde->data = tbl; +#endif + + tbl->hash_mask = 1; + tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); + + phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); + tbl->phash_buckets = kmalloc(phsize, GFP_KERNEL); + + if (!tbl->hash_buckets || !tbl->phash_buckets) + panic("cannot allocate neighbour cache hashes"); + + memset(tbl->phash_buckets, 0, phsize); + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + tbl->lock = RW_LOCK_UNLOCKED; init_timer(&tbl->gc_timer); tbl->gc_timer.data = (unsigned long)tbl; tbl->gc_timer.function = neigh_periodic_timer; - tbl->gc_timer.expires = now + tbl->gc_interval + - tbl->parms.reachable_time; + tbl->gc_timer.expires = now + 1; add_timer(&tbl->gc_timer); init_timer(&tbl->proxy_timer); @@ -1211,6 +1404,13 @@ int neigh_table_clear(struct neigh_table *tbl) } } write_unlock(&neigh_tbl_lock); + + neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); + tbl->hash_buckets = NULL; + + kfree(tbl->phash_buckets); + tbl->phash_buckets = NULL; + return 0; } @@ -1250,7 +1450,9 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST - 1]), dev); if (n) { - err = neigh_update(n, NULL, NUD_FAILED, 1, 0); + err = neigh_update(n, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE| + NEIGH_UPDATE_F_ADMIN); neigh_release(n); } goto out_dev_put; @@ -1323,7 +1525,8 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) RTA_DATA(nda[NDA_LLADDR - 1]) : NULL, ndm->ndm_state, - override, 0); + (override ? NEIGH_UPDATE_F_OVERRIDE : 0) | + NEIGH_UPDATE_F_ADMIN); } if (n) neigh_release(n); @@ -1387,7 +1590,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; - for (h = 0; h <= NEIGH_HASHMASK; h++) { + for (h = 0; h <= tbl->hash_mask; h++) { if (h < s_h) continue; if (h > s_h) @@ -1437,6 +1640,366 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) +{ + int chain; + + read_lock_bh(&tbl->lock); + for (chain = 0; chain <= tbl->hash_mask; chain++) { + struct neighbour *n; + + for (n = tbl->hash_buckets[chain]; n; n = n->next) + cb(n, cookie); + } + read_unlock_bh(&tbl->lock); +} +EXPORT_SYMBOL(neigh_for_each); + +/* The tbl->lock must be held as a writer and BH disabled. */ +void __neigh_for_each_release(struct neigh_table *tbl, + int (*cb)(struct neighbour *)) +{ + int chain; + + for (chain = 0; chain <= tbl->hash_mask; chain++) { + struct neighbour *n, **np; + + np = &tbl->hash_buckets[chain]; + while ((n = *np) != NULL) { + int release; + + write_lock(&n->lock); + release = cb(n); + if (release) { + *np = n->next; + n->dead = 1; + } else + np = &n->next; + write_unlock(&n->lock); + if (release) + neigh_release(n); + } + } +} +EXPORT_SYMBOL(__neigh_for_each_release); + +#ifdef CONFIG_PROC_FS + +static struct neighbour *neigh_get_first(struct seq_file *seq) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + struct neighbour *n = NULL; + int bucket = state->bucket; + + state->flags &= ~NEIGH_SEQ_IS_PNEIGH; + for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { + n = tbl->hash_buckets[bucket]; + + while (n) { + if (state->neigh_sub_iter) { + loff_t fakep = 0; + void *v; + + v = state->neigh_sub_iter(state, n, &fakep); + if (!v) + goto next; + } + if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) + break; + if (n->nud_state & ~NUD_NOARP) + break; + next: + n = n->next; + } + + if (n) + break; + } + state->bucket = bucket; + + return n; +} + +static struct neighbour *neigh_get_next(struct seq_file *seq, + struct neighbour *n, + loff_t *pos) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + if (state->neigh_sub_iter) { + void *v = state->neigh_sub_iter(state, n, pos); + if (v) + return n; + } + n = n->next; + + while (1) { + while (n) { + if (state->neigh_sub_iter) { + void *v = state->neigh_sub_iter(state, n, pos); + if (v) + return n; + goto next; + } + if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) + break; + + if (n->nud_state & ~NUD_NOARP) + break; + next: + n = n->next; + } + + if (n) + break; + + if (++state->bucket > tbl->hash_mask) + break; + + n = tbl->hash_buckets[state->bucket]; + } + + if (n && pos) + --(*pos); + return n; +} + +static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) +{ + struct neighbour *n = neigh_get_first(seq); + + if (n) { + while (*pos) { + n = neigh_get_next(seq, n, pos); + if (!n) + break; + } + } + return *pos ? NULL : n; +} + +static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + struct pneigh_entry *pn = NULL; + int bucket = state->bucket; + + state->flags |= NEIGH_SEQ_IS_PNEIGH; + for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { + pn = tbl->phash_buckets[bucket]; + if (pn) + break; + } + state->bucket = bucket; + + return pn; +} + +static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, + struct pneigh_entry *pn, + loff_t *pos) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + pn = pn->next; + while (!pn) { + if (++state->bucket > PNEIGH_HASHMASK) + break; + pn = tbl->phash_buckets[state->bucket]; + if (pn) + break; + } + + if (pn && pos) + --(*pos); + + return pn; +} + +static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) +{ + struct pneigh_entry *pn = pneigh_get_first(seq); + + if (pn) { + while (*pos) { + pn = pneigh_get_next(seq, pn, pos); + if (!pn) + break; + } + } + return *pos ? NULL : pn; +} + +static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) +{ + struct neigh_seq_state *state = seq->private; + void *rc; + + rc = neigh_get_idx(seq, pos); + if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) + rc = pneigh_get_idx(seq, pos); + + return rc; +} + +void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) +{ + struct neigh_seq_state *state = seq->private; + loff_t pos_minus_one; + + state->tbl = tbl; + state->bucket = 0; + state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); + + read_lock_bh(&tbl->lock); + + pos_minus_one = *pos - 1; + return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN; +} +EXPORT_SYMBOL(neigh_seq_start); + +void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct neigh_seq_state *state; + void *rc; + + if (v == SEQ_START_TOKEN) { + rc = neigh_get_idx(seq, pos); + goto out; + } + + state = seq->private; + if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { + rc = neigh_get_next(seq, v, NULL); + if (rc) + goto out; + if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) + rc = pneigh_get_first(seq); + } else { + BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); + rc = pneigh_get_next(seq, v, NULL); + } +out: + ++(*pos); + return rc; +} +EXPORT_SYMBOL(neigh_seq_next); + +void neigh_seq_stop(struct seq_file *seq, void *v) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + read_unlock_bh(&tbl->lock); +} +EXPORT_SYMBOL(neigh_seq_stop); + +/* statistics via seq_file */ + +static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct proc_dir_entry *pde = seq->private; + struct neigh_table *tbl = pde->data; + int cpu; + + if (*pos == 0) + return SEQ_START_TOKEN; + + for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return per_cpu_ptr(tbl->stats, cpu); + } + return NULL; +} + +static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct proc_dir_entry *pde = seq->private; + struct neigh_table *tbl = pde->data; + int cpu; + + for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return per_cpu_ptr(tbl->stats, cpu); + } + return NULL; +} + +static void neigh_stat_seq_stop(struct seq_file *seq, void *v) +{ + +} + +static int neigh_stat_seq_show(struct seq_file *seq, void *v) +{ + struct proc_dir_entry *pde = seq->private; + struct neigh_table *tbl = pde->data; + struct neigh_statistics *st = v; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs forced_gc_goal_miss\n"); + return 0; + } + + seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " + "%08lx %08lx %08lx %08lx\n", + tbl->entries, + + st->allocs, + st->destroys, + st->hash_grows, + + st->lookups, + st->hits, + + st->res_failed, + + st->rcv_probes_mcast, + st->rcv_probes_ucast, + + st->periodic_gc_runs, + st->forced_gc_runs + ); + + return 0; +} + +static struct seq_operations neigh_stat_seq_ops = { + .start = neigh_stat_seq_start, + .next = neigh_stat_seq_next, + .stop = neigh_stat_seq_stop, + .show = neigh_stat_seq_show, +}; + +static int neigh_stat_seq_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &neigh_stat_seq_ops); + + if (!ret) { + struct seq_file *sf = file->private_data; + sf->private = PDE(inode); + } + return ret; +}; + +static struct file_operations neigh_stat_seq_fops = { + .owner = THIS_MODULE, + .open = neigh_stat_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#endif /* CONFIG_PROC_FS */ + #ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { @@ -1733,6 +2296,7 @@ EXPORT_SYMBOL(neigh_dump_info); EXPORT_SYMBOL(neigh_event_ns); EXPORT_SYMBOL(neigh_ifdown); EXPORT_SYMBOL(neigh_lookup); +EXPORT_SYMBOL(neigh_lookup_nodev); EXPORT_SYMBOL(neigh_parms_alloc); EXPORT_SYMBOL(neigh_parms_release); EXPORT_SYMBOL(neigh_rand_reach_time);