X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;ds=sidebyside;f=net%2Fipv4%2Froute.c;fp=net%2Fipv4%2Froute.c;h=c83066cc342aa6449f6a4edebb8df8eff769fc3c;hb=64ba3f394c830ec48a1c31b53dcae312c56f1604;hp=c7a806b826ddebcbab3cde45133362a3f8bbf560;hpb=be1e6109ac94a859551f8e1774eb9a8469fe055c;p=linux-2.6.git diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c7a806b82..c83066cc3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -55,8 +55,6 @@ * Robert Olsson : Added rt_cache statistics * Arnaldo C. Melo : Convert proc stuff to seq_file * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. - * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect - * Ilia Sotnikov : Removed TOS from hash calculations * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -64,6 +62,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include #include @@ -104,7 +103,6 @@ #include #include #include -#include #ifdef CONFIG_SYSCTL #include #endif @@ -206,27 +204,21 @@ __u8 ip_tos2prio[16] = { struct rt_hash_bucket { struct rtable *chain; }; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_PROVE_LOCKING) +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) /* * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks * The size of this table is a power of two and depends on the number of CPUS. - * (on lockdep we have a quite big spinlock_t, so keep the size down there) */ -#ifdef CONFIG_LOCKDEP -# define RT_HASH_LOCK_SZ 256 +#if NR_CPUS >= 32 +#define RT_HASH_LOCK_SZ 4096 +#elif NR_CPUS >= 16 +#define RT_HASH_LOCK_SZ 2048 +#elif NR_CPUS >= 8 +#define RT_HASH_LOCK_SZ 1024 +#elif NR_CPUS >= 4 +#define RT_HASH_LOCK_SZ 512 #else -# if NR_CPUS >= 32 -# define RT_HASH_LOCK_SZ 4096 -# elif NR_CPUS >= 16 -# define RT_HASH_LOCK_SZ 2048 -# elif NR_CPUS >= 8 -# define RT_HASH_LOCK_SZ 1024 -# elif NR_CPUS >= 4 -# define RT_HASH_LOCK_SZ 512 -# else -# define RT_HASH_LOCK_SZ 256 -# endif +#define RT_HASH_LOCK_SZ 256 #endif static spinlock_t *rt_hash_locks; @@ -250,14 +242,14 @@ static unsigned int rt_hash_rnd; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) \ - (__raw_get_cpu_var(rt_cache_stat).field++) + (per_cpu(rt_cache_stat, raw_smp_processor_id()).field++) static int rt_intern_hash(unsigned hash, struct rtable *rth, struct rtable **res); -static unsigned int rt_hash_code(u32 daddr, u32 saddr) +static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos) { - return (jhash_2words(daddr, saddr, rt_hash_rnd) + return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd) & rt_hash_mask); } @@ -1119,14 +1111,15 @@ static void rt_del(unsigned hash, struct rtable *rt) } void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, - u32 saddr, struct net_device *dev) + u32 saddr, u8 tos, struct net_device *dev) { int i, k; struct in_device *in_dev = in_dev_get(dev); struct rtable *rth, **rthp; u32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; - struct netevent_redirect netevent; + + tos &= IPTOS_RT_MASK; if (!in_dev) return; @@ -1148,7 +1141,8 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, for (i = 0; i < 2; i++) { for (k = 0; k < 2; k++) { unsigned hash = rt_hash_code(daddr, - skeys[i] ^ (ikeys[k] << 5)); + skeys[i] ^ (ikeys[k] << 5), + tos); rthp=&rt_hash_table[hash].chain; @@ -1158,6 +1152,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, if (rth->fl.fl4_dst != daddr || rth->fl.fl4_src != skeys[i] || + rth->fl.fl4_tos != tos || rth->fl.oif != ikeys[k] || rth->fl.iif != 0) { rthp = &rth->u.rt_next; @@ -1218,11 +1213,6 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, rt_drop(rt); goto do_next; } - - netevent.old = &rth->u.dst; - netevent.new = &rt->u.dst; - call_netevent_notifiers(NETEVENT_REDIRECT, - &netevent); rt_del(hash, rth); if (!rt_intern_hash(hash, rt, &rt)) @@ -1242,9 +1232,10 @@ reject_redirect: if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about " "%u.%u.%u.%u ignored.\n" - " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n", + " Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, " + "tos %02x\n", NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw), - NIPQUAD(saddr), NIPQUAD(daddr)); + NIPQUAD(saddr), NIPQUAD(daddr), tos); #endif in_dev_put(in_dev); } @@ -1262,7 +1253,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt->u.dst.expires) { unsigned hash = rt_hash_code(rt->fl.fl4_dst, rt->fl.fl4_src ^ - (rt->fl.oif << 5)); + (rt->fl.oif << 5), + rt->fl.fl4_tos); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ip_rt_advice: redirect to " "%u.%u.%u.%u/%02x dropped\n", @@ -1399,13 +1391,14 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) struct rtable *rth; u32 skeys[2] = { iph->saddr, 0, }; u32 daddr = iph->daddr; + u8 tos = iph->tos & IPTOS_RT_MASK; unsigned short est_mtu = 0; if (ipv4_config.no_pmtu_disc) return 0; for (i = 0; i < 2; i++) { - unsigned hash = rt_hash_code(daddr, skeys[i]); + unsigned hash = rt_hash_code(daddr, skeys[i], tos); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -1414,6 +1407,7 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) rth->fl.fl4_src == skeys[i] && rth->rt_dst == daddr && rth->rt_src == iph->saddr && + rth->fl.fl4_tos == tos && rth->fl.iif == 0 && !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) { unsigned short mtu = new_mtu; @@ -1459,7 +1453,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, ip_rt_mtu_expires); - call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -1665,7 +1658,7 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); - hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5)); + hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); e_nobufs: @@ -1830,7 +1823,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); + hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); } @@ -1871,7 +1864,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); + hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos); err = rt_intern_hash(hash, rth, &rtres); if (err) return err; @@ -2048,7 +2041,7 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5)); + hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos); err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); goto done; @@ -2095,7 +2088,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, int iif = dev->ifindex; tos &= IPTOS_RT_MASK; - hash = rt_hash_code(daddr, saddr ^ (iif << 5)); + hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2293,8 +2286,10 @@ static inline int ip_mkroute_output_def(struct rtable **rp, int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { + u32 tos = RT_FL_TOS(oldflp); + hash = rt_hash_code(oldflp->fl4_dst, - oldflp->fl4_src ^ (oldflp->oif << 5)); + oldflp->fl4_src ^ (oldflp->oif << 5), tos); err = rt_intern_hash(hash, rth, rp); } @@ -2309,6 +2304,7 @@ static inline int ip_mkroute_output(struct rtable** rp, unsigned flags) { #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED + u32 tos = RT_FL_TOS(oldflp); unsigned char hop; unsigned hash; int err = -EINVAL; @@ -2338,7 +2334,7 @@ static inline int ip_mkroute_output(struct rtable** rp, hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ - (oldflp->oif << 5)); + (oldflp->oif << 5), tos); err = rt_intern_hash(hash, rth, rp); /* forward hop information to multipath impl. */ @@ -2567,7 +2563,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) unsigned hash; struct rtable *rth; - hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5)); + hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos); rcu_read_lock_bh(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -3099,7 +3095,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset, memcpy(dst, src, length); /* Add the other cpus in, one int at a time */ - for_each_possible_cpu(i) { + for_each_cpu(i) { unsigned int j; src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;