linux 2.6.16.38 w/ vs2.0.3-rc1
[linux-2.6.git] / net / ipv4 / route.c
index 5fe2fcf..c83066c 100644 (file)
@@ -55,8 +55,6 @@
  *     Robert Olsson           :       Added rt_cache statistics
  *     Arnaldo C. Melo         :       Convert proc stuff to seq_file
  *     Eric Dumazet            :       hashed spinlocks and rt_check_expire() fixes.
- *     Ilia Sotnikov           :       Ignore TOS on PMTUD and Redirect
- *     Ilia Sotnikov           :       Removed TOS from hash calculations
  *
  *             This program is free software; you can redistribute it and/or
  *             modify it under the terms of the GNU General Public License
@@ -249,9 +247,9 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
 static int rt_intern_hash(unsigned hash, struct rtable *rth,
                                struct rtable **res);
 
-static unsigned int rt_hash_code(u32 daddr, u32 saddr)
+static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos)
 {
-       return (jhash_2words(daddr, saddr, rt_hash_rnd)
+       return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd)
                & rt_hash_mask);
 }
 
@@ -1113,7 +1111,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
 }
 
 void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
-                   u32 saddr, struct net_device *dev)
+                   u32 saddr, u8 tos, struct net_device *dev)
 {
        int i, k;
        struct in_device *in_dev = in_dev_get(dev);
@@ -1121,6 +1119,8 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
        u32  skeys[2] = { saddr, 0 };
        int  ikeys[2] = { dev->ifindex, 0 };
 
+       tos &= IPTOS_RT_MASK;
+
        if (!in_dev)
                return;
 
@@ -1141,7 +1141,8 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
        for (i = 0; i < 2; i++) {
                for (k = 0; k < 2; k++) {
                        unsigned hash = rt_hash_code(daddr,
-                                                    skeys[i] ^ (ikeys[k] << 5));
+                                                    skeys[i] ^ (ikeys[k] << 5),
+                                                    tos);
 
                        rthp=&rt_hash_table[hash].chain;
 
@@ -1151,6 +1152,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
 
                                if (rth->fl.fl4_dst != daddr ||
                                    rth->fl.fl4_src != skeys[i] ||
+                                   rth->fl.fl4_tos != tos ||
                                    rth->fl.oif != ikeys[k] ||
                                    rth->fl.iif != 0) {
                                        rthp = &rth->u.rt_next;
@@ -1230,9 +1232,10 @@ reject_redirect:
        if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
                printk(KERN_INFO "Redirect from %u.%u.%u.%u on %s about "
                        "%u.%u.%u.%u ignored.\n"
-                       "  Advised path = %u.%u.%u.%u -> %u.%u.%u.%u\n",
+                       "  Advised path = %u.%u.%u.%u -> %u.%u.%u.%u, "
+                       "tos %02x\n",
                       NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw),
-                      NIPQUAD(saddr), NIPQUAD(daddr));
+                      NIPQUAD(saddr), NIPQUAD(daddr), tos);
 #endif
        in_dev_put(in_dev);
 }
@@ -1250,7 +1253,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
                           rt->u.dst.expires) {
                        unsigned hash = rt_hash_code(rt->fl.fl4_dst,
                                                     rt->fl.fl4_src ^
-                                                       (rt->fl.oif << 5));
+                                                       (rt->fl.oif << 5),
+                                                    rt->fl.fl4_tos);
 #if RT_CACHE_DEBUG >= 1
                        printk(KERN_DEBUG "ip_rt_advice: redirect to "
                                          "%u.%u.%u.%u/%02x dropped\n",
@@ -1387,13 +1391,14 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
        struct rtable *rth;
        u32  skeys[2] = { iph->saddr, 0, };
        u32  daddr = iph->daddr;
+       u8   tos = iph->tos & IPTOS_RT_MASK;
        unsigned short est_mtu = 0;
 
        if (ipv4_config.no_pmtu_disc)
                return 0;
 
        for (i = 0; i < 2; i++) {
-               unsigned hash = rt_hash_code(daddr, skeys[i]);
+               unsigned hash = rt_hash_code(daddr, skeys[i], tos);
 
                rcu_read_lock();
                for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1402,6 +1407,7 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu)
                            rth->fl.fl4_src == skeys[i] &&
                            rth->rt_dst  == daddr &&
                            rth->rt_src  == iph->saddr &&
+                           rth->fl.fl4_tos == tos &&
                            rth->fl.iif == 0 &&
                            !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) {
                                unsigned short mtu = new_mtu;
@@ -1652,7 +1658,7 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
        RT_CACHE_STAT_INC(in_slow_mc);
 
        in_dev_put(in_dev);
-       hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5));
+       hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos);
        return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst);
 
 e_nobufs:
@@ -1762,7 +1768,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
 #endif
        if (in_dev->cnf.no_policy)
                rth->u.dst.flags |= DST_NOPOLICY;
-       if (in_dev->cnf.no_xfrm)
+       if (out_dev->cnf.no_xfrm)
                rth->u.dst.flags |= DST_NOXFRM;
        rth->fl.fl4_dst = daddr;
        rth->rt_dst     = daddr;
@@ -1817,7 +1823,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb,
                return err;
 
        /* put it into the cache */
-       hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5));
+       hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos);
        return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);   
 }
 
@@ -1858,7 +1864,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb,
                        return err;
 
                /* put it into the cache */
-               hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5));
+               hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5), tos);
                err = rt_intern_hash(hash, rth, &rtres);
                if (err)
                        return err;
@@ -2035,7 +2041,7 @@ local_input:
                rth->rt_flags   &= ~RTCF_LOCAL;
        }
        rth->rt_type    = res.type;
-       hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5));
+       hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos);
        err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
        goto done;
 
@@ -2082,7 +2088,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr,
        int iif = dev->ifindex;
 
        tos &= IPTOS_RT_MASK;
-       hash = rt_hash_code(daddr, saddr ^ (iif << 5));
+       hash = rt_hash_code(daddr, saddr ^ (iif << 5), tos);
 
        rcu_read_lock();
        for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2280,8 +2286,10 @@ static inline int ip_mkroute_output_def(struct rtable **rp,
        int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
        unsigned hash;
        if (err == 0) {
+               u32 tos = RT_FL_TOS(oldflp);
+
                hash = rt_hash_code(oldflp->fl4_dst, 
-                                   oldflp->fl4_src ^ (oldflp->oif << 5));
+                                   oldflp->fl4_src ^ (oldflp->oif << 5), tos);
                err = rt_intern_hash(hash, rth, rp);
        }
        
@@ -2296,6 +2304,7 @@ static inline int ip_mkroute_output(struct rtable** rp,
                                    unsigned flags)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+       u32 tos = RT_FL_TOS(oldflp);
        unsigned char hop;
        unsigned hash;
        int err = -EINVAL;
@@ -2325,7 +2334,7 @@ static inline int ip_mkroute_output(struct rtable** rp,
 
                        hash = rt_hash_code(oldflp->fl4_dst, 
                                            oldflp->fl4_src ^
-                                           (oldflp->oif << 5));
+                                           (oldflp->oif << 5), tos);
                        err = rt_intern_hash(hash, rth, rp);
 
                        /* forward hop information to multipath impl. */
@@ -2554,7 +2563,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
        unsigned hash;
        struct rtable *rth;
 
-       hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5));
+       hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos);
 
        rcu_read_lock_bh();
        for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -3086,7 +3095,7 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
                memcpy(dst, src, length);
 
                /* Add the other cpus in, one int at a time */
-               for_each_possible_cpu(i) {
+               for_each_cpu(i) {
                        unsigned int j;
 
                        src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;