Revert to Fedora kernel-2.6.17-1.2187_FC5 patched with vs2.0.2.1; there are too many...
[linux-2.6.git] / net / ipv4 / fib_hash.c
index 21e11b4..c9b54bb 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
@@ -45,8 +46,8 @@
 
 #include "fib_lookup.h"
 
-static kmem_cache_t *fn_hash_kmem;
-static kmem_cache_t *fn_alias_kmem;
+static kmem_cache_t *fn_hash_kmem __read_mostly;
+static kmem_cache_t *fn_alias_kmem __read_mostly;
 
 struct fib_node {
        struct hlist_node       fn_hash;
@@ -92,7 +93,8 @@ static inline u32 fz_key(u32 dst, struct fn_zone *fz)
        return dst & FZ_MASK(fz);
 }
 
-static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(fib_hash_lock);
+static unsigned int fib_hash_genid;
 
 #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
 
@@ -181,6 +183,7 @@ static void fn_rehash_zone(struct fn_zone *fz)
                fz->fz_hashmask = new_hashmask;
                fz->fz_divisor = new_divisor;
                fn_rebuild_zone(fz, old_ht, old_divisor);
+               fib_hash_genid++;
                write_unlock_bh(&fib_hash_lock);
 
                fz_hash_free(old_ht, old_divisor);
@@ -236,6 +239,7 @@ fn_new_zone(struct fn_hash *table, int z)
                table->fn_zones[i]->fz_next = fz;
        }
        table->fn_zones[z] = fz;
+       fib_hash_genid++;
        write_unlock_bh(&fib_hash_lock);
        return fz;
 }
@@ -261,6 +265,7 @@ fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
 
                        err = fib_semantic_match(&f->fn_alias,
                                                 flp, res,
+                                                f->fn_key, fz->fz_mask,
                                                 fz->fz_order);
                        if (err <= 0)
                                goto out;
@@ -274,29 +279,6 @@ out:
 
 static int fn_hash_last_dflt=-1;
 
-static int fib_detect_death(struct fib_info *fi, int order,
-                           struct fib_info **last_resort, int *last_idx)
-{
-       struct neighbour *n;
-       int state = NUD_NONE;
-
-       n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
-       if (n) {
-               state = n->nud_state;
-               neigh_release(n);
-       }
-       if (state==NUD_REACHABLE)
-               return 0;
-       if ((state&NUD_VALID) && order != fn_hash_last_dflt)
-               return 0;
-       if ((state&NUD_VALID) ||
-           (*last_idx<0 && order > fn_hash_last_dflt)) {
-               *last_resort = fi;
-               *last_idx = order;
-       }
-       return 1;
-}
-
 static void
 fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
 {
@@ -337,7 +319,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
                                if (next_fi != res->fi)
                                        break;
                        } else if (!fib_detect_death(fi, order, &last_resort,
-                                                    &last_idx)) {
+                                                    &last_idx, &fn_hash_last_dflt)) {
                                if (res->fi)
                                        fib_info_put(res->fi);
                                res->fi = fi;
@@ -355,7 +337,7 @@ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib
                goto out;
        }
 
-       if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
+       if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) {
                if (res->fi)
                        fib_info_put(res->fi);
                res->fi = fi;
@@ -376,11 +358,6 @@ out:
        read_unlock(&fib_hash_lock);
 }
 
-static void rtmsg_fib(int, struct fib_node *, struct fib_alias *,
-                     int, int,
-                     struct nlmsghdr *n,
-                     struct netlink_skb_parms *);
-
 /* Insert node F to FZ. */
 static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
 {
@@ -404,26 +381,6 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key)
        return NULL;
 }
 
-/* Return the first fib alias matching TOS with
- * priority less than or equal to PRIO.
- */
-static struct fib_alias *fib_find_alias(struct fib_node *fn, u8 tos, u32 prio)
-{
-       if (fn) {
-               struct list_head *head = &fn->fn_alias;
-               struct fib_alias *fa;
-
-               list_for_each_entry(fa, head, fa_list) {
-                       if (fa->fa_tos > tos)
-                               continue;
-                       if (fa->fa_info->fib_priority >= prio ||
-                           fa->fa_tos < tos)
-                               return fa;
-               }
-       }
-       return NULL;
-}
-
 static int
 fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
               struct nlmsghdr *n, struct netlink_skb_parms *req)
@@ -463,7 +420,11 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                fn_rehash_zone(fz);
 
        f = fib_find_node(fz, key);
-       fa = fib_find_alias(f, tos, fi->fib_priority);
+
+       if (!f)
+               fa = NULL;
+       else
+               fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority);
 
        /* Now fa, if non-NULL, points to the first fib alias
         * with the same keys [prefix,tos,priority], if such key already
@@ -495,6 +456,7 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                        fa->fa_scope = r->rtm_scope;
                        state = fa->fa_state;
                        fa->fa_state &= ~FA_S_ACCESSED;
+                       fib_hash_genid++;
                        write_unlock_bh(&fib_hash_lock);
 
                        fib_release_info(fi_drop);
@@ -559,13 +521,14 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                fib_insert_node(fz, new_f);
        list_add_tail(&new_fa->fa_list,
                 (fa ? &fa->fa_list : &f->fn_alias));
+       fib_hash_genid++;
        write_unlock_bh(&fib_hash_lock);
 
        if (new_f)
                fz->fz_nent++;
        rt_cache_flush(-1);
 
-       rtmsg_fib(RTM_NEWROUTE, f, new_fa, z, tb->tb_id, n, req);
+       rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req);
        return 0;
 
 out_free_new_fa:
@@ -603,7 +566,11 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
        }
 
        f = fib_find_node(fz, key);
-       fa = fib_find_alias(f, tos, 0);
+
+       if (!f)
+               fa = NULL;
+       else
+               fa = fib_find_alias(&f->fn_alias, tos, 0);
        if (!fa)
                return -ESRCH;
 
@@ -631,7 +598,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                int kill_fn;
 
                fa = fa_to_delete;
-               rtmsg_fib(RTM_DELROUTE, f, fa, z, tb->tb_id, n, req);
+               rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req);
 
                kill_fn = 0;
                write_lock_bh(&fib_hash_lock);
@@ -640,6 +607,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
                        hlist_del(&f->fn_hash);
                        kill_fn = 1;
                }
+               fib_hash_genid++;
                write_unlock_bh(&fib_hash_lock);
 
                if (fa->fa_state & FA_S_ACCESSED)
@@ -677,6 +645,7 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
                                        hlist_del(&f->fn_hash);
                                        kill_f = 1;
                                }
+                               fib_hash_genid++;
                                write_unlock_bh(&fib_hash_lock);
 
                                fn_free_alias(fa);
@@ -724,7 +693,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
 
                list_for_each_entry(fa, &f->fn_alias, fa_list) {
                        if (i < s_i)
-                               continue;
+                               goto next;
 
                        if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
                                          cb->nlh->nlmsg_seq,
@@ -735,11 +704,12 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
                                          &f->fn_key,
                                          fz->fz_order,
                                          fa->fa_tos,
-                                         fa->fa_info) < 0) {
+                                         fa->fa_info,
+                                         NLM_F_MULTI) < 0) {
                                cb->args[3] = i;
                                return -1;
                        }
-
+               next:
                        i++;
                }
        }
@@ -796,33 +766,6 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin
        return skb->len;
 }
 
-static void rtmsg_fib(int event, struct fib_node *f, struct fib_alias *fa,
-                     int z, int tb_id,
-                     struct nlmsghdr *n, struct netlink_skb_parms *req)
-{
-       struct sk_buff *skb;
-       u32 pid = req ? req->pid : 0;
-       int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
-
-       skb = alloc_skb(size, GFP_KERNEL);
-       if (!skb)
-               return;
-
-       if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
-                         fa->fa_type, fa->fa_scope, &f->fn_key, z,
-                         fa->fa_tos,
-                         fa->fa_info) < 0) {
-               kfree_skb(skb);
-               return;
-       }
-       NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
-       if (n->nlmsg_flags&NLM_F_ECHO)
-               atomic_inc(&skb->users);
-       netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
-       if (n->nlmsg_flags&NLM_F_ECHO)
-               netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
-}
-
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 struct fib_table * fib_hash_init(int id)
 #else
@@ -868,6 +811,9 @@ struct fib_iter_state {
        struct hlist_head *hash_head;
        struct fib_node *fn;
        struct fib_alias *fa;
+       loff_t pos;
+       unsigned int genid;
+       int valid;
 };
 
 static struct fib_alias *fib_get_first(struct seq_file *seq)
@@ -879,6 +825,9 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
        iter->hash_head = NULL;
        iter->fn        = NULL;
        iter->fa        = NULL;
+       iter->pos       = 0;
+       iter->genid     = fib_hash_genid;
+       iter->valid     = 1;
 
        for (iter->zone = table->fn_zone_list; iter->zone;
             iter->zone = iter->zone->fz_next) {
@@ -983,16 +932,34 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
                }
        }
 out:
+       iter->pos++;
        return fa;
 }
 
+static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
+{
+       struct fib_iter_state *iter = seq->private;
+       struct fib_alias *fa;
+       
+       if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) {
+               fa   = iter->fa;
+               pos -= iter->pos;
+       } else
+               fa = fib_get_first(seq);
+
+       if (fa)
+               while (pos && (fa = fib_get_next(seq)))
+                       --pos;
+       return pos ? NULL : fa;
+}
+
 static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
 {
        void *v = NULL;
 
        read_lock(&fib_hash_lock);
        if (ip_fib_main_table)
-               v = *pos ? fib_get_next(seq) : SEQ_START_TOKEN;
+               v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
        return v;
 }
 
@@ -1009,7 +976,7 @@ static void fib_seq_stop(struct seq_file *seq, void *v)
 
 static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi)
 {
-       static unsigned type2flags[RTN_MAX + 1] = {
+       static const unsigned type2flags[RTN_MAX + 1] = {
                [7] = RTF_REJECT, [8] = RTF_REJECT,
        };
        unsigned flags = type2flags[type];
@@ -1022,6 +989,8 @@ static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi)
        return flags;
 }
 
+extern int dev_in_nx_info(struct net_device *, struct nx_info *);
+
 /* 
  *     This outputs /proc/net/route.
  *
@@ -1052,7 +1021,8 @@ static int fib_seq_show(struct seq_file *seq, void *v)
        prefix  = f->fn_key;
        mask    = FZ_MASK(iter->zone);
        flags   = fib_flag_trans(fa->fa_type, mask, fi);
-       if (fi)
+       if (fi && (!vx_flags(VXF_HIDE_NETIF, 0) ||
+               dev_in_nx_info(fi->fib_dev, current->nx_info)))
                snprintf(bf, sizeof(bf),
                         "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
                         fi->fib_dev ? fi->fib_dev->name : "*", prefix,