-+ register_pernet_subsys(&fib_net_ops);
-+ register_netdevice_notifier(&fib_netdev_notifier);
-+ register_inetaddr_notifier(&fib_inetaddr_notifier);
- }
-
- EXPORT_SYMBOL(inet_addr_type);
-diff -Nurb linux-2.6.22-570/net/ipv4/fib_hash.c linux-2.6.22-590/net/ipv4/fib_hash.c
---- linux-2.6.22-570/net/ipv4/fib_hash.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/fib_hash.c 2008-01-29 22:12:32.000000000 -0500
-@@ -40,6 +40,7 @@
- #include <net/route.h>
- #include <net/tcp.h>
- #include <net/sock.h>
-+#include <net/net_namespace.h>
- #include <net/ip_fib.h>
-
- #include "fib_lookup.h"
-@@ -274,11 +275,10 @@
- return err;
- }
-
--static int fn_hash_last_dflt=-1;
--
- static void
- fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
- {
-+ struct net *net = flp->fl_net;
- int order, last_idx;
- struct hlist_node *node;
- struct fib_node *f;
-@@ -316,12 +316,12 @@
- if (next_fi != res->fi)
- break;
- } else if (!fib_detect_death(fi, order, &last_resort,
-- &last_idx, &fn_hash_last_dflt)) {
-+ &last_idx, &net->fn_hash_last_dflt)) {
- if (res->fi)
- fib_info_put(res->fi);
- res->fi = fi;
- atomic_inc(&fi->fib_clntref);
-- fn_hash_last_dflt = order;
-+ net->fn_hash_last_dflt = order;
- goto out;
- }
- fi = next_fi;
-@@ -330,16 +330,16 @@
- }
-
- if (order <= 0 || fi == NULL) {
-- fn_hash_last_dflt = -1;
-+ net->fn_hash_last_dflt = -1;
- goto out;
- }
-
-- if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) {
-+ if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->fn_hash_last_dflt)) {
- if (res->fi)
- fib_info_put(res->fi);
- res->fi = fi;
- atomic_inc(&fi->fib_clntref);
-- fn_hash_last_dflt = order;
-+ net->fn_hash_last_dflt = order;
- goto out;
- }
-
-@@ -350,7 +350,7 @@
- if (last_resort)
- atomic_inc(&last_resort->fib_clntref);
- }
-- fn_hash_last_dflt = last_idx;
-+ net->fn_hash_last_dflt = last_idx;
- out:
- read_unlock(&fib_hash_lock);
- }
-@@ -759,11 +759,15 @@
- return skb->len;
- }
-
--#ifdef CONFIG_IP_MULTIPLE_TABLES
-+void fib_hash_exit(struct fib_table *tb)
-+{
-+ if (!tb)
-+ return;
-+ fn_hash_flush(tb);
-+ kfree(tb);
-+}
-+
- struct fib_table * fib_hash_init(u32 id)
--#else
--struct fib_table * __init fib_hash_init(u32 id)
--#endif
- {
- struct fib_table *tb;
-
-@@ -799,6 +803,7 @@
- #ifdef CONFIG_PROC_FS
-
- struct fib_iter_state {
-+ struct net *net;
- struct fn_zone *zone;
- int bucket;
- struct hlist_head *hash_head;
-@@ -812,7 +817,8 @@
- static struct fib_alias *fib_get_first(struct seq_file *seq)
- {
- struct fib_iter_state *iter = seq->private;
-- struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data;
-+ struct fib_table *main_table = fib_get_table(iter->net, RT_TABLE_MAIN);
-+ struct fn_hash *table = (struct fn_hash *) main_table->tb_data;
-
- iter->bucket = 0;
- iter->hash_head = NULL;
-@@ -948,10 +954,11 @@
-
- static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
- {
-+ struct fib_iter_state *iter = seq->private;
- void *v = NULL;
-
- read_lock(&fib_hash_lock);
-- if (ip_fib_main_table)
-+ if (fib_get_table(iter->net, RT_TABLE_MAIN))
- v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
- return v;
- }
-@@ -1051,6 +1058,7 @@
-
- seq = file->private_data;
- seq->private = s;
-+ s->net = get_net(PROC_NET(inode));
- out:
- return rc;
- out_kfree:
-@@ -1058,23 +1066,32 @@
- goto out;
- }
-
-+static int fib_seq_release(struct inode *inode, struct file *file)
-+{
-+ struct seq_file *seq = file->private_data;
-+ struct fib_iter_state *iter = seq->private;
-+ put_net(iter->net);
-+ return seq_release_private(inode, file);
-+}
-+
- static const struct file_operations fib_seq_fops = {
- .owner = THIS_MODULE,
- .open = fib_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
-- .release = seq_release_private,
-+ .release = fib_seq_release,
- };
-
--int __init fib_proc_init(void)
-+int fib_proc_init(struct net *net)
- {
-- if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
-+ net->fn_hash_last_dflt = -1;
-+ if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
- return -ENOMEM;
- return 0;
- }
-
--void __init fib_proc_exit(void)
-+void fib_proc_exit(struct net *net)
- {
-- proc_net_remove("route");
-+ proc_net_remove(net, "route");
- }
- #endif /* CONFIG_PROC_FS */
-diff -Nurb linux-2.6.22-570/net/ipv4/fib_rules.c linux-2.6.22-590/net/ipv4/fib_rules.c
---- linux-2.6.22-570/net/ipv4/fib_rules.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/fib_rules.c 2008-01-29 22:12:32.000000000 -0500
-@@ -32,8 +32,6 @@
- #include <net/ip_fib.h>
- #include <net/fib_rules.h>
-
--static struct fib_rules_ops fib4_rules_ops;
--
- struct fib4_rule
- {
- struct fib_rule common;
-@@ -49,35 +47,14 @@
- #endif
- };
-
--static struct fib4_rule default_rule = {
-- .common = {
-- .refcnt = ATOMIC_INIT(2),
-- .pref = 0x7FFF,
-- .table = RT_TABLE_DEFAULT,
-- .action = FR_ACT_TO_TBL,
-- },
-+struct fib4_rule_table {
-+ struct list_head fib4_rules;
-+ struct fib4_rule default_rule;
-+ struct fib4_rule main_rule;
-+ struct fib4_rule local_rule;
-+ struct fib_rules_ops fib4_rules_ops;
- };
-
--static struct fib4_rule main_rule = {
-- .common = {
-- .refcnt = ATOMIC_INIT(2),
-- .pref = 0x7FFE,
-- .table = RT_TABLE_MAIN,
-- .action = FR_ACT_TO_TBL,
-- },
--};
--
--static struct fib4_rule local_rule = {
-- .common = {
-- .refcnt = ATOMIC_INIT(2),
-- .table = RT_TABLE_LOCAL,
-- .action = FR_ACT_TO_TBL,
-- .flags = FIB_RULE_PERMANENT,
-- },
--};
--
--static LIST_HEAD(fib4_rules);
--
- #ifdef CONFIG_NET_CLS_ROUTE
- u32 fib_rules_tclass(struct fib_result *res)
- {
-@@ -87,12 +64,14 @@
-
- int fib_lookup(struct flowi *flp, struct fib_result *res)
- {
-+ struct net *net = flp->fl_net;
-+ struct fib4_rule_table *table = net->fib4_table;
- struct fib_lookup_arg arg = {
- .result = res,
- };
- int err;
-
-- err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
-+ err = fib_rules_lookup(&table->fib4_rules_ops, flp, 0, &arg);
- res->r = arg.rule;
-
- return err;
-@@ -122,7 +101,7 @@
- goto errout;
- }
-
-- if ((tbl = fib_get_table(rule->table)) == NULL)
-+ if ((tbl = fib_get_table(flp->fl_net, rule->table)) == NULL)
- goto errout;
-
- err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
-@@ -138,7 +117,7 @@
- if (res->r && res->r->action == FR_ACT_TO_TBL &&
- FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
- struct fib_table *tb;
-- if ((tb = fib_get_table(res->r->table)) != NULL)
-+ if ((tb = fib_get_table(flp->fl_net, res->r->table)) != NULL)
- tb->tb_select_default(tb, flp, res);
- }
- }
-@@ -159,13 +138,13 @@
- return 1;
- }
-
--static struct fib_table *fib_empty_table(void)
-+static struct fib_table *fib_empty_table(struct net *net)
- {
- u32 id;
-
- for (id = 1; id <= RT_TABLE_MAX; id++)
-- if (fib_get_table(id) == NULL)
-- return fib_new_table(id);
-+ if (fib_get_table(net, id) == NULL)
-+ return fib_new_table(net, id);
- return NULL;
- }
-
-@@ -178,6 +157,7 @@
- struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
- struct nlattr **tb)
- {
-+ struct net *net = skb->sk->sk_net;
- int err = -EINVAL;
- struct fib4_rule *rule4 = (struct fib4_rule *) rule;
-
-@@ -188,7 +168,7 @@
- if (rule->action == FR_ACT_TO_TBL) {
- struct fib_table *table;
-
-- table = fib_empty_table();
-+ table = fib_empty_table(net);
- if (table == NULL) {
- err = -ENOBUFS;
- goto errout;
-@@ -274,14 +254,15 @@
- return -ENOBUFS;
- }
-
--static u32 fib4_rule_default_pref(void)
-+static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
- {
-- struct list_head *pos;
-+ struct list_head *list, *pos;
- struct fib_rule *rule;
-
-- if (!list_empty(&fib4_rules)) {
-- pos = fib4_rules.next;
-- if (pos->next != &fib4_rules) {
-+ list = ops->rules_list;
-+ if (!list_empty(list)) {
-+ pos = list->next;
-+ if (pos->next != list) {
- rule = list_entry(pos->next, struct fib_rule, list);
- if (rule->pref)
- return rule->pref - 1;
-@@ -298,12 +279,37 @@
- + nla_total_size(4); /* flow */
- }
-
--static void fib4_rule_flush_cache(void)
-+static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
- {
- rt_cache_flush(-1);
- }
-
--static struct fib_rules_ops fib4_rules_ops = {
-+static struct fib4_rule_table fib4_rule_table = {
-+ .default_rule = {
-+ .common = {
-+ .refcnt = ATOMIC_INIT(2),
-+ .pref = 0x7FFF,
-+ .table = RT_TABLE_DEFAULT,
-+ .action = FR_ACT_TO_TBL,
-+ },
-+ },
-+ .main_rule = {
-+ .common = {
-+ .refcnt = ATOMIC_INIT(2),
-+ .pref = 0x7FFE,
-+ .table = RT_TABLE_MAIN,
-+ .action = FR_ACT_TO_TBL,
-+ },
-+ },
-+ .local_rule = {
-+ .common = {
-+ .refcnt = ATOMIC_INIT(2),
-+ .table = RT_TABLE_LOCAL,
-+ .action = FR_ACT_TO_TBL,
-+ .flags = FIB_RULE_PERMANENT,
-+ },
-+ },
-+ .fib4_rules_ops = {
- .family = AF_INET,
- .rule_size = sizeof(struct fib4_rule),
- .addr_size = sizeof(u32),
-@@ -317,15 +323,34 @@
- .flush_cache = fib4_rule_flush_cache,
- .nlgroup = RTNLGRP_IPV4_RULE,
- .policy = fib4_rule_policy,
-- .rules_list = &fib4_rules,
-+ .rules_list = &fib4_rule_table.fib4_rules, /* &fib4_rules, */
- .owner = THIS_MODULE,
-+ },
- };
-
--void __init fib4_rules_init(void)
-+
-+void fib4_rules_init(struct net *net)
- {
-- list_add_tail(&local_rule.common.list, &fib4_rules);
-- list_add_tail(&main_rule.common.list, &fib4_rules);
-- list_add_tail(&default_rule.common.list, &fib4_rules);
-+ struct fib4_rule_table *table;
-+ table = kmemdup(&fib4_rule_table, sizeof(*table), GFP_KERNEL);
-+ if (!table)
-+ return;
-+ INIT_LIST_HEAD(&table->fib4_rules);
-+ list_add_tail(&table->local_rule.common.list, &table->fib4_rules);
-+ list_add_tail(&table->main_rule.common.list, &table->fib4_rules);
-+ list_add_tail(&table->default_rule.common.list, &table->fib4_rules);
-+ table->fib4_rules_ops.rules_list = &table->fib4_rules;
-+ if (fib_rules_register(net, &table->fib4_rules_ops)) {
-+ kfree(table);
-+ return;
-+ }
-+ net->fib4_table = table;
-+}
-
-- fib_rules_register(&fib4_rules_ops);
-+void fib4_rules_exit(struct net *net)
-+{
-+ struct fib4_rule_table *table = net->fib4_table;
-+ if (table)
-+ fib_rules_unregister(net, &table->fib4_rules_ops);
-+ kfree(table);
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/fib_semantics.c linux-2.6.22-590/net/ipv4/fib_semantics.c
---- linux-2.6.22-570/net/ipv4/fib_semantics.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/fib_semantics.c 2008-01-29 22:12:32.000000000 -0500
-@@ -42,7 +42,6 @@
- #include <net/tcp.h>
- #include <net/sock.h>
- #include <net/ip_fib.h>
--#include <net/ip_mp_alg.h>
- #include <net/netlink.h>
- #include <net/nexthop.h>
-
-@@ -51,14 +50,9 @@
- #define FSprintk(a...)
-
- static DEFINE_SPINLOCK(fib_info_lock);
--static struct hlist_head *fib_info_hash;
--static struct hlist_head *fib_info_laddrhash;
--static unsigned int fib_hash_size;
--static unsigned int fib_info_cnt;
-
- #define DEVINDEX_HASHBITS 8
- #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
--static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
-
- #ifdef CONFIG_IP_ROUTE_MULTIPATH
-
-@@ -154,7 +148,8 @@
- dev_put(nh->nh_dev);
- nh->nh_dev = NULL;
- } endfor_nexthops(fi);
-- fib_info_cnt--;
-+ fi->fib_net->fib_info_cnt--;
-+ release_net(fi->fib_net);
- kfree(fi);
- }
-
-@@ -197,9 +192,9 @@
- return 0;
- }
-
--static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
-+static inline unsigned int fib_info_hashfn(struct net *net, const struct fib_info *fi)
- {
-- unsigned int mask = (fib_hash_size - 1);
-+ unsigned int mask = net->fib_info_hash_size - 1;
- unsigned int val = fi->fib_nhs;
-
- val ^= fi->fib_protocol;
-@@ -209,15 +204,15 @@
- return (val ^ (val >> 7) ^ (val >> 12)) & mask;
- }
-
--static struct fib_info *fib_find_info(const struct fib_info *nfi)
-+static struct fib_info *fib_find_info(struct net *net, const struct fib_info *nfi)
- {
- struct hlist_head *head;
- struct hlist_node *node;
- struct fib_info *fi;
- unsigned int hash;
-
-- hash = fib_info_hashfn(nfi);
-- head = &fib_info_hash[hash];
-+ hash = fib_info_hashfn(net, nfi);
-+ head = &net->fib_info_hash[hash];
-
- hlist_for_each_entry(fi, node, head, fib_hash) {
- if (fi->fib_nhs != nfi->fib_nhs)
-@@ -250,6 +245,7 @@
-
- int ip_fib_check_default(__be32 gw, struct net_device *dev)
- {
-+ struct net *net = dev->nd_net;
- struct hlist_head *head;
- struct hlist_node *node;
- struct fib_nh *nh;
-@@ -258,7 +254,7 @@
- spin_lock(&fib_info_lock);
-
- hash = fib_devindex_hashfn(dev->ifindex);
-- head = &fib_info_devhash[hash];
-+ head = &net->fib_info_devhash[hash];
- hlist_for_each_entry(nh, node, head, nh_hash) {
- if (nh->nh_dev == dev &&
- nh->nh_gw == gw &&
-@@ -321,11 +317,11 @@
- kfree_skb(skb);
- goto errout;
- }
-- err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
-+ err = rtnl_notify(skb, info->net, info->pid, RTNLGRP_IPV4_ROUTE,
- info->nlh, GFP_KERNEL);
- errout:
- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
-+ rtnl_set_sk_err(info->net, RTNLGRP_IPV4_ROUTE, err);
- }
-
- /* Return the first fib alias matching TOS with
-@@ -518,6 +514,7 @@
- static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
- struct fib_nh *nh)
- {
-+ struct net *net = cfg->fc_nlinfo.net;
- int err;
-
- if (nh->nh_gw) {
-@@ -532,9 +529,9 @@
-
- if (cfg->fc_scope >= RT_SCOPE_LINK)
- return -EINVAL;
-- if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
-+ if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
- return -EINVAL;
-- if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
-+ if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
- return -ENODEV;
- if (!(dev->flags&IFF_UP))
- return -ENETDOWN;
-@@ -545,6 +542,7 @@
- }
- {
- struct flowi fl = {
-+ .fl_net = net,
- .nl_u = {
- .ip4_u = {
- .daddr = nh->nh_gw,
-@@ -581,7 +579,7 @@
- if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
- return -EINVAL;
-
-- in_dev = inetdev_by_index(nh->nh_oif);
-+ in_dev = inetdev_by_index(net, nh->nh_oif);
- if (in_dev == NULL)
- return -ENODEV;
- if (!(in_dev->dev->flags&IFF_UP)) {
-@@ -596,9 +594,9 @@
- return 0;
- }
-
--static inline unsigned int fib_laddr_hashfn(__be32 val)
-+static inline unsigned int fib_laddr_hashfn(struct net *net, __be32 val)
- {
-- unsigned int mask = (fib_hash_size - 1);
-+ unsigned int mask = net->fib_info_hash_size - 1;
-
- return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
- }
-@@ -623,21 +621,22 @@
- free_pages((unsigned long) hash, get_order(bytes));
- }
-
--static void fib_hash_move(struct hlist_head *new_info_hash,
-+static void fib_hash_move(struct net *net,
-+ struct hlist_head *new_info_hash,
- struct hlist_head *new_laddrhash,
- unsigned int new_size)
- {
- struct hlist_head *old_info_hash, *old_laddrhash;
-- unsigned int old_size = fib_hash_size;
-+ unsigned int old_size = net->fib_info_hash_size;
- unsigned int i, bytes;
-
- spin_lock_bh(&fib_info_lock);
-- old_info_hash = fib_info_hash;
-- old_laddrhash = fib_info_laddrhash;
-- fib_hash_size = new_size;
-+ old_info_hash = net->fib_info_hash;
-+ old_laddrhash = net->fib_info_laddrhash;
-+ net->fib_info_hash_size = new_size;
-
- for (i = 0; i < old_size; i++) {
-- struct hlist_head *head = &fib_info_hash[i];
-+ struct hlist_head *head = &net->fib_info_hash[i];
- struct hlist_node *node, *n;
- struct fib_info *fi;
-
-@@ -647,15 +646,15 @@
-
- hlist_del(&fi->fib_hash);
-
-- new_hash = fib_info_hashfn(fi);
-+ new_hash = fib_info_hashfn(net, fi);
- dest = &new_info_hash[new_hash];
- hlist_add_head(&fi->fib_hash, dest);
- }
- }
-- fib_info_hash = new_info_hash;
-+ net->fib_info_hash = new_info_hash;
-
- for (i = 0; i < old_size; i++) {
-- struct hlist_head *lhead = &fib_info_laddrhash[i];
-+ struct hlist_head *lhead = &net->fib_info_laddrhash[i];
- struct hlist_node *node, *n;
- struct fib_info *fi;
-
-@@ -665,12 +664,12 @@
-
- hlist_del(&fi->fib_lhash);
-
-- new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
-+ new_hash = fib_laddr_hashfn(net, fi->fib_prefsrc);
- ldest = &new_laddrhash[new_hash];
- hlist_add_head(&fi->fib_lhash, ldest);
- }
- }
-- fib_info_laddrhash = new_laddrhash;
-+ net->fib_info_laddrhash = new_laddrhash;
-
- spin_unlock_bh(&fib_info_lock);
-
-@@ -681,6 +680,7 @@
-
- struct fib_info *fib_create_info(struct fib_config *cfg)
- {
-+ struct net *net = cfg->fc_nlinfo.net;
- int err;
- struct fib_info *fi = NULL;
- struct fib_info *ofi;
-@@ -697,17 +697,10 @@
- goto err_inval;
- }
- #endif
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- if (cfg->fc_mp_alg) {
-- if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
-- cfg->fc_mp_alg > IP_MP_ALG_MAX)
-- goto err_inval;
-- }
--#endif
-
- err = -ENOBUFS;
-- if (fib_info_cnt >= fib_hash_size) {
-- unsigned int new_size = fib_hash_size << 1;
-+ if (net->fib_info_cnt >= net->fib_info_hash_size) {
-+ unsigned int new_size = net->fib_info_hash_size << 1;
- struct hlist_head *new_info_hash;
- struct hlist_head *new_laddrhash;
- unsigned int bytes;
-@@ -724,18 +717,19 @@
- memset(new_info_hash, 0, bytes);
- memset(new_laddrhash, 0, bytes);
-
-- fib_hash_move(new_info_hash, new_laddrhash, new_size);
-+ fib_hash_move(net, new_info_hash, new_laddrhash, new_size);
- }
-
-- if (!fib_hash_size)
-+ if (!net->fib_info_hash_size)
- goto failure;
- }
-
- fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
- if (fi == NULL)
- goto failure;
-- fib_info_cnt++;
-+ net->fib_info_cnt++;
-
-+ fi->fib_net = hold_net(net);
- fi->fib_protocol = cfg->fc_protocol;
- fi->fib_flags = cfg->fc_flags;
- fi->fib_priority = cfg->fc_priority;
-@@ -791,10 +785,6 @@
- #endif
- }
-
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- fi->fib_mp_alg = cfg->fc_mp_alg;
--#endif
--
- if (fib_props[cfg->fc_type].error) {
- if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
- goto err_inval;
-@@ -811,7 +801,7 @@
- if (nhs != 1 || nh->nh_gw)
- goto err_inval;
- nh->nh_scope = RT_SCOPE_NOWHERE;
-- nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
-+ nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
- err = -ENODEV;
- if (nh->nh_dev == NULL)
- goto failure;
-@@ -825,12 +815,12 @@
- if (fi->fib_prefsrc) {
- if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
- fi->fib_prefsrc != cfg->fc_dst)
-- if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
-+ if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
- goto err_inval;
- }
-
- link_it:
-- if ((ofi = fib_find_info(fi)) != NULL) {
-+ if ((ofi = fib_find_info(net, fi)) != NULL) {
- fi->fib_dead = 1;
- free_fib_info(fi);
- ofi->fib_treeref++;
-@@ -841,11 +831,13 @@
- atomic_inc(&fi->fib_clntref);
- spin_lock_bh(&fib_info_lock);
- hlist_add_head(&fi->fib_hash,
-- &fib_info_hash[fib_info_hashfn(fi)]);
-+ &net->fib_info_hash[fib_info_hashfn(net, fi)]);
- if (fi->fib_prefsrc) {
- struct hlist_head *head;
-+ unsigned int hash;
-
-- head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
-+ hash = fib_laddr_hashfn(net, fi->fib_prefsrc);
-+ head = &net->fib_info_laddrhash[hash];
- hlist_add_head(&fi->fib_lhash, head);
- }
- change_nexthops(fi) {
-@@ -855,7 +847,7 @@
- if (!nh->nh_dev)
- continue;
- hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
-- head = &fib_info_devhash[hash];
-+ head = &net->fib_info_devhash[hash];
- hlist_add_head(&nh->nh_hash, head);
- } endfor_nexthops(fi)
- spin_unlock_bh(&fib_info_lock);
-@@ -940,10 +932,6 @@
- res->type = fa->fa_type;
- res->scope = fa->fa_scope;
- res->fi = fa->fa_info;
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- res->netmask = mask;
-- res->network = zone & inet_make_mask(prefixlen);
--#endif
- atomic_inc(&res->fi->fib_clntref);
- return 0;
- }
-@@ -1046,7 +1034,7 @@
- - device went down -> we must shutdown all nexthops going via it.
- */
-
--int fib_sync_down(__be32 local, struct net_device *dev, int force)
-+int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force)
- {
- int ret = 0;
- int scope = RT_SCOPE_NOWHERE;
-@@ -1054,9 +1042,9 @@
- if (force)
- scope = -1;
-
-- if (local && fib_info_laddrhash) {
-- unsigned int hash = fib_laddr_hashfn(local);
-- struct hlist_head *head = &fib_info_laddrhash[hash];
-+ if (local && net->fib_info_laddrhash) {
-+ unsigned int hash = fib_laddr_hashfn(net, local);
-+ struct hlist_head *head = &net->fib_info_laddrhash[hash];
- struct hlist_node *node;
- struct fib_info *fi;
-
-@@ -1071,7 +1059,7 @@
- if (dev) {
- struct fib_info *prev_fi = NULL;
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-- struct hlist_head *head = &fib_info_devhash[hash];
-+ struct hlist_head *head = &net->fib_info_devhash[hash];
- struct hlist_node *node;
- struct fib_nh *nh;
-
-@@ -1124,6 +1112,7 @@
-
- int fib_sync_up(struct net_device *dev)
- {
-+ struct net *net = dev->nd_net;
- struct fib_info *prev_fi;
- unsigned int hash;
- struct hlist_head *head;
-@@ -1136,7 +1125,7 @@
-
- prev_fi = NULL;
- hash = fib_devindex_hashfn(dev->ifindex);
-- head = &fib_info_devhash[hash];
-+ head = &net->fib_info_devhash[hash];
- ret = 0;
-
- hlist_for_each_entry(nh, node, head, nh_hash) {
-@@ -1226,3 +1215,17 @@
- spin_unlock_bh(&fib_multipath_lock);
- }
- #endif
-+
-+int fib_info_init(struct net *net)
-+{
-+ net->fib_info_devhash = kzalloc(
-+ sizeof(struct hlist_head)*DEVINDEX_HASHSIZE, GFP_KERNEL);
-+ if (!net->fib_info_devhash)
-+ return -ENOMEM;
-+ return 0;
-+}
-+
-+void fib_info_exit(struct net *net)
-+{
-+ kfree(net->fib_info_devhash);
-+}
-diff -Nurb linux-2.6.22-570/net/ipv4/fib_trie.c linux-2.6.22-590/net/ipv4/fib_trie.c
---- linux-2.6.22-570/net/ipv4/fib_trie.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/fib_trie.c 2008-01-29 22:12:32.000000000 -0500
-@@ -78,6 +78,7 @@
- #include <net/route.h>
- #include <net/tcp.h>
- #include <net/sock.h>
-+#include <net/net_namespace.h>
- #include <net/ip_fib.h>
- #include "fib_lookup.h"
-
-@@ -172,7 +173,6 @@
- static void tnode_free(struct tnode *tn);
-
- static struct kmem_cache *fn_alias_kmem __read_mostly;
--static struct trie *trie_local = NULL, *trie_main = NULL;
-
-
- /* rcu_read_lock needs to be hold by caller from readside */
-@@ -290,11 +290,10 @@
- WARN_ON(tn && tn->pos+tn->bits > 32);
- }
-
--static int halve_threshold = 25;
--static int inflate_threshold = 50;
--static int halve_threshold_root = 8;
--static int inflate_threshold_root = 15;
--
-+static const int halve_threshold = 25;
-+static const int inflate_threshold = 50;
-+static const int halve_threshold_root = 15;
-+static const int inflate_threshold_root = 25;
-
- static void __alias_free_mem(struct rcu_head *head)
- {
-@@ -1771,11 +1770,10 @@
- return found;
- }
-
--static int trie_last_dflt = -1;
--
- static void
- fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
- {
-+ struct net *net = flp->fl_net;
- struct trie *t = (struct trie *) tb->tb_data;
- int order, last_idx;
- struct fib_info *fi = NULL;
-@@ -1819,28 +1817,28 @@
- if (next_fi != res->fi)
- break;
- } else if (!fib_detect_death(fi, order, &last_resort,
-- &last_idx, &trie_last_dflt)) {
-+ &last_idx, &net->trie_last_dflt)) {
- if (res->fi)
- fib_info_put(res->fi);
- res->fi = fi;
- atomic_inc(&fi->fib_clntref);
-- trie_last_dflt = order;
-+ net->trie_last_dflt = order;
- goto out;
- }
- fi = next_fi;
- order++;
- }
- if (order <= 0 || fi == NULL) {
-- trie_last_dflt = -1;
-+ net->trie_last_dflt = -1;
- goto out;
- }
-
-- if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
-+ if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->trie_last_dflt)) {
- if (res->fi)
- fib_info_put(res->fi);
- res->fi = fi;
- atomic_inc(&fi->fib_clntref);
-- trie_last_dflt = order;
-+ net->trie_last_dflt = order;
- goto out;
- }
- if (last_idx >= 0) {
-@@ -1850,7 +1848,7 @@
- if (last_resort)
- atomic_inc(&last_resort->fib_clntref);
- }
-- trie_last_dflt = last_idx;
-+ net->trie_last_dflt = last_idx;
- out:;
- rcu_read_unlock();
- }
-@@ -1957,11 +1955,15 @@
-
- /* Fix more generic FIB names for init later */
-
--#ifdef CONFIG_IP_MULTIPLE_TABLES
-+void fib_hash_exit(struct fib_table *tb)
-+{
-+ if (!tb)
-+ return;
-+ fn_trie_flush(tb);
-+ kfree(tb);
-+}
-+
- struct fib_table * fib_hash_init(u32 id)
--#else
--struct fib_table * __init fib_hash_init(u32 id)
--#endif
- {
- struct fib_table *tb;
- struct trie *t;
-@@ -1991,11 +1993,6 @@
- trie_init(t);
-
- if (id == RT_TABLE_LOCAL)
-- trie_local = t;
-- else if (id == RT_TABLE_MAIN)
-- trie_main = t;
--
-- if (id == RT_TABLE_LOCAL)
- printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
-
- return tb;
-@@ -2004,6 +2001,8 @@
- #ifdef CONFIG_PROC_FS
- /* Depth first Trie walk iterator */
- struct fib_trie_iter {
-+ struct net *net;
-+ struct trie *trie_local, *trie_main;
- struct tnode *tnode;
- struct trie *trie;
- unsigned index;
-@@ -2170,7 +2169,21 @@
-
- static int fib_triestat_seq_show(struct seq_file *seq, void *v)
- {
-+ struct net *net = seq->private;
-+ struct trie *trie_local, *trie_main;
- struct trie_stat *stat;
-+ struct fib_table *tb;
-+
-+ trie_local = NULL;
-+ tb = fib_get_table(net, RT_TABLE_LOCAL);
-+ if (tb)
-+ trie_local = (struct trie *) tb->tb_data;
-+
-+ trie_main = NULL;
-+ tb = fib_get_table(net, RT_TABLE_MAIN);
-+ if (tb)
-+ trie_main = (struct trie *) tb->tb_data;
-+
-
- stat = kmalloc(sizeof(*stat), GFP_KERNEL);
- if (!stat)
-@@ -2197,7 +2210,15 @@
-
- static int fib_triestat_seq_open(struct inode *inode, struct file *file)
- {
-- return single_open(file, fib_triestat_seq_show, NULL);
-+ return single_open(file, fib_triestat_seq_show,
-+ get_net(PROC_NET(inode)));
-+}
-+
-+static int fib_triestat_seq_release(struct inode *inode, struct file *file)
-+{
-+ struct seq_file *seq = file->private_data;
-+ put_net(seq->private);
-+ return single_release(inode, file);
- }
-
- static const struct file_operations fib_triestat_fops = {
-@@ -2205,7 +2226,7 @@
- .open = fib_triestat_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
-- .release = single_release,
-+ .release = fib_triestat_seq_release,
- };
-
- static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
-@@ -2214,13 +2235,13 @@
- loff_t idx = 0;
- struct node *n;
-
-- for (n = fib_trie_get_first(iter, trie_local);
-+ for (n = fib_trie_get_first(iter, iter->trie_local);
- n; ++idx, n = fib_trie_get_next(iter)) {
- if (pos == idx)
- return n;
- }
-
-- for (n = fib_trie_get_first(iter, trie_main);
-+ for (n = fib_trie_get_first(iter, iter->trie_main);
- n; ++idx, n = fib_trie_get_next(iter)) {
- if (pos == idx)
- return n;
-@@ -2230,10 +2251,23 @@
-
- static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
- {
-+ struct fib_trie_iter *iter = seq->private;
-+ struct fib_table *tb;
-+
-+ if (!iter->trie_local) {
-+ tb = fib_get_table(iter->net, RT_TABLE_LOCAL);
-+ if (tb)
-+ iter->trie_local = (struct trie *) tb->tb_data;
-+ }
-+ if (!iter->trie_main) {
-+ tb = fib_get_table(iter->net, RT_TABLE_MAIN);
-+ if (tb)
-+ iter->trie_main = (struct trie *) tb->tb_data;
-+ }
- rcu_read_lock();
- if (*pos == 0)
- return SEQ_START_TOKEN;
-- return fib_trie_get_idx(seq->private, *pos - 1);
-+ return fib_trie_get_idx(iter, *pos - 1);
- }
-
- static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-@@ -2251,8 +2285,8 @@
- return v;
-
- /* continue scan in next trie */
-- if (iter->trie == trie_local)
-- return fib_trie_get_first(iter, trie_main);
-+ if (iter->trie == iter->trie_local)
-+ return fib_trie_get_first(iter, iter->trie_main);
-
- return NULL;
- }
-@@ -2318,7 +2352,7 @@
- return 0;
-
- if (!NODE_PARENT(n)) {
-- if (iter->trie == trie_local)
-+ if (iter->trie == iter->trie_local)
- seq_puts(seq, "<local>:\n");
- else
- seq_puts(seq, "<main>:\n");
-@@ -2384,6 +2418,7 @@
- seq = file->private_data;
- seq->private = s;
- memset(s, 0, sizeof(*s));
-+ s->net = get_net(PROC_NET(inode));
- out:
- return rc;
- out_kfree:
-@@ -2391,12 +2426,20 @@
- goto out;
- }
-
-+static int fib_trie_seq_release(struct inode *inode, struct file *file)
-+{
-+ struct seq_file *seq = file->private_data;
-+ struct fib_trie_iter *iter = seq->private;
-+ put_net(iter->net);
-+ return seq_release_private(inode, file);
-+}
-+
- static const struct file_operations fib_trie_fops = {
- .owner = THIS_MODULE,
- .open = fib_trie_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
-- .release = seq_release_private,
-+ .release = fib_trie_seq_release,
- };
-
- static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
-@@ -2434,7 +2477,7 @@
- return 0;
- }
-
-- if (iter->trie == trie_local)
-+ if (iter->trie == iter->trie_local)
- return 0;
- if (IS_TNODE(l))
- return 0;
-@@ -2505,6 +2548,7 @@
- seq = file->private_data;
- seq->private = s;
- memset(s, 0, sizeof(*s));
-+ s->net = get_net(PROC_NET(inode));
- out:
- return rc;
- out_kfree:
-@@ -2517,35 +2561,37 @@
- .open = fib_route_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
-- .release = seq_release_private,
-+ .release = fib_trie_seq_release,
- };
-
--int __init fib_proc_init(void)
-+int fib_proc_init(struct net *net)
- {
-- if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
-+ net->trie_last_dflt = -1;
-+
-+ if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
- goto out1;
-
-- if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
-+ if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, &fib_triestat_fops))
- goto out2;
-
-- if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
-+ if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
- goto out3;
-
- return 0;
-
- out3:
-- proc_net_remove("fib_triestat");
-+ proc_net_remove(net, "fib_triestat");
- out2:
-- proc_net_remove("fib_trie");
-+ proc_net_remove(net, "fib_trie");
- out1:
- return -ENOMEM;
- }
-
--void __init fib_proc_exit(void)
-+void fib_proc_exit(struct net *net)
- {
-- proc_net_remove("fib_trie");
-- proc_net_remove("fib_triestat");
-- proc_net_remove("route");
-+ proc_net_remove(net, "fib_trie");
-+ proc_net_remove(net, "fib_triestat");
-+ proc_net_remove(net, "route");
- }
-
- #endif /* CONFIG_PROC_FS */
-diff -Nurb linux-2.6.22-570/net/ipv4/icmp.c linux-2.6.22-590/net/ipv4/icmp.c
---- linux-2.6.22-570/net/ipv4/icmp.c 2008-01-29 22:12:24.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/icmp.c 2008-01-29 22:12:32.000000000 -0500
-@@ -229,14 +229,13 @@
- *
- * On SMP we have one ICMP socket per-cpu.
- */
--static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
--#define icmp_socket __get_cpu_var(__icmp_socket)
-+#define icmp_socket(NET) (*per_cpu_ptr((NET)->__icmp_socket, smp_processor_id()))
-
--static __inline__ int icmp_xmit_lock(void)
-+static __inline__ int icmp_xmit_lock(struct net *net)
- {
- local_bh_disable();
-
-- if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) {
-+ if (unlikely(!spin_trylock(&icmp_socket(net)->sk->sk_lock.slock))) {
- /* This can happen if the output path signals a
- * dst_link_failure() for an outgoing ICMP packet.
- */
-@@ -246,9 +245,9 @@
- return 0;
- }
-
--static void icmp_xmit_unlock(void)
-+static void icmp_xmit_unlock(struct net *net)
- {
-- spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
-+ spin_unlock_bh(&icmp_socket(net)->sk->sk_lock.slock);
- }
-
- /*
-@@ -347,19 +346,20 @@
- static void icmp_push_reply(struct icmp_bxm *icmp_param,
- struct ipcm_cookie *ipc, struct rtable *rt)
- {
-+ struct net *net = icmp_param->skb->dev->nd_net;
- struct sk_buff *skb;
-
-- if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
-+ if (ip_append_data(icmp_socket(net)->sk, icmp_glue_bits, icmp_param,
- icmp_param->data_len+icmp_param->head_len,
- icmp_param->head_len,
- ipc, rt, MSG_DONTWAIT) < 0)
-- ip_flush_pending_frames(icmp_socket->sk);
-- else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
-+ ip_flush_pending_frames(icmp_socket(net)->sk);
-+ else if ((skb = skb_peek(&icmp_socket(net)->sk->sk_write_queue)) != NULL) {
- struct icmphdr *icmph = icmp_hdr(skb);
- __wsum csum = 0;
- struct sk_buff *skb1;
-
-- skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {
-+ skb_queue_walk(&icmp_socket(net)->sk->sk_write_queue, skb1) {
- csum = csum_add(csum, skb1->csum);
- }
- csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
-@@ -367,7 +367,7 @@
- icmp_param->head_len, csum);
- icmph->checksum = csum_fold(csum);
- skb->ip_summed = CHECKSUM_NONE;
-- ip_push_pending_frames(icmp_socket->sk);
-+ ip_push_pending_frames(icmp_socket(net)->sk);
- }
- }
-
-@@ -377,7 +377,8 @@
-
- static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
- {
-- struct sock *sk = icmp_socket->sk;
-+ struct net *net = icmp_param->skb->dev->nd_net;
-+ struct sock *sk = icmp_socket(net)->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct ipcm_cookie ipc;
- struct rtable *rt = (struct rtable *)skb->dst;
-@@ -386,7 +387,7 @@
- if (ip_options_echo(&icmp_param->replyopts, skb))
- return;
-
-- if (icmp_xmit_lock())
-+ if (icmp_xmit_lock(net))
- return;
-
- icmp_param->data.icmph.checksum = 0;
-@@ -401,7 +402,8 @@
- daddr = icmp_param->replyopts.faddr;
- }
- {
-- struct flowi fl = { .nl_u = { .ip4_u =
-+ struct flowi fl = { .fl_net = net,
-+ .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = rt->rt_spec_dst,
- .tos = RT_TOS(ip_hdr(skb)->tos) } },
-@@ -415,7 +417,7 @@
- icmp_push_reply(icmp_param, &ipc, rt);
- ip_rt_put(rt);
- out_unlock:
-- icmp_xmit_unlock();
-+ icmp_xmit_unlock(net);
- }
-
-
-@@ -436,6 +438,7 @@
- int room;
- struct icmp_bxm icmp_param;
- struct rtable *rt = (struct rtable *)skb_in->dst;
-+ struct net *net;
- struct ipcm_cookie ipc;
- __be32 saddr;
- u8 tos;
-@@ -443,6 +446,7 @@
- if (!rt)
- goto out;
-
-+ net = rt->fl.fl_net;
- /*
- * Find the original header. It is expected to be valid, of course.
- * Check this, icmp_send is called from the most obscure devices
-@@ -505,7 +509,7 @@
- }
- }
-
-- if (icmp_xmit_lock())
-+ if (icmp_xmit_lock(net))
- return;
-
- /*
-@@ -517,7 +521,7 @@
- struct net_device *dev = NULL;
-
- if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr)
-- dev = dev_get_by_index(rt->fl.iif);
-+ dev = dev_get_by_index(&init_net, rt->fl.iif);
-
- if (dev) {
- saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
-@@ -545,12 +549,13 @@
- icmp_param.skb = skb_in;
- icmp_param.offset = skb_network_offset(skb_in);
- icmp_out_count(icmp_param.data.icmph.type);
-- inet_sk(icmp_socket->sk)->tos = tos;
-+ inet_sk(icmp_socket(net)->sk)->tos = tos;
- ipc.addr = iph->saddr;
- ipc.opt = &icmp_param.replyopts;
-
- {
- struct flowi fl = {
-+ .fl_net = net,
- .nl_u = {
- .ip4_u = {
- .daddr = icmp_param.replyopts.srr ?
-@@ -593,7 +598,7 @@
- ende:
- ip_rt_put(rt);
- out_unlock:
-- icmp_xmit_unlock();
-+ icmp_xmit_unlock(net);
- out:;
- }
-
-@@ -604,6 +609,7 @@
-
- static void icmp_unreach(struct sk_buff *skb)
- {
-+ struct net *net = skb->dev->nd_net;
- struct iphdr *iph;
- struct icmphdr *icmph;
- int hash, protocol;
-@@ -634,7 +640,7 @@
- case ICMP_PORT_UNREACH:
- break;
- case ICMP_FRAG_NEEDED:
-- if (ipv4_config.no_pmtu_disc) {
-+ if (net->sysctl_ipv4_no_pmtu_disc) {
- LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: "
- "fragmentation needed "
- "and DF set.\n",
-@@ -678,7 +684,7 @@
- */
-
- if (!sysctl_icmp_ignore_bogus_error_responses &&
-- inet_addr_type(iph->daddr) == RTN_BROADCAST) {
-+ inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
- if (net_ratelimit())
- printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
- "type %u, code %u "
-@@ -707,7 +713,7 @@
- hash = protocol & (MAX_INET_PROTOS - 1);
- read_lock(&raw_v4_lock);
- if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
-- while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
-+ while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr,
- iph->saddr,
- skb->dev->ifindex, skb->skb_tag)) != NULL) {
- raw_err(raw_sk, skb, info);
-@@ -1179,29 +1185,54 @@
- },
- };
-
--void __init icmp_init(struct net_proto_family *ops)
-+static void icmp_net_exit(struct net *net)
- {
-- struct inet_sock *inet;
-+ struct socket **sock;
- int i;
-
- for_each_possible_cpu(i) {
-+ sock = percpu_ptr(net->__icmp_socket, i);
-+ if (!*sock)
-+ continue;
-+ /* At the last minute lie and say this is a socket for
-+ * the initial network namespace. So the socket will
-+ * be safe to free.
-+ */
-+ (*sock)->sk->sk_net = get_net(&init_net);
-+ sock_release(*sock);
-+ *sock = NULL;
-+ }
-+ percpu_free(net->__icmp_socket);
-+}
-+
-+static int icmp_net_init(struct net *net)
-+{
-+ struct socket **sock;
-+ struct inet_sock *inet;
- int err;
-+ int i;
-+
-+ net->__icmp_socket = alloc_percpu(struct socket *);
-+ if (!net->__icmp_socket)
-+ return -ENOMEM;
-+
-+ for_each_possible_cpu(i) {
-
-- err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
-- &per_cpu(__icmp_socket, i));
-+ sock = percpu_ptr(net->__icmp_socket, i);
-
-+ err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, sock);
- if (err < 0)
-- panic("Failed to create the ICMP control socket.\n");
-+ goto fail;
-
-- per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC;
-+ (*sock)->sk->sk_allocation = GFP_ATOMIC;
-
- /* Enough space for 2 64K ICMP packets, including
- * sk_buff struct overhead.
- */
-- per_cpu(__icmp_socket, i)->sk->sk_sndbuf =
-+ (*sock)->sk->sk_sndbuf =
- (2 * ((64 * 1024) + sizeof(struct sk_buff)));
-
-- inet = inet_sk(per_cpu(__icmp_socket, i)->sk);
-+ inet = inet_sk((*sock)->sk);
- inet->uc_ttl = -1;
- inet->pmtudisc = IP_PMTUDISC_DONT;
-
-@@ -1209,8 +1240,27 @@
- * see it, we do not wish this socket to see incoming
- * packets.
- */
-- per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
-+ (*sock)->sk->sk_prot->unhash((*sock)->sk);
-+
-+ /* Don't hold an extra reference on the namespace */
-+ put_net((*sock)->sk->sk_net);
- }
-+ return 0;
-+fail:
-+ icmp_net_exit(net);
-+ return err;
-+
-+}
-+
-+static struct pernet_operations icmp_net_ops = {
-+ .init = icmp_net_init,
-+ .exit = icmp_net_exit,
-+};
-+
-+void __init icmp_init(struct net_proto_family *ops)
-+{
-+ if (register_pernet_subsys(&icmp_net_ops))
-+ panic("Failed to create the ICMP control socket.\n");
- }
-
- EXPORT_SYMBOL(icmp_err_convert);
-diff -Nurb linux-2.6.22-570/net/ipv4/igmp.c linux-2.6.22-590/net/ipv4/igmp.c
---- linux-2.6.22-570/net/ipv4/igmp.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/igmp.c 2008-01-29 22:12:32.000000000 -0500
-@@ -97,6 +97,7 @@
- #include <net/route.h>
- #include <net/sock.h>
- #include <net/checksum.h>
-+#include <net/net_namespace.h>
- #include <linux/netfilter_ipv4.h>
- #ifdef CONFIG_IP_MROUTE
- #include <linux/mroute.h>
-@@ -129,12 +130,12 @@
- */
-
- #define IGMP_V1_SEEN(in_dev) \
-- (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \
-+ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \
- IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
- ((in_dev)->mr_v1_seen && \
- time_before(jiffies, (in_dev)->mr_v1_seen)))
- #define IGMP_V2_SEEN(in_dev) \
-- (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \
-+ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \
- IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
- ((in_dev)->mr_v2_seen && \
- time_before(jiffies, (in_dev)->mr_v2_seen)))
-@@ -296,7 +297,8 @@
- return NULL;
-
- {
-- struct flowi fl = { .oif = dev->ifindex,
-+ struct flowi fl = { .fl_net = &init_net,
-+ .oif = dev->ifindex,
- .nl_u = { .ip4_u = {
- .daddr = IGMPV3_ALL_MCR } },
- .proto = IPPROTO_IGMP };
-@@ -646,7 +648,8 @@
- dst = group;
-
- {
-- struct flowi fl = { .oif = dev->ifindex,
-+ struct flowi fl = { .fl_net = &init_net,
-+ .oif = dev->ifindex,
- .nl_u = { .ip4_u = { .daddr = dst } },
- .proto = IPPROTO_IGMP };
- if (ip_route_output_key(&rt, &fl))
-@@ -929,6 +932,11 @@
- struct in_device *in_dev = in_dev_get(skb->dev);
- int len = skb->len;
-
-+ if (skb->dev->nd_net != &init_net) {
-+ kfree_skb(skb);
-+ return 0;
-+ }
-+
- if (in_dev==NULL) {
- kfree_skb(skb);
- return 0;
-@@ -1393,20 +1401,22 @@
-
- static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
- {
-- struct flowi fl = { .nl_u = { .ip4_u =
-- { .daddr = imr->imr_multiaddr.s_addr } } };
-+ struct flowi fl = {
-+ .fl_net = &init_net,
-+ .nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } }
-+ };
- struct rtable *rt;
- struct net_device *dev = NULL;
- struct in_device *idev = NULL;
-
- if (imr->imr_ifindex) {
-- idev = inetdev_by_index(imr->imr_ifindex);
-+ idev = inetdev_by_index(&init_net, imr->imr_ifindex);
- if (idev)
- __in_dev_put(idev);
- return idev;
- }
- if (imr->imr_address.s_addr) {
-- dev = ip_dev_find(imr->imr_address.s_addr);
-+ dev = ip_dev_find(&init_net, imr->imr_address.s_addr);
- if (!dev)
- return NULL;
- dev_put(dev);
-@@ -2234,7 +2244,7 @@
- struct in_device *in_dev;
- inet->mc_list = iml->next;
-
-- in_dev = inetdev_by_index(iml->multi.imr_ifindex);
-+ in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex);
- (void) ip_mc_leave_src(sk, iml, in_dev);
- if (in_dev != NULL) {
- ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
-@@ -2291,7 +2301,7 @@
- struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
-
- state->in_dev = NULL;
-- for_each_netdev(state->dev) {
-+ for_each_netdev(&init_net, state->dev) {
- struct in_device *in_dev;
- in_dev = in_dev_get(state->dev);
- if (!in_dev)
-@@ -2453,7 +2463,7 @@
-
- state->idev = NULL;
- state->im = NULL;
-- for_each_netdev(state->dev) {
-+ for_each_netdev(&init_net, state->dev) {
- struct in_device *idev;
- idev = in_dev_get(state->dev);
- if (unlikely(idev == NULL))
-@@ -2613,8 +2623,8 @@
-
- int __init igmp_mc_proc_init(void)
- {
-- proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops);
-- proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
-+ proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
-+ proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
- return 0;
- }
- #endif
-diff -Nurb linux-2.6.22-570/net/ipv4/inet_connection_sock.c linux-2.6.22-590/net/ipv4/inet_connection_sock.c
---- linux-2.6.22-570/net/ipv4/inet_connection_sock.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/inet_connection_sock.c 2008-01-29 22:12:32.000000000 -0500
-@@ -32,7 +32,7 @@
- /*
- * This array holds the first and last local port number.
- */
--int sysctl_local_port_range[2] = { 32768, 61000 };
-+//int sysctl_local_port_range[2] = { 32768, 61000 };
-
- int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
- {
-@@ -74,6 +74,7 @@
-
- sk_for_each_bound(sk2, node, &tb->owners) {
- if (sk != sk2 &&
-+ (sk->sk_net == sk2->sk_net) &&
- !inet_v6_ipv6only(sk2) &&
- (!sk->sk_bound_dev_if ||
- !sk2->sk_bound_dev_if ||
-@@ -98,6 +99,7 @@
- int (*bind_conflict)(const struct sock *sk,
- const struct inet_bind_bucket *tb))
- {
-+ struct net *net = sk->sk_net;
- struct inet_bind_hashbucket *head;
- struct hlist_node *node;
- struct inet_bind_bucket *tb;
-@@ -105,16 +107,16 @@
-
- local_bh_disable();
- if (!snum) {
-- int low = sysctl_local_port_range[0];
-- int high = sysctl_local_port_range[1];
-+ int low = sk->sk_net->sysctl_local_port_range[0];
-+ int high = sk->sk_net->sysctl_local_port_range[1];
- int remaining = (high - low) + 1;
- int rover = net_random() % (high - low) + low;
-
- do {
-- head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
-+ head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)];
- spin_lock(&head->lock);
- inet_bind_bucket_for_each(tb, node, &head->chain)
-- if (tb->port == rover)
-+ if ((tb->port == rover) && (tb->net == net))
- goto next;
- break;
- next:
-@@ -138,10 +140,10 @@
- */
- snum = rover;
- } else {
-- head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
-+ head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)];
- spin_lock(&head->lock);
- inet_bind_bucket_for_each(tb, node, &head->chain)
-- if (tb->port == snum)
-+ if ((tb->port == snum) && (tb->net==net))
- goto tb_found;
- }
- tb = NULL;
-@@ -161,7 +163,7 @@
- }
- tb_not_found:
- ret = 1;
-- if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
-+ if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, net, snum)) == NULL)
- goto fail_unlock;
- if (hlist_empty(&tb->owners)) {
- if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
-@@ -341,7 +343,8 @@
- struct rtable *rt;
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct ip_options *opt = inet_rsk(req)->opt;
-- struct flowi fl = { .oif = sk->sk_bound_dev_if,
-+ struct flowi fl = { .fl_net = sk->sk_net,
-+ .oif = sk->sk_bound_dev_if,
- .nl_u = { .ip4_u =
- { .daddr = ((opt && opt->srr) ?
- opt->faddr :
-diff -Nurb linux-2.6.22-570/net/ipv4/inet_diag.c linux-2.6.22-590/net/ipv4/inet_diag.c
---- linux-2.6.22-570/net/ipv4/inet_diag.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/inet_diag.c 2008-01-29 22:12:32.000000000 -0500
-@@ -227,6 +227,7 @@
- static int inet_diag_get_exact(struct sk_buff *in_skb,
- const struct nlmsghdr *nlh)
- {
-+ struct net *net = in_skb->sk->sk_net;
- int err;
- struct sock *sk;
- struct inet_diag_req *req = NLMSG_DATA(nlh);
-@@ -242,7 +243,7 @@
- /* TODO: lback */
- sk = inet_lookup(hashinfo, req->id.idiag_dst[0],
- req->id.idiag_dport, req->id.idiag_src[0],
-- req->id.idiag_sport, req->id.idiag_if);
-+ req->id.idiag_sport, req->id.idiag_if, net);
- }
- #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
- else if (req->idiag_family == AF_INET6) {
-@@ -251,7 +252,7 @@
- req->id.idiag_dport,
- (struct in6_addr *)req->id.idiag_src,
- req->id.idiag_sport,
-- req->id.idiag_if);
-+ req->id.idiag_if, net);
- }
- #endif
- else {
-@@ -906,8 +907,8 @@
- if (!inet_diag_table)
- goto out;
-
-- idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
-- NULL, THIS_MODULE);
-+ idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0,
-+ inet_diag_rcv, NULL, THIS_MODULE);
- if (idiagnl == NULL)
- goto out_free_table;
- err = 0;
-diff -Nurb linux-2.6.22-570/net/ipv4/inet_hashtables.c linux-2.6.22-590/net/ipv4/inet_hashtables.c
---- linux-2.6.22-570/net/ipv4/inet_hashtables.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/inet_hashtables.c 2008-01-29 22:12:32.000000000 -0500
-@@ -29,11 +29,13 @@
- */
- struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
- struct inet_bind_hashbucket *head,
-+ struct net *net,
- const unsigned short snum)
- {
- struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
-
- if (tb != NULL) {
-+ tb->net = net;
- tb->port = snum;
- tb->fastreuse = 0;
- INIT_HLIST_HEAD(&tb->owners);
-@@ -66,7 +68,7 @@
- */
- static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
- {
-- const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
-+ const int bhash = inet_bhashfn(sk->sk_net, inet_sk(sk)->num, hashinfo->bhash_size);
- struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
- struct inet_bind_bucket *tb;
-
-@@ -127,7 +129,7 @@
- static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
- const __be32 daddr,
- const unsigned short hnum,
-- const int dif)
-+ const int dif, struct net *net)
- {
- struct sock *result = NULL, *sk;
- const struct hlist_node *node;
-@@ -149,6 +151,8 @@
- continue;
- score += 2;
- }
-+ if (sk->sk_net != net)
-+ continue;
- if (score == 5)
- return sk;
- if (score > hiscore) {
-@@ -163,22 +167,22 @@
- /* Optimize the common listener case. */
- struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
- const __be32 daddr, const unsigned short hnum,
-- const int dif)
-+ const int dif, struct net *net)
- {
- struct sock *sk = NULL;
- const struct hlist_head *head;
-
- read_lock(&hashinfo->lhash_lock);
-- head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
-+ head = &hashinfo->listening_hash[net, inet_lhashfn(net, hnum)];
- if (!hlist_empty(head)) {
- const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
-
- if (inet->num == hnum && !sk->sk_node.next &&
- v4_inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) &&
- (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
-- !sk->sk_bound_dev_if)
-+ !sk->sk_bound_dev_if && (sk->sk_net == net))
- goto sherry_cache;
-- sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
-+ sk = inet_lookup_listener_slow(head, daddr, hnum, dif,net );
- }
- if (sk) {
- sherry_cache:
-@@ -196,12 +200,13 @@
- {
- struct inet_hashinfo *hinfo = death_row->hashinfo;
- struct inet_sock *inet = inet_sk(sk);
-+ struct net *net = sk->sk_net;
- __be32 daddr = inet->rcv_saddr;
- __be32 saddr = inet->daddr;
- int dif = sk->sk_bound_dev_if;
- INET_ADDR_COOKIE(acookie, saddr, daddr)
- const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
-- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-+ unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
- struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
- struct sock *sk2;
- const struct hlist_node *node;
-@@ -214,7 +219,7 @@
- sk_for_each(sk2, node, &head->twchain) {
- tw = inet_twsk(sk2);
-
-- if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
-+ if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net)) {
- if (twsk_unique(sk, sk2, twp))
- goto unique;
- else
-@@ -225,7 +230,7 @@
-
- /* And established part... */
- sk_for_each(sk2, node, &head->chain) {
-- if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-+ if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net))
- goto not_unique;
- }
-
-@@ -271,6 +276,7 @@
- int inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk)
- {
-+ struct net *net = sk->sk_net;
- struct inet_hashinfo *hinfo = death_row->hashinfo;
- const unsigned short snum = inet_sk(sk)->num;
- struct inet_bind_hashbucket *head;
-@@ -278,8 +284,8 @@
- int ret;
-
- if (!snum) {
-- int low = sysctl_local_port_range[0];
-- int high = sysctl_local_port_range[1];
-+ int low = sk->sk_net->sysctl_local_port_range[0];
-+ int high = sk->sk_net->sysctl_local_port_range[1];
- int range = high - low;
- int i;
- int port;
-@@ -291,7 +297,7 @@
- local_bh_disable();
- for (i = 1; i <= range; i++) {
- port = low + (i + offset) % range;
-- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
-+ head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)];
- spin_lock(&head->lock);
-
- /* Does not bother with rcv_saddr checks,
-@@ -299,7 +305,7 @@
- * unique enough.
- */
- inet_bind_bucket_for_each(tb, node, &head->chain) {
-- if (tb->port == port) {
-+ if ((tb->port == port) && (tb->net == net)) {
- BUG_TRAP(!hlist_empty(&tb->owners));
- if (tb->fastreuse >= 0)
- goto next_port;
-@@ -311,7 +317,7 @@
- }
- }
-
-- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
-+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, net, port);
- if (!tb) {
- spin_unlock(&head->lock);
- break;
-@@ -346,7 +352,7 @@
- goto out;
- }
-
-- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
-+ head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-diff -Nurb linux-2.6.22-570/net/ipv4/inet_timewait_sock.c linux-2.6.22-590/net/ipv4/inet_timewait_sock.c
---- linux-2.6.22-570/net/ipv4/inet_timewait_sock.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/inet_timewait_sock.c 2008-01-29 22:12:32.000000000 -0500
-@@ -31,7 +31,7 @@
- write_unlock(&ehead->lock);
-
- /* Disassociate with bind bucket. */
-- bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
-+ bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_net, tw->tw_num, hashinfo->bhash_size)];
- spin_lock(&bhead->lock);
- tb = tw->tw_tb;
- __hlist_del(&tw->tw_bind_node);
-@@ -65,7 +65,7 @@
- Note, that any socket with inet->num != 0 MUST be bound in
- binding cache, even if it is closed.
- */
-- bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
-+ bhead = &hashinfo->bhash[inet_bhashfn(sk->sk_net, inet->num, hashinfo->bhash_size)];
- spin_lock(&bhead->lock);
- tw->tw_tb = icsk->icsk_bind_hash;
- BUG_TRAP(icsk->icsk_bind_hash);
-diff -Nurb linux-2.6.22-570/net/ipv4/inetpeer.c linux-2.6.22-590/net/ipv4/inetpeer.c
---- linux-2.6.22-570/net/ipv4/inetpeer.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/inetpeer.c 2008-01-29 22:12:32.000000000 -0500
-@@ -81,71 +81,94 @@
- .avl_height = 0
- };
- #define peer_avl_empty (&peer_fake_node)
--static struct inet_peer *peer_root = peer_avl_empty;
- static DEFINE_RWLOCK(peer_pool_lock);
- #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
-
--static int peer_total;
--/* Exported for sysctl_net_ipv4. */
--int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
-- * aggressively at this stage */
--int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
--int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
--int inet_peer_gc_mintime __read_mostly = 10 * HZ;
--int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
--
--static struct inet_peer *inet_peer_unused_head;
--static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
- static DEFINE_SPINLOCK(inet_peer_unused_lock);
-
- static void peer_check_expire(unsigned long dummy);
--static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
-
-+static int inet_peers_net_init(struct net *net);
-+static void inet_peers_net_exit(struct net *net);
-+static struct pernet_operations inet_peers_net_ops = {
-+ .init = inet_peers_net_init,
-+ .exit = inet_peers_net_exit,
-+};
-
- /* Called from ip_output.c:ip_init */
- void __init inet_initpeers(void)
- {
-+ peer_cachep = kmem_cache_create("inet_peer_cache",
-+ sizeof(struct inet_peer),
-+ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
-+ NULL, NULL);
-+
-+ register_pernet_subsys(&inet_peers_net_ops);
-+}
-+
-+static int inet_peers_net_init(struct net *net)
-+{
- struct sysinfo si;
-
-+ net->peer_root = peer_avl_empty;
-+ net->inet_peer_unused_tailp = &net->inet_peer_unused_head;
-+
-+ net->inet_peer_threshold = 65536 + 128; /* start to throw entries more
-+ * aggressively at this stage */
-+ net->inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */
-+ net->inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */
-+ net->inet_peer_gc_mintime = 10 * HZ;
-+ net->inet_peer_gc_maxtime = 120 * HZ;
-+
- /* Use the straight interface to information about memory. */
- si_meminfo(&si);
-+
- /* The values below were suggested by Alexey Kuznetsov
- * <kuznet@ms2.inr.ac.ru>. I don't have any opinion about the values
- * myself. --SAW
- */
- if (si.totalram <= (32768*1024)/PAGE_SIZE)
-- inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
-+ net->inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
- if (si.totalram <= (16384*1024)/PAGE_SIZE)
-- inet_peer_threshold >>= 1; /* about 512KB */
-+ net->inet_peer_threshold >>= 1; /* about 512KB */
- if (si.totalram <= (8192*1024)/PAGE_SIZE)
-- inet_peer_threshold >>= 2; /* about 128KB */
-+ net->inet_peer_threshold >>= 2; /* about 128KB */
-
-- peer_cachep = kmem_cache_create("inet_peer_cache",
-- sizeof(struct inet_peer),
-- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
-- NULL, NULL);
-
-+ init_timer(&net->peer_periodic_timer);
-+ net->peer_periodic_timer.function = peer_check_expire;
- /* All the timers, started at system startup tend
- to synchronize. Perturb it a bit.
- */
-- peer_periodic_timer.expires = jiffies
-- + net_random() % inet_peer_gc_maxtime
-- + inet_peer_gc_maxtime;
-- add_timer(&peer_periodic_timer);
-+ net->peer_periodic_timer.expires = jiffies
-+ + net_random() % net->inet_peer_gc_maxtime
-+ + net->inet_peer_gc_maxtime;
-+ /* Remember our namespace */
-+ net->peer_periodic_timer.data = (unsigned long)net;
-+ add_timer(&net->peer_periodic_timer);
-+
-+ return 0;
-+}
-+
-+static void inet_peers_net_exit(struct net *net)
-+{
-+ del_timer(&net->peer_periodic_timer);
-+ /* CHECKME do I need to do something to release all of the peers */
- }
-
- /* Called with or without local BH being disabled. */
--static void unlink_from_unused(struct inet_peer *p)
-+static void unlink_from_unused(struct net *net, struct inet_peer *p)
- {
- spin_lock_bh(&inet_peer_unused_lock);
- if (p->unused_prevp != NULL) {
- /* On unused list. */
-- *p->unused_prevp = p->unused_next;
-- if (p->unused_next != NULL)
-- p->unused_next->unused_prevp = p->unused_prevp;
-+ *p->unused_prevp = p->u.unused_next;
-+ if (p->u.unused_next != NULL)
-+ p->u.unused_next->unused_prevp = p->unused_prevp;
- else
-- inet_peer_unused_tailp = p->unused_prevp;
-+ net->inet_peer_unused_tailp = p->unused_prevp;
- p->unused_prevp = NULL; /* mark it as removed */
-+ p->u.net = hold_net(net); /* Remember the net */
- }
- spin_unlock_bh(&inet_peer_unused_lock);
- }
-@@ -160,9 +183,9 @@
- struct inet_peer *u, **v; \
- if (_stack) { \
- stackptr = _stack; \
-- *stackptr++ = &peer_root; \
-+ *stackptr++ = &net->peer_root; \
- } \
-- for (u = peer_root; u != peer_avl_empty; ) { \
-+ for (u = net->peer_root; u != peer_avl_empty; ) { \
- if (_daddr == u->v4daddr) \
- break; \
- if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
-@@ -279,7 +302,7 @@
- } while(0)
-
- /* May be called with local BH enabled. */
--static void unlink_from_pool(struct inet_peer *p)
-+static void unlink_from_pool(struct net *net, struct inet_peer *p)
- {
- int do_free;
-
-@@ -317,7 +340,7 @@
- delp[1] = &t->avl_left; /* was &p->avl_left */
- }
- peer_avl_rebalance(stack, stackptr);
-- peer_total--;
-+ net->peer_total--;
- do_free = 1;
- }
- write_unlock_bh(&peer_pool_lock);
-@@ -335,13 +358,13 @@
- }
-
- /* May be called with local BH enabled. */
--static int cleanup_once(unsigned long ttl)
-+static int cleanup_once(struct net *net, unsigned long ttl)
- {
- struct inet_peer *p;
-
- /* Remove the first entry from the list of unused nodes. */
- spin_lock_bh(&inet_peer_unused_lock);
-- p = inet_peer_unused_head;
-+ p = net->inet_peer_unused_head;
- if (p != NULL) {
- __u32 delta = (__u32)jiffies - p->dtime;
- if (delta < ttl) {
-@@ -349,12 +372,13 @@
- spin_unlock_bh(&inet_peer_unused_lock);
- return -1;
- }
-- inet_peer_unused_head = p->unused_next;
-- if (p->unused_next != NULL)
-- p->unused_next->unused_prevp = p->unused_prevp;
-+ net->inet_peer_unused_head = p->u.unused_next;
-+ if (p->u.unused_next != NULL)
-+ p->u.unused_next->unused_prevp = p->unused_prevp;
- else
-- inet_peer_unused_tailp = p->unused_prevp;
-+ net->inet_peer_unused_tailp = p->unused_prevp;
- p->unused_prevp = NULL; /* mark as not on the list */
-+ p->u.net = hold_net(net);
- /* Grab an extra reference to prevent node disappearing
- * before unlink_from_pool() call. */
- atomic_inc(&p->refcnt);
-@@ -367,12 +391,12 @@
- * happen because of entry limits in route cache. */
- return -1;
-
-- unlink_from_pool(p);
-+ unlink_from_pool(net, p);
- return 0;
- }
-
- /* Called with or without local BH being disabled. */
--struct inet_peer *inet_getpeer(__be32 daddr, int create)
-+struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create)
- {
- struct inet_peer *p, *n;
- struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
-@@ -387,7 +411,7 @@
- if (p != peer_avl_empty) {
- /* The existing node has been found. */
- /* Remove the entry from unused list if it was there. */
-- unlink_from_unused(p);
-+ unlink_from_unused(net, p);
- return p;
- }
-
-@@ -413,13 +437,13 @@
- /* Link the node. */
- link_to_pool(n);
- n->unused_prevp = NULL; /* not on the list */
-- peer_total++;
-+ n->u.net = hold_net(net); /* Remember the net */
-+ net->peer_total++;
- write_unlock_bh(&peer_pool_lock);
-
-- if (peer_total >= inet_peer_threshold)
-+ if (net->peer_total >= net->inet_peer_threshold)
- /* Remove one less-recently-used entry. */
-- cleanup_once(0);
--
-+ cleanup_once(net, 0);
- return n;
-
- out_free:
-@@ -427,25 +451,26 @@
- atomic_inc(&p->refcnt);
- write_unlock_bh(&peer_pool_lock);
- /* Remove the entry from unused list if it was there. */
-- unlink_from_unused(p);
-+ unlink_from_unused(net, p);
- /* Free preallocated the preallocated node. */
- kmem_cache_free(peer_cachep, n);
- return p;
- }
-
- /* Called with local BH disabled. */
--static void peer_check_expire(unsigned long dummy)
-+static void peer_check_expire(unsigned long arg)
- {
-+ struct net *net = (void *)arg;
- unsigned long now = jiffies;
- int ttl;
-
-- if (peer_total >= inet_peer_threshold)
-- ttl = inet_peer_minttl;
-+ if (net->peer_total >= net->inet_peer_threshold)
-+ ttl = net->inet_peer_minttl;
- else
-- ttl = inet_peer_maxttl
-- - (inet_peer_maxttl - inet_peer_minttl) / HZ *
-- peer_total / inet_peer_threshold * HZ;
-- while (!cleanup_once(ttl)) {
-+ ttl = net->inet_peer_maxttl
-+ - (net->inet_peer_maxttl - net->inet_peer_minttl) / HZ *
-+ net->peer_total / net->inet_peer_threshold * HZ;
-+ while (!cleanup_once(net, ttl)) {
- if (jiffies != now)
- break;
- }
-@@ -453,25 +478,30 @@
- /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
- * interval depending on the total number of entries (more entries,
- * less interval). */
-- if (peer_total >= inet_peer_threshold)
-- peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
-+ if (net->peer_total >= net->inet_peer_threshold)
-+ net->peer_periodic_timer.expires = jiffies
-+ + net->inet_peer_gc_mintime;
- else
-- peer_periodic_timer.expires = jiffies
-- + inet_peer_gc_maxtime
-- - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
-- peer_total / inet_peer_threshold * HZ;
-- add_timer(&peer_periodic_timer);
-+ net->peer_periodic_timer.expires = jiffies
-+ + net->inet_peer_gc_maxtime
-+ - (net->inet_peer_gc_maxtime - net->inet_peer_gc_mintime) / HZ *
-+ net->peer_total / net->inet_peer_threshold * HZ;
-+ add_timer(&net->peer_periodic_timer);
- }
-
- void inet_putpeer(struct inet_peer *p)
- {
- spin_lock_bh(&inet_peer_unused_lock);
- if (atomic_dec_and_test(&p->refcnt)) {
-- p->unused_prevp = inet_peer_unused_tailp;
-- p->unused_next = NULL;
-- *inet_peer_unused_tailp = p;
-- inet_peer_unused_tailp = &p->unused_next;
-+ struct net *net = p->u.net;
-+
-+ p->unused_prevp = net->inet_peer_unused_tailp;
-+ p->u.unused_next = NULL;
-+ *net->inet_peer_unused_tailp = p;
-+ net->inet_peer_unused_tailp = &p->u.unused_next;
- p->dtime = (__u32)jiffies;
-+
-+ release_net(net);
- }
- spin_unlock_bh(&inet_peer_unused_lock);
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/ip_fragment.c linux-2.6.22-590/net/ipv4/ip_fragment.c
---- linux-2.6.22-570/net/ipv4/ip_fragment.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ip_fragment.c 2008-01-29 22:12:32.000000000 -0500
-@@ -49,21 +49,6 @@
- * as well. Or notify me, at least. --ANK
- */
-
--/* Fragment cache limits. We will commit 256K at one time. Should we
-- * cross that limit we will prune down to 192K. This should cope with
-- * even the most extreme cases without allowing an attacker to measurably
-- * harm machine performance.
-- */
--int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
--int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
--
--int sysctl_ipfrag_max_dist __read_mostly = 64;
--
--/* Important NOTE! Fragment queue must be destroyed before MSL expires.
-- * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
-- */
--int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
--
- struct ipfrag_skb_cb
- {
- struct inet_skb_parm h;
-@@ -96,6 +81,7 @@
- int iif;
- unsigned int rid;
- struct inet_peer *peer;
-+ struct net *net;
- };
-
- /* Hash table. */
-@@ -103,17 +89,13 @@
- #define IPQ_HASHSZ 64
-
- /* Per-bucket lock is easy to add now. */
--static struct hlist_head ipq_hash[IPQ_HASHSZ];
- static DEFINE_RWLOCK(ipfrag_lock);
--static u32 ipfrag_hash_rnd;
--static LIST_HEAD(ipq_lru_list);
--int ip_frag_nqueues = 0;
-
- static __inline__ void __ipq_unlink(struct ipq *qp)
- {
- hlist_del(&qp->list);
- list_del(&qp->lru_list);
-- ip_frag_nqueues--;
-+ qp->net->ip_frag_nqueues--;
- }
-
- static __inline__ void ipq_unlink(struct ipq *ipq)
-@@ -123,70 +105,71 @@
- write_unlock(&ipfrag_lock);
- }
-
--static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
-+static unsigned int ipqhashfn(struct net *net, __be16 id, __be32 saddr, __be32 daddr, u8 prot)
- {
- return jhash_3words((__force u32)id << 16 | prot,
- (__force u32)saddr, (__force u32)daddr,
-- ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
-+ net->ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
- }
-
--static struct timer_list ipfrag_secret_timer;
--int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
--
--static void ipfrag_secret_rebuild(unsigned long dummy)
-+static void ipfrag_secret_rebuild(unsigned long arg)
- {
-+ struct net *net = (void *)arg;
- unsigned long now = jiffies;
- int i;
-
- write_lock(&ipfrag_lock);
-- get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
-+ get_random_bytes(&net->ipfrag_hash_rnd, sizeof(u32));
- for (i = 0; i < IPQ_HASHSZ; i++) {
- struct ipq *q;
-+ struct hlist_head *head;
- struct hlist_node *p, *n;
-
-- hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
-- unsigned int hval = ipqhashfn(q->id, q->saddr,
-+ head = &net->ipq_hash[i];
-+ hlist_for_each_entry_safe(q, p, n, head, list) {
-+ unsigned int hval = ipqhashfn(net, q->id, q->saddr,
- q->daddr, q->protocol);
-
- if (hval != i) {
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
-- hlist_add_head(&q->list, &ipq_hash[hval]);
-+ hlist_add_head(&q->list, &net->ipq_hash[hval]);
- }
- }
- }
- write_unlock(&ipfrag_lock);
-
-- mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
-+ mod_timer(&net->ipfrag_secret_timer,
-+ now + net->sysctl_ipfrag_secret_interval);
- }
-
--atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
--
- /* Memory Tracking Functions. */
--static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
-+static __inline__ void frag_kfree_skb(struct net *net, struct sk_buff *skb, int *work)
- {
- if (work)
- *work -= skb->truesize;
-- atomic_sub(skb->truesize, &ip_frag_mem);
-+ atomic_sub(skb->truesize, &net->ip_frag_mem);
- kfree_skb(skb);
- }
-
- static __inline__ void frag_free_queue(struct ipq *qp, int *work)
- {
-+ struct net *net = qp->net;
- if (work)
- *work -= sizeof(struct ipq);
-- atomic_sub(sizeof(struct ipq), &ip_frag_mem);
-+ atomic_sub(sizeof(struct ipq), &net->ip_frag_mem);
-+ release_net(net);
- kfree(qp);
- }
-
--static __inline__ struct ipq *frag_alloc_queue(void)
-+static __inline__ struct ipq *frag_alloc_queue(struct net *net)
- {
- struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
-
- if (!qp)
- return NULL;
-- atomic_add(sizeof(struct ipq), &ip_frag_mem);
-+ atomic_add(sizeof(struct ipq), &net->ip_frag_mem);
- return qp;
- }
-
-@@ -209,7 +192,7 @@
- while (fp) {
- struct sk_buff *xp = fp->next;
-
-- frag_kfree_skb(fp, work);
-+ frag_kfree_skb(qp->net, fp, work);
- fp = xp;
- }
-
-@@ -241,23 +224,23 @@
- /* Memory limiting on fragments. Evictor trashes the oldest
- * fragment queue until we are back under the threshold.
- */
--static void ip_evictor(void)
-+static void ip_evictor(struct net *net)
- {
- struct ipq *qp;
- struct list_head *tmp;
- int work;
-
-- work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
-+ work = atomic_read(&net->ip_frag_mem) - net->sysctl_ipfrag_low_thresh;
- if (work <= 0)
- return;
-
- while (work > 0) {
- read_lock(&ipfrag_lock);
-- if (list_empty(&ipq_lru_list)) {
-+ if (list_empty(&net->ipq_lru_list)) {
- read_unlock(&ipfrag_lock);
- return;
- }
-- tmp = ipq_lru_list.next;
-+ tmp = net->ipq_lru_list.next;
- qp = list_entry(tmp, struct ipq, lru_list);
- atomic_inc(&qp->refcnt);
- read_unlock(&ipfrag_lock);
-@@ -292,7 +275,7 @@
- if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
- struct sk_buff *head = qp->fragments;
- /* Send an ICMP "Fragment Reassembly Timeout" message. */
-- if ((head->dev = dev_get_by_index(qp->iif)) != NULL) {
-+ if ((head->dev = dev_get_by_index(qp->net, qp->iif)) != NULL) {
- icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
- dev_put(head->dev);
- }
-@@ -304,7 +287,7 @@
-
- /* Creation primitives. */
-
--static struct ipq *ip_frag_intern(struct ipq *qp_in)
-+static struct ipq *ip_frag_intern(struct net *net, struct ipq *qp_in)
- {
- struct ipq *qp;
- #ifdef CONFIG_SMP
-@@ -313,14 +296,14 @@
- unsigned int hash;
-
- write_lock(&ipfrag_lock);
-- hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
-+ hash = ipqhashfn(net, qp_in->id, qp_in->saddr, qp_in->daddr,
- qp_in->protocol);
- #ifdef CONFIG_SMP
- /* With SMP race we have to recheck hash table, because
- * such entry could be created on other cpu, while we
- * promoted read lock to write lock.
- */
-- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
-+ hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) {
- if (qp->id == qp_in->id &&
- qp->saddr == qp_in->saddr &&
- qp->daddr == qp_in->daddr &&
-@@ -336,26 +319,27 @@
- #endif
- qp = qp_in;
-
-- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
-+ if (!mod_timer(&qp->timer, jiffies + net->sysctl_ipfrag_time))
- atomic_inc(&qp->refcnt);
-
- atomic_inc(&qp->refcnt);
-- hlist_add_head(&qp->list, &ipq_hash[hash]);
-+ hlist_add_head(&qp->list, &net->ipq_hash[hash]);
- INIT_LIST_HEAD(&qp->lru_list);
-- list_add_tail(&qp->lru_list, &ipq_lru_list);
-- ip_frag_nqueues++;
-+ list_add_tail(&qp->lru_list, &net->ipq_lru_list);
-+ net->ip_frag_nqueues++;
- write_unlock(&ipfrag_lock);
- return qp;
- }
-
- /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
--static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
-+static struct ipq *ip_frag_create(struct net *net, struct iphdr *iph, u32 user)
- {
- struct ipq *qp;
-
-- if ((qp = frag_alloc_queue()) == NULL)
-+ if ((qp = frag_alloc_queue(net)) == NULL)
- goto out_nomem;
-
-+ qp->net = hold_net(net);
- qp->protocol = iph->protocol;
- qp->last_in = 0;
- qp->id = iph->id;
-@@ -366,7 +350,8 @@
- qp->meat = 0;
- qp->fragments = NULL;
- qp->iif = 0;
-- qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
-+ qp->peer = net->sysctl_ipfrag_max_dist ?
-+ inet_getpeer(net, iph->saddr, 1) : NULL;
-
- /* Initialize a timer for this entry. */
- init_timer(&qp->timer);
-@@ -375,7 +360,7 @@
- spin_lock_init(&qp->lock);
- atomic_set(&qp->refcnt, 1);
-
-- return ip_frag_intern(qp);
-+ return ip_frag_intern(net, qp);
-
- out_nomem:
- LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
-@@ -385,7 +370,7 @@
- /* Find the correct entry in the "incomplete datagrams" queue for
- * this IP datagram, and create new one, if nothing is found.
- */
--static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
-+static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
- {
- __be16 id = iph->id;
- __be32 saddr = iph->saddr;
-@@ -396,8 +381,8 @@
- struct hlist_node *n;
-
- read_lock(&ipfrag_lock);
-- hash = ipqhashfn(id, saddr, daddr, protocol);
-- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
-+ hash = ipqhashfn(net, id, saddr, daddr, protocol);
-+ hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) {
- if (qp->id == id &&
- qp->saddr == saddr &&
- qp->daddr == daddr &&
-@@ -410,14 +395,14 @@
- }
- read_unlock(&ipfrag_lock);
-
-- return ip_frag_create(iph, user);
-+ return ip_frag_create(net, iph, user);
- }
-
- /* Is the fragment too far ahead to be part of ipq? */
- static inline int ip_frag_too_far(struct ipq *qp)
- {
- struct inet_peer *peer = qp->peer;
-- unsigned int max = sysctl_ipfrag_max_dist;
-+ unsigned int max = qp->net->sysctl_ipfrag_max_dist;
- unsigned int start, end;
-
- int rc;
-@@ -442,7 +427,7 @@
- {
- struct sk_buff *fp;
-
-- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
-+ if (!mod_timer(&qp->timer, jiffies + qp->net->sysctl_ipfrag_time)) {
- atomic_inc(&qp->refcnt);
- return -ETIMEDOUT;
- }
-@@ -450,7 +435,7 @@
- fp = qp->fragments;
- do {
- struct sk_buff *xp = fp->next;
-- frag_kfree_skb(fp, NULL);
-+ frag_kfree_skb(qp->net, fp, NULL);
- fp = xp;
- } while (fp);
-
-@@ -466,6 +451,7 @@
- /* Add new segment to existing queue. */
- static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
- {
-+ struct net *net = qp->net;
- struct sk_buff *prev, *next;
- int flags, offset;
- int ihl, end;
-@@ -576,7 +562,7 @@
- qp->fragments = next;
-
- qp->meat -= free_it->len;
-- frag_kfree_skb(free_it, NULL);
-+ frag_kfree_skb(net, free_it, NULL);
- }
- }
-
-@@ -594,12 +580,12 @@
- skb->dev = NULL;
- qp->stamp = skb->tstamp;
- qp->meat += skb->len;
-- atomic_add(skb->truesize, &ip_frag_mem);
-+ atomic_add(skb->truesize, &net->ip_frag_mem);
- if (offset == 0)
- qp->last_in |= FIRST_IN;
-
- write_lock(&ipfrag_lock);
-- list_move_tail(&qp->lru_list, &ipq_lru_list);
-+ list_move_tail(&qp->lru_list, &net->ipq_lru_list);
- write_unlock(&ipfrag_lock);
-
- return;
-@@ -613,6 +599,7 @@
-
- static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
- {
-+ struct net *net = qp->net;
- struct iphdr *iph;
- struct sk_buff *fp, *head = qp->fragments;
- int len;
-@@ -654,12 +641,12 @@
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
-- atomic_add(clone->truesize, &ip_frag_mem);
-+ atomic_add(clone->truesize, &net->ip_frag_mem);
- }
-
- skb_shinfo(head)->frag_list = head->next;
- skb_push(head, head->data - skb_network_header(head));
-- atomic_sub(head->truesize, &ip_frag_mem);
-+ atomic_sub(head->truesize, &net->ip_frag_mem);
-
- for (fp=head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
-@@ -669,7 +656,7 @@
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
-- atomic_sub(fp->truesize, &ip_frag_mem);
-+ atomic_sub(fp->truesize, &net->ip_frag_mem);
- }
-
- head->next = NULL;
-@@ -700,19 +687,20 @@
- /* Process an incoming IP datagram fragment. */
- struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
- {
-+ struct net *net = skb->dev->nd_net;
- struct ipq *qp;
- struct net_device *dev;
-
- IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
-
- /* Start by cleaning up the memory. */
-- if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
-- ip_evictor();
-+ if (atomic_read(&net->ip_frag_mem) > net->sysctl_ipfrag_high_thresh)
-+ ip_evictor(net);
-
- dev = skb->dev;
-
- /* Lookup (or create) queue header */
-- if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
-+ if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
- struct sk_buff *ret = NULL;
-
- spin_lock(&qp->lock);
-@@ -733,15 +721,70 @@
- return NULL;
- }
-
--void __init ipfrag_init(void)
-+static int ipfrag_net_init(struct net *net)
- {
-- ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
-+ struct timer_list *secret_timer;
-+ int i;
-+
-+ /* Fragment cache limits. We will commit 256K at one time. Should we
-+ * cross that limit we will prune down to 192K. This should cope with
-+ * even the most extreme cases without allowing an attacker to measurably
-+ * harm machine performance.
-+ */
-+ net->sysctl_ipfrag_high_thresh = 256*1024;
-+ net->sysctl_ipfrag_low_thresh = 192*1024;
-+ net->sysctl_ipfrag_max_dist = 64;
-+
-+ /* Important NOTE! Fragment queue must be destroyed before MSL expires.
-+ * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
-+ */
-+ net->sysctl_ipfrag_time = IP_FRAG_TIME;
-+
-+ net->sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
-+
-+ net->ipq_hash = kzalloc(sizeof(*net->ipq_hash)*IPQ_HASHSZ, GFP_KERNEL);
-+ if (!net->ipq_hash)
-+ return -ENOMEM;
-+
-+ for (i = 0; i < IPQ_HASHSZ; i++)
-+ INIT_HLIST_HEAD(&net->ipq_hash[i]);
-+ INIT_LIST_HEAD(&net->ipq_lru_list);
-+ net->ip_frag_nqueues = 0;
-+ atomic_set(&net->ip_frag_mem, 0);
-+
-+
-+ net->ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
- (jiffies ^ (jiffies >> 6)));
-
-- init_timer(&ipfrag_secret_timer);
-- ipfrag_secret_timer.function = ipfrag_secret_rebuild;
-- ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
-- add_timer(&ipfrag_secret_timer);
-+ secret_timer = &net->ipfrag_secret_timer;
-+ init_timer(secret_timer);
-+ secret_timer->function = ipfrag_secret_rebuild;
-+ secret_timer->expires = jiffies + net->sysctl_ipfrag_secret_interval;
-+ secret_timer->data = (unsigned long)net;
-+ add_timer(secret_timer);
-+
-+ return 0;
-+}
-+
-+static void ipfrag_net_exit(struct net *net)
-+{
-+ del_timer(&net->ipfrag_secret_timer);
-+
-+ net->sysctl_ipfrag_low_thresh = 0;
-+ while (atomic_read(&net->ip_frag_mem))
-+ ip_evictor(net);
-+
-+ kfree(net->ipq_hash);
-+}
-+
-+static struct pernet_operations ipfrag_net_ops = {
-+ .init = ipfrag_net_init,
-+ .exit = ipfrag_net_exit,
-+};
-+
-+void ipfrag_init(void)
-+{
-+ register_pernet_subsys(&ipfrag_net_ops);
- }
-
- EXPORT_SYMBOL(ip_defrag);
-diff -Nurb linux-2.6.22-570/net/ipv4/ip_gre.c linux-2.6.22-590/net/ipv4/ip_gre.c
---- linux-2.6.22-570/net/ipv4/ip_gre.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ip_gre.c 2008-01-29 22:12:32.000000000 -0500
-@@ -262,7 +262,7 @@
- int i;
- for (i=1; i<100; i++) {
- sprintf(name, "gre%d", i);
-- if (__dev_get_by_name(name) == NULL)
-+ if (__dev_get_by_name(&init_net, name) == NULL)
- break;
- }
- if (i==100)
-@@ -397,6 +397,9 @@
- struct flowi fl;
- struct rtable *rt;
-
-+ if (skb->dev->nd_net != &init_net)
-+ return;
-+
- if (p[1] != htons(ETH_P_IP))
- return;
-
-@@ -475,6 +478,7 @@
-
- /* Try to guess incoming interface */
- memset(&fl, 0, sizeof(fl));
-+ fl.fl_net = &init_net;
- fl.fl4_dst = eiph->saddr;
- fl.fl4_tos = RT_TOS(eiph->tos);
- fl.proto = IPPROTO_GRE;
-@@ -559,6 +563,10 @@
- struct ip_tunnel *tunnel;
- int offset = 4;
-
-+ if (skb->dev->nd_net != &init_net) {
-+ kfree_skb(skb);
-+ return 0;
-+ }
- if (!pskb_may_pull(skb, 16))
- goto drop_nolock;
-
-@@ -740,7 +748,8 @@
- }
-
- {
-- struct flowi fl = { .oif = tunnel->parms.link,
-+ struct flowi fl = { .fl_net = &init_net,
-+ .oif = tunnel->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = dst,
- .saddr = tiph->saddr,
-@@ -1095,7 +1104,8 @@
- struct ip_tunnel *t = netdev_priv(dev);
-
- if (MULTICAST(t->parms.iph.daddr)) {
-- struct flowi fl = { .oif = t->parms.link,
-+ struct flowi fl = { .fl_net = &init_net,
-+ .oif = t->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = t->parms.iph.daddr,
- .saddr = t->parms.iph.saddr,
-@@ -1118,7 +1128,7 @@
- {
- struct ip_tunnel *t = netdev_priv(dev);
- if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
-- struct in_device *in_dev = inetdev_by_index(t->mlink);
-+ struct in_device *in_dev = inetdev_by_index(&init_net, t->mlink);
- if (in_dev) {
- ip_mc_dec_group(in_dev, t->parms.iph.daddr);
- in_dev_put(in_dev);
-@@ -1168,7 +1178,8 @@
- /* Guess output device to choose reasonable mtu and hard_header_len */
-
- if (iph->daddr) {
-- struct flowi fl = { .oif = tunnel->parms.link,
-+ struct flowi fl = { .fl_net = &init_net,
-+ .oif = tunnel->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = iph->daddr,
- .saddr = iph->saddr,
-@@ -1195,7 +1206,7 @@
- }
-
- if (!tdev && tunnel->parms.link)
-- tdev = __dev_get_by_index(tunnel->parms.link);
-+ tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
-
- if (tdev) {
- hlen = tdev->hard_header_len;
-diff -Nurb linux-2.6.22-570/net/ipv4/ip_input.c linux-2.6.22-590/net/ipv4/ip_input.c
---- linux-2.6.22-570/net/ipv4/ip_input.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ip_input.c 2008-01-29 22:12:32.000000000 -0500
-@@ -280,6 +280,10 @@
- struct iphdr *iph;
- struct net_device *dev = skb->dev;
-
-+
-+ if (skb->dev->nd_net != &init_net)
-+ goto drop;
-+
- /* It looks as overkill, because not all
- IP options require packet mangling.
- But it is the easiest for now, especially taking
-diff -Nurb linux-2.6.22-570/net/ipv4/ip_options.c linux-2.6.22-590/net/ipv4/ip_options.c
---- linux-2.6.22-570/net/ipv4/ip_options.c 2008-01-29 22:12:24.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/ip_options.c 2008-01-29 22:12:32.000000000 -0500
-@@ -151,7 +151,7 @@
- __be32 addr;
-
- memcpy(&addr, sptr+soffset-1, 4);
-- if (inet_addr_type(addr) != RTN_LOCAL) {
-+ if (inet_addr_type(&init_net, addr) != RTN_LOCAL) {
- dopt->ts_needtime = 1;
- soffset += 8;
- }
-@@ -400,7 +400,7 @@
- {
- __be32 addr;
- memcpy(&addr, &optptr[optptr[2]-1], 4);
-- if (inet_addr_type(addr) == RTN_UNICAST)
-+ if (inet_addr_type(&init_net, addr) == RTN_UNICAST)
- break;
- if (skb)
- timeptr = (__be32*)&optptr[optptr[2]+3];
-diff -Nurb linux-2.6.22-570/net/ipv4/ip_output.c linux-2.6.22-590/net/ipv4/ip_output.c
---- linux-2.6.22-570/net/ipv4/ip_output.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ip_output.c 2008-01-29 22:12:32.000000000 -0500
-@@ -83,8 +83,6 @@
- #include <linux/netlink.h>
- #include <linux/tcp.h>
-
--int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
--
- /* Generate a checksum for an outgoing IP datagram. */
- __inline__ void ip_send_check(struct iphdr *iph)
- {
-@@ -317,7 +315,8 @@
- daddr = opt->faddr;
-
- {
-- struct flowi fl = { .oif = sk->sk_bound_dev_if,
-+ struct flowi fl = { .fl_net = sk->sk_net,
-+ .oif = sk->sk_bound_dev_if,
- .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = inet->saddr,
-@@ -837,7 +836,7 @@
- */
- if (transhdrlen &&
- length + fragheaderlen <= mtu &&
-- rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
-+ rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
- !exthdrlen)
- csummode = CHECKSUM_PARTIAL;
-
-@@ -1352,7 +1351,8 @@
- }
-
- {
-- struct flowi fl = { .oif = arg->bound_dev_if,
-+ struct flowi fl = { .fl_net = sk->sk_net,
-+ .oif = arg->bound_dev_if,
- .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = rt->rt_spec_dst,
-diff -Nurb linux-2.6.22-570/net/ipv4/ip_sockglue.c linux-2.6.22-590/net/ipv4/ip_sockglue.c
---- linux-2.6.22-570/net/ipv4/ip_sockglue.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ip_sockglue.c 2008-01-29 22:12:32.000000000 -0500
-@@ -411,6 +411,7 @@
- static int do_ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int optlen)
- {
-+ struct net *net = sk->sk_net;
- struct inet_sock *inet = inet_sk(sk);
- int val=0,err;
-
-@@ -596,13 +597,13 @@
- err = 0;
- break;
- }
-- dev = ip_dev_find(mreq.imr_address.s_addr);
-+ dev = ip_dev_find(net, mreq.imr_address.s_addr);
- if (dev) {
- mreq.imr_ifindex = dev->ifindex;
- dev_put(dev);
- }
- } else
-- dev = __dev_get_by_index(mreq.imr_ifindex);
-+ dev = __dev_get_by_index(net, mreq.imr_ifindex);
-
-
- err = -EADDRNOTAVAIL;
-@@ -956,6 +957,7 @@
- static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
- {
-+ struct net *net = sk->sk_net;
- struct inet_sock *inet = inet_sk(sk);
- int val;
- int len;
-@@ -1023,7 +1025,7 @@
- break;
- case IP_TTL:
- val = (inet->uc_ttl == -1 ?
-- sysctl_ip_default_ttl :
-+ net->sysctl_ip_default_ttl :
- inet->uc_ttl);
- break;
- case IP_HDRINCL:
-diff -Nurb linux-2.6.22-570/net/ipv4/ipcomp.c linux-2.6.22-590/net/ipv4/ipcomp.c
---- linux-2.6.22-570/net/ipv4/ipcomp.c 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/ipcomp.c 2008-01-29 22:12:32.000000000 -0500
-@@ -175,6 +175,9 @@
- struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
- struct xfrm_state *x;
-
-+ if (skb->dev->nd_net != &init_net)
-+ return;
-+
- if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
- icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
- return;
-@@ -486,3 +489,4 @@
- MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
- MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-
-+MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP);
-diff -Nurb linux-2.6.22-570/net/ipv4/ipconfig.c linux-2.6.22-590/net/ipv4/ipconfig.c
---- linux-2.6.22-570/net/ipv4/ipconfig.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipconfig.c 2008-01-29 22:12:32.000000000 -0500
-@@ -59,6 +59,7 @@
- #include <net/ip.h>
- #include <net/ipconfig.h>
- #include <net/route.h>
-+#include <net/net_namespace.h>
-
- #include <asm/uaccess.h>
- #include <net/checksum.h>
-@@ -184,16 +185,18 @@
- struct ic_device *d, **last;
- struct net_device *dev;
- unsigned short oflags;
-+ struct net_device *lo;
-
- last = &ic_first_dev;
- rtnl_lock();
-
- /* bring loopback device up first */
-- if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0)
-- printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name);
-+ lo = &init_net.loopback_dev;
-+ if (dev_change_flags(lo, lo->flags | IFF_UP) < 0)
-+ printk(KERN_ERR "IP-Config: Failed to open %s\n", lo->name);
-
-- for_each_netdev(dev) {
-- if (dev == &loopback_dev)
-+ for_each_netdev(&init_net, dev) {
-+ if (dev == lo)
- continue;
- if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
- (!(dev->flags & IFF_LOOPBACK) &&
-@@ -283,7 +286,7 @@
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
-- res = devinet_ioctl(cmd, (struct ifreq __user *) arg);
-+ res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
- set_fs(oldfs);
- return res;
- }
-@@ -294,7 +297,7 @@
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
-- res = ip_rt_ioctl(cmd, (void __user *) arg);
-+ res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
- set_fs(oldfs);
- return res;
- }
-@@ -425,6 +428,9 @@
- unsigned char *sha, *tha; /* s for "source", t for "target" */
- struct ic_device *d;
-
-+ if (dev->nd_net != &init_net)
-+ goto drop;
-+
- if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
- return NET_RX_DROP;
-
-@@ -834,6 +840,9 @@
- struct ic_device *d;
- int len, ext_len;
-
-+ if (dev->nd_net != &init_net)
-+ goto drop;
-+
- /* Perform verifications before taking the lock. */
- if (skb->pkt_type == PACKET_OTHERHOST)
- goto drop;
-@@ -1253,7 +1262,7 @@
- __be32 addr;
-
- #ifdef CONFIG_PROC_FS
-- proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops);
-+ proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops);
- #endif /* CONFIG_PROC_FS */
-
- if (!ic_enable)
-diff -Nurb linux-2.6.22-570/net/ipv4/ipip.c linux-2.6.22-590/net/ipv4/ipip.c
---- linux-2.6.22-570/net/ipv4/ipip.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipip.c 2008-01-29 22:12:32.000000000 -0500
-@@ -225,7 +225,7 @@
- int i;
- for (i=1; i<100; i++) {
- sprintf(name, "tunl%d", i);
-- if (__dev_get_by_name(name) == NULL)
-+ if (__dev_get_by_name(&init_net, name) == NULL)
- break;
- }
- if (i==100)
-@@ -403,6 +403,7 @@
-
- /* Try to guess incoming interface */
- memset(&fl, 0, sizeof(fl));
-+ fl.fl_net = &init_net;
- fl.fl4_daddr = eiph->saddr;
- fl.fl4_tos = RT_TOS(eiph->tos);
- fl.proto = IPPROTO_IPIP;
-@@ -542,7 +543,8 @@
- }
-
- {
-- struct flowi fl = { .oif = tunnel->parms.link,
-+ struct flowi fl = { .fl_net = &init_net,
-+ .oif = tunnel->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = dst,
- .saddr = tiph->saddr,
-@@ -806,7 +808,8 @@
- memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
-
- if (iph->daddr) {
-- struct flowi fl = { .oif = tunnel->parms.link,
-+ struct flowi fl = { .fl_net = &init_net,
-+ .oif = tunnel->parms.link,
- .nl_u = { .ip4_u =
- { .daddr = iph->daddr,
- .saddr = iph->saddr,
-@@ -821,7 +824,7 @@
- }
-
- if (!tdev && tunnel->parms.link)
-- tdev = __dev_get_by_index(tunnel->parms.link);
-+ tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
-
- if (tdev) {
- dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
-diff -Nurb linux-2.6.22-570/net/ipv4/ipmr.c linux-2.6.22-590/net/ipv4/ipmr.c
---- linux-2.6.22-570/net/ipv4/ipmr.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipmr.c 2008-01-29 22:12:32.000000000 -0500
-@@ -62,6 +62,7 @@
- #include <linux/netfilter_ipv4.h>
- #include <net/ipip.h>
- #include <net/checksum.h>
-+#include <net/net_namespace.h>
- #include <net/netlink.h>
-
- #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
-@@ -124,7 +125,7 @@
- {
- struct net_device *dev;
-
-- dev = __dev_get_by_name("tunl0");
-+ dev = __dev_get_by_name(&init_net, "tunl0");
-
- if (dev) {
- int err;
-@@ -148,7 +149,7 @@
-
- dev = NULL;
-
-- if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
-+ if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
- dev->flags |= IFF_MULTICAST;
-
- in_dev = __in_dev_get_rtnl(dev);
-@@ -320,7 +321,7 @@
- e->error = -ETIMEDOUT;
- memset(&e->msg, 0, sizeof(e->msg));
-
-- rtnl_unicast(skb, NETLINK_CB(skb).pid);
-+ rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
- } else
- kfree_skb(skb);
- }
-@@ -422,7 +423,7 @@
- return -ENOBUFS;
- break;
- case 0:
-- dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
-+ dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
- if (!dev)
- return -EADDRNOTAVAIL;
- dev_put(dev);
-@@ -532,7 +533,7 @@
- memset(&e->msg, 0, sizeof(e->msg));
- }
-
-- rtnl_unicast(skb, NETLINK_CB(skb).pid);
-+ rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
- } else
- ip_mr_forward(skb, c, 0);
- }
-@@ -848,7 +849,7 @@
- {
- rtnl_lock();
- if (sk == mroute_socket) {
-- IPV4_DEVCONF_ALL(MC_FORWARDING)--;
-+ IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--;
-
- write_lock_bh(&mrt_lock);
- mroute_socket=NULL;
-@@ -897,7 +898,7 @@
- mroute_socket=sk;
- write_unlock_bh(&mrt_lock);
-
-- IPV4_DEVCONF_ALL(MC_FORWARDING)++;
-+ IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++;
- }
- rtnl_unlock();
- return ret;
-@@ -1082,13 +1083,18 @@
-
- static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
- {
-+ struct net_device *dev = ptr;
- struct vif_device *v;
- int ct;
-+
-+ if (dev->nd_net != &init_net)
-+ return NOTIFY_DONE;
-+
- if (event != NETDEV_UNREGISTER)
- return NOTIFY_DONE;
- v=&vif_table[0];
- for (ct=0;ct<maxvif;ct++,v++) {
-- if (v->dev==ptr)
-+ if (v->dev==dev)
- vif_delete(ct);
- }
- return NOTIFY_DONE;
-@@ -1171,7 +1177,8 @@
- #endif
-
- if (vif->flags&VIFF_TUNNEL) {
-- struct flowi fl = { .oif = vif->link,
-+ struct flowi fl = { .fl_net = &init_net,
-+ .oif = vif->link,
- .nl_u = { .ip4_u =
- { .daddr = vif->remote,
- .saddr = vif->local,
-@@ -1181,7 +1188,8 @@
- goto out_free;
- encap = sizeof(struct iphdr);
- } else {
-- struct flowi fl = { .oif = vif->link,
-+ struct flowi fl = { .fl_net = &init_net,
-+ .oif = vif->link,
- .nl_u = { .ip4_u =
- { .daddr = iph->daddr,
- .tos = RT_TOS(iph->tos) } },
-@@ -1498,6 +1506,10 @@
- struct iphdr *encap;
- struct net_device *reg_dev = NULL;
-
-+ if (skb->dev->nd_net != &init_net) {
-+ kfree_skb(skb);
-+ return 0;
-+ }
- if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
- goto drop;
-
-@@ -1922,7 +1934,7 @@
- ipmr_expire_timer.function=ipmr_expire_process;
- register_netdevice_notifier(&ip_mr_notifier);
- #ifdef CONFIG_PROC_FS
-- proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
-- proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
-+ proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
-+ proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops);
- #endif
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_app.c linux-2.6.22-590/net/ipv4/ipvs/ip_vs_app.c
---- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_app.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipvs/ip_vs_app.c 2008-01-29 22:12:32.000000000 -0500
-@@ -32,6 +32,7 @@
- #include <linux/proc_fs.h>
- #include <linux/seq_file.h>
- #include <linux/mutex.h>
-+#include <net/net_namespace.h>
-
- #include <net/ip_vs.h>
-
-@@ -616,12 +617,12 @@
- int ip_vs_app_init(void)
- {
- /* we will replace it with proc_net_ipvs_create() soon */
-- proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops);
-+ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
- return 0;
- }
-
-
- void ip_vs_app_cleanup(void)
- {
-- proc_net_remove("ip_vs_app");
-+ proc_net_remove(&init_net, "ip_vs_app");
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.22-590/net/ipv4/ipvs/ip_vs_conn.c
---- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_conn.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipvs/ip_vs_conn.c 2008-01-29 22:12:32.000000000 -0500
-@@ -34,6 +34,7 @@
- #include <linux/seq_file.h>
- #include <linux/jhash.h>
- #include <linux/random.h>
-+#include <net/net_namespace.h>
-
- #include <net/ip_vs.h>
-
-@@ -922,7 +923,7 @@
- rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
- }
-
-- proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops);
-+ proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-
- /* calculate the random value for connection hash */
- get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
-@@ -938,6 +939,6 @@
-
- /* Release the empty cache */
- kmem_cache_destroy(ip_vs_conn_cachep);
-- proc_net_remove("ip_vs_conn");
-+ proc_net_remove(&init_net, "ip_vs_conn");
- vfree(ip_vs_conn_tab);
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_core.c linux-2.6.22-590/net/ipv4/ipvs/ip_vs_core.c
---- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_core.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipvs/ip_vs_core.c 2008-01-29 22:12:32.000000000 -0500
-@@ -460,7 +460,7 @@
- and the destination is RTN_UNICAST (and not local), then create
- a cache_bypass connection entry */
- if (sysctl_ip_vs_cache_bypass && svc->fwmark
-- && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
-+ && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
- int ret, cs;
- struct ip_vs_conn *cp;
-
-@@ -530,6 +530,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- if (!((*pskb)->ipvs_property))
- return NF_ACCEPT;
- /* The packet was sent from IPVS, exit this chain */
-@@ -734,6 +738,10 @@
- struct ip_vs_conn *cp;
- int ihl;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- EnterFunction(11);
-
- if (skb->ipvs_property)
-@@ -818,7 +826,7 @@
- * if it came from this machine itself. So re-compute
- * the routing information.
- */
-- if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
-+ if (ip_route_me_harder(&init_net, pskb, RTN_LOCAL) != 0)
- goto drop;
- skb = *pskb;
-
-@@ -956,12 +964,16 @@
- int ret, restart;
- int ihl;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /*
- * Big tappo: only PACKET_HOST (neither loopback nor mcasts)
- * ... don't know why 1st test DOES NOT include 2nd (?)
- */
- if (unlikely(skb->pkt_type != PACKET_HOST
-- || skb->dev == &loopback_dev || skb->sk)) {
-+ || skb->dev == &init_net.loopback_dev || skb->sk)) {
- IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
- skb->pkt_type,
- ip_hdr(skb)->protocol,
-@@ -1062,6 +1074,10 @@
- {
- int r;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
- return NF_ACCEPT;
-
-diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.22-590/net/ipv4/ipvs/ip_vs_ctl.c
---- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_ctl.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipvs/ip_vs_ctl.c 2008-01-29 22:12:32.000000000 -0500
-@@ -39,6 +39,7 @@
- #include <net/ip.h>
- #include <net/route.h>
- #include <net/sock.h>
-+#include <net/net_namespace.h>
-
- #include <asm/uaccess.h>
-
-@@ -679,7 +680,7 @@
- conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
-
- /* check if local node and update the flags */
-- if (inet_addr_type(udest->addr) == RTN_LOCAL) {
-+ if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
- conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
- | IP_VS_CONN_F_LOCALNODE;
- }
-@@ -731,7 +732,7 @@
-
- EnterFunction(2);
-
-- atype = inet_addr_type(udest->addr);
-+ atype = inet_addr_type(&init_net, udest->addr);
- if (atype != RTN_LOCAL && atype != RTN_UNICAST)
- return -EINVAL;
-
-@@ -1932,6 +1933,9 @@
- struct ip_vs_service *svc;
- struct ip_vs_dest_user *udest;
-
-+ if (sk->sk_net != &init_net)
-+ return -ENOPROTOOPT;
-+
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
-@@ -2196,6 +2200,9 @@
- unsigned char arg[128];
- int ret = 0;
-
-+ if (sk->sk_net != &init_net)
-+ return -ENOPROTOOPT;
-+
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
-@@ -2356,8 +2363,8 @@
- return ret;
- }
-
-- proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
-- proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
-+ proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
-+ proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-
- sysctl_header = register_sysctl_table(vs_root_table);
-
-@@ -2390,8 +2397,8 @@
- cancel_work_sync(&defense_work.work);
- ip_vs_kill_estimator(&ip_vs_stats);
- unregister_sysctl_table(sysctl_header);
-- proc_net_remove("ip_vs_stats");
-- proc_net_remove("ip_vs");
-+ proc_net_remove(&init_net, "ip_vs_stats");
-+ proc_net_remove(&init_net, "ip_vs");
- nf_unregister_sockopt(&ip_vs_sockopts);
- LeaveFunction(2);
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.22-590/net/ipv4/ipvs/ip_vs_lblcr.c
---- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_lblcr.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipvs/ip_vs_lblcr.c 2008-01-29 22:12:32.000000000 -0500
-@@ -843,7 +843,7 @@
- INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
- sysctl_header = register_sysctl_table(lblcr_root_table);
- #ifdef CONFIG_IP_VS_LBLCR_DEBUG
-- proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
-+ proc_net_create(&init_net, "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
- #endif
- return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
- }
-@@ -852,7 +852,7 @@
- static void __exit ip_vs_lblcr_cleanup(void)
- {
- #ifdef CONFIG_IP_VS_LBLCR_DEBUG
-- proc_net_remove("ip_vs_lblcr");
-+ proc_net_remove(&init_net, "ip_vs_lblcr");
- #endif
- unregister_sysctl_table(sysctl_header);
- unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
-diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.22-590/net/ipv4/ipvs/ip_vs_sync.c
---- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_sync.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipvs/ip_vs_sync.c 2008-01-29 22:12:32.000000000 -0500
-@@ -387,7 +387,7 @@
- struct net_device *dev;
- struct inet_sock *inet = inet_sk(sk);
-
-- if ((dev = __dev_get_by_name(ifname)) == NULL)
-+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
- return -ENODEV;
-
- if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
-@@ -412,7 +412,7 @@
- int num;
-
- if (sync_state == IP_VS_STATE_MASTER) {
-- if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL)
-+ if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
- return -ENODEV;
-
- num = (dev->mtu - sizeof(struct iphdr) -
-@@ -423,7 +423,7 @@
- IP_VS_DBG(7, "setting the maximum length of sync sending "
- "message %d.\n", sync_send_mesg_maxlen);
- } else if (sync_state == IP_VS_STATE_BACKUP) {
-- if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL)
-+ if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
- return -ENODEV;
-
- sync_recv_mesg_maxlen = dev->mtu -
-@@ -451,7 +451,7 @@
- memset(&mreq, 0, sizeof(mreq));
- memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
-
-- if ((dev = __dev_get_by_name(ifname)) == NULL)
-+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
- return -ENODEV;
- if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
- return -EINVAL;
-@@ -472,7 +472,7 @@
- __be32 addr;
- struct sockaddr_in sin;
-
-- if ((dev = __dev_get_by_name(ifname)) == NULL)
-+ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
- return -ENODEV;
-
- addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
-diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.22-590/net/ipv4/ipvs/ip_vs_xmit.c
---- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_xmit.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/ipvs/ip_vs_xmit.c 2008-01-29 22:12:32.000000000 -0500
-@@ -70,6 +70,7 @@
- if (!(rt = (struct rtable *)
- __ip_vs_dst_check(dest, rtos, 0))) {
- struct flowi fl = {
-+ .fl_net = &init_net,
- .oif = 0,
- .nl_u = {
- .ip4_u = {
-@@ -93,6 +94,7 @@
- spin_unlock(&dest->dst_lock);
- } else {
- struct flowi fl = {
-+ .fl_net = &init_net,
- .oif = 0,
- .nl_u = {
- .ip4_u = {
-@@ -160,6 +162,7 @@
- u8 tos = iph->tos;
- int mtu;
- struct flowi fl = {
-+ .fl_net = &init_net,
- .oif = 0,
- .nl_u = {
- .ip4_u = {
-diff -Nurb linux-2.6.22-570/net/ipv4/multipath.c linux-2.6.22-590/net/ipv4/multipath.c
---- linux-2.6.22-570/net/ipv4/multipath.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/multipath.c 1969-12-31 19:00:00.000000000 -0500
-@@ -1,55 +0,0 @@
--/* multipath.c: IPV4 multipath algorithm support.
-- *
-- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
-- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
-- */
--
--#include <linux/module.h>
--#include <linux/errno.h>
--#include <linux/netdevice.h>
--#include <linux/spinlock.h>
--
--#include <net/ip_mp_alg.h>
--
--static DEFINE_SPINLOCK(alg_table_lock);
--struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1];
--
--int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
--{
-- struct ip_mp_alg_ops **slot;
-- int err;
--
-- if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX ||
-- !ops->mp_alg_select_route)
-- return -EINVAL;
--
-- spin_lock(&alg_table_lock);
-- slot = &ip_mp_alg_table[n];
-- if (*slot != NULL) {
-- err = -EBUSY;
-- } else {
-- *slot = ops;
-- err = 0;
-- }
-- spin_unlock(&alg_table_lock);
--
-- return err;
--}
--EXPORT_SYMBOL(multipath_alg_register);
--
--void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
--{
-- struct ip_mp_alg_ops **slot;
--
-- if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX)
-- return;
--
-- spin_lock(&alg_table_lock);
-- slot = &ip_mp_alg_table[n];
-- if (*slot == ops)
-- *slot = NULL;
-- spin_unlock(&alg_table_lock);
--
-- synchronize_net();
--}
--EXPORT_SYMBOL(multipath_alg_unregister);
-diff -Nurb linux-2.6.22-570/net/ipv4/multipath_drr.c linux-2.6.22-590/net/ipv4/multipath_drr.c
---- linux-2.6.22-570/net/ipv4/multipath_drr.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/multipath_drr.c 1969-12-31 19:00:00.000000000 -0500
-@@ -1,249 +0,0 @@
--/*
-- * Device round robin policy for multipath.
-- *
-- *
-- * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $
-- *
-- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--#include <asm/system.h>
--#include <asm/uaccess.h>
--#include <linux/types.h>
--#include <linux/errno.h>
--#include <linux/timer.h>
--#include <linux/mm.h>
--#include <linux/kernel.h>
--#include <linux/fcntl.h>
--#include <linux/stat.h>
--#include <linux/socket.h>
--#include <linux/in.h>
--#include <linux/inet.h>
--#include <linux/netdevice.h>
--#include <linux/inetdevice.h>
--#include <linux/igmp.h>
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--#include <linux/module.h>
--#include <linux/mroute.h>
--#include <linux/init.h>
--#include <net/ip.h>
--#include <net/protocol.h>
--#include <linux/skbuff.h>
--#include <net/sock.h>
--#include <net/icmp.h>
--#include <net/udp.h>
--#include <net/raw.h>
--#include <linux/notifier.h>
--#include <linux/if_arp.h>
--#include <linux/netfilter_ipv4.h>
--#include <net/ipip.h>
--#include <net/checksum.h>
--#include <net/ip_mp_alg.h>
--
--struct multipath_device {
-- int ifi; /* interface index of device */
-- atomic_t usecount;
-- int allocated;
--};
--
--#define MULTIPATH_MAX_DEVICECANDIDATES 10
--
--static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
--static DEFINE_SPINLOCK(state_lock);
--
--static int inline __multipath_findslot(void)
--{
-- int i;
--
-- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
-- if (state[i].allocated == 0)
-- return i;
-- }
-- return -1;
--}
--
--static int inline __multipath_finddev(int ifindex)
--{
-- int i;
--
-- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
-- if (state[i].allocated != 0 &&
-- state[i].ifi == ifindex)
-- return i;
-- }
-- return -1;
--}
--
--static int drr_dev_event(struct notifier_block *this,
-- unsigned long event, void *ptr)
--{
-- struct net_device *dev = ptr;
-- int devidx;
--
-- switch (event) {
-- case NETDEV_UNREGISTER:
-- case NETDEV_DOWN:
-- spin_lock_bh(&state_lock);
--
-- devidx = __multipath_finddev(dev->ifindex);
-- if (devidx != -1) {
-- state[devidx].allocated = 0;
-- state[devidx].ifi = 0;
-- atomic_set(&state[devidx].usecount, 0);
-- }
--
-- spin_unlock_bh(&state_lock);
-- break;
-- }
--
-- return NOTIFY_DONE;
--}
--
--static struct notifier_block drr_dev_notifier = {
-- .notifier_call = drr_dev_event,
--};
--
--
--static void drr_safe_inc(atomic_t *usecount)
--{
-- int n;
--
-- atomic_inc(usecount);
--
-- n = atomic_read(usecount);
-- if (n <= 0) {
-- int i;
--
-- spin_lock_bh(&state_lock);
--
-- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++)
-- atomic_set(&state[i].usecount, 0);
--
-- spin_unlock_bh(&state_lock);
-- }
--}
--
--static void drr_select_route(const struct flowi *flp,
-- struct rtable *first, struct rtable **rp)
--{
-- struct rtable *nh, *result, *cur_min;
-- int min_usecount = -1;
-- int devidx = -1;
-- int cur_min_devidx = -1;
--
-- /* 1. make sure all alt. nexthops have the same GC related data */
-- /* 2. determine the new candidate to be returned */
-- result = NULL;
-- cur_min = NULL;
-- for (nh = rcu_dereference(first); nh;
-- nh = rcu_dereference(nh->u.dst.rt_next)) {
-- if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
-- multipath_comparekeys(&nh->fl, flp)) {
-- int nh_ifidx = nh->u.dst.dev->ifindex;
--
-- nh->u.dst.lastuse = jiffies;
-- nh->u.dst.__use++;
-- if (result != NULL)
-- continue;
--
-- /* search for the output interface */
--
-- /* this is not SMP safe, only add/remove are
-- * SMP safe as wrong usecount updates have no big
-- * impact
-- */
-- devidx = __multipath_finddev(nh_ifidx);
-- if (devidx == -1) {
-- /* add the interface to the array
-- * SMP safe
-- */
-- spin_lock_bh(&state_lock);
--
-- /* due to SMP: search again */
-- devidx = __multipath_finddev(nh_ifidx);
-- if (devidx == -1) {
-- /* add entry for device */
-- devidx = __multipath_findslot();
-- if (devidx == -1) {
-- /* unlikely but possible */
-- continue;
-- }
--
-- state[devidx].allocated = 1;
-- state[devidx].ifi = nh_ifidx;
-- atomic_set(&state[devidx].usecount, 0);
-- min_usecount = 0;
-- }
--
-- spin_unlock_bh(&state_lock);
-- }
--
-- if (min_usecount == 0) {
-- /* if the device has not been used it is
-- * the primary target
-- */
-- drr_safe_inc(&state[devidx].usecount);
-- result = nh;
-- } else {
-- int count =
-- atomic_read(&state[devidx].usecount);
--
-- if (min_usecount == -1 ||
-- count < min_usecount) {
-- cur_min = nh;
-- cur_min_devidx = devidx;
-- min_usecount = count;
-- }
-- }
-- }
-- }
--
-- if (!result) {
-- if (cur_min) {
-- drr_safe_inc(&state[cur_min_devidx].usecount);
-- result = cur_min;
-- } else {
-- result = first;
-- }
-- }
--
-- *rp = result;
--}
--
--static struct ip_mp_alg_ops drr_ops = {
-- .mp_alg_select_route = drr_select_route,
--};
--
--static int __init drr_init(void)
--{
-- int err = register_netdevice_notifier(&drr_dev_notifier);
--
-- if (err)
-- return err;
--
-- err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR);
-- if (err)
-- goto fail;
--
-- return 0;
--
--fail:
-- unregister_netdevice_notifier(&drr_dev_notifier);
-- return err;
--}
--
--static void __exit drr_exit(void)
--{
-- unregister_netdevice_notifier(&drr_dev_notifier);
-- multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR);
--}
--
--module_init(drr_init);
--module_exit(drr_exit);
--MODULE_LICENSE("GPL");
-diff -Nurb linux-2.6.22-570/net/ipv4/multipath_random.c linux-2.6.22-590/net/ipv4/multipath_random.c
---- linux-2.6.22-570/net/ipv4/multipath_random.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/multipath_random.c 1969-12-31 19:00:00.000000000 -0500
-@@ -1,114 +0,0 @@
--/*
-- * Random policy for multipath.
-- *
-- *
-- * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $
-- *
-- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--#include <asm/system.h>
--#include <asm/uaccess.h>
--#include <linux/types.h>
--#include <linux/errno.h>
--#include <linux/timer.h>
--#include <linux/mm.h>
--#include <linux/kernel.h>
--#include <linux/fcntl.h>
--#include <linux/stat.h>
--#include <linux/socket.h>
--#include <linux/in.h>
--#include <linux/inet.h>
--#include <linux/netdevice.h>
--#include <linux/inetdevice.h>
--#include <linux/igmp.h>
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--#include <linux/module.h>
--#include <linux/mroute.h>
--#include <linux/init.h>
--#include <linux/random.h>
--#include <net/ip.h>
--#include <net/protocol.h>
--#include <linux/skbuff.h>
--#include <net/sock.h>
--#include <net/icmp.h>
--#include <net/udp.h>
--#include <net/raw.h>
--#include <linux/notifier.h>
--#include <linux/if_arp.h>
--#include <linux/netfilter_ipv4.h>
--#include <net/ipip.h>
--#include <net/checksum.h>
--#include <net/ip_mp_alg.h>
--
--#define MULTIPATH_MAX_CANDIDATES 40
--
--static void random_select_route(const struct flowi *flp,
-- struct rtable *first,
-- struct rtable **rp)
--{
-- struct rtable *rt;
-- struct rtable *decision;
-- unsigned char candidate_count = 0;
--
-- /* count all candidate */
-- for (rt = rcu_dereference(first); rt;
-- rt = rcu_dereference(rt->u.dst.rt_next)) {
-- if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
-- multipath_comparekeys(&rt->fl, flp))
-- ++candidate_count;
-- }
--
-- /* choose a random candidate */
-- decision = first;
-- if (candidate_count > 1) {
-- unsigned char i = 0;
-- unsigned char candidate_no = (unsigned char)
-- (random32() % candidate_count);
--
-- /* find chosen candidate and adjust GC data for all candidates
-- * to ensure they stay in cache
-- */
-- for (rt = first; rt; rt = rt->u.dst.rt_next) {
-- if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
-- multipath_comparekeys(&rt->fl, flp)) {
-- rt->u.dst.lastuse = jiffies;
--
-- if (i == candidate_no)
-- decision = rt;
--
-- if (i >= candidate_count)
-- break;
--
-- i++;
-- }
-- }
-- }
--
-- decision->u.dst.__use++;
-- *rp = decision;
--}
--
--static struct ip_mp_alg_ops random_ops = {
-- .mp_alg_select_route = random_select_route,
--};
--
--static int __init random_init(void)
--{
-- return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM);
--}
--
--static void __exit random_exit(void)
--{
-- multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM);
--}
--
--module_init(random_init);
--module_exit(random_exit);
--MODULE_LICENSE("GPL");
-diff -Nurb linux-2.6.22-570/net/ipv4/multipath_rr.c linux-2.6.22-590/net/ipv4/multipath_rr.c
---- linux-2.6.22-570/net/ipv4/multipath_rr.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/multipath_rr.c 1969-12-31 19:00:00.000000000 -0500
-@@ -1,95 +0,0 @@
--/*
-- * Round robin policy for multipath.
-- *
-- *
-- * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $
-- *
-- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--#include <asm/system.h>
--#include <asm/uaccess.h>
--#include <linux/types.h>
--#include <linux/errno.h>
--#include <linux/timer.h>
--#include <linux/mm.h>
--#include <linux/kernel.h>
--#include <linux/fcntl.h>
--#include <linux/stat.h>
--#include <linux/socket.h>
--#include <linux/in.h>
--#include <linux/inet.h>
--#include <linux/netdevice.h>
--#include <linux/inetdevice.h>
--#include <linux/igmp.h>
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--#include <linux/module.h>
--#include <linux/mroute.h>
--#include <linux/init.h>
--#include <net/ip.h>
--#include <net/protocol.h>
--#include <linux/skbuff.h>
--#include <net/sock.h>
--#include <net/icmp.h>
--#include <net/udp.h>
--#include <net/raw.h>
--#include <linux/notifier.h>
--#include <linux/if_arp.h>
--#include <linux/netfilter_ipv4.h>
--#include <net/ipip.h>
--#include <net/checksum.h>
--#include <net/ip_mp_alg.h>
--
--static void rr_select_route(const struct flowi *flp,
-- struct rtable *first, struct rtable **rp)
--{
-- struct rtable *nh, *result, *min_use_cand = NULL;
-- int min_use = -1;
--
-- /* 1. make sure all alt. nexthops have the same GC related data
-- * 2. determine the new candidate to be returned
-- */
-- result = NULL;
-- for (nh = rcu_dereference(first); nh;
-- nh = rcu_dereference(nh->u.dst.rt_next)) {
-- if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
-- multipath_comparekeys(&nh->fl, flp)) {
-- nh->u.dst.lastuse = jiffies;
--
-- if (min_use == -1 || nh->u.dst.__use < min_use) {
-- min_use = nh->u.dst.__use;
-- min_use_cand = nh;
-- }
-- }
-- }
-- result = min_use_cand;
-- if (!result)
-- result = first;
--
-- result->u.dst.__use++;
-- *rp = result;
--}
--
--static struct ip_mp_alg_ops rr_ops = {
-- .mp_alg_select_route = rr_select_route,
--};
--
--static int __init rr_init(void)
--{
-- return multipath_alg_register(&rr_ops, IP_MP_ALG_RR);
--}
--
--static void __exit rr_exit(void)
--{
-- multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR);
--}
--
--module_init(rr_init);
--module_exit(rr_exit);
--MODULE_LICENSE("GPL");
-diff -Nurb linux-2.6.22-570/net/ipv4/multipath_wrandom.c linux-2.6.22-590/net/ipv4/multipath_wrandom.c
---- linux-2.6.22-570/net/ipv4/multipath_wrandom.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/multipath_wrandom.c 1969-12-31 19:00:00.000000000 -0500
-@@ -1,329 +0,0 @@
--/*
-- * Weighted random policy for multipath.
-- *
-- *
-- * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
-- *
-- * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--#include <asm/system.h>
--#include <asm/uaccess.h>
--#include <linux/types.h>
--#include <linux/errno.h>
--#include <linux/timer.h>
--#include <linux/mm.h>
--#include <linux/kernel.h>
--#include <linux/fcntl.h>
--#include <linux/stat.h>
--#include <linux/socket.h>
--#include <linux/in.h>
--#include <linux/inet.h>
--#include <linux/netdevice.h>
--#include <linux/inetdevice.h>
--#include <linux/igmp.h>
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--#include <linux/module.h>
--#include <linux/mroute.h>
--#include <linux/init.h>
--#include <linux/random.h>
--#include <net/ip.h>
--#include <net/protocol.h>
--#include <linux/skbuff.h>
--#include <net/sock.h>
--#include <net/icmp.h>
--#include <net/udp.h>
--#include <net/raw.h>
--#include <linux/notifier.h>
--#include <linux/if_arp.h>
--#include <linux/netfilter_ipv4.h>
--#include <net/ipip.h>
--#include <net/checksum.h>
--#include <net/ip_fib.h>
--#include <net/ip_mp_alg.h>
--
--#define MULTIPATH_STATE_SIZE 15
--
--struct multipath_candidate {
-- struct multipath_candidate *next;
-- int power;
-- struct rtable *rt;
--};
--
--struct multipath_dest {
-- struct list_head list;
--
-- const struct fib_nh *nh_info;
-- __be32 netmask;
-- __be32 network;
-- unsigned char prefixlen;
--
-- struct rcu_head rcu;
--};
--
--struct multipath_bucket {
-- struct list_head head;
-- spinlock_t lock;
--};
--
--struct multipath_route {
-- struct list_head list;
--
-- int oif;
-- __be32 gw;
-- struct list_head dests;
--
-- struct rcu_head rcu;
--};
--
--/* state: primarily weight per route information */
--static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
--
--static unsigned char __multipath_lookup_weight(const struct flowi *fl,
-- const struct rtable *rt)
--{
-- const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
-- struct multipath_route *r;
-- struct multipath_route *target_route = NULL;
-- struct multipath_dest *d;
-- int weight = 1;
--
-- /* lookup the weight information for a certain route */
-- rcu_read_lock();
--
-- /* find state entry for gateway or add one if necessary */
-- list_for_each_entry_rcu(r, &state[state_idx].head, list) {
-- if (r->gw == rt->rt_gateway &&
-- r->oif == rt->idev->dev->ifindex) {
-- target_route = r;
-- break;
-- }
-- }
--
-- if (!target_route) {
-- /* this should not happen... but we are prepared */
-- printk( KERN_CRIT"%s: missing state for gateway: %u and " \
-- "device %d\n", __FUNCTION__, rt->rt_gateway,
-- rt->idev->dev->ifindex);
-- goto out;
-- }
--
-- /* find state entry for destination */
-- list_for_each_entry_rcu(d, &target_route->dests, list) {
-- __be32 targetnetwork = fl->fl4_dst &
-- inet_make_mask(d->prefixlen);
--
-- if ((targetnetwork & d->netmask) == d->network) {
-- weight = d->nh_info->nh_weight;
-- goto out;
-- }
-- }
--
--out:
-- rcu_read_unlock();
-- return weight;
--}
--
--static void wrandom_init_state(void)
--{
-- int i;
--
-- for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
-- INIT_LIST_HEAD(&state[i].head);
-- spin_lock_init(&state[i].lock);
-- }
--}
--
--static void wrandom_select_route(const struct flowi *flp,
-- struct rtable *first,
-- struct rtable **rp)
--{
-- struct rtable *rt;
-- struct rtable *decision;
-- struct multipath_candidate *first_mpc = NULL;
-- struct multipath_candidate *mpc, *last_mpc = NULL;
-- int power = 0;
-- int last_power;
-- int selector;
-- const size_t size_mpc = sizeof(struct multipath_candidate);
--
-- /* collect all candidates and identify their weights */
-- for (rt = rcu_dereference(first); rt;
-- rt = rcu_dereference(rt->u.dst.rt_next)) {
-- if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
-- multipath_comparekeys(&rt->fl, flp)) {
-- struct multipath_candidate* mpc =
-- (struct multipath_candidate*)
-- kmalloc(size_mpc, GFP_ATOMIC);
--
-- if (!mpc)
-- return;
--
-- power += __multipath_lookup_weight(flp, rt) * 10000;
--
-- mpc->power = power;
-- mpc->rt = rt;
-- mpc->next = NULL;
--
-- if (!first_mpc)
-- first_mpc = mpc;
-- else
-- last_mpc->next = mpc;
--
-- last_mpc = mpc;
-- }
-- }
--
-- /* choose a weighted random candidate */
-- decision = first;
-- selector = random32() % power;
-- last_power = 0;
--
-- /* select candidate, adjust GC data and cleanup local state */
-- decision = first;
-- last_mpc = NULL;
-- for (mpc = first_mpc; mpc; mpc = mpc->next) {
-- mpc->rt->u.dst.lastuse = jiffies;
-- if (last_power <= selector && selector < mpc->power)
-- decision = mpc->rt;
--
-- last_power = mpc->power;
-- kfree(last_mpc);
-- last_mpc = mpc;
-- }
--
-- /* concurrent __multipath_flush may lead to !last_mpc */
-- kfree(last_mpc);
--
-- decision->u.dst.__use++;
-- *rp = decision;
--}
--
--static void wrandom_set_nhinfo(__be32 network,
-- __be32 netmask,
-- unsigned char prefixlen,
-- const struct fib_nh *nh)
--{
-- const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
-- struct multipath_route *r, *target_route = NULL;
-- struct multipath_dest *d, *target_dest = NULL;
--
-- /* store the weight information for a certain route */
-- spin_lock_bh(&state[state_idx].lock);
--
-- /* find state entry for gateway or add one if necessary */
-- list_for_each_entry_rcu(r, &state[state_idx].head, list) {
-- if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
-- target_route = r;
-- break;
-- }
-- }
--
-- if (!target_route) {
-- const size_t size_rt = sizeof(struct multipath_route);
-- target_route = (struct multipath_route *)
-- kmalloc(size_rt, GFP_ATOMIC);
--
-- target_route->gw = nh->nh_gw;
-- target_route->oif = nh->nh_oif;
-- memset(&target_route->rcu, 0, sizeof(struct rcu_head));
-- INIT_LIST_HEAD(&target_route->dests);
--
-- list_add_rcu(&target_route->list, &state[state_idx].head);
-- }
--
-- /* find state entry for destination or add one if necessary */
-- list_for_each_entry_rcu(d, &target_route->dests, list) {
-- if (d->nh_info == nh) {
-- target_dest = d;
-- break;
-- }
-- }
--
-- if (!target_dest) {
-- const size_t size_dst = sizeof(struct multipath_dest);
-- target_dest = (struct multipath_dest*)
-- kmalloc(size_dst, GFP_ATOMIC);
--
-- target_dest->nh_info = nh;
-- target_dest->network = network;
-- target_dest->netmask = netmask;
-- target_dest->prefixlen = prefixlen;
-- memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
--
-- list_add_rcu(&target_dest->list, &target_route->dests);
-- }
-- /* else: we already stored this info for another destination =>
-- * we are finished
-- */
--
-- spin_unlock_bh(&state[state_idx].lock);
--}
--
--static void __multipath_free(struct rcu_head *head)
--{
-- struct multipath_route *rt = container_of(head, struct multipath_route,
-- rcu);
-- kfree(rt);
--}
--
--static void __multipath_free_dst(struct rcu_head *head)
--{
-- struct multipath_dest *dst = container_of(head,
-- struct multipath_dest,
-- rcu);
-- kfree(dst);
--}
--
--static void wrandom_flush(void)
--{
-- int i;
--
-- /* defere delete to all entries */
-- for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
-- struct multipath_route *r;
--
-- spin_lock_bh(&state[i].lock);
-- list_for_each_entry_rcu(r, &state[i].head, list) {
-- struct multipath_dest *d;
-- list_for_each_entry_rcu(d, &r->dests, list) {
-- list_del_rcu(&d->list);
-- call_rcu(&d->rcu,
-- __multipath_free_dst);
-- }
-- list_del_rcu(&r->list);
-- call_rcu(&r->rcu,
-- __multipath_free);
-- }
--
-- spin_unlock_bh(&state[i].lock);
-- }
--}
--
--static struct ip_mp_alg_ops wrandom_ops = {
-- .mp_alg_select_route = wrandom_select_route,
-- .mp_alg_flush = wrandom_flush,
-- .mp_alg_set_nhinfo = wrandom_set_nhinfo,
--};
--
--static int __init wrandom_init(void)
--{
-- wrandom_init_state();
--
-- return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
--}
--
--static void __exit wrandom_exit(void)
--{
-- multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
--}
--
--module_init(wrandom_init);
--module_exit(wrandom_exit);
--MODULE_LICENSE("GPL");
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/arp_tables.c linux-2.6.22-590/net/ipv4/netfilter/arp_tables.c
---- linux-2.6.22-570/net/ipv4/netfilter/arp_tables.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/arp_tables.c 2008-01-29 22:12:32.000000000 -0500
-@@ -19,6 +19,7 @@
- #include <linux/proc_fs.h>
- #include <linux/module.h>
- #include <linux/init.h>
-+#include <net/sock.h>
-
- #include <asm/uaccess.h>
- #include <linux/mutex.h>
-@@ -773,7 +774,7 @@
- int ret;
- struct arpt_table *t;
-
-- t = xt_find_table_lock(NF_ARP, entries->name);
-+ t = xt_find_table_lock(&init_net, NF_ARP, entries->name);
- if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
- duprintf("t->private->number = %u\n",
-@@ -843,7 +844,7 @@
-
- duprintf("arp_tables: Translated table\n");
-
-- t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
-+ t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, tmp.name),
- "arptable_%s", tmp.name);
- if (!t || IS_ERR(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
-@@ -936,7 +937,7 @@
- goto free;
- }
-
-- t = xt_find_table_lock(NF_ARP, tmp.name);
-+ t = xt_find_table_lock(&init_net, NF_ARP, tmp.name);
- if (!t || IS_ERR(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
- goto free;
-@@ -971,6 +972,9 @@
- {
- int ret;
-
-+ if (sk->sk_net != &init_net)
-+ return -ENOPROTOOPT;
-+
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
-@@ -995,6 +999,9 @@
- {
- int ret;
-
-+ if (sk->sk_net != &init_net)
-+ return -ENOPROTOOPT;
-+
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
-@@ -1016,7 +1023,7 @@
- }
- name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
-
-- t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
-+ t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name),
- "arptable_%s", name);
- if (t && !IS_ERR(t)) {
- struct arpt_getinfo info;
-@@ -1116,7 +1123,7 @@
- return ret;
- }
-
-- ret = xt_register_table(table, &bootstrap, newinfo);
-+ ret = xt_register_table(&init_net, table, &bootstrap, newinfo);
- if (ret != 0) {
- xt_free_table_info(newinfo);
- return ret;
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/arptable_filter.c linux-2.6.22-590/net/ipv4/netfilter/arptable_filter.c
---- linux-2.6.22-570/net/ipv4/netfilter/arptable_filter.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/arptable_filter.c 2008-01-29 22:12:32.000000000 -0500
-@@ -61,6 +61,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- return arpt_do_table(pskb, hook, in, out, &packet_filter);
- }
-
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ip_queue.c linux-2.6.22-590/net/ipv4/netfilter/ip_queue.c
---- linux-2.6.22-570/net/ipv4/netfilter/ip_queue.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/ip_queue.c 2008-01-29 22:12:32.000000000 -0500
-@@ -26,6 +26,7 @@
- #include <linux/mutex.h>
- #include <net/sock.h>
- #include <net/route.h>
-+#include <net/net_namespace.h>
-
- #define IPQ_QMAX_DEFAULT 1024
- #define IPQ_PROC_FS_NAME "ip_queue"
-@@ -556,6 +557,9 @@
- {
- struct net_device *dev = ptr;
-
-+ if (dev->nd_net != &init_net)
-+ return NOTIFY_DONE;
-+
- /* Drop any packets associated with the downed device */
- if (event == NETDEV_DOWN)
- ipq_dev_drop(dev->ifindex);
-@@ -575,7 +579,7 @@
- if (event == NETLINK_URELEASE &&
- n->protocol == NETLINK_FIREWALL && n->pid) {
- write_lock_bh(&queue_lock);
-- if (n->pid == peer_pid)
-+ if ((n->net == &init_net) && (n->pid == peer_pid))
- __ipq_reset();
- write_unlock_bh(&queue_lock);
- }
-@@ -667,14 +671,14 @@
- struct proc_dir_entry *proc;
-
- netlink_register_notifier(&ipq_nl_notifier);
-- ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
-- NULL, THIS_MODULE);
-+ ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
-+ ipq_rcv_sk, NULL, THIS_MODULE);
- if (ipqnl == NULL) {
- printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
- goto cleanup_netlink_notifier;
- }
-
-- proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
-+ proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info);
- if (proc)
- proc->owner = THIS_MODULE;
- else {
-@@ -695,8 +699,7 @@
- cleanup_sysctl:
- unregister_sysctl_table(ipq_sysctl_header);
- unregister_netdevice_notifier(&ipq_dev_notifier);
-- proc_net_remove(IPQ_PROC_FS_NAME);
--
-+ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
- cleanup_ipqnl:
- sock_release(ipqnl->sk_socket);
- mutex_lock(&ipqnl_mutex);
-@@ -715,7 +718,7 @@
-
- unregister_sysctl_table(ipq_sysctl_header);
- unregister_netdevice_notifier(&ipq_dev_notifier);
-- proc_net_remove(IPQ_PROC_FS_NAME);
-+ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
- sock_release(ipqnl->sk_socket);
- mutex_lock(&ipqnl_mutex);
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ip_tables.c linux-2.6.22-590/net/ipv4/netfilter/ip_tables.c
---- linux-2.6.22-570/net/ipv4/netfilter/ip_tables.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/ip_tables.c 2008-01-29 22:12:32.000000000 -0500
-@@ -1039,7 +1039,7 @@
- }
- #endif
-
--static int get_info(void __user *user, int *len, int compat)
-+static int get_info(struct net *net, void __user *user, int *len, int compat)
- {
- char name[IPT_TABLE_MAXNAMELEN];
- struct xt_table *t;
-@@ -1059,7 +1059,7 @@
- if (compat)
- xt_compat_lock(AF_INET);
- #endif
-- t = try_then_request_module(xt_find_table_lock(AF_INET, name),
-+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
- "iptable_%s", name);
- if (t && !IS_ERR(t)) {
- struct ipt_getinfo info;
-@@ -1099,7 +1099,7 @@
- }
-
- static int
--get_entries(struct ipt_get_entries __user *uptr, int *len)
-+get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
- {
- int ret;
- struct ipt_get_entries get;
-@@ -1119,7 +1119,7 @@
- return -EINVAL;
- }
-
-- t = xt_find_table_lock(AF_INET, get.name);
-+ t = xt_find_table_lock(net, AF_INET, get.name);
- if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
- duprintf("t->private->number = %u\n",
-@@ -1142,7 +1142,7 @@
- }
-
- static int
--__do_replace(const char *name, unsigned int valid_hooks,
-+__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
- struct xt_table_info *newinfo, unsigned int num_counters,
- void __user *counters_ptr)
- {
-@@ -1159,7 +1159,7 @@
- goto out;
- }
-
-- t = try_then_request_module(xt_find_table_lock(AF_INET, name),
-+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
- "iptable_%s", name);
- if (!t || IS_ERR(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
-@@ -1211,7 +1211,7 @@
- }
-
- static int
--do_replace(void __user *user, unsigned int len)
-+do_replace(struct net *net, void __user *user, unsigned int len)
- {
- int ret;
- struct ipt_replace tmp;
-@@ -1252,7 +1252,7 @@
-
- duprintf("ip_tables: Translated table\n");
-
-- ret = __do_replace(tmp.name, tmp.valid_hooks,
-+ ret = __do_replace(net, tmp.name, tmp.valid_hooks,
- newinfo, tmp.num_counters,
- tmp.counters);
- if (ret)
-@@ -1289,7 +1289,7 @@
- }
-
- static int
--do_add_counters(void __user *user, unsigned int len, int compat)
-+do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
- {
- unsigned int i;
- struct xt_counters_info tmp;
-@@ -1341,7 +1341,7 @@
- goto free;
- }
-
-- t = xt_find_table_lock(AF_INET, name);
-+ t = xt_find_table_lock(net, AF_INET, name);
- if (!t || IS_ERR(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
- goto free;
-@@ -1745,7 +1745,7 @@
- }
-
- static int
--compat_do_replace(void __user *user, unsigned int len)
-+compat_do_replace(struct net *net, void __user *user, unsigned int len)
- {
- int ret;
- struct compat_ipt_replace tmp;
-@@ -1786,7 +1786,7 @@
-
- duprintf("compat_do_replace: Translated table\n");
-
-- ret = __do_replace(tmp.name, tmp.valid_hooks,
-+ ret = __do_replace(net, tmp.name, tmp.valid_hooks,
- newinfo, tmp.num_counters,
- compat_ptr(tmp.counters));
- if (ret)
-@@ -1811,11 +1811,11 @@
-
- switch (cmd) {
- case IPT_SO_SET_REPLACE:
-- ret = compat_do_replace(user, len);
-+ ret = compat_do_replace(sk->sk_net, user, len);
- break;
-
- case IPT_SO_SET_ADD_COUNTERS:
-- ret = do_add_counters(user, len, 1);
-+ ret = do_add_counters(sk->sk_net, user, len, 1);
- break;
-
- default:
-@@ -1904,7 +1904,7 @@
- }
-
- static int
--compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
-+compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, int *len)
- {
- int ret;
- struct compat_ipt_get_entries get;
-@@ -1928,7 +1928,7 @@
- }
-
- xt_compat_lock(AF_INET);
-- t = xt_find_table_lock(AF_INET, get.name);
-+ t = xt_find_table_lock(net, AF_INET, get.name);
- if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
- struct xt_table_info info;
-@@ -1966,10 +1966,10 @@
-
- switch (cmd) {
- case IPT_SO_GET_INFO:
-- ret = get_info(user, len, 1);
-+ ret = get_info(sk->sk_net, user, len, 1);
- break;
- case IPT_SO_GET_ENTRIES:
-- ret = compat_get_entries(user, len);
-+ ret = compat_get_entries(sk->sk_net, user, len);
- break;
- default:
- ret = do_ipt_get_ctl(sk, cmd, user, len);
-@@ -1988,11 +1988,11 @@
-
- switch (cmd) {
- case IPT_SO_SET_REPLACE:
-- ret = do_replace(user, len);
-+ ret = do_replace(sk->sk_net, user, len);
- break;
-
- case IPT_SO_SET_ADD_COUNTERS:
-- ret = do_add_counters(user, len, 0);
-+ ret = do_add_counters(sk->sk_net, user, len, 0);
- break;
-
- default:
-@@ -2013,11 +2013,11 @@
-
- switch (cmd) {
- case IPT_SO_GET_INFO:
-- ret = get_info(user, len, 0);
-+ ret = get_info(sk->sk_net, user, len, 0);
- break;
-
- case IPT_SO_GET_ENTRIES:
-- ret = get_entries(user, len);
-+ ret = get_entries(sk->sk_net, user, len);
- break;
-
- case IPT_SO_GET_REVISION_MATCH:
-@@ -2054,7 +2054,7 @@
- return ret;
- }
-
--int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
-+int ipt_register_table(struct net *net, struct xt_table *table, const struct ipt_replace *repl)
- {
- int ret;
- struct xt_table_info *newinfo;
-@@ -2082,7 +2082,7 @@
- return ret;
- }
-
-- ret = xt_register_table(table, &bootstrap, newinfo);
-+ ret = xt_register_table(net, table, &bootstrap, newinfo);
- if (ret != 0) {
- xt_free_table_info(newinfo);
- return ret;
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_CLUSTERIP.c linux-2.6.22-590/net/ipv4/netfilter/ipt_CLUSTERIP.c
---- linux-2.6.22-570/net/ipv4/netfilter/ipt_CLUSTERIP.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/ipt_CLUSTERIP.c 2008-01-29 22:12:32.000000000 -0500
-@@ -27,6 +27,7 @@
- #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
- #include <net/netfilter/nf_conntrack.h>
- #include <net/checksum.h>
-+#include <net/net_namespace.h>
-
- #define CLUSTERIP_VERSION "0.8"
-
-@@ -427,7 +428,7 @@
- return 0;
- }
-
-- dev = dev_get_by_name(e->ip.iniface);
-+ dev = dev_get_by_name(&init_net, e->ip.iniface);
- if (!dev) {
- printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
- return 0;
-@@ -523,6 +524,10 @@
- struct arp_payload *payload;
- struct clusterip_config *c;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* we don't care about non-ethernet and non-ipv4 ARP */
- if (arp->ar_hrd != htons(ARPHRD_ETHER)
- || arp->ar_pro != htons(ETH_P_IP)
-@@ -735,7 +740,7 @@
- goto cleanup_target;
-
- #ifdef CONFIG_PROC_FS
-- clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net);
-+ clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
- if (!clusterip_procdir) {
- printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
- ret = -ENOMEM;
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.22-590/net/ipv4/netfilter/ipt_MASQUERADE.c
---- linux-2.6.22-570/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-01-29 22:12:32.000000000 -0500
-@@ -131,6 +131,9 @@
- {
- struct net_device *dev = ptr;
-
-+ if (dev->nd_net != &init_net)
-+ return NOTIFY_DONE;
-+
- if (event == NETDEV_DOWN) {
- /* Device was downed. Search entire table for
- conntracks which were associated with that device,
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.22-590/net/ipv4/netfilter/ipt_REJECT.c
---- linux-2.6.22-570/net/ipv4/netfilter/ipt_REJECT.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/ipt_REJECT.c 2008-01-29 22:12:32.000000000 -0500
-@@ -137,7 +137,7 @@
- )
- addr_type = RTN_LOCAL;
-
-- if (ip_route_me_harder(&nskb, addr_type))
-+ if (ip_route_me_harder(&init_net, &nskb, addr_type))
- goto free_nskb;
-
- nskb->ip_summed = CHECKSUM_NONE;
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.22-590/net/ipv4/netfilter/ipt_ULOG.c
---- linux-2.6.22-570/net/ipv4/netfilter/ipt_ULOG.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/ipt_ULOG.c 2008-01-29 22:12:32.000000000 -0500
-@@ -419,7 +419,8 @@
- for (i = 0; i < ULOG_MAXNLGROUPS; i++)
- setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
-
-- nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
-+ nflognl = netlink_kernel_create(&init_net,
-+ NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
- NULL, THIS_MODULE);
- if (!nflognl)
- return -ENOMEM;
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_addrtype.c linux-2.6.22-590/net/ipv4/netfilter/ipt_addrtype.c
---- linux-2.6.22-570/net/ipv4/netfilter/ipt_addrtype.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/ipt_addrtype.c 2008-01-29 22:12:32.000000000 -0500
-@@ -24,7 +24,7 @@
-
- static inline int match_type(__be32 addr, u_int16_t mask)
- {
-- return !!(mask & (1 << inet_addr_type(addr)));
-+ return !!(mask & (1 << inet_addr_type(&init_net, addr)));
- }
-
- static int match(const struct sk_buff *skb,
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_recent.c linux-2.6.22-590/net/ipv4/netfilter/ipt_recent.c
---- linux-2.6.22-570/net/ipv4/netfilter/ipt_recent.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/ipt_recent.c 2008-01-29 22:12:32.000000000 -0500
-@@ -24,6 +24,7 @@
- #include <linux/bitops.h>
- #include <linux/skbuff.h>
- #include <linux/inet.h>
-+#include <net/net_namespace.h>
-
- #include <linux/netfilter/x_tables.h>
- #include <linux/netfilter_ipv4/ipt_recent.h>
-@@ -485,7 +486,7 @@
- #ifdef CONFIG_PROC_FS
- if (err)
- return err;
-- proc_dir = proc_mkdir("ipt_recent", proc_net);
-+ proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
- if (proc_dir == NULL) {
- xt_unregister_match(&recent_match);
- err = -ENOMEM;
-@@ -499,7 +500,7 @@
- BUG_ON(!list_empty(&tables));
- xt_unregister_match(&recent_match);
- #ifdef CONFIG_PROC_FS
-- remove_proc_entry("ipt_recent", proc_net);
-+ remove_proc_entry("ipt_recent", init_net.proc_net);
- #endif
- }
-
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_filter.c linux-2.6.22-590/net/ipv4/netfilter/iptable_filter.c
---- linux-2.6.22-570/net/ipv4/netfilter/iptable_filter.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/iptable_filter.c 2008-01-29 22:12:32.000000000 -0500
-@@ -26,7 +26,7 @@
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
--} initial_table __initdata = {
-+} initial_table = {
- .repl = {
- .name = "filter",
- .valid_hooks = FILTER_VALID_HOOKS,
-@@ -51,7 +51,7 @@
- .term = IPT_ERROR_INIT, /* ERROR */
- };
-
--static struct xt_table packet_filter = {
-+static struct xt_table ip_packet_filter_dflt = {
- .name = "filter",
- .valid_hooks = FILTER_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
-@@ -67,7 +67,9 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-- return ipt_do_table(pskb, hook, in, out, &packet_filter);
-+ struct net *net = (in?in:out)->nd_net;
-+
-+ return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter);
- }
-
- static unsigned int
-@@ -77,6 +79,8 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ struct net *net = (in?in:out)->nd_net;
-+
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
-@@ -86,7 +90,7 @@
- return NF_ACCEPT;
- }
-
-- return ipt_do_table(pskb, hook, in, out, &packet_filter);
-+ return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter);
- }
-
- static struct nf_hook_ops ipt_ops[] = {
-@@ -117,6 +121,30 @@
- static int forward = NF_ACCEPT;
- module_param(forward, bool, 0000);
-
-+static int iptable_filter_net_init(struct net *net)
-+{
-+ /* Allocate the table */
-+ net->ip_packet_filter = kmemdup(&ip_packet_filter_dflt,
-+ sizeof(*net->ip_packet_filter),
-+ GFP_KERNEL);
-+ if (!net->ip_packet_filter)
-+ return -ENOMEM;
-+
-+ /* Register table */
-+ return ipt_register_table(net, net->ip_packet_filter, &initial_table.repl);
-+}
-+
-+static void iptable_filter_net_exit(struct net *net)
-+{
-+ ipt_unregister_table(net->ip_packet_filter);
-+ kfree(net->ip_packet_filter);
-+}
-+
-+static struct pernet_operations iptable_filter_net_ops = {
-+ .init = iptable_filter_net_init,
-+ .exit = iptable_filter_net_exit,
-+};
-+
- static int __init iptable_filter_init(void)
- {
- int ret;
-@@ -130,7 +158,7 @@
- initial_table.entries[1].target.verdict = -forward - 1;
-
- /* Register table */
-- ret = ipt_register_table(&packet_filter, &initial_table.repl);
-+ ret = register_pernet_subsys(&iptable_filter_net_ops);
- if (ret < 0)
- return ret;
-
-@@ -142,14 +170,14 @@
- return ret;
-
- cleanup_table:
-- ipt_unregister_table(&packet_filter);
-+ unregister_pernet_subsys(&iptable_filter_net_ops);
- return ret;
- }
-
- static void __exit iptable_filter_fini(void)
- {
- nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
-- ipt_unregister_table(&packet_filter);
-+ unregister_pernet_subsys(&iptable_filter_net_ops);
- }
-
- module_init(iptable_filter_init);
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_mangle.c linux-2.6.22-590/net/ipv4/netfilter/iptable_mangle.c
---- linux-2.6.22-570/net/ipv4/netfilter/iptable_mangle.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/iptable_mangle.c 2008-01-29 22:12:32.000000000 -0500
-@@ -80,6 +80,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- return ipt_do_table(pskb, hook, in, out, &packet_mangler);
- }
-
-@@ -96,6 +100,10 @@
- __be32 saddr, daddr;
- u_int32_t mark;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
-@@ -121,7 +129,7 @@
- iph->daddr != daddr ||
- (*pskb)->mark != mark ||
- iph->tos != tos)
-- if (ip_route_me_harder(pskb, RTN_UNSPEC))
-+ if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC))
- ret = NF_DROP;
- }
-
-@@ -171,7 +179,7 @@
- int ret;
-
- /* Register table */
-- ret = ipt_register_table(&packet_mangler, &initial_table.repl);
-+ ret = ipt_register_table(&init_net, &packet_mangler, &initial_table.repl);
- if (ret < 0)
- return ret;
-
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_raw.c linux-2.6.22-590/net/ipv4/netfilter/iptable_raw.c
---- linux-2.6.22-570/net/ipv4/netfilter/iptable_raw.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/iptable_raw.c 2008-01-29 22:12:32.000000000 -0500
-@@ -52,6 +52,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- return ipt_do_table(pskb, hook, in, out, &packet_raw);
- }
-
-@@ -96,7 +100,7 @@
- int ret;
-
- /* Register table */
-- ret = ipt_register_table(&packet_raw, &initial_table.repl);
-+ ret = ipt_register_table(&init_net, &packet_raw, &initial_table.repl);
- if (ret < 0)
- return ret;
-
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c linux-2.6.22-590/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
---- linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2008-01-29 22:12:32.000000000 -0500
-@@ -120,6 +120,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(pskb);
- }
-@@ -135,6 +139,10 @@
- struct nf_conn_help *help;
- struct nf_conntrack_helper *helper;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* This is where we call the helper: as the packet goes out. */
- ct = nf_ct_get(*pskb, &ctinfo);
- if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
-@@ -157,6 +165,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* Previously seen (loopback)? Ignore. Do this before
- fragment check. */
- if ((*pskb)->nfct)
-@@ -180,6 +192,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- return nf_conntrack_in(PF_INET, hooknum, pskb);
- }
-
-@@ -189,6 +205,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
-@@ -325,6 +345,9 @@
- struct nf_conntrack_tuple_hash *h;
- struct nf_conntrack_tuple tuple;
-
-+ if (sk->sk_net != &init_net)
-+ return -ENOPROTOOPT;
-+
- NF_CT_TUPLE_U_BLANK(&tuple);
- tuple.src.u3.ip = inet->rcv_saddr;
- tuple.src.u.tcp.port = inet->sport;
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c linux-2.6.22-590/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
---- linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2008-01-29 22:12:32.000000000 -0500
-@@ -11,6 +11,7 @@
- #include <linux/proc_fs.h>
- #include <linux/seq_file.h>
- #include <linux/percpu.h>
-+#include <net/net_namespace.h>
-
- #include <linux/netfilter.h>
- #include <net/netfilter/nf_conntrack_core.h>
-@@ -378,16 +379,16 @@
- {
- struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-
-- proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
-+ proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops);
- if (!proc)
- goto err1;
-
-- proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
-+ proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440,
- &ip_exp_file_ops);
- if (!proc_exp)
- goto err2;
-
-- proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
-+ proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat);
- if (!proc_stat)
- goto err3;
-
-@@ -397,16 +398,16 @@
- return 0;
-
- err3:
-- proc_net_remove("ip_conntrack_expect");
-+ proc_net_remove(&init_net, "ip_conntrack_expect");
- err2:
-- proc_net_remove("ip_conntrack");
-+ proc_net_remove(&init_net, "ip_conntrack");
- err1:
- return -ENOMEM;
- }
-
- void __exit nf_conntrack_ipv4_compat_fini(void)
- {
-- remove_proc_entry("ip_conntrack", proc_net_stat);
-- proc_net_remove("ip_conntrack_expect");
-- proc_net_remove("ip_conntrack");
-+ remove_proc_entry("ip_conntrack", init_net.proc_net_stat);
-+ proc_net_remove(&init_net, "ip_conntrack_expect");
-+ proc_net_remove(&init_net, "ip_conntrack");
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c linux-2.6.22-590/net/ipv4/netfilter/nf_nat_helper.c
---- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/nf_nat_helper.c 2008-01-29 22:12:32.000000000 -0500
-@@ -178,7 +178,7 @@
- datalen = (*pskb)->len - iph->ihl*4;
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- if (!(rt->rt_flags & RTCF_LOCAL) &&
-- (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
-+ (*pskb)->dev->features & NETIF_F_V4_CSUM) {
- (*pskb)->ip_summed = CHECKSUM_PARTIAL;
- (*pskb)->csum_start = skb_headroom(*pskb) +
- skb_network_offset(*pskb) +
-@@ -265,7 +265,7 @@
-
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
- if (!(rt->rt_flags & RTCF_LOCAL) &&
-- (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
-+ (*pskb)->dev->features & NETIF_F_V4_CSUM) {
- (*pskb)->ip_summed = CHECKSUM_PARTIAL;
- (*pskb)->csum_start = skb_headroom(*pskb) +
- skb_network_offset(*pskb) +
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_rule.c linux-2.6.22-590/net/ipv4/netfilter/nf_nat_rule.c
---- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_rule.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/nf_nat_rule.c 2008-01-29 22:12:32.000000000 -0500
-@@ -98,7 +98,10 @@
- static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
- {
- static int warned = 0;
-- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
-+ struct flowi fl = {
-+ .fl_net = &init_net,
-+ .nl_u = { .ip4_u = { .daddr = dstip } }
-+ };
- struct rtable *rt;
-
- if (ip_route_output_key(&rt, &fl) != 0)
-@@ -252,7 +255,7 @@
- {
- int ret;
-
-- ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
-+ ret = ipt_register_table(&init_net, &nat_table, &nat_initial_table.repl);
- if (ret != 0)
- return ret;
- ret = xt_register_target(&ipt_snat_reg);
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_standalone.c linux-2.6.22-590/net/ipv4/netfilter/nf_nat_standalone.c
---- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_standalone.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter/nf_nat_standalone.c 2008-01-29 22:12:32.000000000 -0500
-@@ -83,6 +83,10 @@
- /* maniptype == SRC for postrouting. */
- enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* We never see fragments: conntrack defrags on pre-routing
- and local-out, and nf_nat_out protects post-routing. */
- NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
-@@ -172,6 +176,10 @@
- unsigned int ret;
- __be32 daddr = ip_hdr(*pskb)->daddr;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- daddr != ip_hdr(*pskb)->daddr) {
-@@ -194,6 +202,10 @@
- #endif
- unsigned int ret;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr) ||
- ip_hdrlen(*pskb) < sizeof(struct iphdr))
-@@ -227,6 +239,10 @@
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr) ||
- ip_hdrlen(*pskb) < sizeof(struct iphdr))
-@@ -239,7 +255,7 @@
-
- if (ct->tuplehash[dir].tuple.dst.u3.ip !=
- ct->tuplehash[!dir].tuple.src.u3.ip) {
-- if (ip_route_me_harder(pskb, RTN_UNSPEC))
-+ if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC))
- ret = NF_DROP;
- }
- #ifdef CONFIG_XFRM
-@@ -262,6 +278,10 @@
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- ct = nf_ct_get(*pskb, &ctinfo);
- if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
- DEBUGP("nf_nat_standalone: adjusting sequence number\n");
-diff -Nurb linux-2.6.22-570/net/ipv4/netfilter.c linux-2.6.22-590/net/ipv4/netfilter.c
---- linux-2.6.22-570/net/ipv4/netfilter.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/netfilter.c 2008-01-29 22:12:32.000000000 -0500
-@@ -8,7 +8,7 @@
- #include <net/ip.h>
-
- /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
--int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
-+int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type)
- {
- const struct iphdr *iph = ip_hdr(*pskb);
- struct rtable *rt;
-@@ -17,7 +17,8 @@
- unsigned int hh_len;
- unsigned int type;
-
-- type = inet_addr_type(iph->saddr);
-+ fl.fl_net = net;
-+ type = inet_addr_type(net, iph->saddr);
- if (addr_type == RTN_UNSPEC)
- addr_type = type;
-
-@@ -155,12 +156,13 @@
- const struct ip_rt_info *rt_info = nf_info_reroute(info);
-
- if (info->hook == NF_IP_LOCAL_OUT) {
-+ struct net *net = (info->indev?info->indev:info->outdev)->nd_net;
- const struct iphdr *iph = ip_hdr(*pskb);
-
- if (!(iph->tos == rt_info->tos
- && iph->daddr == rt_info->daddr
- && iph->saddr == rt_info->saddr))
-- return ip_route_me_harder(pskb, RTN_UNSPEC);
-+ return ip_route_me_harder(net, pskb, RTN_UNSPEC);
- }
- return 0;
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/proc.c linux-2.6.22-590/net/ipv4/proc.c
---- linux-2.6.22-570/net/ipv4/proc.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/proc.c 2008-01-29 22:12:32.000000000 -0500
-@@ -44,6 +44,7 @@
- #include <linux/seq_file.h>
- #include <net/sock.h>
- #include <net/raw.h>
-+#include <net/net_namespace.h>
-
- static int fold_prot_inuse(struct proto *proto)
- {
-@@ -69,8 +70,9 @@
- seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
- seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
- seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
-- seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
-- atomic_read(&ip_frag_mem));
-+ seq_printf(seq, "FRAG: inuse %d memory %d\n",
-+ init_net.ip_frag_nqueues,
-+ atomic_read(&init_net.ip_frag_mem));
- return 0;
- }
-
-@@ -260,7 +262,8 @@
- seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
-
- seq_printf(seq, "\nIp: %d %d",
-- IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl);
-+ IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2,
-+ init_net.sysctl_ip_default_ttl);
-
- for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
- seq_printf(seq, " %lu",
-@@ -380,20 +383,20 @@
- {
- int rc = 0;
-
-- if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
-+ if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
- goto out_netstat;
-
-- if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
-+ if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
- goto out_snmp;
-
-- if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
-+ if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops))
- goto out_sockstat;
- out:
- return rc;
- out_sockstat:
-- proc_net_remove("snmp");
-+ proc_net_remove(&init_net, "snmp");
- out_snmp:
-- proc_net_remove("netstat");
-+ proc_net_remove(&init_net, "netstat");
- out_netstat:
- rc = -ENOMEM;
- goto out;
-diff -Nurb linux-2.6.22-570/net/ipv4/raw.c linux-2.6.22-590/net/ipv4/raw.c
---- linux-2.6.22-570/net/ipv4/raw.c 2008-01-29 22:12:24.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/raw.c 2008-01-29 22:12:32.000000000 -0500
-@@ -73,6 +73,7 @@
- #include <net/inet_common.h>
- #include <net/checksum.h>
- #include <net/xfrm.h>
-+#include <net/net_namespace.h>
- #include <linux/rtnetlink.h>
- #include <linux/proc_fs.h>
- #include <linux/seq_file.h>
-@@ -101,7 +102,7 @@
- write_unlock_bh(&raw_v4_lock);
- }
-
--struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
-+struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num,
- __be32 raddr, __be32 laddr,
- int dif, int tag)
- {
-@@ -110,6 +111,9 @@
- sk_for_each_from(sk, node) {
- struct inet_sock *inet = inet_sk(sk);
-
-+ if (sk->sk_net != net)
-+ continue;
-+
- if (inet->num == num &&
- !(inet->daddr && inet->daddr != raddr) &&
- (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag) &&
-@@ -152,6 +156,7 @@
- */
- int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
- {
-+ struct net *net = skb->dev->nd_net;
- struct sock *sk;
- struct hlist_head *head;
- int delivered = 0;
-@@ -160,7 +165,7 @@
- head = &raw_v4_htable[hash];
- if (hlist_empty(head))
- goto out;
-- sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
-+ sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
- iph->saddr, iph->daddr,
- skb->dev->ifindex, skb->skb_tag);
-
-@@ -173,7 +178,7 @@
- if (clone)
- raw_rcv(sk, clone);
- }
-- sk = __raw_v4_lookup(sk_next(sk), iph->protocol,
-+ sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
- iph->saddr, iph->daddr,
- skb->dev->ifindex, skb->skb_tag);
- }
-@@ -484,7 +489,8 @@
- }
-
- {
-- struct flowi fl = { .oif = ipc.oif,
-+ struct flowi fl = { .fl_net = sk->sk_net,
-+ .oif = ipc.oif,
- .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = saddr,
-@@ -574,7 +580,7 @@
- if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
- goto out;
- v4_map_sock_addr(inet, addr, &nsa);
-- chk_addr_ret = inet_addr_type(nsa.saddr);
-+ chk_addr_ret = inet_addr_type(sk->sk_net, nsa.saddr);
- ret = -EADDRNOTAVAIL;
- if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
- chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
-@@ -798,6 +804,7 @@
-
- #ifdef CONFIG_PROC_FS
- struct raw_iter_state {
-+ struct net *net;
- int bucket;
- };
-
-@@ -811,11 +818,14 @@
- for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) {
- struct hlist_node *node;
-
-- sk_for_each(sk, node, &raw_v4_htable[state->bucket])
-+ sk_for_each(sk, node, &raw_v4_htable[state->bucket]) {
-+ if (sk->sk_net != state->net)
-+ continue;
- if (sk->sk_family == PF_INET &&
- nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
- goto found;
- }
-+ }
- sk = NULL;
- found:
- return sk;
-@@ -830,7 +840,7 @@
- try_again:
- ;
- } while (sk && (sk->sk_family != PF_INET ||
-- !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
-+ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT) || (sk->sk_net != state->net)));
-
- if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
- sk = sk_head(&raw_v4_htable[state->bucket]);
-@@ -933,6 +943,7 @@
- seq = file->private_data;
- seq->private = s;
- memset(s, 0, sizeof(*s));
-+ s->net = get_net(PROC_NET(inode));
- out:
- return rc;
- out_kfree:
-@@ -940,23 +951,46 @@
- goto out;
- }
-
-+static int raw_seq_release(struct inode *inode, struct file *file)
-+{
-+ struct seq_file *seq = file->private_data;
-+ struct raw_iter_state *state = seq->private;
-+ put_net(state->net);
-+ return seq_release_private(inode, file);
-+}
-+
- static const struct file_operations raw_seq_fops = {
- .owner = THIS_MODULE,
- .open = raw_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
-- .release = seq_release_private,
-+ .release = raw_seq_release,
- };
-
--int __init raw_proc_init(void)
-+static int raw_proc_net_init(struct net *net)
- {
-- if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
-+ if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops))
- return -ENOMEM;
- return 0;
- }
-
-+static void raw_proc_net_exit(struct net *net)
-+{
-+ proc_net_remove(net, "raw");
-+}
-+
-+static struct pernet_operations raw_proc_net_ops = {
-+ .init = raw_proc_net_init,
-+ .exit = raw_proc_net_exit,
-+};
-+
-+int __init raw_proc_init(void)
-+{
-+ return register_pernet_subsys(&raw_proc_net_ops);
-+}
-+
- void __init raw_proc_exit(void)
- {
-- proc_net_remove("raw");
-+ unregister_pernet_subsys(&raw_proc_net_ops);
- }
- #endif /* CONFIG_PROC_FS */
-diff -Nurb linux-2.6.22-570/net/ipv4/route.c linux-2.6.22-590/net/ipv4/route.c
---- linux-2.6.22-570/net/ipv4/route.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/route.c 2008-01-29 22:12:32.000000000 -0500
-@@ -101,8 +101,8 @@
- #include <net/tcp.h>
- #include <net/icmp.h>
- #include <net/xfrm.h>
--#include <net/ip_mp_alg.h>
- #include <net/netevent.h>
-+#include <net/net_namespace.h>
- #include <net/rtnetlink.h>
- #ifdef CONFIG_SYSCTL
- #include <linux/sysctl.h>
-@@ -266,6 +266,7 @@
-
- #ifdef CONFIG_PROC_FS
- struct rt_cache_iter_state {
-+ struct net *net;
- int bucket;
- };
-
-@@ -334,6 +335,7 @@
-
- static int rt_cache_seq_show(struct seq_file *seq, void *v)
- {
-+ struct rt_cache_iter_state *st = seq->private;
- if (v == SEQ_START_TOKEN)
- seq_printf(seq, "%-127s\n",
- "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
-@@ -343,6 +345,9 @@
- struct rtable *r = v;
- char temp[256];
-
-+ if (r->fl.fl_net != st->net)
-+ return 0;
-+
- sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
- "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X",
- r->u.dst.dev ? r->u.dst.dev->name : "*",
-@@ -385,6 +390,7 @@
- seq = file->private_data;
- seq->private = s;
- memset(s, 0, sizeof(*s));
-+ s->net = get_net(PROC_NET(inode));
- out:
- return rc;
- out_kfree:
-@@ -392,12 +398,20 @@
- goto out;
- }
-
-+static int rt_cache_seq_release(struct inode *inode, struct file *file)
-+{
-+ struct seq_file *seq = file->private_data;
-+ struct rt_cache_iter_state *st = seq->private;
-+ put_net(st->net);
-+ return seq_release_private(inode, file);
-+}
-+
- static const struct file_operations rt_cache_seq_fops = {
- .owner = THIS_MODULE,
- .open = rt_cache_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
-- .release = seq_release_private,
-+ .release = rt_cache_seq_release,
- };
-
-
-@@ -495,13 +509,11 @@
-
- static __inline__ void rt_free(struct rtable *rt)
- {
-- multipath_remove(rt);
- call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
- }
-
- static __inline__ void rt_drop(struct rtable *rt)
- {
-- multipath_remove(rt);
- ip_rt_put(rt);
- call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
- }
-@@ -565,61 +577,16 @@
-
- static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
- {
-- return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
-+ return (((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
- (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
- (fl1->mark ^ fl2->mark) |
- (*(u16 *)&fl1->nl_u.ip4_u.tos ^
- *(u16 *)&fl2->nl_u.ip4_u.tos) |
- (fl1->oif ^ fl2->oif) |
-- (fl1->iif ^ fl2->iif)) == 0;
-+ (fl1->iif ^ fl2->iif)) == 0) &&
-+ fl1->fl_net == fl2->fl_net;
- }
-
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
--static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
-- struct rtable *expentry,
-- int *removed_count)
--{
-- int passedexpired = 0;
-- struct rtable **nextstep = NULL;
-- struct rtable **rthp = chain_head;
-- struct rtable *rth;
--
-- if (removed_count)
-- *removed_count = 0;
--
-- while ((rth = *rthp) != NULL) {
-- if (rth == expentry)
-- passedexpired = 1;
--
-- if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 &&
-- compare_keys(&(*rthp)->fl, &expentry->fl)) {
-- if (*rthp == expentry) {
-- *rthp = rth->u.dst.rt_next;
-- continue;
-- } else {
-- *rthp = rth->u.dst.rt_next;
-- rt_free(rth);
-- if (removed_count)
-- ++(*removed_count);
-- }
-- } else {
-- if (!((*rthp)->u.dst.flags & DST_BALANCED) &&
-- passedexpired && !nextstep)
-- nextstep = &rth->u.dst.rt_next;
--
-- rthp = &rth->u.dst.rt_next;
-- }
-- }
--
-- rt_free(expentry);
-- if (removed_count)
-- ++(*removed_count);
--
-- return nextstep;
--}
--#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
--
--
- /* This runs via a timer and thus is always in BH context. */
- static void rt_check_expire(unsigned long dummy)
- {
-@@ -658,23 +625,9 @@
- }
-
- /* Cleanup aged off entries. */
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- /* remove all related balanced entries if necessary */
-- if (rth->u.dst.flags & DST_BALANCED) {
-- rthp = rt_remove_balanced_route(
-- &rt_hash_table[i].chain,
-- rth, NULL);
-- if (!rthp)
-- break;
-- } else {
- *rthp = rth->u.dst.rt_next;
- rt_free(rth);
- }
--#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-- *rthp = rth->u.dst.rt_next;
-- rt_free(rth);
--#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-- }
- spin_unlock(rt_hash_lock_addr(i));
-
- /* Fallback loop breaker. */
-@@ -721,9 +674,6 @@
- if (delay < 0)
- delay = ip_rt_min_delay;
-
-- /* flush existing multipath state*/
-- multipath_flush();
--
- spin_lock_bh(&rt_flush_lock);
-
- if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
-@@ -842,31 +792,10 @@
- rthp = &rth->u.dst.rt_next;
- continue;
- }
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- /* remove all related balanced entries
-- * if necessary
-- */
-- if (rth->u.dst.flags & DST_BALANCED) {
-- int r;
--
-- rthp = rt_remove_balanced_route(
-- &rt_hash_table[k].chain,
-- rth,
-- &r);
-- goal -= r;
-- if (!rthp)
-- break;
-- } else {
- *rthp = rth->u.dst.rt_next;
- rt_free(rth);
- goal--;
- }
--#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-- *rthp = rth->u.dst.rt_next;
-- rt_free(rth);
-- goal--;
--#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-- }
- spin_unlock_bh(rt_hash_lock_addr(k));
- if (goal <= 0)
- break;
-@@ -939,12 +868,7 @@
-
- spin_lock_bh(rt_hash_lock_addr(hash));
- while ((rth = *rthp) != NULL) {
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- if (!(rth->u.dst.flags & DST_BALANCED) &&
-- compare_keys(&rth->fl, &rt->fl)) {
--#else
- if (compare_keys(&rth->fl, &rt->fl)) {
--#endif
- /* Put it first */
- *rthp = rth->u.dst.rt_next;
- /*
-@@ -1055,7 +979,7 @@
- static DEFINE_SPINLOCK(rt_peer_lock);
- struct inet_peer *peer;
-
-- peer = inet_getpeer(rt->rt_dst, create);
-+ peer = inet_getpeer(rt->fl.fl_net, rt->rt_dst, create);
-
- spin_lock_bh(&rt_peer_lock);
- if (rt->peer == NULL) {
-@@ -1148,7 +1072,7 @@
- if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
- goto reject_redirect;
- } else {
-- if (inet_addr_type(new_gw) != RTN_UNICAST)
-+ if (inet_addr_type(dev->nd_net, new_gw) != RTN_UNICAST)
- goto reject_redirect;
- }
-
-@@ -1189,6 +1113,7 @@
-
- /* Copy all the information. */
- *rt = *rth;
-+ hold_net(rt->fl.fl_net);
- INIT_RCU_HEAD(&rt->u.dst.rcu_head);
- rt->u.dst.__use = 1;
- atomic_set(&rt->u.dst.__refcnt, 1);
-@@ -1407,7 +1332,7 @@
- __be32 daddr = iph->daddr;
- unsigned short est_mtu = 0;
-
-- if (ipv4_config.no_pmtu_disc)
-+ if (init_net.sysctl_ipv4_no_pmtu_disc)
- return 0;
-
- for (i = 0; i < 2; i++) {
-@@ -1489,6 +1414,7 @@
- rt->idev = NULL;
- in_dev_put(idev);
- }
-+ release_net(rt->fl.fl_net);
- }
-
- static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-@@ -1496,8 +1422,9 @@
- {
- struct rtable *rt = (struct rtable *) dst;
- struct in_device *idev = rt->idev;
-- if (dev != &loopback_dev && idev && idev->dev == dev) {
-- struct in_device *loopback_idev = in_dev_get(&loopback_dev);
-+ struct net *net = dev->nd_net;
-+ if (dev != &net->loopback_dev && idev && idev->dev == dev) {
-+ struct in_device *loopback_idev = in_dev_get(&net->loopback_dev);
- if (loopback_idev) {
- rt->idev = loopback_idev;
- in_dev_put(idev);
-@@ -1584,7 +1511,7 @@
- rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
-
- if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
-- rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
-+ rt->u.dst.metrics[RTAX_HOPLIMIT-1] = init_net.sysctl_ip_default_ttl;
- if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU)
- rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
- if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0)
-@@ -1605,6 +1532,7 @@
- static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, int our)
- {
-+ struct net *net = dev->nd_net;
- unsigned hash;
- struct rtable *rth;
- __be32 spec_dst;
-@@ -1638,6 +1566,7 @@
- rth->u.dst.flags= DST_HOST;
- if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
- rth->u.dst.flags |= DST_NOPOLICY;
-+ rth->fl.fl_net = hold_net(net);
- rth->fl.fl4_dst = daddr;
- rth->rt_dst = daddr;
- rth->fl.fl4_tos = tos;
-@@ -1649,7 +1578,7 @@
- #endif
- rth->rt_iif =
- rth->fl.iif = dev->ifindex;
-- rth->u.dst.dev = &loopback_dev;
-+ rth->u.dst.dev = &net->loopback_dev;
- dev_hold(rth->u.dst.dev);
- rth->idev = in_dev_get(rth->u.dst.dev);
- rth->fl.oif = 0;
-@@ -1774,14 +1703,11 @@
-
- atomic_set(&rth->u.dst.__refcnt, 1);
- rth->u.dst.flags= DST_HOST;
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- if (res->fi->fib_nhs > 1)
-- rth->u.dst.flags |= DST_BALANCED;
--#endif
- if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
- rth->u.dst.flags |= DST_NOPOLICY;
- if (IN_DEV_CONF_GET(out_dev, NOXFRM))
- rth->u.dst.flags |= DST_NOXFRM;
-+ rth->fl.fl_net = hold_net(in_dev->dev->nd_net);
- rth->fl.fl4_dst = daddr;
- rth->rt_dst = daddr;
- rth->fl.fl4_tos = tos;
-@@ -1812,7 +1738,7 @@
- return err;
- }
-
--static inline int ip_mkroute_input_def(struct sk_buff *skb,
-+static inline int ip_mkroute_input(struct sk_buff *skb,
- struct fib_result* res,
- const struct flowi *fl,
- struct in_device *in_dev,
-@@ -1837,63 +1763,6 @@
- return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
- }
-
--static inline int ip_mkroute_input(struct sk_buff *skb,
-- struct fib_result* res,
-- const struct flowi *fl,
-- struct in_device *in_dev,
-- __be32 daddr, __be32 saddr, u32 tos)
--{
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- struct rtable* rth = NULL, *rtres;
-- unsigned char hop, hopcount;
-- int err = -EINVAL;
-- unsigned int hash;
--
-- if (res->fi)
-- hopcount = res->fi->fib_nhs;
-- else
-- hopcount = 1;
--
-- /* distinguish between multipath and singlepath */
-- if (hopcount < 2)
-- return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
-- saddr, tos);
--
-- /* add all alternatives to the routing cache */
-- for (hop = 0; hop < hopcount; hop++) {
-- res->nh_sel = hop;
--
-- /* put reference to previous result */
-- if (hop)
-- ip_rt_put(rtres);
--
-- /* create a routing cache entry */
-- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
-- &rth);
-- if (err)
-- return err;
--
-- /* put it into the cache */
-- hash = rt_hash(daddr, saddr, fl->iif);
-- err = rt_intern_hash(hash, rth, &rtres);
-- if (err)
-- return err;
--
-- /* forward hop information to multipath impl. */
-- multipath_set_nhinfo(rth,
-- FIB_RES_NETWORK(*res),
-- FIB_RES_NETMASK(*res),
-- res->prefixlen,
-- &FIB_RES_NH(*res));
-- }
-- skb->dst = &rtres->u.dst;
-- return err;
--#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-- return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
--#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
--}
--
--
- /*
- * NOTE. We drop all the packets that has local source
- * addresses, because every properly looped back packet
-@@ -1907,9 +1776,11 @@
- static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
- {
-+ struct net *net = dev->nd_net;
- struct fib_result res;
- struct in_device *in_dev = in_dev_get(dev);
-- struct flowi fl = { .nl_u = { .ip4_u =
-+ struct flowi fl = { .fl_net = net,
-+ .nl_u = { .ip4_u =
- { .daddr = daddr,
- .saddr = saddr,
- .tos = tos,
-@@ -1967,7 +1838,7 @@
- if (res.type == RTN_LOCAL) {
- int result;
- result = fib_validate_source(saddr, daddr, tos,
-- loopback_dev.ifindex,
-+ net->loopback_dev.ifindex,
- dev, &spec_dst, &itag);
- if (result < 0)
- goto martian_source;
-@@ -2023,6 +1894,7 @@
- rth->u.dst.flags= DST_HOST;
- if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
- rth->u.dst.flags |= DST_NOPOLICY;
-+ rth->fl.fl_net = hold_net(net);
- rth->fl.fl4_dst = daddr;
- rth->rt_dst = daddr;
- rth->fl.fl4_tos = tos;
-@@ -2034,7 +1906,7 @@
- #endif
- rth->rt_iif =
- rth->fl.iif = dev->ifindex;
-- rth->u.dst.dev = &loopback_dev;
-+ rth->u.dst.dev = &net->loopback_dev;
- dev_hold(rth->u.dst.dev);
- rth->idev = in_dev_get(rth->u.dst.dev);
- rth->rt_gateway = daddr;
-@@ -2092,6 +1964,7 @@
- struct rtable * rth;
- unsigned hash;
- int iif = dev->ifindex;
-+ struct net *net = dev->nd_net;
-
- tos &= IPTOS_RT_MASK;
- hash = rt_hash(daddr, saddr, iif);
-@@ -2104,7 +1977,8 @@
- rth->fl.iif == iif &&
- rth->fl.oif == 0 &&
- rth->fl.mark == skb->mark &&
-- rth->fl.fl4_tos == tos) {
-+ rth->fl.fl4_tos == tos &&
-+ rth->fl.fl_net == net) {
- rth->u.dst.lastuse = jiffies;
- dst_hold(&rth->u.dst);
- rth->u.dst.__use++;
-@@ -2211,18 +2085,12 @@
-
- atomic_set(&rth->u.dst.__refcnt, 1);
- rth->u.dst.flags= DST_HOST;
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- if (res->fi) {
-- rth->rt_multipath_alg = res->fi->fib_mp_alg;
-- if (res->fi->fib_nhs > 1)
-- rth->u.dst.flags |= DST_BALANCED;
-- }
--#endif
- if (IN_DEV_CONF_GET(in_dev, NOXFRM))
- rth->u.dst.flags |= DST_NOXFRM;
- if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
- rth->u.dst.flags |= DST_NOPOLICY;
-
-+ rth->fl.fl_net = hold_net(oldflp->fl_net);
- rth->fl.fl4_dst = oldflp->fl4_dst;
- rth->fl.fl4_tos = tos;
- rth->fl.fl4_src = oldflp->fl4_src;
-@@ -2277,7 +2145,7 @@
- return err;
- }
-
--static inline int ip_mkroute_output_def(struct rtable **rp,
-+static inline int ip_mkroute_output(struct rtable **rp,
- struct fib_result* res,
- const struct flowi *fl,
- const struct flowi *oldflp,
-@@ -2295,68 +2163,6 @@
- return err;
- }
-
--static inline int ip_mkroute_output(struct rtable** rp,
-- struct fib_result* res,
-- const struct flowi *fl,
-- const struct flowi *oldflp,
-- struct net_device *dev_out,
-- unsigned flags)
--{
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- unsigned char hop;
-- unsigned hash;
-- int err = -EINVAL;
-- struct rtable *rth = NULL;
--
-- if (res->fi && res->fi->fib_nhs > 1) {
-- unsigned char hopcount = res->fi->fib_nhs;
--
-- for (hop = 0; hop < hopcount; hop++) {
-- struct net_device *dev2nexthop;
--
-- res->nh_sel = hop;
--
-- /* hold a work reference to the output device */
-- dev2nexthop = FIB_RES_DEV(*res);
-- dev_hold(dev2nexthop);
--
-- /* put reference to previous result */
-- if (hop)
-- ip_rt_put(*rp);
--
-- err = __mkroute_output(&rth, res, fl, oldflp,
-- dev2nexthop, flags);
--
-- if (err != 0)
-- goto cleanup;
--
-- hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src,
-- oldflp->oif);
-- err = rt_intern_hash(hash, rth, rp);
--
-- /* forward hop information to multipath impl. */
-- multipath_set_nhinfo(rth,
-- FIB_RES_NETWORK(*res),
-- FIB_RES_NETMASK(*res),
-- res->prefixlen,
-- &FIB_RES_NH(*res));
-- cleanup:
-- /* release work reference to output device */
-- dev_put(dev2nexthop);
--
-- if (err != 0)
-- return err;
-- }
-- return err;
-- } else {
-- return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
-- flags);
-- }
--#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
-- return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags);
--#endif
--}
--
- /*
- * Major route resolver routine.
- */
-@@ -2364,7 +2170,9 @@
- static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
- {
- u32 tos = RT_FL_TOS(oldflp);
-- struct flowi fl = { .nl_u = { .ip4_u =
-+ struct net *net = oldflp->fl_net;
-+ struct flowi fl = { .fl_net = net,
-+ .nl_u = { .ip4_u =
- { .daddr = oldflp->fl4_dst,
- .saddr = oldflp->fl4_src,
- .tos = tos & IPTOS_RT_MASK,
-@@ -2373,7 +2181,7 @@
- RT_SCOPE_UNIVERSE),
- } },
- .mark = oldflp->mark,
-- .iif = loopback_dev.ifindex,
-+ .iif = net->loopback_dev.ifindex,
- .oif = oldflp->oif };
- struct fib_result res;
- unsigned flags = 0;
-@@ -2395,7 +2203,7 @@
- goto out;
-
- /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-- dev_out = ip_dev_find(oldflp->fl4_src);
-+ dev_out = ip_dev_find(net, oldflp->fl4_src);
- if (dev_out == NULL)
- goto out;
-
-@@ -2434,7 +2242,7 @@
-
-
- if (oldflp->oif) {
-- dev_out = dev_get_by_index(oldflp->oif);
-+ dev_out = dev_get_by_index(net, oldflp->oif);
- err = -ENODEV;
- if (dev_out == NULL)
- goto out;
-@@ -2467,9 +2275,9 @@
- fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
- if (dev_out)
- dev_put(dev_out);
-- dev_out = &loopback_dev;
-+ dev_out = &net->loopback_dev;
- dev_hold(dev_out);
-- fl.oif = loopback_dev.ifindex;
-+ fl.oif = net->loopback_dev.ifindex;
- res.type = RTN_LOCAL;
- flags |= RTCF_LOCAL;
- goto make_route;
-@@ -2514,7 +2322,7 @@
- fl.fl4_src = fl.fl4_dst;
- if (dev_out)
- dev_put(dev_out);
-- dev_out = &loopback_dev;
-+ dev_out = &net->loopback_dev;
- dev_hold(dev_out);
- fl.oif = dev_out->ifindex;
- if (res.fi)
-@@ -2568,19 +2376,9 @@
- rth->fl.iif == 0 &&
- rth->fl.oif == flp->oif &&
- rth->fl.mark == flp->mark &&
-+ rth->fl.fl_net == flp->fl_net &&
- !((rth->fl.fl4_tos ^ flp->fl4_tos) &
- (IPTOS_RT_MASK | RTO_ONLINK))) {
--
-- /* check for multipath routes and choose one if
-- * necessary
-- */
-- if (multipath_select_route(flp, rth, rp)) {
-- dst_hold(&(*rp)->u.dst);
-- RT_CACHE_STAT_INC(out_hit);
-- rcu_read_unlock_bh();
-- return 0;
-- }
--
- rth->u.dst.lastuse = jiffies;
- dst_hold(&rth->u.dst);
- rth->u.dst.__use++;
-@@ -2729,10 +2527,6 @@
- if (rt->u.dst.tclassid)
- NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
- #endif
--#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
-- if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
-- NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
--#endif
- if (rt->fl.iif)
- NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
- else if (rt->rt_src != rt->fl.fl4_src)
-@@ -2759,7 +2553,7 @@
- __be32 dst = rt->rt_dst;
-
- if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
-- IPV4_DEVCONF_ALL(MC_FORWARDING)) {
-+ IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
- int err = ipmr_get_route(skb, r, nowait);
- if (err <= 0) {
- if (!nowait) {
-@@ -2790,6 +2584,7 @@
-
- static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
- {
-+ struct net *net = in_skb->sk->sk_net;
- struct rtmsg *rtm;
- struct nlattr *tb[RTA_MAX+1];
- struct rtable *rt = NULL;
-@@ -2828,7 +2623,7 @@
- if (iif) {
- struct net_device *dev;
-
-- dev = __dev_get_by_index(iif);
-+ dev = __dev_get_by_index(net, iif);
- if (dev == NULL) {
- err = -ENODEV;
- goto errout_free;
-@@ -2845,6 +2640,7 @@
- err = -rt->u.dst.error;
- } else {
- struct flowi fl = {
-+ .fl_net = net,
- .nl_u = {
- .ip4_u = {
- .daddr = dst,
-@@ -2869,7 +2665,7 @@
- if (err <= 0)
- goto errout_free;
-
-- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
-+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
- errout:
- return err;
-
-@@ -3182,6 +2978,48 @@
- }
- __setup("rhash_entries=", set_rhash_entries);
-
-+
-+static void ip_rt_net_exit(struct net *net)
-+{
-+#ifdef CONFIG_PROC_FS
-+# ifdef CONFIG_NET_CLS_ROUTE
-+ proc_net_remove(net, "rt_acct");
-+# endif
-+ remove_proc_entry("rt_cache", net->proc_net_stat);
-+ proc_net_remove(net, "rt_cache");
-+#endif
-+ rt_run_flush(0);
-+}
-+
-+static int ip_rt_net_init(struct net *net)
-+{
-+ int error = -ENOMEM;
-+#ifdef CONFIG_PROC_FS
-+ struct proc_dir_entry *rtstat_pde;
-+ if (!proc_net_fops_create(net, "rt_cache", S_IRUGO, &rt_cache_seq_fops))
-+ goto out;
-+ if (!(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
-+ net->proc_net_stat)))
-+ goto out;
-+ rtstat_pde->proc_fops = &rt_cpu_seq_fops;
-+# ifdef CONFIG_NET_CLS_ROUTE
-+ if (!create_proc_read_entry("rt_acct", 0, net->proc_net,
-+ ip_rt_acct_read, NULL))
-+ goto out;
-+# endif
-+#endif
-+ error = 0;
-+out:
-+ if (error)
-+ ip_rt_net_exit(net);
-+ return error;
-+}
-+
-+struct pernet_operations ip_rt_net_ops = {
-+ .init = ip_rt_net_init,
-+ .exit = ip_rt_net_exit,
-+};
-+
- int __init ip_rt_init(void)
- {
- int rc = 0;
-@@ -3245,20 +3083,7 @@
- ip_rt_secret_interval;
- add_timer(&rt_secret_timer);
-
--#ifdef CONFIG_PROC_FS
-- {
-- struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
-- if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
-- !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
-- proc_net_stat))) {
-- return -ENOMEM;
-- }
-- rtstat_pde->proc_fops = &rt_cpu_seq_fops;
-- }
--#ifdef CONFIG_NET_CLS_ROUTE
-- create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
--#endif
--#endif
-+ register_pernet_subsys(&ip_rt_net_ops);
- #ifdef CONFIG_XFRM
- xfrm_init();
- xfrm4_init();
-diff -Nurb linux-2.6.22-570/net/ipv4/syncookies.c linux-2.6.22-590/net/ipv4/syncookies.c
---- linux-2.6.22-570/net/ipv4/syncookies.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/syncookies.c 2008-01-29 22:12:32.000000000 -0500
-@@ -253,7 +253,8 @@
- * no easy way to do this.
- */
- {
-- struct flowi fl = { .nl_u = { .ip4_u =
-+ struct flowi fl = { .fl_net = &init_net,
-+ .nl_u = { .ip4_u =
- { .daddr = ((opt && opt->srr) ?
- opt->faddr :
- ireq->rmt_addr),
-diff -Nurb linux-2.6.22-570/net/ipv4/sysctl_net_ipv4.c linux-2.6.22-590/net/ipv4/sysctl_net_ipv4.c
---- linux-2.6.22-570/net/ipv4/sysctl_net_ipv4.c 2008-01-29 22:12:23.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/sysctl_net_ipv4.c 2008-01-29 22:12:32.000000000 -0500
-@@ -29,21 +29,21 @@
- static int ip_local_port_range_max[] = { 65535, 65535 };
- #endif
-
--struct ipv4_config ipv4_config;
--
- #ifdef CONFIG_SYSCTL
-
- static
- int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
- {
-- int val = IPV4_DEVCONF_ALL(FORWARDING);
-+ struct net *net = ctl->extra2;
-+ int *valp = ctl->data;
-+ int old = *valp;
- int ret;
-
- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-
-- if (write && IPV4_DEVCONF_ALL(FORWARDING) != val)
-- inet_forward_change();
-+ if (write && *valp != old)
-+ inet_forward_change(net);
-
- return ret;
- }
-@@ -53,6 +53,7 @@
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
- {
-+ struct net *net = table->extra2;
- int *valp = table->data;
- int new;
-
-@@ -85,7 +86,7 @@
- }
-
- *valp = new;
-- inet_forward_change();
-+ inet_forward_change(net);
- return 1;
- }
-
-@@ -188,22 +189,6 @@
-
- ctl_table ipv4_table[] = {
- {
-- .ctl_name = NET_IPV4_TCP_TIMESTAMPS,
-- .procname = "tcp_timestamps",
-- .data = &sysctl_tcp_timestamps,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec
-- },
-- {
-- .ctl_name = NET_IPV4_TCP_WINDOW_SCALING,
-- .procname = "tcp_window_scaling",
-- .data = &sysctl_tcp_window_scaling,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec
-- },
-- {
- .ctl_name = NET_IPV4_TCP_SACK,
- .procname = "tcp_sack",
- .data = &sysctl_tcp_sack,
-@@ -220,40 +205,6 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = NET_IPV4_FORWARD,
-- .procname = "ip_forward",
-- .data = &IPV4_DEVCONF_ALL(FORWARDING),
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &ipv4_sysctl_forward,
-- .strategy = &ipv4_sysctl_forward_strategy
-- },
-- {
-- .ctl_name = NET_IPV4_DEFAULT_TTL,
-- .procname = "ip_default_ttl",
-- .data = &sysctl_ip_default_ttl,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &ipv4_doint_and_flush,
-- .strategy = &ipv4_doint_and_flush_strategy,
-- },
-- {
-- .ctl_name = NET_IPV4_NO_PMTU_DISC,
-- .procname = "ip_no_pmtu_disc",
-- .data = &ipv4_config.no_pmtu_disc,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec
-- },
-- {
-- .ctl_name = NET_IPV4_NONLOCAL_BIND,
-- .procname = "ip_nonlocal_bind",
-- .data = &sysctl_ip_nonlocal_bind,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec
-- },
-- {
- .ctl_name = NET_IPV4_TCP_SYN_RETRIES,
- .procname = "tcp_syn_retries",
- .data = &sysctl_tcp_syn_retries,
-@@ -286,39 +237,6 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
-- .procname = "ipfrag_high_thresh",
-- .data = &sysctl_ipfrag_high_thresh,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec
-- },
-- {
-- .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
-- .procname = "ipfrag_low_thresh",
-- .data = &sysctl_ipfrag_low_thresh,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec
-- },
-- {
-- .ctl_name = NET_IPV4_DYNADDR,
-- .procname = "ip_dynaddr",
-- .data = &sysctl_ip_dynaddr,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec
-- },
-- {
-- .ctl_name = NET_IPV4_IPFRAG_TIME,
-- .procname = "ipfrag_time",
-- .data = &sysctl_ipfrag_time,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies
-- },
-- {
- .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME,
- .procname = "tcp_keepalive_time",
- .data = &sysctl_tcp_keepalive_time,
-@@ -422,17 +340,6 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = NET_IPV4_LOCAL_PORT_RANGE,
-- .procname = "ip_local_port_range",
-- .data = &sysctl_local_port_range,
-- .maxlen = sizeof(sysctl_local_port_range),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_minmax,
-- .strategy = &sysctl_intvec,
-- .extra1 = ip_local_port_range_min,
-- .extra2 = ip_local_port_range_max
-- },
-- {
- .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL,
- .procname = "icmp_echo_ignore_all",
- .data = &sysctl_icmp_echo_ignore_all,
-@@ -534,50 +441,6 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = NET_IPV4_INET_PEER_THRESHOLD,
-- .procname = "inet_peer_threshold",
-- .data = &inet_peer_threshold,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec
-- },
-- {
-- .ctl_name = NET_IPV4_INET_PEER_MINTTL,
-- .procname = "inet_peer_minttl",
-- .data = &inet_peer_minttl,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies
-- },
-- {
-- .ctl_name = NET_IPV4_INET_PEER_MAXTTL,
-- .procname = "inet_peer_maxttl",
-- .data = &inet_peer_maxttl,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies
-- },
-- {
-- .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME,
-- .procname = "inet_peer_gc_mintime",
-- .data = &inet_peer_gc_mintime,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies
-- },
-- {
-- .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME,
-- .procname = "inet_peer_gc_maxtime",
-- .data = &inet_peer_gc_maxtime,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies
-- },
-- {
- .ctl_name = NET_TCP_ORPHAN_RETRIES,
- .procname = "tcp_orphan_retries",
- .data = &sysctl_tcp_orphan_retries,
-@@ -706,24 +569,6 @@
- .proc_handler = &proc_dointvec
- },
- {
-- .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
-- .procname = "ipfrag_secret_interval",
-- .data = &sysctl_ipfrag_secret_interval,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies
-- },
-- {
-- .ctl_name = NET_IPV4_IPFRAG_MAX_DIST,
-- .procname = "ipfrag_max_dist",
-- .data = &sysctl_ipfrag_max_dist,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_minmax,
-- .extra1 = &zero
-- },
-- {
- .ctl_name = NET_TCP_NO_METRICS_SAVE,
- .procname = "tcp_no_metrics_save",
- .data = &sysctl_tcp_nometrics_save,
-@@ -865,6 +710,181 @@
- { .ctl_name = 0 }
- };
-
--#endif /* CONFIG_SYSCTL */
-+struct ctl_table multi_ipv4_table[] = {
-+ {
-+ /* .data is filled in by devinet_net_init.
-+ * As a consequence this table entry must be the first
-+ * entry in multi_ipv4_table.
-+ */
-+ .ctl_name = NET_IPV4_FORWARD,
-+ .procname = "ip_forward",
-+ .data = NULL,
-+ .extra2 = &init_net,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &ipv4_sysctl_forward,
-+ .strategy = &ipv4_sysctl_forward_strategy
-+ },
-+ {
-+ .ctl_name = NET_IPV4_DEFAULT_TTL,
-+ .procname = "ip_default_ttl",
-+ .data = &init_net.sysctl_ip_default_ttl,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &ipv4_doint_and_flush,
-+ .strategy = &ipv4_doint_and_flush_strategy,
-+ },
-+ {
-+ .ctl_name = NET_IPV4_NO_PMTU_DISC,
-+ .procname = "ip_no_pmtu_disc",
-+ .data = &init_net.sysctl_ipv4_no_pmtu_disc,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
-+ {
-+ .ctl_name = NET_IPV4_NONLOCAL_BIND,
-+ .procname = "ip_nonlocal_bind",
-+ .data = &init_net.sysctl_ip_nonlocal_bind,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
-+ {
-+ .ctl_name = NET_IPV4_LOCAL_PORT_RANGE,
-+ .procname = "ip_local_port_range",
-+ .data = &init_net.sysctl_local_port_range,
-+ .maxlen = sizeof(init_net.sysctl_local_port_range),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec_minmax,
-+ .strategy = &sysctl_intvec,
-+ .extra1 = ip_local_port_range_min,
-+ .extra2 = ip_local_port_range_max
-+ },
-+ {
-+ .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
-+ .procname = "ipfrag_high_thresh",
-+ .data = &init_net.sysctl_ipfrag_high_thresh,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
-+ {
-+ .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
-+ .procname = "ipfrag_low_thresh",
-+ .data = &init_net.sysctl_ipfrag_low_thresh,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
-+ {
-+ .ctl_name = NET_IPV4_IPFRAG_TIME,
-+ .procname = "ipfrag_time",
-+ .data = &init_net.sysctl_ipfrag_time,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec_jiffies,
-+ .strategy = &sysctl_jiffies
-+ },
-+ {
-+ .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
-+ .procname = "ipfrag_secret_interval",
-+ .data = &init_net.sysctl_ipfrag_secret_interval,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec_jiffies,
-+ .strategy = &sysctl_jiffies
-+ },
-+ {
-+ .ctl_name = NET_IPV4_IPFRAG_MAX_DIST,
-+ .procname = "ipfrag_max_dist",
-+ .data = &init_net.sysctl_ipfrag_max_dist,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec_minmax,
-+ .extra1 = &zero
-+ },
-+ {
-+ .ctl_name = NET_IPV4_DYNADDR,
-+ .procname = "ip_dynaddr",
-+ .data = &init_net.sysctl_ip_dynaddr,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
-+ {
-+ .ctl_name = NET_IPV4_LOCAL_PORT_RANGE,
-+ .procname = "ip_local_port_range",
-+ .data = &init_net.sysctl_local_port_range,
-+ .maxlen = sizeof(init_net.sysctl_local_port_range),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec_minmax,
-+ .strategy = &sysctl_intvec,
-+ .extra1 = ip_local_port_range_min,
-+ .extra2 = ip_local_port_range_max
-+ },
-+ {
-+ .ctl_name = NET_IPV4_INET_PEER_THRESHOLD,
-+ .procname = "inet_peer_threshold",
-+ .data = &init_net.inet_peer_threshold,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
-+ {
-+ .ctl_name = NET_IPV4_INET_PEER_MINTTL,
-+ .procname = "inet_peer_minttl",
-+ .data = &init_net.inet_peer_minttl,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec_jiffies,
-+ .strategy = &sysctl_jiffies
-+ },
-+ {
-+ .ctl_name = NET_IPV4_INET_PEER_MAXTTL,
-+ .procname = "inet_peer_maxttl",
-+ .data = &init_net.inet_peer_maxttl,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec_jiffies,
-+ .strategy = &sysctl_jiffies
-+ },
-+ {
-+ .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME,
-+ .procname = "inet_peer_gc_mintime",
-+ .data = &init_net.inet_peer_gc_mintime,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec_jiffies,
-+ .strategy = &sysctl_jiffies
-+ },
-+ {
-+ .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME,
-+ .procname = "inet_peer_gc_maxtime",
-+ .data = &init_net.inet_peer_gc_maxtime,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec_jiffies,
-+ .strategy = &sysctl_jiffies
-+ },
-+ {
-+ .ctl_name = NET_IPV4_TCP_TIMESTAMPS,
-+ .procname = "tcp_timestamps",
-+ .data = &init_net.sysctl_tcp_timestamps,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+
-+ },
-+ {
-+ .ctl_name = NET_IPV4_TCP_WINDOW_SCALING,
-+ .procname = "tcp_window_scaling",
-+ .data = &init_net.sysctl_tcp_window_scaling,
-+ .maxlen = sizeof(int),
-+ .mode = 0644,
-+ .proc_handler = &proc_dointvec
-+ },
-+ {}
-+};
-
--EXPORT_SYMBOL(ipv4_config);
-+#endif /* CONFIG_SYSCTL */
-diff -Nurb linux-2.6.22-570/net/ipv4/tcp.c linux-2.6.22-590/net/ipv4/tcp.c
---- linux-2.6.22-570/net/ipv4/tcp.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/tcp.c 2008-01-29 22:12:32.000000000 -0500
-@@ -2409,6 +2409,23 @@
- }
- __setup("thash_entries=", set_thash_entries);
-
-+static int tcp_net_init(struct net *net)
-+{
-+ /*
-+ * This array holds the first and last local port number.
-+ */
-+ net->sysctl_local_port_range[0] = 32768;
-+ net->sysctl_local_port_range[1] = 61000;
-+
-+ net->sysctl_tcp_timestamps = 1;
-+ net->sysctl_tcp_window_scaling = 1;
-+ return 0;
-+}
-+
-+static struct pernet_operations tcp_net_ops = {
-+ .init = tcp_net_init,
-+};
-+
- void __init tcp_init(void)
- {
- struct sk_buff *skb = NULL;
-@@ -2502,6 +2519,8 @@
- sysctl_tcp_rmem[1] = 87380;
- sysctl_tcp_rmem[2] = max(87380, max_share);
-
-+ register_pernet_subsys(&tcp_net_ops);
-+
- printk(KERN_INFO "TCP: Hash tables configured "
- "(established %d bind %d)\n",
- tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size);
-diff -Nurb linux-2.6.22-570/net/ipv4/tcp_input.c linux-2.6.22-590/net/ipv4/tcp_input.c
---- linux-2.6.22-570/net/ipv4/tcp_input.c 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/tcp_input.c 2008-01-29 22:12:32.000000000 -0500
-@@ -72,8 +72,6 @@
- #include <asm/unaligned.h>
- #include <net/netdma.h>
-
--int sysctl_tcp_timestamps __read_mostly = 1;
--int sysctl_tcp_window_scaling __read_mostly = 1;
- int sysctl_tcp_sack __read_mostly = 1;
- int sysctl_tcp_fack __read_mostly = 1;
- int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
-@@ -2922,7 +2920,7 @@
- break;
- case TCPOPT_WINDOW:
- if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
-- if (sysctl_tcp_window_scaling) {
-+ if (init_net.sysctl_tcp_window_scaling) {
- __u8 snd_wscale = *(__u8 *) ptr;
- opt_rx->wscale_ok = 1;
- if (snd_wscale > 14) {
-@@ -2938,7 +2936,7 @@
- case TCPOPT_TIMESTAMP:
- if (opsize==TCPOLEN_TIMESTAMP) {
- if ((estab && opt_rx->tstamp_ok) ||
-- (!estab && sysctl_tcp_timestamps)) {
-+ (!estab && init_net.sysctl_tcp_timestamps)) {
- opt_rx->saw_tstamp = 1;
- opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
- opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
-diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c linux-2.6.22-590/net/ipv4/tcp_ipv4.c
---- linux-2.6.22-570/net/ipv4/tcp_ipv4.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/tcp_ipv4.c 2008-01-29 22:12:32.000000000 -0500
-@@ -71,6 +71,7 @@
- #include <net/timewait_sock.h>
- #include <net/xfrm.h>
- #include <net/netdma.h>
-+#include <net/net_namespace.h>
-
- #include <linux/inet.h>
- #include <linux/ipv6.h>
-@@ -353,6 +354,7 @@
-
- void tcp_v4_err(struct sk_buff *skb, u32 info)
- {
-+ struct net *net = skb->dev->nd_net;
- struct iphdr *iph = (struct iphdr *)skb->data;
- struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
- struct tcp_sock *tp;
-@@ -369,7 +371,7 @@
- }
-
- sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
-- th->source, inet_iif(skb));
-+ th->source, inet_iif(skb), net);
- if (!sk) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
- return;
-@@ -1499,7 +1501,8 @@
- return tcp_check_req(sk, skb, req, prev);
-
- nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
-- iph->daddr, th->dest, inet_iif(skb));
-+ iph->daddr, th->dest, inet_iif(skb),
-+ sk->sk_net);
-
- if (nsk) {
- if (nsk->sk_state != TCP_TIME_WAIT) {
-@@ -1618,6 +1621,7 @@
-
- int tcp_v4_rcv(struct sk_buff *skb)
- {
-+ struct net *net = skb->dev->nd_net;
- const struct iphdr *iph;
- struct tcphdr *th;
- struct sock *sk;
-@@ -1657,7 +1661,7 @@
- TCP_SKB_CB(skb)->sacked = 0;
-
- sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
-- iph->daddr, th->dest, inet_iif(skb));
-+ iph->daddr, th->dest, inet_iif(skb), net);
- if (!sk)
- goto no_tcp_socket;
-
-@@ -1732,7 +1736,7 @@
- case TCP_TW_SYN: {
- struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
- iph->daddr, th->dest,
-- inet_iif(skb));
-+ inet_iif(skb), net);
- if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
- inet_twsk_put(inet_twsk(sk));
-@@ -1766,7 +1770,7 @@
- int release_it = 0;
-
- if (!rt || rt->rt_dst != inet->daddr) {
-- peer = inet_getpeer(inet->daddr, 1);
-+ peer = inet_getpeer(sk->sk_net, inet->daddr, 1);
- release_it = 1;
- } else {
- if (!rt->peer)
-@@ -1791,7 +1795,7 @@
-
- int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
- {
-- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
-+ struct inet_peer *peer = inet_getpeer(tw->tw_net, tw->tw_daddr, 1);
-
- if (peer) {
- const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-@@ -1980,7 +1984,8 @@
- if (req->sk &&
- !nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT))
- continue;
-- if (req->rsk_ops->family == st->family) {
-+ if ((req->rsk_ops->family == st->family) &&
-+ (req->sk->sk_net == st->net)) {
- cur = req;
- goto out;
- }
-@@ -2004,6 +2009,8 @@
- }
- get_sk:
- sk_for_each_from(sk, node) {
-+ if (sk->sk_net != st->net)
-+ continue;
- vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
- sk, sk->sk_nid, nx_current_nid());
- if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
-@@ -2054,11 +2061,10 @@
- struct hlist_node *node;
- struct inet_timewait_sock *tw;
-
-- /* We can reschedule _before_ having picked the target: */
-- cond_resched_softirq();
--
-- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
-+ read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
- sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-+ if (sk->sk_net != st->net)
-+ continue;
- vxdprintk(VXD_CBIT(net, 6),
- "sk,egf: %p [#%d] (from %d)",
- sk, sk->sk_nid, nx_current_nid());
-@@ -2072,6 +2078,8 @@
- st->state = TCP_SEQ_STATE_TIME_WAIT;
- inet_twsk_for_each(tw, node,
- &tcp_hashinfo.ehash[st->bucket].twchain) {
-+ if (tw->tw_net != st->net)
-+ continue;
- vxdprintk(VXD_CBIT(net, 6),
- "tw: %p [#%d] (from %d)",
- tw, tw->tw_nid, nx_current_nid());
-@@ -2082,7 +2090,7 @@
- rc = tw;
- goto out;
- }
-- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-+ read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
- st->state = TCP_SEQ_STATE_ESTABLISHED;
- }
- out:
-@@ -2102,7 +2110,8 @@
- tw = cur;
- tw = tw_next(tw);
- get_tw:
-- while (tw && (tw->tw_family != st->family ||
-+ while (tw && ((tw->tw_net != st->net) ||
-+ (tw->tw_family != st->family) ||
- !nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) {
- tw = tw_next(tw);
- }
-@@ -2110,14 +2119,11 @@
- cur = tw;
- goto out;
- }
-- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-+ read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
- st->state = TCP_SEQ_STATE_ESTABLISHED;
-
-- /* We can reschedule between buckets: */
-- cond_resched_softirq();
--
- if (++st->bucket < tcp_hashinfo.ehash_size) {
-- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
-+ read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
- sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
- } else {
- cur = NULL;
-@@ -2130,6 +2136,8 @@
- vxdprintk(VXD_CBIT(net, 6),
- "sk,egn: %p [#%d] (from %d)",
- sk, sk->sk_nid, nx_current_nid());
-+ if (sk->sk_net != st->net)
-+ continue;
- if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
- continue;
- if (sk->sk_family == st->family)
-@@ -2167,7 +2175,6 @@
-
- if (!rc) {
- inet_listen_unlock(&tcp_hashinfo);
-- local_bh_disable();
- st->state = TCP_SEQ_STATE_ESTABLISHED;
- rc = established_get_idx(seq, pos);
- }
-@@ -2200,7 +2207,6 @@
- rc = listening_get_next(seq, v);
- if (!rc) {
- inet_listen_unlock(&tcp_hashinfo);
-- local_bh_disable();
- st->state = TCP_SEQ_STATE_ESTABLISHED;
- rc = established_get_first(seq);
- }
-@@ -2232,8 +2238,7 @@
- case TCP_SEQ_STATE_TIME_WAIT:
- case TCP_SEQ_STATE_ESTABLISHED:
- if (v)
-- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-- local_bh_enable();
-+ read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
- break;
- }
- }
-@@ -2262,6 +2267,7 @@
- goto out_kfree;
- seq = file->private_data;
- seq->private = s;
-+ s->net = get_net(PROC_NET(inode));
- out:
- return rc;
- out_kfree:
-@@ -2269,20 +2275,30 @@
- goto out;
- }
-
--int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
-+static int tcp_seq_release(struct inode *inode, struct file *file)
-+{
-+ struct seq_file *seq = file->private_data;
-+ struct tcp_iter_state *st = seq->private;
-+ put_net(st->net);
-+ return seq_release_private(inode, file);
-+}
-+
-+int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
- {
- int rc = 0;
- struct proc_dir_entry *p;
-
- if (!afinfo)
- return -EINVAL;
-+ if (net == &init_net) {
- afinfo->seq_fops->owner = afinfo->owner;
- afinfo->seq_fops->open = tcp_seq_open;
- afinfo->seq_fops->read = seq_read;
- afinfo->seq_fops->llseek = seq_lseek;
-- afinfo->seq_fops->release = seq_release_private;
-+ afinfo->seq_fops->release = tcp_seq_release;
-+ }
-
-- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
-+ p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops);
- if (p)
- p->data = afinfo;
- else
-@@ -2290,11 +2306,12 @@
- return rc;
- }
-
--void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
-+void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
- {
- if (!afinfo)
- return;
-- proc_net_remove(afinfo->name);
-+ proc_net_remove(net, afinfo->name);
-+ if (net == &init_net)
- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
- }
-
-@@ -2439,14 +2456,29 @@
- .seq_fops = &tcp4_seq_fops,
- };
-
-+static int tcp4_proc_net_init(struct net *net)
-+{
-+ return tcp_proc_register(net, &tcp4_seq_afinfo);
-+}
-+
-+static void tcp4_proc_net_exit(struct net *net)
-+{
-+ tcp_proc_unregister(net, &tcp4_seq_afinfo);
-+}
-+
-+static struct pernet_operations tcp4_proc_net_ops = {
-+ .init = tcp4_proc_net_init,
-+ .exit = tcp4_proc_net_exit,
-+};
-+
- int __init tcp4_proc_init(void)
- {
-- return tcp_proc_register(&tcp4_seq_afinfo);
-+ return register_pernet_subsys(&tcp4_proc_net_ops);
- }
-
- void tcp4_proc_exit(void)
- {
-- tcp_proc_unregister(&tcp4_seq_afinfo);
-+ unregister_pernet_subsys(&tcp4_proc_net_ops);
- }
- #endif /* CONFIG_PROC_FS */
-
-@@ -2508,6 +2540,5 @@
- EXPORT_SYMBOL(tcp_proc_register);
- EXPORT_SYMBOL(tcp_proc_unregister);
- #endif
--EXPORT_SYMBOL(sysctl_local_port_range);
- EXPORT_SYMBOL(sysctl_tcp_low_latency);
-
-diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig linux-2.6.22-590/net/ipv4/tcp_ipv4.c.orig
---- linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/tcp_ipv4.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,2483 +0,0 @@
--/*
-- * INET An implementation of the TCP/IP protocol suite for the LINUX
-- * operating system. INET is implemented using the BSD Socket
-- * interface as the means of communication with the user level.
-- *
-- * Implementation of the Transmission Control Protocol(TCP).
-- *
-- * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
-- *
-- * IPv4 specific functions
-- *
-- *
-- * code split from:
-- * linux/ipv4/tcp.c
-- * linux/ipv4/tcp_input.c
-- * linux/ipv4/tcp_output.c
-- *
-- * See tcp.c for author information
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--/*
-- * Changes:
-- * David S. Miller : New socket lookup architecture.
-- * This code is dedicated to John Dyson.
-- * David S. Miller : Change semantics of established hash,
-- * half is devoted to TIME_WAIT sockets
-- * and the rest go in the other half.
-- * Andi Kleen : Add support for syncookies and fixed
-- * some bugs: ip options weren't passed to
-- * the TCP layer, missed a check for an
-- * ACK bit.
-- * Andi Kleen : Implemented fast path mtu discovery.
-- * Fixed many serious bugs in the
-- * request_sock handling and moved
-- * most of it into the af independent code.
-- * Added tail drop and some other bugfixes.
-- * Added new listen semantics.
-- * Mike McLagan : Routing by source
-- * Juan Jose Ciarlante: ip_dynaddr bits
-- * Andi Kleen: various fixes.
-- * Vitaly E. Lavrov : Transparent proxy revived after year
-- * coma.
-- * Andi Kleen : Fix new listen.
-- * Andi Kleen : Fix accept error reporting.
-- * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
-- * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
-- * a single port at the same time.
-- */
--
--
--#include <linux/types.h>
--#include <linux/fcntl.h>
--#include <linux/module.h>
--#include <linux/random.h>
--#include <linux/cache.h>
--#include <linux/jhash.h>
--#include <linux/init.h>
--#include <linux/times.h>
--
--#include <net/icmp.h>
--#include <net/inet_hashtables.h>
--#include <net/tcp.h>
--#include <net/transp_v6.h>
--#include <net/ipv6.h>
--#include <net/inet_common.h>
--#include <net/timewait_sock.h>
--#include <net/xfrm.h>
--#include <net/netdma.h>
--
--#include <linux/inet.h>
--#include <linux/ipv6.h>
--#include <linux/stddef.h>
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--
--#include <linux/crypto.h>
--#include <linux/scatterlist.h>
--
--int sysctl_tcp_tw_reuse __read_mostly;
--int sysctl_tcp_low_latency __read_mostly;
--
--/* Check TCP sequence numbers in ICMP packets. */
--#define ICMP_MIN_LENGTH 8
--
--/* Socket used for sending RSTs */
--static struct socket *tcp_socket __read_mostly;
--
--void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
--
--#ifdef CONFIG_TCP_MD5SIG
--static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
-- __be32 addr);
--static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-- __be32 saddr, __be32 daddr,
-- struct tcphdr *th, int protocol,
-- int tcplen);
--#endif
--
--struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
-- .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
-- .lhash_users = ATOMIC_INIT(0),
-- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
--};
--
--static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
--{
-- return inet_csk_get_port(&tcp_hashinfo, sk, snum,
-- inet_csk_bind_conflict);
--}
--
--static void tcp_v4_hash(struct sock *sk)
--{
-- inet_hash(&tcp_hashinfo, sk);
--}
--
--void tcp_unhash(struct sock *sk)
--{
-- inet_unhash(&tcp_hashinfo, sk);
--}
--
--static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
--{
-- return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr,
-- tcp_hdr(skb)->dest,
-- tcp_hdr(skb)->source);
--}
--
--int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
--{
-- const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
-- struct tcp_sock *tp = tcp_sk(sk);
--
-- /* With PAWS, it is safe from the viewpoint
-- of data integrity. Even without PAWS it is safe provided sequence
-- spaces do not overlap i.e. at data rates <= 80Mbit/sec.
--
-- Actually, the idea is close to VJ's one, only timestamp cache is
-- held not per host, but per port pair and TW bucket is used as state
-- holder.
--
-- If TW bucket has been already destroyed we fall back to VJ's scheme
-- and use initial timestamp retrieved from peer table.
-- */
-- if (tcptw->tw_ts_recent_stamp &&
-- (twp == NULL || (sysctl_tcp_tw_reuse &&
-- get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
-- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-- if (tp->write_seq == 0)
-- tp->write_seq = 1;
-- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
-- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-- sock_hold(sktw);
-- return 1;
-- }
--
-- return 0;
--}
--
--EXPORT_SYMBOL_GPL(tcp_twsk_unique);
--
--/* This will initiate an outgoing connection. */
--int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
--{
-- struct inet_sock *inet = inet_sk(sk);
-- struct tcp_sock *tp = tcp_sk(sk);
-- struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
-- struct rtable *rt;
-- __be32 daddr, nexthop;
-- int tmp;
-- int err;
--
-- if (addr_len < sizeof(struct sockaddr_in))
-- return -EINVAL;
--
-- if (usin->sin_family != AF_INET)
-- return -EAFNOSUPPORT;
--
-- nexthop = daddr = usin->sin_addr.s_addr;
-- if (inet->opt && inet->opt->srr) {
-- if (!daddr)
-- return -EINVAL;
-- nexthop = inet->opt->faddr;
-- }
--
-- tmp = ip_route_connect(&rt, nexthop, inet->saddr,
-- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
-- IPPROTO_TCP,
-- inet->sport, usin->sin_port, sk, 1);
-- if (tmp < 0) {
-- if (tmp == -ENETUNREACH)
-- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
-- return tmp;
-- }
--
-- if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
-- ip_rt_put(rt);
-- return -ENETUNREACH;
-- }
--
-- if (!inet->opt || !inet->opt->srr)
-- daddr = rt->rt_dst;
--
-- if (!inet->saddr)
-- inet->saddr = rt->rt_src;
-- inet->rcv_saddr = inet->saddr;
--
-- if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
-- /* Reset inherited state */
-- tp->rx_opt.ts_recent = 0;
-- tp->rx_opt.ts_recent_stamp = 0;
-- tp->write_seq = 0;
-- }
--
-- if (tcp_death_row.sysctl_tw_recycle &&
-- !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
-- struct inet_peer *peer = rt_get_peer(rt);
-- /*
-- * VJ's idea. We save last timestamp seen from
-- * the destination in peer table, when entering state
-- * TIME-WAIT * and initialize rx_opt.ts_recent from it,
-- * when trying new connection.
-- */
-- if (peer != NULL &&
-- peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
-- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
-- tp->rx_opt.ts_recent = peer->tcp_ts;
-- }
-- }
--
-- inet->dport = usin->sin_port;
-- inet->daddr = daddr;
--
-- inet_csk(sk)->icsk_ext_hdr_len = 0;
-- if (inet->opt)
-- inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
--
-- tp->rx_opt.mss_clamp = 536;
--
-- /* Socket identity is still unknown (sport may be zero).
-- * However we set state to SYN-SENT and not releasing socket
-- * lock select source port, enter ourselves into the hash tables and
-- * complete initialization after this.
-- */
-- tcp_set_state(sk, TCP_SYN_SENT);
-- err = inet_hash_connect(&tcp_death_row, sk);
-- if (err)
-- goto failure;
--
-- err = ip_route_newports(&rt, IPPROTO_TCP,
-- inet->sport, inet->dport, sk);
-- if (err)
-- goto failure;
--
-- /* OK, now commit destination to socket. */
-- sk->sk_gso_type = SKB_GSO_TCPV4;
-- sk_setup_caps(sk, &rt->u.dst);
--
-- if (!tp->write_seq)
-- tp->write_seq = secure_tcp_sequence_number(inet->saddr,
-- inet->daddr,
-- inet->sport,
-- usin->sin_port);
--
-- inet->id = tp->write_seq ^ jiffies;
--
-- err = tcp_connect(sk);
-- rt = NULL;
-- if (err)
-- goto failure;
--
-- return 0;
--
--failure:
-- /*
-- * This unhashes the socket and releases the local port,
-- * if necessary.
-- */
-- tcp_set_state(sk, TCP_CLOSE);
-- ip_rt_put(rt);
-- sk->sk_route_caps = 0;
-- inet->dport = 0;
-- return err;
--}
--
--/*
-- * This routine does path mtu discovery as defined in RFC1191.
-- */
--static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
--{
-- struct dst_entry *dst;
-- struct inet_sock *inet = inet_sk(sk);
--
-- /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
-- * send out by Linux are always <576bytes so they should go through
-- * unfragmented).
-- */
-- if (sk->sk_state == TCP_LISTEN)
-- return;
--
-- /* We don't check in the destentry if pmtu discovery is forbidden
-- * on this route. We just assume that no packet_to_big packets
-- * are send back when pmtu discovery is not active.
-- * There is a small race when the user changes this flag in the
-- * route, but I think that's acceptable.
-- */
-- if ((dst = __sk_dst_check(sk, 0)) == NULL)
-- return;
--
-- dst->ops->update_pmtu(dst, mtu);
--
-- /* Something is about to be wrong... Remember soft error
-- * for the case, if this connection will not able to recover.
-- */
-- if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
-- sk->sk_err_soft = EMSGSIZE;
--
-- mtu = dst_mtu(dst);
--
-- if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-- inet_csk(sk)->icsk_pmtu_cookie > mtu) {
-- tcp_sync_mss(sk, mtu);
--
-- /* Resend the TCP packet because it's
-- * clear that the old packet has been
-- * dropped. This is the new "fast" path mtu
-- * discovery.
-- */
-- tcp_simple_retransmit(sk);
-- } /* else let the usual retransmit timer handle it */
--}
--
--/*
-- * This routine is called by the ICMP module when it gets some
-- * sort of error condition. If err < 0 then the socket should
-- * be closed and the error returned to the user. If err > 0
-- * it's just the icmp type << 8 | icmp code. After adjustment
-- * header points to the first 8 bytes of the tcp header. We need
-- * to find the appropriate port.
-- *
-- * The locking strategy used here is very "optimistic". When
-- * someone else accesses the socket the ICMP is just dropped
-- * and for some paths there is no check at all.
-- * A more general error queue to queue errors for later handling
-- * is probably better.
-- *
-- */
--
--void tcp_v4_err(struct sk_buff *skb, u32 info)
--{
-- struct iphdr *iph = (struct iphdr *)skb->data;
-- struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
-- struct tcp_sock *tp;
-- struct inet_sock *inet;
-- const int type = icmp_hdr(skb)->type;
-- const int code = icmp_hdr(skb)->code;
-- struct sock *sk;
-- __u32 seq;
-- int err;
--
-- if (skb->len < (iph->ihl << 2) + 8) {
-- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
-- return;
-- }
--
-- sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
-- th->source, inet_iif(skb));
-- if (!sk) {
-- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
-- return;
-- }
-- if (sk->sk_state == TCP_TIME_WAIT) {
-- inet_twsk_put(inet_twsk(sk));
-- return;
-- }
--
-- bh_lock_sock(sk);
-- /* If too many ICMPs get dropped on busy
-- * servers this needs to be solved differently.
-- */
-- if (sock_owned_by_user(sk))
-- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
--
-- if (sk->sk_state == TCP_CLOSE)
-- goto out;
--
-- tp = tcp_sk(sk);
-- seq = ntohl(th->seq);
-- if (sk->sk_state != TCP_LISTEN &&
-- !between(seq, tp->snd_una, tp->snd_nxt)) {
-- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
-- goto out;
-- }
--
-- switch (type) {
-- case ICMP_SOURCE_QUENCH:
-- /* Just silently ignore these. */
-- goto out;
-- case ICMP_PARAMETERPROB:
-- err = EPROTO;
-- break;
-- case ICMP_DEST_UNREACH:
-- if (code > NR_ICMP_UNREACH)
-- goto out;
--
-- if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
-- if (!sock_owned_by_user(sk))
-- do_pmtu_discovery(sk, iph, info);
-- goto out;
-- }
--
-- err = icmp_err_convert[code].errno;
-- break;
-- case ICMP_TIME_EXCEEDED:
-- err = EHOSTUNREACH;
-- break;
-- default:
-- goto out;
-- }
--
-- switch (sk->sk_state) {
-- struct request_sock *req, **prev;
-- case TCP_LISTEN:
-- if (sock_owned_by_user(sk))
-- goto out;
--
-- req = inet_csk_search_req(sk, &prev, th->dest,
-- iph->daddr, iph->saddr);
-- if (!req)
-- goto out;
--
-- /* ICMPs are not backlogged, hence we cannot get
-- an established socket here.
-- */
-- BUG_TRAP(!req->sk);
--
-- if (seq != tcp_rsk(req)->snt_isn) {
-- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
-- goto out;
-- }
--
-- /*
-- * Still in SYN_RECV, just remove it silently.
-- * There is no good way to pass the error to the newly
-- * created socket, and POSIX does not want network
-- * errors returned from accept().
-- */
-- inet_csk_reqsk_queue_drop(sk, req, prev);
-- goto out;
--
-- case TCP_SYN_SENT:
-- case TCP_SYN_RECV: /* Cannot happen.
-- It can f.e. if SYNs crossed.
-- */
-- if (!sock_owned_by_user(sk)) {
-- sk->sk_err = err;
--
-- sk->sk_error_report(sk);
--
-- tcp_done(sk);
-- } else {
-- sk->sk_err_soft = err;
-- }
-- goto out;
-- }
--
-- /* If we've already connected we will keep trying
-- * until we time out, or the user gives up.
-- *
-- * rfc1122 4.2.3.9 allows to consider as hard errors
-- * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
-- * but it is obsoleted by pmtu discovery).
-- *
-- * Note, that in modern internet, where routing is unreliable
-- * and in each dark corner broken firewalls sit, sending random
-- * errors ordered by their masters even this two messages finally lose
-- * their original sense (even Linux sends invalid PORT_UNREACHs)
-- *
-- * Now we are in compliance with RFCs.
-- * --ANK (980905)
-- */
--
-- inet = inet_sk(sk);
-- if (!sock_owned_by_user(sk) && inet->recverr) {
-- sk->sk_err = err;
-- sk->sk_error_report(sk);
-- } else { /* Only an error on timeout */
-- sk->sk_err_soft = err;
-- }
--
--out:
-- bh_unlock_sock(sk);
-- sock_put(sk);
--}
--
--/* This routine computes an IPv4 TCP checksum. */
--void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
--{
-- struct inet_sock *inet = inet_sk(sk);
-- struct tcphdr *th = tcp_hdr(skb);
--
-- if (skb->ip_summed == CHECKSUM_PARTIAL) {
-- th->check = ~tcp_v4_check(len, inet->saddr,
-- inet->daddr, 0);
-- skb->csum_start = skb_transport_header(skb) - skb->head;
-- skb->csum_offset = offsetof(struct tcphdr, check);
-- } else {
-- th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
-- csum_partial((char *)th,
-- th->doff << 2,
-- skb->csum));
-- }
--}
--
--int tcp_v4_gso_send_check(struct sk_buff *skb)
--{
-- const struct iphdr *iph;
-- struct tcphdr *th;
--
-- if (!pskb_may_pull(skb, sizeof(*th)))
-- return -EINVAL;
--
-- iph = ip_hdr(skb);
-- th = tcp_hdr(skb);
--
-- th->check = 0;
-- th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
-- skb->csum_start = skb_transport_header(skb) - skb->head;
-- skb->csum_offset = offsetof(struct tcphdr, check);
-- skb->ip_summed = CHECKSUM_PARTIAL;
-- return 0;
--}
--
--/*
-- * This routine will send an RST to the other tcp.
-- *
-- * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
-- * for reset.
-- * Answer: if a packet caused RST, it is not for a socket
-- * existing in our system, if it is matched to a socket,
-- * it is just duplicate segment or bug in other side's TCP.
-- * So that we build reply only basing on parameters
-- * arrived with segment.
-- * Exception: precedence violation. We do not implement it in any case.
-- */
--
--static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
--{
-- struct tcphdr *th = tcp_hdr(skb);
-- struct {
-- struct tcphdr th;
--#ifdef CONFIG_TCP_MD5SIG
-- __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
--#endif
-- } rep;
-- struct ip_reply_arg arg;
--#ifdef CONFIG_TCP_MD5SIG
-- struct tcp_md5sig_key *key;
--#endif
--
-- /* Never send a reset in response to a reset. */
-- if (th->rst)
-- return;
--
-- if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
-- return;
--
-- /* Swap the send and the receive. */
-- memset(&rep, 0, sizeof(rep));
-- rep.th.dest = th->source;
-- rep.th.source = th->dest;
-- rep.th.doff = sizeof(struct tcphdr) / 4;
-- rep.th.rst = 1;
--
-- if (th->ack) {
-- rep.th.seq = th->ack_seq;
-- } else {
-- rep.th.ack = 1;
-- rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
-- skb->len - (th->doff << 2));
-- }
--
-- memset(&arg, 0, sizeof(arg));
-- arg.iov[0].iov_base = (unsigned char *)&rep;
-- arg.iov[0].iov_len = sizeof(rep.th);
--
--#ifdef CONFIG_TCP_MD5SIG
-- key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
-- if (key) {
-- rep.opt[0] = htonl((TCPOPT_NOP << 24) |
-- (TCPOPT_NOP << 16) |
-- (TCPOPT_MD5SIG << 8) |
-- TCPOLEN_MD5SIG);
-- /* Update length and the length the header thinks exists */
-- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
-- rep.th.doff = arg.iov[0].iov_len / 4;
--
-- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
-- key,
-- ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr,
-- &rep.th, IPPROTO_TCP,
-- arg.iov[0].iov_len);
-- }
--#endif
-- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr, /* XXX */
-- sizeof(struct tcphdr), IPPROTO_TCP, 0);
-- arg.csumoffset = offsetof(struct tcphdr, check) / 2;
--
-- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
--
-- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
-- TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
--}
--
--/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
-- outside socket context is ugly, certainly. What can I do?
-- */
--
--static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
-- struct sk_buff *skb, u32 seq, u32 ack,
-- u32 win, u32 ts)
--{
-- struct tcphdr *th = tcp_hdr(skb);
-- struct {
-- struct tcphdr th;
-- __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
--#ifdef CONFIG_TCP_MD5SIG
-- + (TCPOLEN_MD5SIG_ALIGNED >> 2)
--#endif
-- ];
-- } rep;
-- struct ip_reply_arg arg;
--#ifdef CONFIG_TCP_MD5SIG
-- struct tcp_md5sig_key *key;
-- struct tcp_md5sig_key tw_key;
--#endif
--
-- memset(&rep.th, 0, sizeof(struct tcphdr));
-- memset(&arg, 0, sizeof(arg));
--
-- arg.iov[0].iov_base = (unsigned char *)&rep;
-- arg.iov[0].iov_len = sizeof(rep.th);
-- if (ts) {
-- rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
-- (TCPOPT_TIMESTAMP << 8) |
-- TCPOLEN_TIMESTAMP);
-- rep.opt[1] = htonl(tcp_time_stamp);
-- rep.opt[2] = htonl(ts);
-- arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
-- }
--
-- /* Swap the send and the receive. */
-- rep.th.dest = th->source;
-- rep.th.source = th->dest;
-- rep.th.doff = arg.iov[0].iov_len / 4;
-- rep.th.seq = htonl(seq);
-- rep.th.ack_seq = htonl(ack);
-- rep.th.ack = 1;
-- rep.th.window = htons(win);
--
--#ifdef CONFIG_TCP_MD5SIG
-- /*
-- * The SKB holds an imcoming packet, but may not have a valid ->sk
-- * pointer. This is especially the case when we're dealing with a
-- * TIME_WAIT ack, because the sk structure is long gone, and only
-- * the tcp_timewait_sock remains. So the md5 key is stashed in that
-- * structure, and we use it in preference. I believe that (twsk ||
-- * skb->sk) holds true, but we program defensively.
-- */
-- if (!twsk && skb->sk) {
-- key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
-- } else if (twsk && twsk->tw_md5_keylen) {
-- tw_key.key = twsk->tw_md5_key;
-- tw_key.keylen = twsk->tw_md5_keylen;
-- key = &tw_key;
-- } else
-- key = NULL;
--
-- if (key) {
-- int offset = (ts) ? 3 : 0;
--
-- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
-- (TCPOPT_NOP << 16) |
-- (TCPOPT_MD5SIG << 8) |
-- TCPOLEN_MD5SIG);
-- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
-- rep.th.doff = arg.iov[0].iov_len/4;
--
-- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
-- key,
-- ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr,
-- &rep.th, IPPROTO_TCP,
-- arg.iov[0].iov_len);
-- }
--#endif
-- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
-- ip_hdr(skb)->saddr, /* XXX */
-- arg.iov[0].iov_len, IPPROTO_TCP, 0);
-- arg.csumoffset = offsetof(struct tcphdr, check) / 2;
-- if (twsk)
-- arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
--
-- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
--
-- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
--}
--
--static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
--{
-- struct inet_timewait_sock *tw = inet_twsk(sk);
-- struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
--
-- tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
-- tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
-- tcptw->tw_ts_recent);
--
-- inet_twsk_put(tw);
--}
--
--static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
-- struct request_sock *req)
--{
-- tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
-- tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
-- req->ts_recent);
--}
--
--/*
-- * Send a SYN-ACK after having received an ACK.
-- * This still operates on a request_sock only, not on a big
-- * socket.
-- */
--static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
-- struct dst_entry *dst)
--{
-- const struct inet_request_sock *ireq = inet_rsk(req);
-- int err = -1;
-- struct sk_buff * skb;
--
-- /* First, grab a route. */
-- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
-- goto out;
--
-- skb = tcp_make_synack(sk, dst, req);
--
-- if (skb) {
-- struct tcphdr *th = tcp_hdr(skb);
--
-- th->check = tcp_v4_check(skb->len,
-- ireq->loc_addr,
-- ireq->rmt_addr,
-- csum_partial((char *)th, skb->len,
-- skb->csum));
--
-- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
-- ireq->rmt_addr,
-- ireq->opt);
-- err = net_xmit_eval(err);
-- }
--
--out:
-- dst_release(dst);
-- return err;
--}
--
--/*
-- * IPv4 request_sock destructor.
-- */
--static void tcp_v4_reqsk_destructor(struct request_sock *req)
--{
-- kfree(inet_rsk(req)->opt);
--}
--
--#ifdef CONFIG_SYN_COOKIES
--static void syn_flood_warning(struct sk_buff *skb)
--{
-- static unsigned long warntime;
--
-- if (time_after(jiffies, (warntime + HZ * 60))) {
-- warntime = jiffies;
-- printk(KERN_INFO
-- "possible SYN flooding on port %d. Sending cookies.\n",
-- ntohs(tcp_hdr(skb)->dest));
-- }
--}
--#endif
--
--/*
-- * Save and compile IPv4 options into the request_sock if needed.
-- */
--static struct ip_options *tcp_v4_save_options(struct sock *sk,
-- struct sk_buff *skb)
--{
-- struct ip_options *opt = &(IPCB(skb)->opt);
-- struct ip_options *dopt = NULL;
--
-- if (opt && opt->optlen) {
-- int opt_size = optlength(opt);
-- dopt = kmalloc(opt_size, GFP_ATOMIC);
-- if (dopt) {
-- if (ip_options_echo(dopt, skb)) {
-- kfree(dopt);
-- dopt = NULL;
-- }
-- }
-- }
-- return dopt;
--}
--
--#ifdef CONFIG_TCP_MD5SIG
--/*
-- * RFC2385 MD5 checksumming requires a mapping of
-- * IP address->MD5 Key.
-- * We need to maintain these in the sk structure.
-- */
--
--/* Find the Key structure for an address. */
--static struct tcp_md5sig_key *
-- tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
--{
-- struct tcp_sock *tp = tcp_sk(sk);
-- int i;
--
-- if (!tp->md5sig_info || !tp->md5sig_info->entries4)
-- return NULL;
-- for (i = 0; i < tp->md5sig_info->entries4; i++) {
-- if (tp->md5sig_info->keys4[i].addr == addr)
-- return &tp->md5sig_info->keys4[i].base;
-- }
-- return NULL;
--}
--
--struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
-- struct sock *addr_sk)
--{
-- return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
--}
--
--EXPORT_SYMBOL(tcp_v4_md5_lookup);
--
--static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
-- struct request_sock *req)
--{
-- return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
--}
--
--/* This can be called on a newly created socket, from other files */
--int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
-- u8 *newkey, u8 newkeylen)
--{
-- /* Add Key to the list */
-- struct tcp4_md5sig_key *key;
-- struct tcp_sock *tp = tcp_sk(sk);
-- struct tcp4_md5sig_key *keys;
--
-- key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr);
-- if (key) {
-- /* Pre-existing entry - just update that one. */
-- kfree(key->base.key);
-- key->base.key = newkey;
-- key->base.keylen = newkeylen;
-- } else {
-- struct tcp_md5sig_info *md5sig;
--
-- if (!tp->md5sig_info) {
-- tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
-- GFP_ATOMIC);
-- if (!tp->md5sig_info) {
-- kfree(newkey);
-- return -ENOMEM;
-- }
-- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-- }
-- if (tcp_alloc_md5sig_pool() == NULL) {
-- kfree(newkey);
-- return -ENOMEM;
-- }
-- md5sig = tp->md5sig_info;
--
-- if (md5sig->alloced4 == md5sig->entries4) {
-- keys = kmalloc((sizeof(*keys) *
-- (md5sig->entries4 + 1)), GFP_ATOMIC);
-- if (!keys) {
-- kfree(newkey);
-- tcp_free_md5sig_pool();
-- return -ENOMEM;
-- }
--
-- if (md5sig->entries4)
-- memcpy(keys, md5sig->keys4,
-- sizeof(*keys) * md5sig->entries4);
--
-- /* Free old key list, and reference new one */
-- if (md5sig->keys4)
-- kfree(md5sig->keys4);
-- md5sig->keys4 = keys;
-- md5sig->alloced4++;
-- }
-- md5sig->entries4++;
-- md5sig->keys4[md5sig->entries4 - 1].addr = addr;
-- md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
-- md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
-- }
-- return 0;
--}
--
--EXPORT_SYMBOL(tcp_v4_md5_do_add);
--
--static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
-- u8 *newkey, u8 newkeylen)
--{
-- return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
-- newkey, newkeylen);
--}
--
--int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
--{
-- struct tcp_sock *tp = tcp_sk(sk);
-- int i;
--
-- for (i = 0; i < tp->md5sig_info->entries4; i++) {
-- if (tp->md5sig_info->keys4[i].addr == addr) {
-- /* Free the key */
-- kfree(tp->md5sig_info->keys4[i].base.key);
-- tp->md5sig_info->entries4--;
--
-- if (tp->md5sig_info->entries4 == 0) {
-- kfree(tp->md5sig_info->keys4);
-- tp->md5sig_info->keys4 = NULL;
-- tp->md5sig_info->alloced4 = 0;
-- } else if (tp->md5sig_info->entries4 != i) {
-- /* Need to do some manipulation */
-- memcpy(&tp->md5sig_info->keys4[i],
-- &tp->md5sig_info->keys4[i+1],
-- (tp->md5sig_info->entries4 - i) *
-- sizeof(struct tcp4_md5sig_key));
-- }
-- tcp_free_md5sig_pool();
-- return 0;
-- }
-- }
-- return -ENOENT;
--}
--
--EXPORT_SYMBOL(tcp_v4_md5_do_del);
--
--static void tcp_v4_clear_md5_list(struct sock *sk)
--{
-- struct tcp_sock *tp = tcp_sk(sk);
--
-- /* Free each key, then the set of key keys,
-- * the crypto element, and then decrement our
-- * hold on the last resort crypto.
-- */
-- if (tp->md5sig_info->entries4) {
-- int i;
-- for (i = 0; i < tp->md5sig_info->entries4; i++)
-- kfree(tp->md5sig_info->keys4[i].base.key);
-- tp->md5sig_info->entries4 = 0;
-- tcp_free_md5sig_pool();
-- }
-- if (tp->md5sig_info->keys4) {
-- kfree(tp->md5sig_info->keys4);
-- tp->md5sig_info->keys4 = NULL;
-- tp->md5sig_info->alloced4 = 0;
-- }
--}
--
--static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
-- int optlen)
--{
-- struct tcp_md5sig cmd;
-- struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
-- u8 *newkey;
--
-- if (optlen < sizeof(cmd))
-- return -EINVAL;
--
-- if (copy_from_user(&cmd, optval, sizeof(cmd)))
-- return -EFAULT;
--
-- if (sin->sin_family != AF_INET)
-- return -EINVAL;
--
-- if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
-- if (!tcp_sk(sk)->md5sig_info)
-- return -ENOENT;
-- return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
-- }
--
-- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
-- return -EINVAL;
--
-- if (!tcp_sk(sk)->md5sig_info) {
-- struct tcp_sock *tp = tcp_sk(sk);
-- struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
--
-- if (!p)
-- return -EINVAL;
--
-- tp->md5sig_info = p;
-- sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-- }
--
-- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
-- if (!newkey)
-- return -ENOMEM;
-- return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
-- newkey, cmd.tcpm_keylen);
--}
--
--static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-- __be32 saddr, __be32 daddr,
-- struct tcphdr *th, int protocol,
-- int tcplen)
--{
-- struct scatterlist sg[4];
-- __u16 data_len;
-- int block = 0;
-- __sum16 old_checksum;
-- struct tcp_md5sig_pool *hp;
-- struct tcp4_pseudohdr *bp;
-- struct hash_desc *desc;
-- int err;
-- unsigned int nbytes = 0;
--
-- /*
-- * Okay, so RFC2385 is turned on for this connection,
-- * so we need to generate the MD5 hash for the packet now.
-- */
--
-- hp = tcp_get_md5sig_pool();
-- if (!hp)
-- goto clear_hash_noput;
--
-- bp = &hp->md5_blk.ip4;
-- desc = &hp->md5_desc;
--
-- /*
-- * 1. the TCP pseudo-header (in the order: source IP address,
-- * destination IP address, zero-padded protocol number, and
-- * segment length)
-- */
-- bp->saddr = saddr;
-- bp->daddr = daddr;
-- bp->pad = 0;
-- bp->protocol = protocol;
-- bp->len = htons(tcplen);
-- sg_set_buf(&sg[block++], bp, sizeof(*bp));
-- nbytes += sizeof(*bp);
--
-- /* 2. the TCP header, excluding options, and assuming a
-- * checksum of zero/
-- */
-- old_checksum = th->check;
-- th->check = 0;
-- sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
-- nbytes += sizeof(struct tcphdr);
--
-- /* 3. the TCP segment data (if any) */
-- data_len = tcplen - (th->doff << 2);
-- if (data_len > 0) {
-- unsigned char *data = (unsigned char *)th + (th->doff << 2);
-- sg_set_buf(&sg[block++], data, data_len);
-- nbytes += data_len;
-- }
--
-- /* 4. an independently-specified key or password, known to both
-- * TCPs and presumably connection-specific
-- */
-- sg_set_buf(&sg[block++], key->key, key->keylen);
-- nbytes += key->keylen;
--
-- /* Now store the Hash into the packet */
-- err = crypto_hash_init(desc);
-- if (err)
-- goto clear_hash;
-- err = crypto_hash_update(desc, sg, nbytes);
-- if (err)
-- goto clear_hash;
-- err = crypto_hash_final(desc, md5_hash);
-- if (err)
-- goto clear_hash;
--
-- /* Reset header, and free up the crypto */
-- tcp_put_md5sig_pool();
-- th->check = old_checksum;
--
--out:
-- return 0;
--clear_hash:
-- tcp_put_md5sig_pool();
--clear_hash_noput:
-- memset(md5_hash, 0, 16);
-- goto out;
--}
--
--int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
-- struct sock *sk,
-- struct dst_entry *dst,
-- struct request_sock *req,
-- struct tcphdr *th, int protocol,
-- int tcplen)
--{
-- __be32 saddr, daddr;
--
-- if (sk) {
-- saddr = inet_sk(sk)->saddr;
-- daddr = inet_sk(sk)->daddr;
-- } else {
-- struct rtable *rt = (struct rtable *)dst;
-- BUG_ON(!rt);
-- saddr = rt->rt_src;
-- daddr = rt->rt_dst;
-- }
-- return tcp_v4_do_calc_md5_hash(md5_hash, key,
-- saddr, daddr,
-- th, protocol, tcplen);
--}
--
--EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
--
--static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
--{
-- /*
-- * This gets called for each TCP segment that arrives
-- * so we want to be efficient.
-- * We have 3 drop cases:
-- * o No MD5 hash and one expected.
-- * o MD5 hash and we're not expecting one.
-- * o MD5 hash and its wrong.
-- */
-- __u8 *hash_location = NULL;
-- struct tcp_md5sig_key *hash_expected;
-- const struct iphdr *iph = ip_hdr(skb);
-- struct tcphdr *th = tcp_hdr(skb);
-- int length = (th->doff << 2) - sizeof(struct tcphdr);
-- int genhash;
-- unsigned char *ptr;
-- unsigned char newhash[16];
--
-- hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
--
-- /*
-- * If the TCP option length is less than the TCP_MD5SIG
-- * option length, then we can shortcut
-- */
-- if (length < TCPOLEN_MD5SIG) {
-- if (hash_expected)
-- return 1;
-- else
-- return 0;
-- }
--
-- /* Okay, we can't shortcut - we have to grub through the options */
-- ptr = (unsigned char *)(th + 1);
-- while (length > 0) {
-- int opcode = *ptr++;
-- int opsize;
--
-- switch (opcode) {
-- case TCPOPT_EOL:
-- goto done_opts;
-- case TCPOPT_NOP:
-- length--;
-- continue;
-- default:
-- opsize = *ptr++;
-- if (opsize < 2)
-- goto done_opts;
-- if (opsize > length)
-- goto done_opts;
--
-- if (opcode == TCPOPT_MD5SIG) {
-- hash_location = ptr;
-- goto done_opts;
-- }
-- }
-- ptr += opsize-2;
-- length -= opsize;
-- }
--done_opts:
-- /* We've parsed the options - do we have a hash? */
-- if (!hash_expected && !hash_location)
-- return 0;
--
-- if (hash_expected && !hash_location) {
-- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found "
-- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-- NIPQUAD(iph->saddr), ntohs(th->source),
-- NIPQUAD(iph->daddr), ntohs(th->dest));
-- return 1;
-- }
--
-- if (!hash_expected && hash_location) {
-- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found "
-- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
-- NIPQUAD(iph->saddr), ntohs(th->source),
-- NIPQUAD(iph->daddr), ntohs(th->dest));
-- return 1;
-- }
--
-- /* Okay, so this is hash_expected and hash_location -
-- * so we need to calculate the checksum.
-- */
-- genhash = tcp_v4_do_calc_md5_hash(newhash,
-- hash_expected,
-- iph->saddr, iph->daddr,
-- th, sk->sk_protocol,
-- skb->len);
--
-- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
-- if (net_ratelimit()) {
-- printk(KERN_INFO "MD5 Hash failed for "
-- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
-- NIPQUAD(iph->saddr), ntohs(th->source),
-- NIPQUAD(iph->daddr), ntohs(th->dest),
-- genhash ? " tcp_v4_calc_md5_hash failed" : "");
-- }
-- return 1;
-- }
-- return 0;
--}
--
--#endif
--
--struct request_sock_ops tcp_request_sock_ops __read_mostly = {
-- .family = PF_INET,
-- .obj_size = sizeof(struct tcp_request_sock),
-- .rtx_syn_ack = tcp_v4_send_synack,
-- .send_ack = tcp_v4_reqsk_send_ack,
-- .destructor = tcp_v4_reqsk_destructor,
-- .send_reset = tcp_v4_send_reset,
--};
--
--#ifdef CONFIG_TCP_MD5SIG
--static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
-- .md5_lookup = tcp_v4_reqsk_md5_lookup,
--};
--#endif
--
--static struct timewait_sock_ops tcp_timewait_sock_ops = {
-- .twsk_obj_size = sizeof(struct tcp_timewait_sock),
-- .twsk_unique = tcp_twsk_unique,
-- .twsk_destructor= tcp_twsk_destructor,
--};
--
--int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
--{
-- struct inet_request_sock *ireq;
-- struct tcp_options_received tmp_opt;
-- struct request_sock *req;
-- __be32 saddr = ip_hdr(skb)->saddr;
-- __be32 daddr = ip_hdr(skb)->daddr;
-- __u32 isn = TCP_SKB_CB(skb)->when;
-- struct dst_entry *dst = NULL;
--#ifdef CONFIG_SYN_COOKIES
-- int want_cookie = 0;
--#else
--#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
--#endif
--
-- /* Never answer to SYNs send to broadcast or multicast */
-- if (((struct rtable *)skb->dst)->rt_flags &
-- (RTCF_BROADCAST | RTCF_MULTICAST))
-- goto drop;
--
-- /* TW buckets are converted to open requests without
-- * limitations, they conserve resources and peer is
-- * evidently real one.
-- */
-- if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
--#ifdef CONFIG_SYN_COOKIES
-- if (sysctl_tcp_syncookies) {
-- want_cookie = 1;
-- } else
--#endif
-- goto drop;
-- }
--
-- /* Accept backlog is full. If we have already queued enough
-- * of warm entries in syn queue, drop request. It is better than
-- * clogging syn queue with openreqs with exponentially increasing
-- * timeout.
-- */
-- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
-- goto drop;
--
-- req = reqsk_alloc(&tcp_request_sock_ops);
-- if (!req)
-- goto drop;
--
--#ifdef CONFIG_TCP_MD5SIG
-- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
--#endif
--
-- tcp_clear_options(&tmp_opt);
-- tmp_opt.mss_clamp = 536;
-- tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
--
-- tcp_parse_options(skb, &tmp_opt, 0);
--
-- if (want_cookie) {
-- tcp_clear_options(&tmp_opt);
-- tmp_opt.saw_tstamp = 0;
-- }
--
-- if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
-- /* Some OSes (unknown ones, but I see them on web server, which
-- * contains information interesting only for windows'
-- * users) do not send their stamp in SYN. It is easy case.
-- * We simply do not advertise TS support.
-- */
-- tmp_opt.saw_tstamp = 0;
-- tmp_opt.tstamp_ok = 0;
-- }
-- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
--
-- tcp_openreq_init(req, &tmp_opt, skb);
--
-- if (security_inet_conn_request(sk, skb, req))
-- goto drop_and_free;
--
-- ireq = inet_rsk(req);
-- ireq->loc_addr = daddr;
-- ireq->rmt_addr = saddr;
-- ireq->opt = tcp_v4_save_options(sk, skb);
-- if (!want_cookie)
-- TCP_ECN_create_request(req, tcp_hdr(skb));
--
-- if (want_cookie) {
--#ifdef CONFIG_SYN_COOKIES
-- syn_flood_warning(skb);
--#endif
-- isn = cookie_v4_init_sequence(sk, skb, &req->mss);
-- } else if (!isn) {
-- struct inet_peer *peer = NULL;
--
-- /* VJ's idea. We save last timestamp seen
-- * from the destination in peer table, when entering
-- * state TIME-WAIT, and check against it before
-- * accepting new connection request.
-- *
-- * If "isn" is not zero, this request hit alive
-- * timewait bucket, so that all the necessary checks
-- * are made in the function processing timewait state.
-- */
-- if (tmp_opt.saw_tstamp &&
-- tcp_death_row.sysctl_tw_recycle &&
-- (dst = inet_csk_route_req(sk, req)) != NULL &&
-- (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
-- peer->v4daddr == saddr) {
-- if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
-- (s32)(peer->tcp_ts - req->ts_recent) >
-- TCP_PAWS_WINDOW) {
-- NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
-- dst_release(dst);
-- goto drop_and_free;
-- }
-- }
-- /* Kill the following clause, if you dislike this way. */
-- else if (!sysctl_tcp_syncookies &&
-- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
-- (sysctl_max_syn_backlog >> 2)) &&
-- (!peer || !peer->tcp_ts_stamp) &&
-- (!dst || !dst_metric(dst, RTAX_RTT))) {
-- /* Without syncookies last quarter of
-- * backlog is filled with destinations,
-- * proven to be alive.
-- * It means that we continue to communicate
-- * to destinations, already remembered
-- * to the moment of synflood.
-- */
-- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
-- "request from %u.%u.%u.%u/%u\n",
-- NIPQUAD(saddr),
-- ntohs(tcp_hdr(skb)->source));
-- dst_release(dst);
-- goto drop_and_free;
-- }
--
-- isn = tcp_v4_init_sequence(skb);
-- }
-- tcp_rsk(req)->snt_isn = isn;
--
-- if (tcp_v4_send_synack(sk, req, dst))
-- goto drop_and_free;
--
-- if (want_cookie) {
-- reqsk_free(req);
-- } else {
-- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-- }
-- return 0;
--
--drop_and_free:
-- reqsk_free(req);
--drop:
-- return 0;
--}
--
--
--/*
-- * The three way handshake has completed - we got a valid synack -
-- * now create the new socket.
-- */
--struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
-- struct request_sock *req,
-- struct dst_entry *dst)
--{
-- struct inet_request_sock *ireq;
-- struct inet_sock *newinet;
-- struct tcp_sock *newtp;
-- struct sock *newsk;
--#ifdef CONFIG_TCP_MD5SIG
-- struct tcp_md5sig_key *key;
--#endif
--
-- if (sk_acceptq_is_full(sk))
-- goto exit_overflow;
--
-- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
-- goto exit;
--
-- newsk = tcp_create_openreq_child(sk, req, skb);
-- if (!newsk)
-- goto exit;
--
-- newsk->sk_gso_type = SKB_GSO_TCPV4;
-- sk_setup_caps(newsk, dst);
--
-- newtp = tcp_sk(newsk);
-- newinet = inet_sk(newsk);
-- ireq = inet_rsk(req);
-- newinet->daddr = ireq->rmt_addr;
-- newinet->rcv_saddr = ireq->loc_addr;
-- newinet->saddr = ireq->loc_addr;
-- newinet->opt = ireq->opt;
-- ireq->opt = NULL;
-- newinet->mc_index = inet_iif(skb);
-- newinet->mc_ttl = ip_hdr(skb)->ttl;
-- inet_csk(newsk)->icsk_ext_hdr_len = 0;
-- if (newinet->opt)
-- inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
-- newinet->id = newtp->write_seq ^ jiffies;
--
-- tcp_mtup_init(newsk);
-- tcp_sync_mss(newsk, dst_mtu(dst));
-- newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
-- tcp_initialize_rcv_mss(newsk);
--
--#ifdef CONFIG_TCP_MD5SIG
-- /* Copy over the MD5 key from the original socket */
-- if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
-- /*
-- * We're using one, so create a matching key
-- * on the newsk structure. If we fail to get
-- * memory, then we end up not copying the key
-- * across. Shucks.
-- */
-- char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
-- if (newkey != NULL)
-- tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
-- newkey, key->keylen);
-- }
--#endif
--
-- __inet_hash(&tcp_hashinfo, newsk, 0);
-- __inet_inherit_port(&tcp_hashinfo, sk, newsk);
--
-- return newsk;
--
--exit_overflow:
-- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
--exit:
-- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
-- dst_release(dst);
-- return NULL;
--}
--
--static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
--{
-- struct tcphdr *th = tcp_hdr(skb);
-- const struct iphdr *iph = ip_hdr(skb);
-- struct sock *nsk;
-- struct request_sock **prev;
-- /* Find possible connection requests. */
-- struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
-- iph->saddr, iph->daddr);
-- if (req)
-- return tcp_check_req(sk, skb, req, prev);
--
-- nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
-- iph->daddr, th->dest, inet_iif(skb));
--
-- if (nsk) {
-- if (nsk->sk_state != TCP_TIME_WAIT) {
-- bh_lock_sock(nsk);
-- return nsk;
-- }
-- inet_twsk_put(inet_twsk(nsk));
-- return NULL;
-- }
--
--#ifdef CONFIG_SYN_COOKIES
-- if (!th->rst && !th->syn && th->ack)
-- sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
--#endif
-- return sk;
--}
--
--static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
--{
-- const struct iphdr *iph = ip_hdr(skb);
--
-- if (skb->ip_summed == CHECKSUM_COMPLETE) {
-- if (!tcp_v4_check(skb->len, iph->saddr,
-- iph->daddr, skb->csum)) {
-- skb->ip_summed = CHECKSUM_UNNECESSARY;
-- return 0;
-- }
-- }
--
-- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-- skb->len, IPPROTO_TCP, 0);
--
-- if (skb->len <= 76) {
-- return __skb_checksum_complete(skb);
-- }
-- return 0;
--}
--
--
--/* The socket must have it's spinlock held when we get
-- * here.
-- *
-- * We have a potential double-lock case here, so even when
-- * doing backlog processing we use the BH locking scheme.
-- * This is because we cannot sleep with the original spinlock
-- * held.
-- */
--int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
--{
-- struct sock *rsk;
--#ifdef CONFIG_TCP_MD5SIG
-- /*
-- * We really want to reject the packet as early as possible
-- * if:
-- * o We're expecting an MD5'd packet and this is no MD5 tcp option
-- * o There is an MD5 option and we're not expecting one
-- */
-- if (tcp_v4_inbound_md5_hash(sk, skb))
-- goto discard;
--#endif
--
-- if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
-- TCP_CHECK_TIMER(sk);
-- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
-- rsk = sk;
-- goto reset;
-- }
-- TCP_CHECK_TIMER(sk);
-- return 0;
-- }
--
-- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
-- goto csum_err;
--
-- if (sk->sk_state == TCP_LISTEN) {
-- struct sock *nsk = tcp_v4_hnd_req(sk, skb);
-- if (!nsk)
-- goto discard;
--
-- if (nsk != sk) {
-- if (tcp_child_process(sk, nsk, skb)) {
-- rsk = nsk;
-- goto reset;
-- }
-- return 0;
-- }
-- }
--
-- TCP_CHECK_TIMER(sk);
-- if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
-- rsk = sk;
-- goto reset;
-- }
-- TCP_CHECK_TIMER(sk);
-- return 0;
--
--reset:
-- tcp_v4_send_reset(rsk, skb);
--discard:
-- kfree_skb(skb);
-- /* Be careful here. If this function gets more complicated and
-- * gcc suffers from register pressure on the x86, sk (in %ebx)
-- * might be destroyed here. This current version compiles correctly,
-- * but you have been warned.
-- */
-- return 0;
--
--csum_err:
-- TCP_INC_STATS_BH(TCP_MIB_INERRS);
-- goto discard;
--}
--
--/*
-- * From tcp_input.c
-- */
--
--int tcp_v4_rcv(struct sk_buff *skb)
--{
-- const struct iphdr *iph;
-- struct tcphdr *th;
-- struct sock *sk;
-- int ret;
--
-- if (skb->pkt_type != PACKET_HOST)
-- goto discard_it;
--
-- /* Count it even if it's bad */
-- TCP_INC_STATS_BH(TCP_MIB_INSEGS);
--
-- if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
-- goto discard_it;
--
-- th = tcp_hdr(skb);
--
-- if (th->doff < sizeof(struct tcphdr) / 4)
-- goto bad_packet;
-- if (!pskb_may_pull(skb, th->doff * 4))
-- goto discard_it;
--
-- /* An explanation is required here, I think.
-- * Packet length and doff are validated by header prediction,
-- * provided case of th->doff==0 is eliminated.
-- * So, we defer the checks. */
-- if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
-- goto bad_packet;
--
-- th = tcp_hdr(skb);
-- iph = ip_hdr(skb);
-- TCP_SKB_CB(skb)->seq = ntohl(th->seq);
-- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
-- skb->len - th->doff * 4);
-- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
-- TCP_SKB_CB(skb)->when = 0;
-- TCP_SKB_CB(skb)->flags = iph->tos;
-- TCP_SKB_CB(skb)->sacked = 0;
--
-- sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
-- iph->daddr, th->dest, inet_iif(skb));
-- if (!sk)
-- goto no_tcp_socket;
--
--process:
-- if (sk->sk_state == TCP_TIME_WAIT)
-- goto do_time_wait;
--
-- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
-- goto discard_and_relse;
-- nf_reset(skb);
--
-- if (sk_filter(sk, skb))
-- goto discard_and_relse;
--
-- skb->dev = NULL;
--
-- bh_lock_sock_nested(sk);
-- ret = 0;
-- if (!sock_owned_by_user(sk)) {
--#ifdef CONFIG_NET_DMA
-- struct tcp_sock *tp = tcp_sk(sk);
-- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-- tp->ucopy.dma_chan = get_softnet_dma();
-- if (tp->ucopy.dma_chan)
-- ret = tcp_v4_do_rcv(sk, skb);
-- else
--#endif
-- {
-- if (!tcp_prequeue(sk, skb))
-- ret = tcp_v4_do_rcv(sk, skb);
-- }
-- } else
-- sk_add_backlog(sk, skb);
-- bh_unlock_sock(sk);
--
-- sock_put(sk);
--
-- return ret;
--
--no_tcp_socket:
-- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
-- goto discard_it;
--
-- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
--bad_packet:
-- TCP_INC_STATS_BH(TCP_MIB_INERRS);
-- } else {
-- tcp_v4_send_reset(NULL, skb);
-- }
--
--discard_it:
-- /* Discard frame. */
-- kfree_skb(skb);
-- return 0;
--
--discard_and_relse:
-- sock_put(sk);
-- goto discard_it;
--
--do_time_wait:
-- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-- inet_twsk_put(inet_twsk(sk));
-- goto discard_it;
-- }
--
-- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
-- TCP_INC_STATS_BH(TCP_MIB_INERRS);
-- inet_twsk_put(inet_twsk(sk));
-- goto discard_it;
-- }
-- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
-- case TCP_TW_SYN: {
-- struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
-- iph->daddr, th->dest,
-- inet_iif(skb));
-- if (sk2) {
-- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
-- inet_twsk_put(inet_twsk(sk));
-- sk = sk2;
-- goto process;
-- }
-- /* Fall through to ACK */
-- }
-- case TCP_TW_ACK:
-- tcp_v4_timewait_ack(sk, skb);
-- break;
-- case TCP_TW_RST:
-- goto no_tcp_socket;
-- case TCP_TW_SUCCESS:;
-- }
-- goto discard_it;
--}
--
--/* VJ's idea. Save last timestamp seen from this destination
-- * and hold it at least for normal timewait interval to use for duplicate
-- * segment detection in subsequent connections, before they enter synchronized
-- * state.
-- */
--
--int tcp_v4_remember_stamp(struct sock *sk)
--{
-- struct inet_sock *inet = inet_sk(sk);
-- struct tcp_sock *tp = tcp_sk(sk);
-- struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
-- struct inet_peer *peer = NULL;
-- int release_it = 0;
--
-- if (!rt || rt->rt_dst != inet->daddr) {
-- peer = inet_getpeer(inet->daddr, 1);
-- release_it = 1;
-- } else {
-- if (!rt->peer)
-- rt_bind_peer(rt, 1);
-- peer = rt->peer;
-- }
--
-- if (peer) {
-- if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
-- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
-- peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
-- peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
-- peer->tcp_ts = tp->rx_opt.ts_recent;
-- }
-- if (release_it)
-- inet_putpeer(peer);
-- return 1;
-- }
--
-- return 0;
--}
--
--int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
--{
-- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
--
-- if (peer) {
-- const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
--
-- if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
-- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
-- peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
-- peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
-- peer->tcp_ts = tcptw->tw_ts_recent;
-- }
-- inet_putpeer(peer);
-- return 1;
-- }
--
-- return 0;
--}
--
--struct inet_connection_sock_af_ops ipv4_specific = {
-- .queue_xmit = ip_queue_xmit,
-- .send_check = tcp_v4_send_check,
-- .rebuild_header = inet_sk_rebuild_header,
-- .conn_request = tcp_v4_conn_request,
-- .syn_recv_sock = tcp_v4_syn_recv_sock,
-- .remember_stamp = tcp_v4_remember_stamp,
-- .net_header_len = sizeof(struct iphdr),
-- .setsockopt = ip_setsockopt,
-- .getsockopt = ip_getsockopt,
-- .addr2sockaddr = inet_csk_addr2sockaddr,
-- .sockaddr_len = sizeof(struct sockaddr_in),
--#ifdef CONFIG_COMPAT
-- .compat_setsockopt = compat_ip_setsockopt,
-- .compat_getsockopt = compat_ip_getsockopt,
--#endif
--};
--
--#ifdef CONFIG_TCP_MD5SIG
--static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
-- .md5_lookup = tcp_v4_md5_lookup,
-- .calc_md5_hash = tcp_v4_calc_md5_hash,
-- .md5_add = tcp_v4_md5_add_func,
-- .md5_parse = tcp_v4_parse_md5_keys,
--};
--#endif
--
--/* NOTE: A lot of things set to zero explicitly by call to
-- * sk_alloc() so need not be done here.
-- */
--static int tcp_v4_init_sock(struct sock *sk)
--{
-- struct inet_connection_sock *icsk = inet_csk(sk);
-- struct tcp_sock *tp = tcp_sk(sk);
--
-- skb_queue_head_init(&tp->out_of_order_queue);
-- tcp_init_xmit_timers(sk);
-- tcp_prequeue_init(tp);
--
-- icsk->icsk_rto = TCP_TIMEOUT_INIT;
-- tp->mdev = TCP_TIMEOUT_INIT;
--
-- /* So many TCP implementations out there (incorrectly) count the
-- * initial SYN frame in their delayed-ACK and congestion control
-- * algorithms that we must have the following bandaid to talk
-- * efficiently to them. -DaveM
-- */
-- tp->snd_cwnd = 2;
--
-- /* See draft-stevens-tcpca-spec-01 for discussion of the
-- * initialization of these values.
-- */
-- tp->snd_ssthresh = 0x7fffffff; /* Infinity */
-- tp->snd_cwnd_clamp = ~0;
-- tp->mss_cache = 536;
--
-- tp->reordering = sysctl_tcp_reordering;
-- icsk->icsk_ca_ops = &tcp_init_congestion_ops;
--
-- sk->sk_state = TCP_CLOSE;
--
-- sk->sk_write_space = sk_stream_write_space;
-- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
--
-- icsk->icsk_af_ops = &ipv4_specific;
-- icsk->icsk_sync_mss = tcp_sync_mss;
--#ifdef CONFIG_TCP_MD5SIG
-- tp->af_specific = &tcp_sock_ipv4_specific;
--#endif
--
-- sk->sk_sndbuf = sysctl_tcp_wmem[1];
-- sk->sk_rcvbuf = sysctl_tcp_rmem[1];
--
-- atomic_inc(&tcp_sockets_allocated);
--
-- return 0;
--}
--
--int tcp_v4_destroy_sock(struct sock *sk)
--{
-- struct tcp_sock *tp = tcp_sk(sk);
--
-- tcp_clear_xmit_timers(sk);
--
-- tcp_cleanup_congestion_control(sk);
--
-- /* Cleanup up the write buffer. */
-- tcp_write_queue_purge(sk);
--
-- /* Cleans up our, hopefully empty, out_of_order_queue. */
-- __skb_queue_purge(&tp->out_of_order_queue);
--
--#ifdef CONFIG_TCP_MD5SIG
-- /* Clean up the MD5 key list, if any */
-- if (tp->md5sig_info) {
-- tcp_v4_clear_md5_list(sk);
-- kfree(tp->md5sig_info);
-- tp->md5sig_info = NULL;
-- }
--#endif
--
--#ifdef CONFIG_NET_DMA
-- /* Cleans up our sk_async_wait_queue */
-- __skb_queue_purge(&sk->sk_async_wait_queue);
--#endif
--
-- /* Clean prequeue, it must be empty really */
-- __skb_queue_purge(&tp->ucopy.prequeue);
--
-- /* Clean up a referenced TCP bind bucket. */
-- if (inet_csk(sk)->icsk_bind_hash)
-- inet_put_port(&tcp_hashinfo, sk);
--
-- /*
-- * If sendmsg cached page exists, toss it.
-- */
-- if (sk->sk_sndmsg_page) {
-- __free_page(sk->sk_sndmsg_page);
-- sk->sk_sndmsg_page = NULL;
-- }
--
-- atomic_dec(&tcp_sockets_allocated);
--
-- return 0;
--}
--
--EXPORT_SYMBOL(tcp_v4_destroy_sock);
--
--#ifdef CONFIG_PROC_FS
--/* Proc filesystem TCP sock list dumping. */
--
--static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
--{
-- return hlist_empty(head) ? NULL :
-- list_entry(head->first, struct inet_timewait_sock, tw_node);
--}
--
--static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
--{
-- return tw->tw_node.next ?
-- hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
--}
--
--static void *listening_get_next(struct seq_file *seq, void *cur)
--{
-- struct inet_connection_sock *icsk;
-- struct hlist_node *node;
-- struct sock *sk = cur;
-- struct tcp_iter_state* st = seq->private;
--
-- if (!sk) {
-- st->bucket = 0;
-- sk = sk_head(&tcp_hashinfo.listening_hash[0]);
-- goto get_sk;
-- }
--
-- ++st->num;
--
-- if (st->state == TCP_SEQ_STATE_OPENREQ) {
-- struct request_sock *req = cur;
--
-- icsk = inet_csk(st->syn_wait_sk);
-- req = req->dl_next;
-- while (1) {
-- while (req) {
-- if (req->rsk_ops->family == st->family) {
-- cur = req;
-- goto out;
-- }
-- req = req->dl_next;
-- }
-- if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
-- break;
--get_req:
-- req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
-- }
-- sk = sk_next(st->syn_wait_sk);
-- st->state = TCP_SEQ_STATE_LISTENING;
-- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- } else {
-- icsk = inet_csk(sk);
-- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- if (reqsk_queue_len(&icsk->icsk_accept_queue))
-- goto start_req;
-- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- sk = sk_next(sk);
-- }
--get_sk:
-- sk_for_each_from(sk, node) {
-- if (sk->sk_family == st->family) {
-- cur = sk;
-- goto out;
-- }
-- icsk = inet_csk(sk);
-- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
--start_req:
-- st->uid = sock_i_uid(sk);
-- st->syn_wait_sk = sk;
-- st->state = TCP_SEQ_STATE_OPENREQ;
-- st->sbucket = 0;
-- goto get_req;
-- }
-- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- }
-- if (++st->bucket < INET_LHTABLE_SIZE) {
-- sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
-- goto get_sk;
-- }
-- cur = NULL;
--out:
-- return cur;
--}
--
--static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
--{
-- void *rc = listening_get_next(seq, NULL);
--
-- while (rc && *pos) {
-- rc = listening_get_next(seq, rc);
-- --*pos;
-- }
-- return rc;
--}
--
--static void *established_get_first(struct seq_file *seq)
--{
-- struct tcp_iter_state* st = seq->private;
-- void *rc = NULL;
--
-- for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
-- struct sock *sk;
-- struct hlist_node *node;
-- struct inet_timewait_sock *tw;
--
-- /* We can reschedule _before_ having picked the target: */
-- cond_resched_softirq();
--
-- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
-- sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-- if (sk->sk_family != st->family) {
-- continue;
-- }
-- rc = sk;
-- goto out;
-- }
-- st->state = TCP_SEQ_STATE_TIME_WAIT;
-- inet_twsk_for_each(tw, node,
-- &tcp_hashinfo.ehash[st->bucket].twchain) {
-- if (tw->tw_family != st->family) {
-- continue;
-- }
-- rc = tw;
-- goto out;
-- }
-- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-- st->state = TCP_SEQ_STATE_ESTABLISHED;
-- }
--out:
-- return rc;
--}
--
--static void *established_get_next(struct seq_file *seq, void *cur)
--{
-- struct sock *sk = cur;
-- struct inet_timewait_sock *tw;
-- struct hlist_node *node;
-- struct tcp_iter_state* st = seq->private;
--
-- ++st->num;
--
-- if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
-- tw = cur;
-- tw = tw_next(tw);
--get_tw:
-- while (tw && tw->tw_family != st->family) {
-- tw = tw_next(tw);
-- }
-- if (tw) {
-- cur = tw;
-- goto out;
-- }
-- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-- st->state = TCP_SEQ_STATE_ESTABLISHED;
--
-- /* We can reschedule between buckets: */
-- cond_resched_softirq();
--
-- if (++st->bucket < tcp_hashinfo.ehash_size) {
-- read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
-- sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
-- } else {
-- cur = NULL;
-- goto out;
-- }
-- } else
-- sk = sk_next(sk);
--
-- sk_for_each_from(sk, node) {
-- if (sk->sk_family == st->family)
-- goto found;
-- }
--
-- st->state = TCP_SEQ_STATE_TIME_WAIT;
-- tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
-- goto get_tw;
--found:
-- cur = sk;
--out:
-- return cur;
--}
--
--static void *established_get_idx(struct seq_file *seq, loff_t pos)
--{
-- void *rc = established_get_first(seq);
--
-- while (rc && pos) {
-- rc = established_get_next(seq, rc);
-- --pos;
-- }
-- return rc;
--}
--
--static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
--{
-- void *rc;
-- struct tcp_iter_state* st = seq->private;
--
-- inet_listen_lock(&tcp_hashinfo);
-- st->state = TCP_SEQ_STATE_LISTENING;
-- rc = listening_get_idx(seq, &pos);
--
-- if (!rc) {
-- inet_listen_unlock(&tcp_hashinfo);
-- local_bh_disable();
-- st->state = TCP_SEQ_STATE_ESTABLISHED;
-- rc = established_get_idx(seq, pos);
-- }
--
-- return rc;
--}
--
--static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
--{
-- struct tcp_iter_state* st = seq->private;
-- st->state = TCP_SEQ_STATE_LISTENING;
-- st->num = 0;
-- return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
--}
--
--static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
--{
-- void *rc = NULL;
-- struct tcp_iter_state* st;
--
-- if (v == SEQ_START_TOKEN) {
-- rc = tcp_get_idx(seq, 0);
-- goto out;
-- }
-- st = seq->private;
--
-- switch (st->state) {
-- case TCP_SEQ_STATE_OPENREQ:
-- case TCP_SEQ_STATE_LISTENING:
-- rc = listening_get_next(seq, v);
-- if (!rc) {
-- inet_listen_unlock(&tcp_hashinfo);
-- local_bh_disable();
-- st->state = TCP_SEQ_STATE_ESTABLISHED;
-- rc = established_get_first(seq);
-- }
-- break;
-- case TCP_SEQ_STATE_ESTABLISHED:
-- case TCP_SEQ_STATE_TIME_WAIT:
-- rc = established_get_next(seq, v);
-- break;
-- }
--out:
-- ++*pos;
-- return rc;
--}
--
--static void tcp_seq_stop(struct seq_file *seq, void *v)
--{
-- struct tcp_iter_state* st = seq->private;
--
-- switch (st->state) {
-- case TCP_SEQ_STATE_OPENREQ:
-- if (v) {
-- struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
-- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-- }
-- case TCP_SEQ_STATE_LISTENING:
-- if (v != SEQ_START_TOKEN)
-- inet_listen_unlock(&tcp_hashinfo);
-- break;
-- case TCP_SEQ_STATE_TIME_WAIT:
-- case TCP_SEQ_STATE_ESTABLISHED:
-- if (v)
-- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
-- local_bh_enable();
-- break;
-- }
--}
--
--static int tcp_seq_open(struct inode *inode, struct file *file)
--{
-- struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
-- struct seq_file *seq;
-- struct tcp_iter_state *s;
-- int rc;
--
-- if (unlikely(afinfo == NULL))
-- return -EINVAL;
--
-- s = kzalloc(sizeof(*s), GFP_KERNEL);
-- if (!s)
-- return -ENOMEM;
-- s->family = afinfo->family;
-- s->seq_ops.start = tcp_seq_start;
-- s->seq_ops.next = tcp_seq_next;
-- s->seq_ops.show = afinfo->seq_show;
-- s->seq_ops.stop = tcp_seq_stop;
--
-- rc = seq_open(file, &s->seq_ops);
-- if (rc)
-- goto out_kfree;
-- seq = file->private_data;
-- seq->private = s;
--out:
-- return rc;
--out_kfree:
-- kfree(s);
-- goto out;
--}
--
--int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
--{
-- int rc = 0;
-- struct proc_dir_entry *p;
--
-- if (!afinfo)
-- return -EINVAL;
-- afinfo->seq_fops->owner = afinfo->owner;
-- afinfo->seq_fops->open = tcp_seq_open;
-- afinfo->seq_fops->read = seq_read;
-- afinfo->seq_fops->llseek = seq_lseek;
-- afinfo->seq_fops->release = seq_release_private;
--
-- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
-- if (p)
-- p->data = afinfo;
-- else
-- rc = -ENOMEM;
-- return rc;
--}
--
--void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
--{
-- if (!afinfo)
-- return;
-- proc_net_remove(afinfo->name);
-- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
--}
--
--static void get_openreq4(struct sock *sk, struct request_sock *req,
-- char *tmpbuf, int i, int uid)
--{
-- const struct inet_request_sock *ireq = inet_rsk(req);
-- int ttd = req->expires - jiffies;
--
-- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
-- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
-- i,
-- ireq->loc_addr,
-- ntohs(inet_sk(sk)->sport),
-- ireq->rmt_addr,
-- ntohs(ireq->rmt_port),
-- TCP_SYN_RECV,
-- 0, 0, /* could print option size, but that is af dependent. */
-- 1, /* timers active (only the expire timer) */
-- jiffies_to_clock_t(ttd),
-- req->retrans,
-- uid,
-- 0, /* non standard timer */
-- 0, /* open_requests have no inode */
-- atomic_read(&sk->sk_refcnt),
-- req);
--}
--
--static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
--{
-- int timer_active;
-- unsigned long timer_expires;
-- struct tcp_sock *tp = tcp_sk(sk);
-- const struct inet_connection_sock *icsk = inet_csk(sk);
-- struct inet_sock *inet = inet_sk(sk);
-- __be32 dest = inet->daddr;
-- __be32 src = inet->rcv_saddr;
-- __u16 destp = ntohs(inet->dport);
-- __u16 srcp = ntohs(inet->sport);
--
-- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
-- timer_active = 1;
-- timer_expires = icsk->icsk_timeout;
-- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
-- timer_active = 4;
-- timer_expires = icsk->icsk_timeout;
-- } else if (timer_pending(&sk->sk_timer)) {
-- timer_active = 2;
-- timer_expires = sk->sk_timer.expires;
-- } else {
-- timer_active = 0;
-- timer_expires = jiffies;
-- }
--
-- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
-- "%08X %5d %8d %lu %d %p %u %u %u %u %d",
-- i, src, srcp, dest, destp, sk->sk_state,
-- tp->write_seq - tp->snd_una,
-- sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
-- (tp->rcv_nxt - tp->copied_seq),
-- timer_active,
-- jiffies_to_clock_t(timer_expires - jiffies),
-- icsk->icsk_retransmits,
-- sock_i_uid(sk),
-- icsk->icsk_probes_out,
-- sock_i_ino(sk),
-- atomic_read(&sk->sk_refcnt), sk,
-- icsk->icsk_rto,
-- icsk->icsk_ack.ato,
-- (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
-- tp->snd_cwnd,
-- tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
--}
--
--static void get_timewait4_sock(struct inet_timewait_sock *tw,
-- char *tmpbuf, int i)
--{
-- __be32 dest, src;
-- __u16 destp, srcp;
-- int ttd = tw->tw_ttd - jiffies;
--
-- if (ttd < 0)
-- ttd = 0;
--
-- dest = tw->tw_daddr;
-- src = tw->tw_rcv_saddr;
-- destp = ntohs(tw->tw_dport);
-- srcp = ntohs(tw->tw_sport);
--
-- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
-- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
-- i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
-- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
-- atomic_read(&tw->tw_refcnt), tw);
--}
--
--#define TMPSZ 150
--
--static int tcp4_seq_show(struct seq_file *seq, void *v)
--{
-- struct tcp_iter_state* st;
-- char tmpbuf[TMPSZ + 1];
--
-- if (v == SEQ_START_TOKEN) {
-- seq_printf(seq, "%-*s\n", TMPSZ - 1,
-- " sl local_address rem_address st tx_queue "
-- "rx_queue tr tm->when retrnsmt uid timeout "
-- "inode");
-- goto out;
-- }
-- st = seq->private;
--
-- switch (st->state) {
-- case TCP_SEQ_STATE_LISTENING:
-- case TCP_SEQ_STATE_ESTABLISHED:
-- get_tcp4_sock(v, tmpbuf, st->num);
-- break;
-- case TCP_SEQ_STATE_OPENREQ:
-- get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
-- break;
-- case TCP_SEQ_STATE_TIME_WAIT:
-- get_timewait4_sock(v, tmpbuf, st->num);
-- break;
-- }
-- seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
--out:
-- return 0;
--}
--
--static struct file_operations tcp4_seq_fops;
--static struct tcp_seq_afinfo tcp4_seq_afinfo = {
-- .owner = THIS_MODULE,
-- .name = "tcp",
-- .family = AF_INET,
-- .seq_show = tcp4_seq_show,
-- .seq_fops = &tcp4_seq_fops,
--};
--
--int __init tcp4_proc_init(void)
--{
-- return tcp_proc_register(&tcp4_seq_afinfo);
--}
--
--void tcp4_proc_exit(void)
--{
-- tcp_proc_unregister(&tcp4_seq_afinfo);
--}
--#endif /* CONFIG_PROC_FS */
--
--struct proto tcp_prot = {
-- .name = "TCP",
-- .owner = THIS_MODULE,
-- .close = tcp_close,
-- .connect = tcp_v4_connect,
-- .disconnect = tcp_disconnect,
-- .accept = inet_csk_accept,
-- .ioctl = tcp_ioctl,
-- .init = tcp_v4_init_sock,
-- .destroy = tcp_v4_destroy_sock,
-- .shutdown = tcp_shutdown,
-- .setsockopt = tcp_setsockopt,
-- .getsockopt = tcp_getsockopt,
-- .recvmsg = tcp_recvmsg,
-- .backlog_rcv = tcp_v4_do_rcv,
-- .hash = tcp_v4_hash,
-- .unhash = tcp_unhash,
-- .get_port = tcp_v4_get_port,
-- .enter_memory_pressure = tcp_enter_memory_pressure,
-- .sockets_allocated = &tcp_sockets_allocated,
-- .orphan_count = &tcp_orphan_count,
-- .memory_allocated = &tcp_memory_allocated,
-- .memory_pressure = &tcp_memory_pressure,
-- .sysctl_mem = sysctl_tcp_mem,
-- .sysctl_wmem = sysctl_tcp_wmem,
-- .sysctl_rmem = sysctl_tcp_rmem,
-- .max_header = MAX_TCP_HEADER,
-- .obj_size = sizeof(struct tcp_sock),
-- .twsk_prot = &tcp_timewait_sock_ops,
-- .rsk_prot = &tcp_request_sock_ops,
--#ifdef CONFIG_COMPAT
-- .compat_setsockopt = compat_tcp_setsockopt,
-- .compat_getsockopt = compat_tcp_getsockopt,
--#endif
--};
--
--void __init tcp_v4_init(struct net_proto_family *ops)
--{
-- if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW,
-- IPPROTO_TCP) < 0)
-- panic("Failed to create the TCP control socket.\n");
--}
--
--EXPORT_SYMBOL(ipv4_specific);
--EXPORT_SYMBOL(tcp_hashinfo);
--EXPORT_SYMBOL(tcp_prot);
--EXPORT_SYMBOL(tcp_unhash);
--EXPORT_SYMBOL(tcp_v4_conn_request);
--EXPORT_SYMBOL(tcp_v4_connect);
--EXPORT_SYMBOL(tcp_v4_do_rcv);
--EXPORT_SYMBOL(tcp_v4_remember_stamp);
--EXPORT_SYMBOL(tcp_v4_send_check);
--EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
--
--#ifdef CONFIG_PROC_FS
--EXPORT_SYMBOL(tcp_proc_register);
--EXPORT_SYMBOL(tcp_proc_unregister);
--#endif
--EXPORT_SYMBOL(sysctl_local_port_range);
--EXPORT_SYMBOL(sysctl_tcp_low_latency);
--
-diff -Nurb linux-2.6.22-570/net/ipv4/tcp_output.c linux-2.6.22-590/net/ipv4/tcp_output.c
---- linux-2.6.22-570/net/ipv4/tcp_output.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/tcp_output.c 2008-01-29 22:12:32.000000000 -0500
-@@ -432,11 +432,11 @@
- sysctl_flags = 0;
- if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
- tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-- if (sysctl_tcp_timestamps) {
-+ if (sk->sk_net->sysctl_tcp_timestamps) {
- tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
- }
-- if (sysctl_tcp_window_scaling) {
-+ if (sk->sk_net->sysctl_tcp_window_scaling) {
- tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_WSCALE;
- }
-@@ -2215,7 +2215,7 @@
- * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
- */
- tp->tcp_header_len = sizeof(struct tcphdr) +
-- (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
-+ (sk->sk_net->sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
-
- #ifdef CONFIG_TCP_MD5SIG
- if (tp->af_specific->md5_lookup(sk, sk) != NULL)
-@@ -2238,7 +2238,7 @@
- tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
- &tp->rcv_wnd,
- &tp->window_clamp,
-- sysctl_tcp_window_scaling,
-+ sk->sk_net->sysctl_tcp_window_scaling,
- &rcv_wscale);
-
- tp->rx_opt.rcv_wscale = rcv_wscale;
-diff -Nurb linux-2.6.22-570/net/ipv4/tcp_probe.c linux-2.6.22-590/net/ipv4/tcp_probe.c
---- linux-2.6.22-570/net/ipv4/tcp_probe.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/tcp_probe.c 2008-01-29 22:12:32.000000000 -0500
-@@ -172,7 +172,7 @@
- if (IS_ERR(tcpw.fifo))
- return PTR_ERR(tcpw.fifo);
-
-- if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
-+ if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops))
- goto err0;
-
- ret = register_jprobe(&tcp_probe);
-@@ -182,7 +182,7 @@
- pr_info("TCP watch registered (port=%d)\n", port);
- return 0;
- err1:
-- proc_net_remove(procname);
-+ proc_net_remove(&init_net, procname);
- err0:
- kfifo_free(tcpw.fifo);
- return ret;
-@@ -192,7 +192,7 @@
- static __exit void tcpprobe_exit(void)
- {
- kfifo_free(tcpw.fifo);
-- proc_net_remove(procname);
-+ proc_net_remove(&init_net, procname);
- unregister_jprobe(&tcp_probe);
-
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/tunnel4.c linux-2.6.22-590/net/ipv4/tunnel4.c
---- linux-2.6.22-570/net/ipv4/tunnel4.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/tunnel4.c 2008-01-29 22:12:32.000000000 -0500
-@@ -75,6 +75,10 @@
- {
- struct xfrm_tunnel *handler;
-
-+ if (skb->dev->nd_net != &init_net) {
-+ kfree_skb(skb);
-+ return 0;
-+ }
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto drop;
-
-@@ -113,6 +117,9 @@
- {
- struct xfrm_tunnel *handler;
-
-+ if (skb->dev->nd_net != &init_net)
-+ return;
-+
- for (handler = tunnel4_handlers; handler; handler = handler->next)
- if (!handler->err_handler(skb, info))
- break;
-diff -Nurb linux-2.6.22-570/net/ipv4/udp.c linux-2.6.22-590/net/ipv4/udp.c
---- linux-2.6.22-570/net/ipv4/udp.c 2008-01-29 22:12:23.000000000 -0500
-+++ linux-2.6.22-590/net/ipv4/udp.c 2008-01-29 22:12:32.000000000 -0500
-@@ -101,6 +101,7 @@
- #include <net/route.h>
- #include <net/checksum.h>
- #include <net/xfrm.h>
-+#include <net/net_namespace.h>
- #include "udp_impl.h"
-
- /*
-@@ -112,16 +113,17 @@
- struct hlist_head udp_hash[UDP_HTABLE_SIZE];
- DEFINE_RWLOCK(udp_hash_lock);
-
--static int udp_port_rover;
--
--static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
-+static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, struct hlist_head udptable[])
- {
- struct sock *sk;
- struct hlist_node *node;
-
-- sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
-+ sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) {
-+ if (sk->sk_net != net)
-+ continue;
- if (sk->sk_hash == num)
- return 1;
-+ }
- return 0;
- }
-
-@@ -148,9 +150,9 @@
- if (snum == 0) {
- int best_size_so_far, best, result, i;
-
-- if (*port_rover > sysctl_local_port_range[1] ||
-- *port_rover < sysctl_local_port_range[0])
-- *port_rover = sysctl_local_port_range[0];
-+ if (*port_rover > sk->sk_net->sysctl_local_port_range[1] ||
-+ *port_rover < sk->sk_net->sysctl_local_port_range[0])
-+ *port_rover = sk->sk_net->sysctl_local_port_range[0];
- best_size_so_far = 32767;
- best = result = *port_rover;
- for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
-@@ -158,9 +160,9 @@
-
- head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
- if (hlist_empty(head)) {
-- if (result > sysctl_local_port_range[1])
-- result = sysctl_local_port_range[0] +
-- ((result - sysctl_local_port_range[0]) &
-+ if (result > sk->sk_net->sysctl_local_port_range[1])
-+ result = sk->sk_net->sysctl_local_port_range[0] +
-+ ((result - sk->sk_net->sysctl_local_port_range[0]) &
- (UDP_HTABLE_SIZE - 1));
- goto gotit;
- }
-@@ -177,11 +179,11 @@
- result = best;
- for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE;
- i++, result += UDP_HTABLE_SIZE) {
-- if (result > sysctl_local_port_range[1])
-- result = sysctl_local_port_range[0]
-- + ((result - sysctl_local_port_range[0]) &
-+ if (result > sk->sk_net->sysctl_local_port_range[1])
-+ result = sk->sk_net->sysctl_local_port_range[0]
-+ + ((result - sk->sk_net->sysctl_local_port_range[0]) &
- (UDP_HTABLE_SIZE - 1));
-- if (! __udp_lib_lport_inuse(result, udptable))
-+ if (! __udp_lib_lport_inuse(sk->sk_net, result, udptable))
- break;
- }
- if (i >= (1 << 16) / UDP_HTABLE_SIZE)
-@@ -194,6 +196,7 @@
- sk_for_each(sk2, node, head)
- if (sk2->sk_hash == snum &&
- sk2 != sk &&
-+ sk->sk_net == sk2->sk_net &&
- (!sk2->sk_reuse || !sk->sk_reuse) &&
- (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
- || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-@@ -216,7 +219,7 @@
- int udp_get_port(struct sock *sk, unsigned short snum,
- int (*scmp)(const struct sock *, const struct sock *))
- {
-- return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
-+ return __udp_lib_get_port(sk, snum, udp_hash, &sk->sk_net->udp_port_rover, scmp);
- }
-
- extern int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2);
-@@ -229,7 +232,8 @@
- /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
- * harder than this. -DaveM
- */
--static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
-+static struct sock *__udp4_lib_lookup(struct net *net,
-+ __be32 saddr, __be16 sport,
- __be32 daddr, __be16 dport,
- int dif, struct hlist_head udptable[])
- {
-@@ -243,6 +247,9 @@
- sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
- struct inet_sock *inet = inet_sk(sk);
-
-+ if (sk->sk_net != net)
-+ continue;
-+
- if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
- int score = (sk->sk_family == PF_INET ? 1 : 0);
-
-@@ -299,6 +306,9 @@
- sk_for_each_from(s, node) {
- struct inet_sock *inet = inet_sk(s);
-
-+ if (s->sk_net != sk->sk_net)
-+ continue;
-+
- if (s->sk_hash != hnum ||
- (inet->daddr && inet->daddr != rmt_addr) ||
- (inet->dport != rmt_port && inet->dport) ||
-@@ -328,6 +338,7 @@
-
- void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
- {
-+ struct net *net = skb->dev->nd_net;
- struct inet_sock *inet;
- struct iphdr *iph = (struct iphdr*)skb->data;
- struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
-@@ -337,7 +348,7 @@
- int harderr;
- int err;
-
-- sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source,
-+ sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source,
- skb->dev->ifindex, udptable );
- if (sk == NULL) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
-@@ -623,7 +634,8 @@
- rt = (struct rtable*)sk_dst_check(sk, 0);
-
- if (rt == NULL) {
-- struct flowi fl = { .oif = ipc.oif,
-+ struct flowi fl = { .fl_net = sk->sk_net,
-+ .oif = ipc.oif,
- .nl_u = { .ip4_u =
- { .daddr = faddr,
- .saddr = saddr,
-@@ -1288,6 +1300,7 @@
- int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
- int proto)
- {
-+ struct net *net = skb->dev->nd_net;
- struct sock *sk;
- struct udphdr *uh = udp_hdr(skb);
- unsigned short ulen;
-@@ -1322,7 +1335,7 @@
- udp_ping_of_death(skb, uh, saddr);
- #endif
-
-- sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
-+ sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, uh->dest,
- skb->dev->ifindex, udptable );
-
- if (sk != NULL) {
-@@ -1651,7 +1664,7 @@
- sk = sk_next(sk);
- try_again:
- ;
-- } while (sk && (sk->sk_family != state->family ||
-+ } while (sk && ((sk->sk_net != state->net) || sk->sk_family != state->family ||
- !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
-
- if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
-@@ -1717,6 +1730,7 @@
-
- seq = file->private_data;
- seq->private = s;
-+ s->net = get_net(PROC_NET(inode));
- out:
- return rc;
- out_kfree:
-@@ -1724,21 +1738,31 @@
- goto out;
- }
-
-+static int udp_seq_release(struct inode *inode, struct file *file)
-+{
-+ struct seq_file *seq = file->private_data;
-+ struct udp_iter_state *state = seq->private;
-+ put_net(state->net);
-+ return seq_release_private(inode, file);
-+}
-+
- /* ------------------------------------------------------------------------ */
--int udp_proc_register(struct udp_seq_afinfo *afinfo)
-+int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
- {
- struct proc_dir_entry *p;
- int rc = 0;
-
- if (!afinfo)
- return -EINVAL;
-+ if (net == &init_net) {
- afinfo->seq_fops->owner = afinfo->owner;
- afinfo->seq_fops->open = udp_seq_open;
- afinfo->seq_fops->read = seq_read;
- afinfo->seq_fops->llseek = seq_lseek;
-- afinfo->seq_fops->release = seq_release_private;
-+ afinfo->seq_fops->release = udp_seq_release;
-+ }
-
-- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
-+ p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops);
- if (p)
- p->data = afinfo;
- else
-@@ -1746,11 +1770,12 @@
- return rc;
- }
-
--void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
-+void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
- {
- if (!afinfo)
- return;
-- proc_net_remove(afinfo->name);
-+ proc_net_remove(net, afinfo->name);
-+ if (net == &init_net)
- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
- }
-
-@@ -1803,14 +1828,30 @@
- .seq_fops = &udp4_seq_fops,
- };
-
-+
-+static int udp4_proc_net_init(struct net *net)
-+{
-+ return udp_proc_register(net, &udp4_seq_afinfo);
-+}
-+
-+static void udp4_proc_net_exit(struct net *net)
-+{
-+ udp_proc_unregister(net, &udp4_seq_afinfo);
-+}
-+
-+static struct pernet_operations udp4_proc_net_ops = {
-+ .init = udp4_proc_net_init,
-+ .exit = udp4_proc_net_exit,
-+};
-+
- int __init udp4_proc_init(void)
- {
-- return udp_proc_register(&udp4_seq_afinfo);
-+ return register_pernet_subsys(&udp4_proc_net_ops);
- }
-
- void udp4_proc_exit(void)
- {
-- udp_proc_unregister(&udp4_seq_afinfo);
-+ unregister_pernet_subsys(&udp4_proc_net_ops);
- }
- #endif /* CONFIG_PROC_FS */
-
-diff -Nurb linux-2.6.22-570/net/ipv4/udplite.c linux-2.6.22-590/net/ipv4/udplite.c
---- linux-2.6.22-570/net/ipv4/udplite.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/udplite.c 2008-01-29 22:12:32.000000000 -0500
-@@ -31,11 +31,18 @@
-
- static int udplite_rcv(struct sk_buff *skb)
- {
-+ if (skb->dev->nd_net != &init_net) {
-+ kfree_skb(skb);
-+ return 0;
-+ }
- return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
- }
-
- static void udplite_err(struct sk_buff *skb, u32 info)
- {
-+ if (skb->dev->nd_net != &init_net)
-+ return;
-+
- return __udp4_lib_err(skb, info, udplite_hash);
- }
-
-@@ -103,7 +110,7 @@
- inet_register_protosw(&udplite4_protosw);
-
- #ifdef CONFIG_PROC_FS
-- if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */
-+ if (udp_proc_register(&init_net, &udplite4_seq_afinfo)) /* udplite4_proc_init() */
- printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__);
- #endif
- return;
-diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_input.c linux-2.6.22-590/net/ipv4/xfrm4_input.c
---- linux-2.6.22-570/net/ipv4/xfrm4_input.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/xfrm4_input.c 2008-01-29 22:12:32.000000000 -0500
-@@ -18,6 +18,10 @@
-
- int xfrm4_rcv(struct sk_buff *skb)
- {
-+ if (skb->dev->nd_net != &init_net) {
-+ kfree_skb(skb);
-+ return 0;
-+ }
- return xfrm4_rcv_encap(skb, 0);
- }
-
-diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_policy.c linux-2.6.22-590/net/ipv4/xfrm4_policy.c
---- linux-2.6.22-570/net/ipv4/xfrm4_policy.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/xfrm4_policy.c 2008-01-29 22:12:32.000000000 -0500
-@@ -25,6 +25,7 @@
- {
- struct rtable *rt;
- struct flowi fl_tunnel = {
-+ .fl_net = &init_net,
- .nl_u = {
- .ip4_u = {
- .daddr = daddr->a4,
-@@ -73,6 +74,7 @@
- struct rtable *rt0 = (struct rtable*)(*dst_p);
- struct rtable *rt = rt0;
- struct flowi fl_tunnel = {
-+ .fl_net = &init_net,
- .nl_u = {
- .ip4_u = {
- .saddr = fl->fl4_src,
-@@ -213,6 +215,7 @@
- u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
-
- memset(fl, 0, sizeof(struct flowi));
-+ fl->fl_net = &init_net;
- if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
- switch (iph->protocol) {
- case IPPROTO_UDP:
-@@ -306,7 +309,7 @@
-
- xdst = (struct xfrm_dst *)dst;
- if (xdst->u.rt.idev->dev == dev) {
-- struct in_device *loopback_idev = in_dev_get(&loopback_dev);
-+ struct in_device *loopback_idev = in_dev_get(&init_net.loopback_dev);
- BUG_ON(!loopback_idev);
-
- do {
-diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_state.c linux-2.6.22-590/net/ipv4/xfrm4_state.c
---- linux-2.6.22-570/net/ipv4/xfrm4_state.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/xfrm4_state.c 2008-01-29 22:12:32.000000000 -0500
-@@ -16,7 +16,7 @@
-
- static int xfrm4_init_flags(struct xfrm_state *x)
- {
-- if (ipv4_config.no_pmtu_disc)
-+ if (init_net.sysctl_ipv4_no_pmtu_disc)
- x->props.flags |= XFRM_STATE_NOPMTUDISC;
- return 0;
- }
-diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c linux-2.6.22-590/net/ipv4/xfrm4_tunnel.c
---- linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv4/xfrm4_tunnel.c 2008-01-29 22:12:32.000000000 -0500
-@@ -109,3 +109,4 @@
- module_init(ipip_init);
- module_exit(ipip_fini);
- MODULE_LICENSE("GPL");
-+MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP);
-diff -Nurb linux-2.6.22-570/net/ipv6/Kconfig linux-2.6.22-590/net/ipv6/Kconfig
---- linux-2.6.22-570/net/ipv6/Kconfig 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/Kconfig 2008-01-29 22:12:32.000000000 -0500
-@@ -109,7 +109,7 @@
- If unsure, say Y.
-
- config IPV6_MIP6
-- bool "IPv6: Mobility (EXPERIMENTAL)"
-+ tristate "IPv6: Mobility (EXPERIMENTAL)"
- depends on IPV6 && EXPERIMENTAL
- select XFRM
- ---help---
-diff -Nurb linux-2.6.22-570/net/ipv6/Makefile linux-2.6.22-590/net/ipv6/Makefile
---- linux-2.6.22-570/net/ipv6/Makefile 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/Makefile 2008-01-29 22:12:32.000000000 -0500
-@@ -14,7 +14,6 @@
- xfrm6_output.o
- ipv6-$(CONFIG_NETFILTER) += netfilter.o
- ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
--ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
- ipv6-$(CONFIG_PROC_FS) += proc.o
-
- ipv6-objs += $(ipv6-y)
-@@ -28,6 +27,7 @@
- obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
- obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
- obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
-+obj-$(CONFIG_IPV6_MIP6) += mip6.o
- obj-$(CONFIG_NETFILTER) += netfilter/
-
- obj-$(CONFIG_IPV6_SIT) += sit.o
-diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c linux-2.6.22-590/net/ipv6/addrconf.c
---- linux-2.6.22-570/net/ipv6/addrconf.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/addrconf.c 2008-01-29 22:12:32.000000000 -0500
-@@ -73,6 +73,7 @@
- #include <net/tcp.h>
- #include <net/ip.h>
- #include <net/netlink.h>
-+#include <net/net_namespace.h>
- #include <net/pkt_sched.h>
- #include <linux/if_tunnel.h>
- #include <linux/rtnetlink.h>
-@@ -457,7 +458,7 @@
- struct inet6_dev *idev;
-
- read_lock(&dev_base_lock);
-- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- rcu_read_lock();
- idev = __in6_dev_get(dev);
- if (idev) {
-@@ -920,7 +921,7 @@
- read_lock(&dev_base_lock);
- rcu_read_lock();
-
-- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
-
-@@ -1047,7 +1048,7 @@
- }
-
- /* Rule 4: Prefer home address */
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- if (hiscore.rule < 4) {
- if (ifa_result->flags & IFA_F_HOMEADDRESS)
- hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
-@@ -1882,7 +1883,7 @@
- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
- goto err_exit;
-
-- dev = __dev_get_by_index(ireq.ifr6_ifindex);
-+ dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex);
-
- err = -ENODEV;
- if (dev == NULL)
-@@ -1913,7 +1914,7 @@
-
- if (err == 0) {
- err = -ENOBUFS;
-- if ((dev = __dev_get_by_name(p.name)) == NULL)
-+ if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL)
- goto err_exit;
- err = dev_open(dev);
- }
-@@ -1943,7 +1944,7 @@
- if (!valid_lft || prefered_lft > valid_lft)
- return -EINVAL;
-
-- if ((dev = __dev_get_by_index(ifindex)) == NULL)
-+ if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL)
- return -ENODEV;
-
- if ((idev = addrconf_add_dev(dev)) == NULL)
-@@ -1994,7 +1995,7 @@
- struct inet6_dev *idev;
- struct net_device *dev;
-
-- if ((dev = __dev_get_by_index(ifindex)) == NULL)
-+ if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL)
- return -ENODEV;
-
- if ((idev = __in6_dev_get(dev)) == NULL)
-@@ -2089,7 +2090,7 @@
- return;
- }
-
-- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- struct in_device * in_dev = __in_dev_get_rtnl(dev);
- if (in_dev && (dev->flags & IFF_UP)) {
- struct in_ifaddr * ifa;
-@@ -2245,12 +2246,12 @@
-
- /* first try to inherit the link-local address from the link device */
- if (idev->dev->iflink &&
-- (link_dev = __dev_get_by_index(idev->dev->iflink))) {
-+ (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- /* then try to inherit it from any device */
-- for_each_netdev(link_dev) {
-+ for_each_netdev(&init_net, link_dev) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
-@@ -2282,6 +2283,9 @@
- struct inet6_dev *idev = __in6_dev_get(dev);
- int run_pending = 0;
-
-+ if (dev->nd_net != &init_net)
-+ return NOTIFY_DONE;
-+
- switch(event) {
- case NETDEV_REGISTER:
- if (!idev && dev->mtu >= IPV6_MIN_MTU) {
-@@ -2419,7 +2423,7 @@
-
- ASSERT_RTNL();
-
-- if (dev == &loopback_dev && how == 1)
-+ if (dev == &init_net.loopback_dev && how == 1)
- how = 0;
-
- rt6_ifdown(dev);
-@@ -2850,18 +2854,18 @@
-
- int __init if6_proc_init(void)
- {
-- if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
-+ if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops))
- return -ENOMEM;
- return 0;
- }
-
- void if6_proc_exit(void)
- {
-- proc_net_remove("if_inet6");
-+ proc_net_remove(&init_net, "if_inet6");
- }
- #endif /* CONFIG_PROC_FS */
-
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- /* Check if address is a home address configured on any interface. */
- int ipv6_chk_home_addr(struct in6_addr *addr)
- {
-@@ -3017,11 +3021,15 @@
- static int
- inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
- {
-+ struct net *net = skb->sk->sk_net;
- struct ifaddrmsg *ifm;
- struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
- int err;
-
-+ if (net != &init_net)
-+ return -EINVAL;
-+
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
- if (err < 0)
- return err;
-@@ -3074,6 +3082,7 @@
- static int
- inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
- {
-+ struct net *net = skb->sk->sk_net;
- struct ifaddrmsg *ifm;
- struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx;
-@@ -3083,6 +3092,9 @@
- u8 ifa_flags;
- int err;
-
-+ if (net != &init_net)
-+ return -EINVAL;
-+
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
- if (err < 0)
- return err;
-@@ -3103,7 +3115,7 @@
- valid_lft = INFINITY_LIFE_TIME;
- }
-
-- dev = __dev_get_by_index(ifm->ifa_index);
-+ dev = __dev_get_by_index(&init_net, ifm->ifa_index);
- if (dev == NULL)
- return -ENODEV;
-
-@@ -3292,7 +3304,7 @@
- s_ip_idx = ip_idx = cb->args[1];
-
- idx = 0;
-- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
-@@ -3367,26 +3379,42 @@
-
- static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
- {
-+ struct net *net = skb->sk->sk_net;
- enum addr_type_t type = UNICAST_ADDR;
-+
-+ if (net != &init_net)
-+ return 0;
-+
- return inet6_dump_addr(skb, cb, type);
- }
-
- static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
- {
-+ struct net *net = skb->sk->sk_net;
- enum addr_type_t type = MULTICAST_ADDR;
-+
-+ if (net != &init_net)
-+ return 0;
-+
- return inet6_dump_addr(skb, cb, type);
- }
-
-
- static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
- {
-+ struct net *net = skb->sk->sk_net;
- enum addr_type_t type = ANYCAST_ADDR;
-+
-+ if (net != &init_net)
-+ return 0;
-+
- return inet6_dump_addr(skb, cb, type);
- }
-
- static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
- void *arg)
- {
-+ struct net *net = in_skb->sk->sk_net;
- struct ifaddrmsg *ifm;
- struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *addr = NULL;
-@@ -3395,6 +3423,9 @@
- struct sk_buff *skb;
- int err;
-
-+ if (net != &init_net)
-+ return -EINVAL;
-+
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
- if (err < 0)
- goto errout;
-@@ -3407,7 +3438,7 @@
-
- ifm = nlmsg_data(nlh);
- if (ifm->ifa_index)
-- dev = __dev_get_by_index(ifm->ifa_index);
-+ dev = __dev_get_by_index(&init_net, ifm->ifa_index);
-
- if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
- err = -EADDRNOTAVAIL;
-@@ -3427,7 +3458,7 @@
- kfree_skb(skb);
- goto errout_ifa;
- }
-- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
-+ err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
- errout_ifa:
- in6_ifa_put(ifa);
- errout:
-@@ -3450,10 +3481,10 @@
- kfree_skb(skb);
- goto errout;
- }
-- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
-+ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
- errout:
- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
-+ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
- }
-
- static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
-@@ -3612,19 +3643,22 @@
-
- static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
- {
-+ struct net *net = skb->sk->sk_net;
- int idx, err;
- int s_idx = cb->args[0];
- struct net_device *dev;
- struct inet6_dev *idev;
- struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
-
-+ if (net != &init_net)
-+ return 0;
- /* FIXME: maybe disable ipv6 on non v6 guests?
- if (skb->sk && skb->sk->sk_vx_info)
- return skb->len; */
-
- read_lock(&dev_base_lock);
- idx = 0;
-- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- if (idx < s_idx)
- goto cont;
- if (!v6_dev_in_nx_info(dev, nxi))
-@@ -3661,10 +3695,10 @@
- kfree_skb(skb);
- goto errout;
- }
-- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
-+ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
- errout:
- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
-+ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
- }
-
- static inline size_t inet6_prefix_nlmsg_size(void)
-@@ -3730,10 +3764,10 @@
- kfree_skb(skb);
- goto errout;
- }
-- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
-+ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
- errout:
- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
-+ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err);
- }
-
- static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
-@@ -4244,16 +4278,16 @@
- * device and it being up should be removed.
- */
- rtnl_lock();
-- if (!ipv6_add_dev(&loopback_dev))
-+ if (!ipv6_add_dev(&init_net.loopback_dev))
- err = -ENOMEM;
- rtnl_unlock();
- if (err)
- return err;
-
-- ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev);
-+ ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
- #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-- ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev);
-- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev);
-+ ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
-+ ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
- #endif
-
- register_netdevice_notifier(&ipv6_dev_notf);
-@@ -4304,12 +4338,12 @@
- * clean dev list.
- */
-
-- for_each_netdev(dev) {
-+ for_each_netdev(&init_net, dev) {
- if ((idev = __in6_dev_get(dev)) == NULL)
- continue;
- addrconf_ifdown(dev, 1);
- }
-- addrconf_ifdown(&loopback_dev, 2);
-+ addrconf_ifdown(&init_net.loopback_dev, 2);
-
- /*
- * Check hash table.
-@@ -4335,6 +4369,6 @@
- rtnl_unlock();
-
- #ifdef CONFIG_PROC_FS
-- proc_net_remove("if_inet6");
-+ proc_net_remove(&init_net, "if_inet6");
- #endif
- }
-diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c.orig linux-2.6.22-590/net/ipv6/addrconf.c.orig
---- linux-2.6.22-570/net/ipv6/addrconf.c.orig 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/addrconf.c.orig 1969-12-31 19:00:00.000000000 -0500
-@@ -1,4301 +0,0 @@
--/*
-- * IPv6 Address [auto]configuration
-- * Linux INET6 implementation
-- *
-- * Authors:
-- * Pedro Roque <roque@di.fc.ul.pt>
-- * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
-- *
-- * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License
-- * as published by the Free Software Foundation; either version
-- * 2 of the License, or (at your option) any later version.
-- */
--
--/*
-- * Changes:
-- *
-- * Janos Farkas : delete timer on ifdown
-- * <chexum@bankinf.banki.hu>
-- * Andi Kleen : kill double kfree on module
-- * unload.
-- * Maciej W. Rozycki : FDDI support
-- * sekiya@USAGI : Don't send too many RS
-- * packets.
-- * yoshfuji@USAGI : Fixed interval between DAD
-- * packets.
-- * YOSHIFUJI Hideaki @USAGI : improved accuracy of
-- * address validation timer.
-- * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041)
-- * support.
-- * Yuji SEKIYA @USAGI : Don't assign a same IPv6
-- * address on a same interface.
-- * YOSHIFUJI Hideaki @USAGI : ARCnet support
-- * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to
-- * seq_file.
-- * YOSHIFUJI Hideaki @USAGI : improved source address
-- * selection; consider scope,
-- * status etc.
-- */
--
--#include <linux/errno.h>
--#include <linux/types.h>
--#include <linux/socket.h>
--#include <linux/sockios.h>
--#include <linux/net.h>
--#include <linux/in6.h>
--#include <linux/netdevice.h>
--#include <linux/if_addr.h>
--#include <linux/if_arp.h>
--#include <linux/if_arcnet.h>
--#include <linux/if_infiniband.h>
--#include <linux/route.h>
--#include <linux/inetdevice.h>
--#include <linux/init.h>
--#ifdef CONFIG_SYSCTL
--#include <linux/sysctl.h>
--#endif
--#include <linux/capability.h>
--#include <linux/delay.h>
--#include <linux/notifier.h>
--#include <linux/string.h>
--
--#include <net/sock.h>
--#include <net/snmp.h>
--
--#include <net/ipv6.h>
--#include <net/protocol.h>
--#include <net/ndisc.h>
--#include <net/ip6_route.h>
--#include <net/addrconf.h>
--#include <net/tcp.h>
--#include <net/ip.h>
--#include <net/netlink.h>
--#include <net/pkt_sched.h>
--#include <linux/if_tunnel.h>
--#include <linux/rtnetlink.h>
--
--#ifdef CONFIG_IPV6_PRIVACY
--#include <linux/random.h>
--#endif
--
--#include <asm/uaccess.h>
--#include <asm/unaligned.h>
--
--#include <linux/proc_fs.h>
--#include <linux/seq_file.h>
--
--/* Set to 3 to get tracing... */
--#define ACONF_DEBUG 2
--
--#if ACONF_DEBUG >= 3
--#define ADBG(x) printk x
--#else
--#define ADBG(x)
--#endif
--
--#define INFINITY_LIFE_TIME 0xFFFFFFFF
--#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
--
--#ifdef CONFIG_SYSCTL
--static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
--static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
--#endif
--
--#ifdef CONFIG_IPV6_PRIVACY
--static int __ipv6_regen_rndid(struct inet6_dev *idev);
--static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
--static void ipv6_regen_rndid(unsigned long data);
--
--static int desync_factor = MAX_DESYNC_FACTOR * HZ;
--#endif
--
--static int ipv6_count_addresses(struct inet6_dev *idev);
--
--/*
-- * Configured unicast address hash table
-- */
--static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
--static DEFINE_RWLOCK(addrconf_hash_lock);
--
--static void addrconf_verify(unsigned long);
--
--static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
--static DEFINE_SPINLOCK(addrconf_verify_lock);
--
--static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
--static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
--
--static int addrconf_ifdown(struct net_device *dev, int how);
--
--static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
--static void addrconf_dad_timer(unsigned long data);
--static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
--static void addrconf_dad_run(struct inet6_dev *idev);
--static void addrconf_rs_timer(unsigned long data);
--static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
--static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
--
--static void inet6_prefix_notify(int event, struct inet6_dev *idev,
-- struct prefix_info *pinfo);
--static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev);
--
--static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
--
--struct ipv6_devconf ipv6_devconf __read_mostly = {
-- .forwarding = 0,
-- .hop_limit = IPV6_DEFAULT_HOPLIMIT,
-- .mtu6 = IPV6_MIN_MTU,
-- .accept_ra = 1,
-- .accept_redirects = 1,
-- .autoconf = 1,
-- .force_mld_version = 0,
-- .dad_transmits = 1,
-- .rtr_solicits = MAX_RTR_SOLICITATIONS,
-- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
-- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
--#ifdef CONFIG_IPV6_PRIVACY
-- .use_tempaddr = 0,
-- .temp_valid_lft = TEMP_VALID_LIFETIME,
-- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
-- .regen_max_retry = REGEN_MAX_RETRY,
-- .max_desync_factor = MAX_DESYNC_FACTOR,
--#endif
-- .max_addresses = IPV6_MAX_ADDRESSES,
-- .accept_ra_defrtr = 1,
-- .accept_ra_pinfo = 1,
--#ifdef CONFIG_IPV6_ROUTER_PREF
-- .accept_ra_rtr_pref = 1,
-- .rtr_probe_interval = 60 * HZ,
--#ifdef CONFIG_IPV6_ROUTE_INFO
-- .accept_ra_rt_info_max_plen = 0,
--#endif
--#endif
-- .proxy_ndp = 0,
-- .accept_source_route = 0, /* we do not accept RH0 by default. */
--};
--
--static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
-- .forwarding = 0,
-- .hop_limit = IPV6_DEFAULT_HOPLIMIT,
-- .mtu6 = IPV6_MIN_MTU,
-- .accept_ra = 1,
-- .accept_redirects = 1,
-- .autoconf = 1,
-- .dad_transmits = 1,
-- .rtr_solicits = MAX_RTR_SOLICITATIONS,
-- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
-- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
--#ifdef CONFIG_IPV6_PRIVACY
-- .use_tempaddr = 0,
-- .temp_valid_lft = TEMP_VALID_LIFETIME,
-- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
-- .regen_max_retry = REGEN_MAX_RETRY,
-- .max_desync_factor = MAX_DESYNC_FACTOR,
--#endif
-- .max_addresses = IPV6_MAX_ADDRESSES,
-- .accept_ra_defrtr = 1,
-- .accept_ra_pinfo = 1,
--#ifdef CONFIG_IPV6_ROUTER_PREF
-- .accept_ra_rtr_pref = 1,
-- .rtr_probe_interval = 60 * HZ,
--#ifdef CONFIG_IPV6_ROUTE_INFO
-- .accept_ra_rt_info_max_plen = 0,
--#endif
--#endif
-- .proxy_ndp = 0,
-- .accept_source_route = 0, /* we do not accept RH0 by default. */
--};
--
--/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
--const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
--const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
--
--/* Check if a valid qdisc is available */
--static inline int addrconf_qdisc_ok(struct net_device *dev)
--{
-- return (dev->qdisc != &noop_qdisc);
--}
--
--static void addrconf_del_timer(struct inet6_ifaddr *ifp)
--{
-- if (del_timer(&ifp->timer))
-- __in6_ifa_put(ifp);
--}
--
--enum addrconf_timer_t
--{
-- AC_NONE,
-- AC_DAD,
-- AC_RS,
--};
--
--static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
-- enum addrconf_timer_t what,
-- unsigned long when)
--{
-- if (!del_timer(&ifp->timer))
-- in6_ifa_hold(ifp);
--
-- switch (what) {
-- case AC_DAD:
-- ifp->timer.function = addrconf_dad_timer;
-- break;
-- case AC_RS:
-- ifp->timer.function = addrconf_rs_timer;
-- break;
-- default:;
-- }
-- ifp->timer.expires = jiffies + when;
-- add_timer(&ifp->timer);
--}
--
--static int snmp6_alloc_dev(struct inet6_dev *idev)
--{
-- int err = -ENOMEM;
--
-- if (!idev || !idev->dev)
-- return -EINVAL;
--
-- if (snmp_mib_init((void **)idev->stats.ipv6,
-- sizeof(struct ipstats_mib),
-- __alignof__(struct ipstats_mib)) < 0)
-- goto err_ip;
-- if (snmp_mib_init((void **)idev->stats.icmpv6,
-- sizeof(struct icmpv6_mib),
-- __alignof__(struct icmpv6_mib)) < 0)
-- goto err_icmp;
--
-- return 0;
--
--err_icmp:
-- snmp_mib_free((void **)idev->stats.ipv6);
--err_ip:
-- return err;
--}
--
--static int snmp6_free_dev(struct inet6_dev *idev)
--{
-- snmp_mib_free((void **)idev->stats.icmpv6);
-- snmp_mib_free((void **)idev->stats.ipv6);
-- return 0;
--}
--
--/* Nobody refers to this device, we may destroy it. */
--
--static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
--{
-- struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
-- kfree(idev);
--}
--
--void in6_dev_finish_destroy(struct inet6_dev *idev)
--{
-- struct net_device *dev = idev->dev;
-- BUG_TRAP(idev->addr_list==NULL);
-- BUG_TRAP(idev->mc_list==NULL);
--#ifdef NET_REFCNT_DEBUG
-- printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
--#endif
-- dev_put(dev);
-- if (!idev->dead) {
-- printk("Freeing alive inet6 device %p\n", idev);
-- return;
-- }
-- snmp6_free_dev(idev);
-- call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
--}
--
--EXPORT_SYMBOL(in6_dev_finish_destroy);
--
--static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
--{
-- struct inet6_dev *ndev;
-- struct in6_addr maddr;
--
-- ASSERT_RTNL();
--
-- if (dev->mtu < IPV6_MIN_MTU)
-- return NULL;
--
-- ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
--
-- if (ndev == NULL)
-- return NULL;
--
-- rwlock_init(&ndev->lock);
-- ndev->dev = dev;
-- memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
-- ndev->cnf.mtu6 = dev->mtu;
-- ndev->cnf.sysctl = NULL;
-- ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
-- if (ndev->nd_parms == NULL) {
-- kfree(ndev);
-- return NULL;
-- }
-- /* We refer to the device */
-- dev_hold(dev);
--
-- if (snmp6_alloc_dev(ndev) < 0) {
-- ADBG((KERN_WARNING
-- "%s(): cannot allocate memory for statistics; dev=%s.\n",
-- __FUNCTION__, dev->name));
-- neigh_parms_release(&nd_tbl, ndev->nd_parms);
-- ndev->dead = 1;
-- in6_dev_finish_destroy(ndev);
-- return NULL;
-- }
--
-- if (snmp6_register_dev(ndev) < 0) {
-- ADBG((KERN_WARNING
-- "%s(): cannot create /proc/net/dev_snmp6/%s\n",
-- __FUNCTION__, dev->name));
-- neigh_parms_release(&nd_tbl, ndev->nd_parms);
-- ndev->dead = 1;
-- in6_dev_finish_destroy(ndev);
-- return NULL;
-- }
--
-- /* One reference from device. We must do this before
-- * we invoke __ipv6_regen_rndid().
-- */
-- in6_dev_hold(ndev);
--
--#ifdef CONFIG_IPV6_PRIVACY
-- init_timer(&ndev->regen_timer);
-- ndev->regen_timer.function = ipv6_regen_rndid;
-- ndev->regen_timer.data = (unsigned long) ndev;
-- if ((dev->flags&IFF_LOOPBACK) ||
-- dev->type == ARPHRD_TUNNEL ||
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
-- dev->type == ARPHRD_SIT ||
--#endif
-- dev->type == ARPHRD_NONE) {
-- printk(KERN_INFO
-- "%s: Disabled Privacy Extensions\n",
-- dev->name);
-- ndev->cnf.use_tempaddr = -1;
-- } else {
-- in6_dev_hold(ndev);
-- ipv6_regen_rndid((unsigned long) ndev);
-- }
--#endif
--
-- if (netif_running(dev) && addrconf_qdisc_ok(dev))
-- ndev->if_flags |= IF_READY;
--
-- ipv6_mc_init_dev(ndev);
-- ndev->tstamp = jiffies;
--#ifdef CONFIG_SYSCTL
-- neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6,
-- NET_IPV6_NEIGH, "ipv6",
-- &ndisc_ifinfo_sysctl_change,
-- NULL);
-- addrconf_sysctl_register(ndev, &ndev->cnf);
--#endif
-- /* protected by rtnl_lock */
-- rcu_assign_pointer(dev->ip6_ptr, ndev);
--
-- /* Join all-node multicast group */
-- ipv6_addr_all_nodes(&maddr);
-- ipv6_dev_mc_inc(dev, &maddr);
--
-- return ndev;
--}
--
--static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
--{
-- struct inet6_dev *idev;
--
-- ASSERT_RTNL();
--
-- if ((idev = __in6_dev_get(dev)) == NULL) {
-- if ((idev = ipv6_add_dev(dev)) == NULL)
-- return NULL;
-- }
--
-- if (dev->flags&IFF_UP)
-- ipv6_mc_up(idev);
-- return idev;
--}
--
--#ifdef CONFIG_SYSCTL
--static void dev_forward_change(struct inet6_dev *idev)
--{
-- struct net_device *dev;
-- struct inet6_ifaddr *ifa;
-- struct in6_addr addr;
--
-- if (!idev)
-- return;
-- dev = idev->dev;
-- if (dev && (dev->flags & IFF_MULTICAST)) {
-- ipv6_addr_all_routers(&addr);
--
-- if (idev->cnf.forwarding)
-- ipv6_dev_mc_inc(dev, &addr);
-- else
-- ipv6_dev_mc_dec(dev, &addr);
-- }
-- for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
-- if (ifa->flags&IFA_F_TENTATIVE)
-- continue;
-- if (idev->cnf.forwarding)
-- addrconf_join_anycast(ifa);
-- else
-- addrconf_leave_anycast(ifa);
-- }
--}
--
--
--static void addrconf_forward_change(void)
--{
-- struct net_device *dev;
-- struct inet6_dev *idev;
--
-- read_lock(&dev_base_lock);
-- for_each_netdev(dev) {
-- rcu_read_lock();
-- idev = __in6_dev_get(dev);
-- if (idev) {
-- int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
-- idev->cnf.forwarding = ipv6_devconf.forwarding;
-- if (changed)
-- dev_forward_change(idev);
-- }
-- rcu_read_unlock();
-- }
-- read_unlock(&dev_base_lock);
--}
--#endif
--
--/* Nobody refers to this ifaddr, destroy it */
--
--void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
--{
-- BUG_TRAP(ifp->if_next==NULL);
-- BUG_TRAP(ifp->lst_next==NULL);
--#ifdef NET_REFCNT_DEBUG
-- printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
--#endif
--
-- in6_dev_put(ifp->idev);
--
-- if (del_timer(&ifp->timer))
-- printk("Timer is still running, when freeing ifa=%p\n", ifp);
--
-- if (!ifp->dead) {
-- printk("Freeing alive inet6 address %p\n", ifp);
-- return;
-- }
-- dst_release(&ifp->rt->u.dst);
--
-- kfree(ifp);
--}
--
--static void
--ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
--{
-- struct inet6_ifaddr *ifa, **ifap;
-- int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
--
-- /*
-- * Each device address list is sorted in order of scope -
-- * global before linklocal.
-- */
-- for (ifap = &idev->addr_list; (ifa = *ifap) != NULL;
-- ifap = &ifa->if_next) {
-- if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
-- break;
-- }
--
-- ifp->if_next = *ifap;
-- *ifap = ifp;
--}
--
--/* On success it returns ifp with increased reference count */
--
--static struct inet6_ifaddr *
--ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
-- int scope, u32 flags)
--{
-- struct inet6_ifaddr *ifa = NULL;
-- struct rt6_info *rt;
-- int hash;
-- int err = 0;
--
-- rcu_read_lock_bh();
-- if (idev->dead) {
-- err = -ENODEV; /*XXX*/
-- goto out2;
-- }
--
-- write_lock(&addrconf_hash_lock);
--
-- /* Ignore adding duplicate addresses on an interface */
-- if (ipv6_chk_same_addr(addr, idev->dev)) {
-- ADBG(("ipv6_add_addr: already assigned\n"));
-- err = -EEXIST;
-- goto out;
-- }
--
-- ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
--
-- if (ifa == NULL) {
-- ADBG(("ipv6_add_addr: malloc failed\n"));
-- err = -ENOBUFS;
-- goto out;
-- }
--
-- rt = addrconf_dst_alloc(idev, addr, 0);
-- if (IS_ERR(rt)) {
-- err = PTR_ERR(rt);
-- goto out;
-- }
--
-- ipv6_addr_copy(&ifa->addr, addr);
--
-- spin_lock_init(&ifa->lock);
-- init_timer(&ifa->timer);
-- ifa->timer.data = (unsigned long) ifa;
-- ifa->scope = scope;
-- ifa->prefix_len = pfxlen;
-- ifa->flags = flags | IFA_F_TENTATIVE;
-- ifa->cstamp = ifa->tstamp = jiffies;
--
-- ifa->rt = rt;
--
-- /*
-- * part one of RFC 4429, section 3.3
-- * We should not configure an address as
-- * optimistic if we do not yet know the link
-- * layer address of our nexhop router
-- */
--
-- if (rt->rt6i_nexthop == NULL)
-- ifa->flags &= ~IFA_F_OPTIMISTIC;
--
-- ifa->idev = idev;
-- in6_dev_hold(idev);
-- /* For caller */
-- in6_ifa_hold(ifa);
--
-- /* Add to big hash table */
-- hash = ipv6_addr_hash(addr);
--
-- ifa->lst_next = inet6_addr_lst[hash];
-- inet6_addr_lst[hash] = ifa;
-- in6_ifa_hold(ifa);
-- write_unlock(&addrconf_hash_lock);
--
-- write_lock(&idev->lock);
-- /* Add to inet6_dev unicast addr list. */
-- ipv6_link_dev_addr(idev, ifa);
--
--#ifdef CONFIG_IPV6_PRIVACY
-- if (ifa->flags&IFA_F_TEMPORARY) {
-- ifa->tmp_next = idev->tempaddr_list;
-- idev->tempaddr_list = ifa;
-- in6_ifa_hold(ifa);
-- }
--#endif
--
-- in6_ifa_hold(ifa);
-- write_unlock(&idev->lock);
--out2:
-- rcu_read_unlock_bh();
--
-- if (likely(err == 0))
-- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
-- else {
-- kfree(ifa);
-- ifa = ERR_PTR(err);
-- }
--
-- return ifa;
--out:
-- write_unlock(&addrconf_hash_lock);
-- goto out2;
--}
--
--/* This function wants to get referenced ifp and releases it before return */
--
--static void ipv6_del_addr(struct inet6_ifaddr *ifp)
--{
-- struct inet6_ifaddr *ifa, **ifap;
-- struct inet6_dev *idev = ifp->idev;
-- int hash;
-- int deleted = 0, onlink = 0;
-- unsigned long expires = jiffies;
--
-- hash = ipv6_addr_hash(&ifp->addr);
--
-- ifp->dead = 1;
--
-- write_lock_bh(&addrconf_hash_lock);
-- for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
-- ifap = &ifa->lst_next) {
-- if (ifa == ifp) {
-- *ifap = ifa->lst_next;
-- __in6_ifa_put(ifp);
-- ifa->lst_next = NULL;
-- break;
-- }
-- }
-- write_unlock_bh(&addrconf_hash_lock);
--
-- write_lock_bh(&idev->lock);
--#ifdef CONFIG_IPV6_PRIVACY
-- if (ifp->flags&IFA_F_TEMPORARY) {
-- for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
-- ifap = &ifa->tmp_next) {
-- if (ifa == ifp) {
-- *ifap = ifa->tmp_next;
-- if (ifp->ifpub) {
-- in6_ifa_put(ifp->ifpub);
-- ifp->ifpub = NULL;
-- }
-- __in6_ifa_put(ifp);
-- ifa->tmp_next = NULL;
-- break;
-- }
-- }
-- }
--#endif
--
-- for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
-- if (ifa == ifp) {
-- *ifap = ifa->if_next;
-- __in6_ifa_put(ifp);
-- ifa->if_next = NULL;
-- if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
-- break;
-- deleted = 1;
-- continue;
-- } else if (ifp->flags & IFA_F_PERMANENT) {
-- if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
-- ifp->prefix_len)) {
-- if (ifa->flags & IFA_F_PERMANENT) {
-- onlink = 1;
-- if (deleted)
-- break;
-- } else {
-- unsigned long lifetime;
--
-- if (!onlink)
-- onlink = -1;
--
-- spin_lock(&ifa->lock);
-- lifetime = min_t(unsigned long,
-- ifa->valid_lft, 0x7fffffffUL/HZ);
-- if (time_before(expires,
-- ifa->tstamp + lifetime * HZ))
-- expires = ifa->tstamp + lifetime * HZ;
-- spin_unlock(&ifa->lock);
-- }
-- }
-- }
-- ifap = &ifa->if_next;
-- }
-- write_unlock_bh(&idev->lock);
--
-- ipv6_ifa_notify(RTM_DELADDR, ifp);
--
-- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
--
-- addrconf_del_timer(ifp);
--
-- /*
-- * Purge or update corresponding prefix
-- *
-- * 1) we don't purge prefix here if address was not permanent.
-- * prefix is managed by its own lifetime.
-- * 2) if there're no addresses, delete prefix.
-- * 3) if there're still other permanent address(es),
-- * corresponding prefix is still permanent.
-- * 4) otherwise, update prefix lifetime to the
-- * longest valid lifetime among the corresponding
-- * addresses on the device.
-- * Note: subsequent RA will update lifetime.
-- *
-- * --yoshfuji
-- */
-- if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
-- struct in6_addr prefix;
-- struct rt6_info *rt;
--
-- ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
-- rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1);
--
-- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
-- if (onlink == 0) {
-- ip6_del_rt(rt);
-- rt = NULL;
-- } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
-- rt->rt6i_expires = expires;
-- rt->rt6i_flags |= RTF_EXPIRES;
-- }
-- }
-- dst_release(&rt->u.dst);
-- }
--
-- in6_ifa_put(ifp);
--}
--
--#ifdef CONFIG_IPV6_PRIVACY
--static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
--{
-- struct inet6_dev *idev = ifp->idev;
-- struct in6_addr addr, *tmpaddr;
-- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp;
-- int tmp_plen;
-- int ret = 0;
-- int max_addresses;
-- u32 addr_flags;
--
-- write_lock(&idev->lock);
-- if (ift) {
-- spin_lock_bh(&ift->lock);
-- memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
-- spin_unlock_bh(&ift->lock);
-- tmpaddr = &addr;
-- } else {
-- tmpaddr = NULL;
-- }
--retry:
-- in6_dev_hold(idev);
-- if (idev->cnf.use_tempaddr <= 0) {
-- write_unlock(&idev->lock);
-- printk(KERN_INFO
-- "ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
-- in6_dev_put(idev);
-- ret = -1;
-- goto out;
-- }
-- spin_lock_bh(&ifp->lock);
-- if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
-- idev->cnf.use_tempaddr = -1; /*XXX*/
-- spin_unlock_bh(&ifp->lock);
-- write_unlock(&idev->lock);
-- printk(KERN_WARNING
-- "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
-- in6_dev_put(idev);
-- ret = -1;
-- goto out;
-- }
-- in6_ifa_hold(ifp);
-- memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
-- if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
-- spin_unlock_bh(&ifp->lock);
-- write_unlock(&idev->lock);
-- printk(KERN_WARNING
-- "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
-- in6_ifa_put(ifp);
-- in6_dev_put(idev);
-- ret = -1;
-- goto out;
-- }
-- memcpy(&addr.s6_addr[8], idev->rndid, 8);
-- tmp_valid_lft = min_t(__u32,
-- ifp->valid_lft,
-- idev->cnf.temp_valid_lft);
-- tmp_prefered_lft = min_t(__u32,
-- ifp->prefered_lft,
-- idev->cnf.temp_prefered_lft - desync_factor / HZ);
-- tmp_plen = ifp->prefix_len;
-- max_addresses = idev->cnf.max_addresses;
-- tmp_cstamp = ifp->cstamp;
-- tmp_tstamp = ifp->tstamp;
-- spin_unlock_bh(&ifp->lock);
--
-- write_unlock(&idev->lock);
--
-- addr_flags = IFA_F_TEMPORARY;
-- /* set in addrconf_prefix_rcv() */
-- if (ifp->flags & IFA_F_OPTIMISTIC)
-- addr_flags |= IFA_F_OPTIMISTIC;
--
-- ift = !max_addresses ||
-- ipv6_count_addresses(idev) < max_addresses ?
-- ipv6_add_addr(idev, &addr, tmp_plen,
-- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
-- addr_flags) : NULL;
-- if (!ift || IS_ERR(ift)) {
-- in6_ifa_put(ifp);
-- in6_dev_put(idev);
-- printk(KERN_INFO
-- "ipv6_create_tempaddr(): retry temporary address regeneration.\n");
-- tmpaddr = &addr;
-- write_lock(&idev->lock);
-- goto retry;
-- }
--
-- spin_lock_bh(&ift->lock);
-- ift->ifpub = ifp;
-- ift->valid_lft = tmp_valid_lft;
-- ift->prefered_lft = tmp_prefered_lft;
-- ift->cstamp = tmp_cstamp;
-- ift->tstamp = tmp_tstamp;
-- spin_unlock_bh(&ift->lock);
--
-- addrconf_dad_start(ift, 0);
-- in6_ifa_put(ift);
-- in6_dev_put(idev);
--out:
-- return ret;
--}
--#endif
--
--/*
-- * Choose an appropriate source address (RFC3484)
-- */
--struct ipv6_saddr_score {
-- int addr_type;
-- unsigned int attrs;
-- int matchlen;
-- int scope;
-- unsigned int rule;
--};
--
--#define IPV6_SADDR_SCORE_LOCAL 0x0001
--#define IPV6_SADDR_SCORE_PREFERRED 0x0004
--#define IPV6_SADDR_SCORE_HOA 0x0008
--#define IPV6_SADDR_SCORE_OIF 0x0010
--#define IPV6_SADDR_SCORE_LABEL 0x0020
--#define IPV6_SADDR_SCORE_PRIVACY 0x0040
--
--static inline int ipv6_saddr_preferred(int type)
--{
-- if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|
-- IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED))
-- return 1;
-- return 0;
--}
--
--/* static matching label */
--static inline int ipv6_saddr_label(const struct in6_addr *addr, int type)
--{
-- /*
-- * prefix (longest match) label
-- * -----------------------------
-- * ::1/128 0
-- * ::/0 1
-- * 2002::/16 2
-- * ::/96 3
-- * ::ffff:0:0/96 4
-- * fc00::/7 5
-- * 2001::/32 6
-- */
-- if (type & IPV6_ADDR_LOOPBACK)
-- return 0;
-- else if (type & IPV6_ADDR_COMPATv4)
-- return 3;
-- else if (type & IPV6_ADDR_MAPPED)
-- return 4;
-- else if (addr->s6_addr32[0] == htonl(0x20010000))
-- return 6;
-- else if (addr->s6_addr16[0] == htons(0x2002))
-- return 2;
-- else if ((addr->s6_addr[0] & 0xfe) == 0xfc)
-- return 5;
-- return 1;
--}
--
--int ipv6_dev_get_saddr(struct net_device *daddr_dev,
-- struct in6_addr *daddr, struct in6_addr *saddr)
--{
-- struct ipv6_saddr_score hiscore;
-- struct inet6_ifaddr *ifa_result = NULL;
-- int daddr_type = __ipv6_addr_type(daddr);
-- int daddr_scope = __ipv6_addr_src_scope(daddr_type);
-- u32 daddr_label = ipv6_saddr_label(daddr, daddr_type);
-- struct net_device *dev;
--
-- memset(&hiscore, 0, sizeof(hiscore));
--
-- read_lock(&dev_base_lock);
-- rcu_read_lock();
--
-- for_each_netdev(dev) {
-- struct inet6_dev *idev;
-- struct inet6_ifaddr *ifa;
--
-- /* Rule 0: Candidate Source Address (section 4)
-- * - multicast and link-local destination address,
-- * the set of candidate source address MUST only
-- * include addresses assigned to interfaces
-- * belonging to the same link as the outgoing
-- * interface.
-- * (- For site-local destination addresses, the
-- * set of candidate source addresses MUST only
-- * include addresses assigned to interfaces
-- * belonging to the same site as the outgoing
-- * interface.)
-- */
-- if ((daddr_type & IPV6_ADDR_MULTICAST ||
-- daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
-- daddr_dev && dev != daddr_dev)
-- continue;
--
-- idev = __in6_dev_get(dev);
-- if (!idev)
-- continue;
--
-- read_lock_bh(&idev->lock);
-- for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
-- struct ipv6_saddr_score score;
--
-- score.addr_type = __ipv6_addr_type(&ifa->addr);
--
-- /* Rule 0:
-- * - Tentative Address (RFC2462 section 5.4)
-- * - A tentative address is not considered
-- * "assigned to an interface" in the traditional
-- * sense, unless it is also flagged as optimistic.
-- * - Candidate Source Address (section 4)
-- * - In any case, anycast addresses, multicast
-- * addresses, and the unspecified address MUST
-- * NOT be included in a candidate set.
-- */
-- if ((ifa->flags & IFA_F_TENTATIVE) &&
-- (!(ifa->flags & IFA_F_OPTIMISTIC)))
-- continue;
-- if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
-- score.addr_type & IPV6_ADDR_MULTICAST)) {
-- LIMIT_NETDEBUG(KERN_DEBUG
-- "ADDRCONF: unspecified / multicast address"
-- "assigned as unicast address on %s",
-- dev->name);
-- continue;
-- }
--
-- score.attrs = 0;
-- score.matchlen = 0;
-- score.scope = 0;
-- score.rule = 0;
--
-- if (ifa_result == NULL) {
-- /* record it if the first available entry */
-- goto record_it;
-- }
--
-- /* Rule 1: Prefer same address */
-- if (hiscore.rule < 1) {
-- if (ipv6_addr_equal(&ifa_result->addr, daddr))
-- hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL;
-- hiscore.rule++;
-- }
-- if (ipv6_addr_equal(&ifa->addr, daddr)) {
-- score.attrs |= IPV6_SADDR_SCORE_LOCAL;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) {
-- score.rule = 1;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)
-- continue;
-- }
--
-- /* Rule 2: Prefer appropriate scope */
-- if (hiscore.rule < 2) {
-- hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type);
-- hiscore.rule++;
-- }
-- score.scope = __ipv6_addr_src_scope(score.addr_type);
-- if (hiscore.scope < score.scope) {
-- if (hiscore.scope < daddr_scope) {
-- score.rule = 2;
-- goto record_it;
-- } else
-- continue;
-- } else if (score.scope < hiscore.scope) {
-- if (score.scope < daddr_scope)
-- break; /* addresses sorted by scope */
-- else {
-- score.rule = 2;
-- goto record_it;
-- }
-- }
--
-- /* Rule 3: Avoid deprecated and optimistic addresses */
-- if (hiscore.rule < 3) {
-- if (ipv6_saddr_preferred(hiscore.addr_type) ||
-- (((ifa_result->flags &
-- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0)))
-- hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
-- hiscore.rule++;
-- }
-- if (ipv6_saddr_preferred(score.addr_type) ||
-- (((ifa->flags &
-- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
-- score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
-- score.rule = 3;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)
-- continue;
-- }
--
-- /* Rule 4: Prefer home address */
--#ifdef CONFIG_IPV6_MIP6
-- if (hiscore.rule < 4) {
-- if (ifa_result->flags & IFA_F_HOMEADDRESS)
-- hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
-- hiscore.rule++;
-- }
-- if (ifa->flags & IFA_F_HOMEADDRESS) {
-- score.attrs |= IPV6_SADDR_SCORE_HOA;
-- if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) {
-- score.rule = 4;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_HOA)
-- continue;
-- }
--#else
-- if (hiscore.rule < 4)
-- hiscore.rule++;
--#endif
--
-- /* Rule 5: Prefer outgoing interface */
-- if (hiscore.rule < 5) {
-- if (daddr_dev == NULL ||
-- daddr_dev == ifa_result->idev->dev)
-- hiscore.attrs |= IPV6_SADDR_SCORE_OIF;
-- hiscore.rule++;
-- }
-- if (daddr_dev == NULL ||
-- daddr_dev == ifa->idev->dev) {
-- score.attrs |= IPV6_SADDR_SCORE_OIF;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) {
-- score.rule = 5;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_OIF)
-- continue;
-- }
--
-- /* Rule 6: Prefer matching label */
-- if (hiscore.rule < 6) {
-- if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label)
-- hiscore.attrs |= IPV6_SADDR_SCORE_LABEL;
-- hiscore.rule++;
-- }
-- if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) {
-- score.attrs |= IPV6_SADDR_SCORE_LABEL;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) {
-- score.rule = 6;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL)
-- continue;
-- }
--
--#ifdef CONFIG_IPV6_PRIVACY
-- /* Rule 7: Prefer public address
-- * Note: prefer temprary address if use_tempaddr >= 2
-- */
-- if (hiscore.rule < 7) {
-- if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^
-- (ifa_result->idev->cnf.use_tempaddr >= 2))
-- hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY;
-- hiscore.rule++;
-- }
-- if ((!(ifa->flags & IFA_F_TEMPORARY)) ^
-- (ifa->idev->cnf.use_tempaddr >= 2)) {
-- score.attrs |= IPV6_SADDR_SCORE_PRIVACY;
-- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) {
-- score.rule = 7;
-- goto record_it;
-- }
-- } else {
-- if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)
-- continue;
-- }
--#else
-- if (hiscore.rule < 7)
-- hiscore.rule++;
--#endif
-- /* Rule 8: Use longest matching prefix */
-- if (hiscore.rule < 8) {
-- hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr);
-- hiscore.rule++;
-- }
-- score.matchlen = ipv6_addr_diff(&ifa->addr, daddr);
-- if (score.matchlen > hiscore.matchlen) {
-- score.rule = 8;
-- goto record_it;
-- }
--#if 0
-- else if (score.matchlen < hiscore.matchlen)
-- continue;
--#endif
--
-- /* Final Rule: choose first available one */
-- continue;
--record_it:
-- if (ifa_result)
-- in6_ifa_put(ifa_result);
-- in6_ifa_hold(ifa);
-- ifa_result = ifa;
-- hiscore = score;
-- }
-- read_unlock_bh(&idev->lock);
-- }
-- rcu_read_unlock();
-- read_unlock(&dev_base_lock);
--
-- if (!ifa_result)
-- return -EADDRNOTAVAIL;
--
-- ipv6_addr_copy(saddr, &ifa_result->addr);
-- in6_ifa_put(ifa_result);
-- return 0;
--}
--
--
--int ipv6_get_saddr(struct dst_entry *dst,
-- struct in6_addr *daddr, struct in6_addr *saddr)
--{
-- return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
--}
--
--EXPORT_SYMBOL(ipv6_get_saddr);
--
--int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
-- unsigned char banned_flags)
--{
-- struct inet6_dev *idev;
-- int err = -EADDRNOTAVAIL;
--
-- rcu_read_lock();
-- if ((idev = __in6_dev_get(dev)) != NULL) {
-- struct inet6_ifaddr *ifp;
--
-- read_lock_bh(&idev->lock);
-- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-- if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
-- ipv6_addr_copy(addr, &ifp->addr);
-- err = 0;
-- break;
-- }
-- }
-- read_unlock_bh(&idev->lock);
-- }
-- rcu_read_unlock();
-- return err;
--}
--
--static int ipv6_count_addresses(struct inet6_dev *idev)
--{
-- int cnt = 0;
-- struct inet6_ifaddr *ifp;
--
-- read_lock_bh(&idev->lock);
-- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
-- cnt++;
-- read_unlock_bh(&idev->lock);
-- return cnt;
--}
--
--int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
--{
-- struct inet6_ifaddr * ifp;
-- u8 hash = ipv6_addr_hash(addr);
--
-- read_lock_bh(&addrconf_hash_lock);
-- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-- if (ipv6_addr_equal(&ifp->addr, addr) &&
-- !(ifp->flags&IFA_F_TENTATIVE)) {
-- if (dev == NULL || ifp->idev->dev == dev ||
-- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
-- break;
-- }
-- }
-- read_unlock_bh(&addrconf_hash_lock);
-- return ifp != NULL;
--}
--
--EXPORT_SYMBOL(ipv6_chk_addr);
--
--static
--int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
--{
-- struct inet6_ifaddr * ifp;
-- u8 hash = ipv6_addr_hash(addr);
--
-- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-- if (ipv6_addr_equal(&ifp->addr, addr)) {
-- if (dev == NULL || ifp->idev->dev == dev)
-- break;
-- }
-- }
-- return ifp != NULL;
--}
--
--struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict)
--{
-- struct inet6_ifaddr * ifp;
-- u8 hash = ipv6_addr_hash(addr);
--
-- read_lock_bh(&addrconf_hash_lock);
-- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
-- if (ipv6_addr_equal(&ifp->addr, addr)) {
-- if (dev == NULL || ifp->idev->dev == dev ||
-- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
-- in6_ifa_hold(ifp);
-- break;
-- }
-- }
-- }
-- read_unlock_bh(&addrconf_hash_lock);
--
-- return ifp;
--}
--
--int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
--{
-- const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
-- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
-- __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
-- __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
-- int sk_ipv6only = ipv6_only_sock(sk);
-- int sk2_ipv6only = inet_v6_ipv6only(sk2);
-- int addr_type = ipv6_addr_type(sk_rcv_saddr6);
-- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
--
-- if (!sk2_rcv_saddr && !sk_ipv6only)
-- return 1;
--
-- if (addr_type2 == IPV6_ADDR_ANY &&
-- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
-- return 1;
--
-- if (addr_type == IPV6_ADDR_ANY &&
-- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
-- return 1;
--
-- if (sk2_rcv_saddr6 &&
-- ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
-- return 1;
--
-- if (addr_type == IPV6_ADDR_MAPPED &&
-- !sk2_ipv6only &&
-- (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr))
-- return 1;
--
-- return 0;
--}
--
--/* Gets referenced address, destroys ifaddr */
--
--static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
--{
-- if (ifp->flags&IFA_F_PERMANENT) {
-- spin_lock_bh(&ifp->lock);
-- addrconf_del_timer(ifp);
-- ifp->flags |= IFA_F_TENTATIVE;
-- spin_unlock_bh(&ifp->lock);
-- in6_ifa_put(ifp);
--#ifdef CONFIG_IPV6_PRIVACY
-- } else if (ifp->flags&IFA_F_TEMPORARY) {
-- struct inet6_ifaddr *ifpub;
-- spin_lock_bh(&ifp->lock);
-- ifpub = ifp->ifpub;
-- if (ifpub) {
-- in6_ifa_hold(ifpub);
-- spin_unlock_bh(&ifp->lock);
-- ipv6_create_tempaddr(ifpub, ifp);
-- in6_ifa_put(ifpub);
-- } else {
-- spin_unlock_bh(&ifp->lock);
-- }
-- ipv6_del_addr(ifp);
--#endif
-- } else
-- ipv6_del_addr(ifp);
--}
--
--void addrconf_dad_failure(struct inet6_ifaddr *ifp)
--{
-- if (net_ratelimit())
-- printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
-- addrconf_dad_stop(ifp);
--}
--
--/* Join to solicited addr multicast group. */
--
--void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
--{
-- struct in6_addr maddr;
--
-- if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
-- return;
--
-- addrconf_addr_solict_mult(addr, &maddr);
-- ipv6_dev_mc_inc(dev, &maddr);
--}
--
--void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
--{
-- struct in6_addr maddr;
--
-- if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
-- return;
--
-- addrconf_addr_solict_mult(addr, &maddr);
-- __ipv6_dev_mc_dec(idev, &maddr);
--}
--
--static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
--{
-- struct in6_addr addr;
-- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
-- if (ipv6_addr_any(&addr))
-- return;
-- ipv6_dev_ac_inc(ifp->idev->dev, &addr);
--}
--
--static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
--{
-- struct in6_addr addr;
-- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
-- if (ipv6_addr_any(&addr))
-- return;
-- __ipv6_dev_ac_dec(ifp->idev, &addr);
--}
--
--static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
--{
-- if (dev->addr_len != ETH_ALEN)
-- return -1;
-- memcpy(eui, dev->dev_addr, 3);
-- memcpy(eui + 5, dev->dev_addr + 3, 3);
--
-- /*
-- * The zSeries OSA network cards can be shared among various
-- * OS instances, but the OSA cards have only one MAC address.
-- * This leads to duplicate address conflicts in conjunction
-- * with IPv6 if more than one instance uses the same card.
-- *
-- * The driver for these cards can deliver a unique 16-bit
-- * identifier for each instance sharing the same card. It is
-- * placed instead of 0xFFFE in the interface identifier. The
-- * "u" bit of the interface identifier is not inverted in this
-- * case. Hence the resulting interface identifier has local
-- * scope according to RFC2373.
-- */
-- if (dev->dev_id) {
-- eui[3] = (dev->dev_id >> 8) & 0xFF;
-- eui[4] = dev->dev_id & 0xFF;
-- } else {
-- eui[3] = 0xFF;
-- eui[4] = 0xFE;
-- eui[0] ^= 2;
-- }
-- return 0;
--}
--
--static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
--{
-- /* XXX: inherit EUI-64 from other interface -- yoshfuji */
-- if (dev->addr_len != ARCNET_ALEN)
-- return -1;
-- memset(eui, 0, 7);
-- eui[7] = *(u8*)dev->dev_addr;
-- return 0;
--}
--
--static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
--{
-- if (dev->addr_len != INFINIBAND_ALEN)
-- return -1;
-- memcpy(eui, dev->dev_addr + 12, 8);
-- eui[0] |= 2;
-- return 0;
--}
--
--static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
--{
-- switch (dev->type) {
-- case ARPHRD_ETHER:
-- case ARPHRD_FDDI:
-- case ARPHRD_IEEE802_TR:
-- return addrconf_ifid_eui48(eui, dev);
-- case ARPHRD_ARCNET:
-- return addrconf_ifid_arcnet(eui, dev);
-- case ARPHRD_INFINIBAND:
-- return addrconf_ifid_infiniband(eui, dev);
-- }
-- return -1;
--}
--
--static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
--{
-- int err = -1;
-- struct inet6_ifaddr *ifp;
--
-- read_lock_bh(&idev->lock);
-- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-- if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
-- memcpy(eui, ifp->addr.s6_addr+8, 8);
-- err = 0;
-- break;
-- }
-- }
-- read_unlock_bh(&idev->lock);
-- return err;
--}
--
--#ifdef CONFIG_IPV6_PRIVACY
--/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
--static int __ipv6_regen_rndid(struct inet6_dev *idev)
--{
--regen:
-- get_random_bytes(idev->rndid, sizeof(idev->rndid));
-- idev->rndid[0] &= ~0x02;
--
-- /*
-- * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
-- * check if generated address is not inappropriate
-- *
-- * - Reserved subnet anycast (RFC 2526)
-- * 11111101 11....11 1xxxxxxx
-- * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1
-- * 00-00-5E-FE-xx-xx-xx-xx
-- * - value 0
-- * - XXX: already assigned to an address on the device
-- */
-- if (idev->rndid[0] == 0xfd &&
-- (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff &&
-- (idev->rndid[7]&0x80))
-- goto regen;
-- if ((idev->rndid[0]|idev->rndid[1]) == 0) {
-- if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
-- goto regen;
-- if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
-- goto regen;
-- }
--
-- return 0;
--}
--
--static void ipv6_regen_rndid(unsigned long data)
--{
-- struct inet6_dev *idev = (struct inet6_dev *) data;
-- unsigned long expires;
--
-- rcu_read_lock_bh();
-- write_lock_bh(&idev->lock);
--
-- if (idev->dead)
-- goto out;
--
-- if (__ipv6_regen_rndid(idev) < 0)
-- goto out;
--
-- expires = jiffies +
-- idev->cnf.temp_prefered_lft * HZ -
-- idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
-- if (time_before(expires, jiffies)) {
-- printk(KERN_WARNING
-- "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
-- idev->dev->name);
-- goto out;
-- }
--
-- if (!mod_timer(&idev->regen_timer, expires))
-- in6_dev_hold(idev);
--
--out:
-- write_unlock_bh(&idev->lock);
-- rcu_read_unlock_bh();
-- in6_dev_put(idev);
--}
--
--static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
-- int ret = 0;
--
-- if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
-- ret = __ipv6_regen_rndid(idev);
-- return ret;
--}
--#endif
--
--/*
-- * Add prefix route.
-- */
--
--static void
--addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
-- unsigned long expires, u32 flags)
--{
-- struct fib6_config cfg = {
-- .fc_table = RT6_TABLE_PREFIX,
-- .fc_metric = IP6_RT_PRIO_ADDRCONF,
-- .fc_ifindex = dev->ifindex,
-- .fc_expires = expires,
-- .fc_dst_len = plen,
-- .fc_flags = RTF_UP | flags,
-- };
--
-- ipv6_addr_copy(&cfg.fc_dst, pfx);
--
-- /* Prevent useless cloning on PtP SIT.
-- This thing is done here expecting that the whole
-- class of non-broadcast devices need not cloning.
-- */
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
-- if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT))
-- cfg.fc_flags |= RTF_NONEXTHOP;
--#endif
--
-- ip6_route_add(&cfg);
--}
--
--/* Create "default" multicast route to the interface */
--
--static void addrconf_add_mroute(struct net_device *dev)
--{
-- struct fib6_config cfg = {
-- .fc_table = RT6_TABLE_LOCAL,
-- .fc_metric = IP6_RT_PRIO_ADDRCONF,
-- .fc_ifindex = dev->ifindex,
-- .fc_dst_len = 8,
-- .fc_flags = RTF_UP,
-- };
--
-- ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
--
-- ip6_route_add(&cfg);
--}
--
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
--static void sit_route_add(struct net_device *dev)
--{
-- struct fib6_config cfg = {
-- .fc_table = RT6_TABLE_MAIN,
-- .fc_metric = IP6_RT_PRIO_ADDRCONF,
-- .fc_ifindex = dev->ifindex,
-- .fc_dst_len = 96,
-- .fc_flags = RTF_UP | RTF_NONEXTHOP,
-- };
--
-- /* prefix length - 96 bits "::d.d.d.d" */
-- ip6_route_add(&cfg);
--}
--#endif
--
--static void addrconf_add_lroute(struct net_device *dev)
--{
-- struct in6_addr addr;
--
-- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
-- addrconf_prefix_route(&addr, 64, dev, 0, 0);
--}
--
--static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
--{
-- struct inet6_dev *idev;
--
-- ASSERT_RTNL();
--
-- if ((idev = ipv6_find_idev(dev)) == NULL)
-- return NULL;
--
-- /* Add default multicast route */
-- addrconf_add_mroute(dev);
--
-- /* Add link local route */
-- addrconf_add_lroute(dev);
-- return idev;
--}
--
--void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
--{
-- struct prefix_info *pinfo;
-- __u32 valid_lft;
-- __u32 prefered_lft;
-- int addr_type;
-- unsigned long rt_expires;
-- struct inet6_dev *in6_dev;
--
-- pinfo = (struct prefix_info *) opt;
--
-- if (len < sizeof(struct prefix_info)) {
-- ADBG(("addrconf: prefix option too short\n"));
-- return;
-- }
--
-- /*
-- * Validation checks ([ADDRCONF], page 19)
-- */
--
-- addr_type = ipv6_addr_type(&pinfo->prefix);
--
-- if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL))
-- return;
--
-- valid_lft = ntohl(pinfo->valid);
-- prefered_lft = ntohl(pinfo->prefered);
--
-- if (prefered_lft > valid_lft) {
-- if (net_ratelimit())
-- printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
-- return;
-- }
--
-- in6_dev = in6_dev_get(dev);
--
-- if (in6_dev == NULL) {
-- if (net_ratelimit())
-- printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
-- return;
-- }
--
-- /*
-- * Two things going on here:
-- * 1) Add routes for on-link prefixes
-- * 2) Configure prefixes with the auto flag set
-- */
--
-- /* Avoid arithmetic overflow. Really, we could
-- save rt_expires in seconds, likely valid_lft,
-- but it would require division in fib gc, that it
-- not good.
-- */
-- if (valid_lft >= 0x7FFFFFFF/HZ)
-- rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
-- else
-- rt_expires = valid_lft * HZ;
--
-- /*
-- * We convert this (in jiffies) to clock_t later.
-- * Avoid arithmetic overflow there as well.
-- * Overflow can happen only if HZ < USER_HZ.
-- */
-- if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
-- rt_expires = 0x7FFFFFFF / USER_HZ;
--
-- if (pinfo->onlink) {
-- struct rt6_info *rt;
-- rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
--
-- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
-- if (rt->rt6i_flags&RTF_EXPIRES) {
-- if (valid_lft == 0) {
-- ip6_del_rt(rt);
-- rt = NULL;
-- } else {
-- rt->rt6i_expires = jiffies + rt_expires;
-- }
-- }
-- } else if (valid_lft) {
-- addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
-- dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
-- }
-- if (rt)
-- dst_release(&rt->u.dst);
-- }
--
-- /* Try to figure out our local address for this prefix */
--
-- if (pinfo->autoconf && in6_dev->cnf.autoconf) {
-- struct inet6_ifaddr * ifp;
-- struct in6_addr addr;
-- int create = 0, update_lft = 0;
--
-- if (pinfo->prefix_len == 64) {
-- memcpy(&addr, &pinfo->prefix, 8);
-- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
-- ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
-- in6_dev_put(in6_dev);
-- return;
-- }
-- goto ok;
-- }
-- if (net_ratelimit())
-- printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
-- pinfo->prefix_len);
-- in6_dev_put(in6_dev);
-- return;
--
--ok:
--
-- ifp = ipv6_get_ifaddr(&addr, dev, 1);
--
-- if (ifp == NULL && valid_lft) {
-- int max_addresses = in6_dev->cnf.max_addresses;
-- u32 addr_flags = 0;
--
--#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-- if (in6_dev->cnf.optimistic_dad &&
-- !ipv6_devconf.forwarding)
-- addr_flags = IFA_F_OPTIMISTIC;
--#endif
--
-- /* Do not allow to create too much of autoconfigured
-- * addresses; this would be too easy way to crash kernel.
-- */
-- if (!max_addresses ||
-- ipv6_count_addresses(in6_dev) < max_addresses)
-- ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
-- addr_type&IPV6_ADDR_SCOPE_MASK,
-- addr_flags);
--
-- if (!ifp || IS_ERR(ifp)) {
-- in6_dev_put(in6_dev);
-- return;
-- }
--
-- update_lft = create = 1;
-- ifp->cstamp = jiffies;
-- addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
-- }
--
-- if (ifp) {
-- int flags;
-- unsigned long now;
--#ifdef CONFIG_IPV6_PRIVACY
-- struct inet6_ifaddr *ift;
--#endif
-- u32 stored_lft;
--
-- /* update lifetime (RFC2462 5.5.3 e) */
-- spin_lock(&ifp->lock);
-- now = jiffies;
-- if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
-- stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
-- else
-- stored_lft = 0;
-- if (!update_lft && stored_lft) {
-- if (valid_lft > MIN_VALID_LIFETIME ||
-- valid_lft > stored_lft)
-- update_lft = 1;
-- else if (stored_lft <= MIN_VALID_LIFETIME) {
-- /* valid_lft <= stored_lft is always true */
-- /* XXX: IPsec */
-- update_lft = 0;
-- } else {
-- valid_lft = MIN_VALID_LIFETIME;
-- if (valid_lft < prefered_lft)
-- prefered_lft = valid_lft;
-- update_lft = 1;
-- }
-- }
--
-- if (update_lft) {
-- ifp->valid_lft = valid_lft;
-- ifp->prefered_lft = prefered_lft;
-- ifp->tstamp = now;
-- flags = ifp->flags;
-- ifp->flags &= ~IFA_F_DEPRECATED;
-- spin_unlock(&ifp->lock);
--
-- if (!(flags&IFA_F_TENTATIVE))
-- ipv6_ifa_notify(0, ifp);
-- } else
-- spin_unlock(&ifp->lock);
--
--#ifdef CONFIG_IPV6_PRIVACY
-- read_lock_bh(&in6_dev->lock);
-- /* update all temporary addresses in the list */
-- for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
-- /*
-- * When adjusting the lifetimes of an existing
-- * temporary address, only lower the lifetimes.
-- * Implementations must not increase the
-- * lifetimes of an existing temporary address
-- * when processing a Prefix Information Option.
-- */
-- spin_lock(&ift->lock);
-- flags = ift->flags;
-- if (ift->valid_lft > valid_lft &&
-- ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
-- ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
-- if (ift->prefered_lft > prefered_lft &&
-- ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
-- ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
-- spin_unlock(&ift->lock);
-- if (!(flags&IFA_F_TENTATIVE))
-- ipv6_ifa_notify(0, ift);
-- }
--
-- if (create && in6_dev->cnf.use_tempaddr > 0) {
-- /*
-- * When a new public address is created as described in [ADDRCONF],
-- * also create a new temporary address.
-- */
-- read_unlock_bh(&in6_dev->lock);
-- ipv6_create_tempaddr(ifp, NULL);
-- } else {
-- read_unlock_bh(&in6_dev->lock);
-- }
--#endif
-- in6_ifa_put(ifp);
-- addrconf_verify(0);
-- }
-- }
-- inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
-- in6_dev_put(in6_dev);
--}
--
--/*
-- * Set destination address.
-- * Special case for SIT interfaces where we create a new "virtual"
-- * device.
-- */
--int addrconf_set_dstaddr(void __user *arg)
--{
-- struct in6_ifreq ireq;
-- struct net_device *dev;
-- int err = -EINVAL;
--
-- rtnl_lock();
--
-- err = -EFAULT;
-- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-- goto err_exit;
--
-- dev = __dev_get_by_index(ireq.ifr6_ifindex);
--
-- err = -ENODEV;
-- if (dev == NULL)
-- goto err_exit;
--
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
-- if (dev->type == ARPHRD_SIT) {
-- struct ifreq ifr;
-- mm_segment_t oldfs;
-- struct ip_tunnel_parm p;
--
-- err = -EADDRNOTAVAIL;
-- if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
-- goto err_exit;
--
-- memset(&p, 0, sizeof(p));
-- p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
-- p.iph.saddr = 0;
-- p.iph.version = 4;
-- p.iph.ihl = 5;
-- p.iph.protocol = IPPROTO_IPV6;
-- p.iph.ttl = 64;
-- ifr.ifr_ifru.ifru_data = (void __user *)&p;
--
-- oldfs = get_fs(); set_fs(KERNEL_DS);
-- err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
-- set_fs(oldfs);
--
-- if (err == 0) {
-- err = -ENOBUFS;
-- if ((dev = __dev_get_by_name(p.name)) == NULL)
-- goto err_exit;
-- err = dev_open(dev);
-- }
-- }
--#endif
--
--err_exit:
-- rtnl_unlock();
-- return err;
--}
--
--/*
-- * Manual configuration of address on an interface
-- */
--static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
-- __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft)
--{
-- struct inet6_ifaddr *ifp;
-- struct inet6_dev *idev;
-- struct net_device *dev;
-- int scope;
-- u32 flags = RTF_EXPIRES;
--
-- ASSERT_RTNL();
--
-- /* check the lifetime */
-- if (!valid_lft || prefered_lft > valid_lft)
-- return -EINVAL;
--
-- if ((dev = __dev_get_by_index(ifindex)) == NULL)
-- return -ENODEV;
--
-- if ((idev = addrconf_add_dev(dev)) == NULL)
-- return -ENOBUFS;
--
-- scope = ipv6_addr_scope(pfx);
--
-- if (valid_lft == INFINITY_LIFE_TIME) {
-- ifa_flags |= IFA_F_PERMANENT;
-- flags = 0;
-- } else if (valid_lft >= 0x7FFFFFFF/HZ)
-- valid_lft = 0x7FFFFFFF/HZ;
--
-- if (prefered_lft == 0)
-- ifa_flags |= IFA_F_DEPRECATED;
-- else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-- (prefered_lft != INFINITY_LIFE_TIME))
-- prefered_lft = 0x7FFFFFFF/HZ;
--
-- ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
--
-- if (!IS_ERR(ifp)) {
-- spin_lock_bh(&ifp->lock);
-- ifp->valid_lft = valid_lft;
-- ifp->prefered_lft = prefered_lft;
-- ifp->tstamp = jiffies;
-- spin_unlock_bh(&ifp->lock);
--
-- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
-- jiffies_to_clock_t(valid_lft * HZ), flags);
-- /*
-- * Note that section 3.1 of RFC 4429 indicates
-- * that the Optimistic flag should not be set for
-- * manually configured addresses
-- */
-- addrconf_dad_start(ifp, 0);
-- in6_ifa_put(ifp);
-- addrconf_verify(0);
-- return 0;
-- }
--
-- return PTR_ERR(ifp);
--}
--
--static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
--{
-- struct inet6_ifaddr *ifp;
-- struct inet6_dev *idev;
-- struct net_device *dev;
--
-- if ((dev = __dev_get_by_index(ifindex)) == NULL)
-- return -ENODEV;
--
-- if ((idev = __in6_dev_get(dev)) == NULL)
-- return -ENXIO;
--
-- read_lock_bh(&idev->lock);
-- for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
-- if (ifp->prefix_len == plen &&
-- ipv6_addr_equal(pfx, &ifp->addr)) {
-- in6_ifa_hold(ifp);
-- read_unlock_bh(&idev->lock);
--
-- ipv6_del_addr(ifp);
--
-- /* If the last address is deleted administratively,
-- disable IPv6 on this interface.
-- */
-- if (idev->addr_list == NULL)
-- addrconf_ifdown(idev->dev, 1);
-- return 0;
-- }
-- }
-- read_unlock_bh(&idev->lock);
-- return -EADDRNOTAVAIL;
--}
--
--
--int addrconf_add_ifaddr(void __user *arg)
--{
-- struct in6_ifreq ireq;
-- int err;
--
-- if (!capable(CAP_NET_ADMIN))
-- return -EPERM;
--
-- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-- return -EFAULT;
--
-- rtnl_lock();
-- err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen,
-- IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
-- rtnl_unlock();
-- return err;
--}
--
--int addrconf_del_ifaddr(void __user *arg)
--{
-- struct in6_ifreq ireq;
-- int err;
--
-- if (!capable(CAP_NET_ADMIN))
-- return -EPERM;
--
-- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-- return -EFAULT;
--
-- rtnl_lock();
-- err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
-- rtnl_unlock();
-- return err;
--}
--
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
--static void sit_add_v4_addrs(struct inet6_dev *idev)
--{
-- struct inet6_ifaddr * ifp;
-- struct in6_addr addr;
-- struct net_device *dev;
-- int scope;
--
-- ASSERT_RTNL();
--
-- memset(&addr, 0, sizeof(struct in6_addr));
-- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
--
-- if (idev->dev->flags&IFF_POINTOPOINT) {
-- addr.s6_addr32[0] = htonl(0xfe800000);
-- scope = IFA_LINK;
-- } else {
-- scope = IPV6_ADDR_COMPATv4;
-- }
--
-- if (addr.s6_addr32[3]) {
-- ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT);
-- if (!IS_ERR(ifp)) {
-- spin_lock_bh(&ifp->lock);
-- ifp->flags &= ~IFA_F_TENTATIVE;
-- spin_unlock_bh(&ifp->lock);
-- ipv6_ifa_notify(RTM_NEWADDR, ifp);
-- in6_ifa_put(ifp);
-- }
-- return;
-- }
--
-- for_each_netdev(dev) {
-- struct in_device * in_dev = __in_dev_get_rtnl(dev);
-- if (in_dev && (dev->flags & IFF_UP)) {
-- struct in_ifaddr * ifa;
--
-- int flag = scope;
--
-- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
-- int plen;
--
-- addr.s6_addr32[3] = ifa->ifa_local;
--
-- if (ifa->ifa_scope == RT_SCOPE_LINK)
-- continue;
-- if (ifa->ifa_scope >= RT_SCOPE_HOST) {
-- if (idev->dev->flags&IFF_POINTOPOINT)
-- continue;
-- flag |= IFA_HOST;
-- }
-- if (idev->dev->flags&IFF_POINTOPOINT)
-- plen = 64;
-- else
-- plen = 96;
--
-- ifp = ipv6_add_addr(idev, &addr, plen, flag,
-- IFA_F_PERMANENT);
-- if (!IS_ERR(ifp)) {
-- spin_lock_bh(&ifp->lock);
-- ifp->flags &= ~IFA_F_TENTATIVE;
-- spin_unlock_bh(&ifp->lock);
-- ipv6_ifa_notify(RTM_NEWADDR, ifp);
-- in6_ifa_put(ifp);
-- }
-- }
-- }
-- }
--}
--#endif
--
--static void init_loopback(struct net_device *dev)
--{
-- struct inet6_dev *idev;
-- struct inet6_ifaddr * ifp;
--
-- /* ::1 */
--
-- ASSERT_RTNL();
--
-- if ((idev = ipv6_find_idev(dev)) == NULL) {
-- printk(KERN_DEBUG "init loopback: add_dev failed\n");
-- return;
-- }
--
-- ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT);
-- if (!IS_ERR(ifp)) {
-- spin_lock_bh(&ifp->lock);
-- ifp->flags &= ~IFA_F_TENTATIVE;
-- spin_unlock_bh(&ifp->lock);
-- ipv6_ifa_notify(RTM_NEWADDR, ifp);
-- in6_ifa_put(ifp);
-- }
--}
--
--static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
--{
-- struct inet6_ifaddr * ifp;
-- u32 addr_flags = IFA_F_PERMANENT;
--
--#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-- if (idev->cnf.optimistic_dad &&
-- !ipv6_devconf.forwarding)
-- addr_flags |= IFA_F_OPTIMISTIC;
--#endif
--
--
-- ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
-- if (!IS_ERR(ifp)) {
-- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
-- addrconf_dad_start(ifp, 0);
-- in6_ifa_put(ifp);
-- }
--}
--
--static void addrconf_dev_config(struct net_device *dev)
--{
-- struct in6_addr addr;
-- struct inet6_dev * idev;
--
-- ASSERT_RTNL();
--
-- if ((dev->type != ARPHRD_ETHER) &&
-- (dev->type != ARPHRD_FDDI) &&
-- (dev->type != ARPHRD_IEEE802_TR) &&
-- (dev->type != ARPHRD_ARCNET) &&
-- (dev->type != ARPHRD_INFINIBAND)) {
-- /* Alas, we support only Ethernet autoconfiguration. */
-- return;
-- }
--
-- idev = addrconf_add_dev(dev);
-- if (idev == NULL)
-- return;
--
-- memset(&addr, 0, sizeof(struct in6_addr));
-- addr.s6_addr32[0] = htonl(0xFE800000);
--
-- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
-- addrconf_add_linklocal(idev, &addr);
--}
--
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
--static void addrconf_sit_config(struct net_device *dev)
--{
-- struct inet6_dev *idev;
--
-- ASSERT_RTNL();
--
-- /*
-- * Configure the tunnel with one of our IPv4
-- * addresses... we should configure all of
-- * our v4 addrs in the tunnel
-- */
--
-- if ((idev = ipv6_find_idev(dev)) == NULL) {
-- printk(KERN_DEBUG "init sit: add_dev failed\n");
-- return;
-- }
--
-- sit_add_v4_addrs(idev);
--
-- if (dev->flags&IFF_POINTOPOINT) {
-- addrconf_add_mroute(dev);
-- addrconf_add_lroute(dev);
-- } else
-- sit_route_add(dev);
--}
--#endif
--
--static inline int
--ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
--{
-- struct in6_addr lladdr;
--
-- if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
-- addrconf_add_linklocal(idev, &lladdr);
-- return 0;
-- }
-- return -1;
--}
--
--static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
--{
-- struct net_device *link_dev;
--
-- /* first try to inherit the link-local address from the link device */
-- if (idev->dev->iflink &&
-- (link_dev = __dev_get_by_index(idev->dev->iflink))) {
-- if (!ipv6_inherit_linklocal(idev, link_dev))
-- return;
-- }
-- /* then try to inherit it from any device */
-- for_each_netdev(link_dev) {
-- if (!ipv6_inherit_linklocal(idev, link_dev))
-- return;
-- }
-- printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
--}
--
--/*
-- * Autoconfigure tunnel with a link-local address so routing protocols,
-- * DHCPv6, MLD etc. can be run over the virtual link
-- */
--
--static void addrconf_ip6_tnl_config(struct net_device *dev)
--{
-- struct inet6_dev *idev;
--
-- ASSERT_RTNL();
--
-- if ((idev = addrconf_add_dev(dev)) == NULL) {
-- printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
-- return;
-- }
-- ip6_tnl_add_linklocal(idev);
--}
--
--static int addrconf_notify(struct notifier_block *this, unsigned long event,
-- void * data)
--{
-- struct net_device *dev = (struct net_device *) data;
-- struct inet6_dev *idev = __in6_dev_get(dev);
-- int run_pending = 0;
--
-- switch(event) {
-- case NETDEV_REGISTER:
-- if (!idev && dev->mtu >= IPV6_MIN_MTU) {
-- idev = ipv6_add_dev(dev);
-- if (!idev)
-- printk(KERN_WARNING "IPv6: add_dev failed for %s\n",
-- dev->name);
-- }
-- break;
-- case NETDEV_UP:
-- case NETDEV_CHANGE:
-- if (event == NETDEV_UP) {
-- if (!addrconf_qdisc_ok(dev)) {
-- /* device is not ready yet. */
-- printk(KERN_INFO
-- "ADDRCONF(NETDEV_UP): %s: "
-- "link is not ready\n",
-- dev->name);
-- break;
-- }
--
-- if (idev)
-- idev->if_flags |= IF_READY;
-- } else {
-- if (!addrconf_qdisc_ok(dev)) {
-- /* device is still not ready. */
-- break;
-- }
--
-- if (idev) {
-- if (idev->if_flags & IF_READY) {
-- /* device is already configured. */
-- break;
-- }
-- idev->if_flags |= IF_READY;
-- }
--
-- printk(KERN_INFO
-- "ADDRCONF(NETDEV_CHANGE): %s: "
-- "link becomes ready\n",
-- dev->name);
--
-- run_pending = 1;
-- }
--
-- switch(dev->type) {
--#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
-- case ARPHRD_SIT:
-- addrconf_sit_config(dev);
-- break;
--#endif
-- case ARPHRD_TUNNEL6:
-- addrconf_ip6_tnl_config(dev);
-- break;
-- case ARPHRD_LOOPBACK:
-- init_loopback(dev);
-- break;
--
-- default:
-- addrconf_dev_config(dev);
-- break;
-- }
-- if (idev) {
-- if (run_pending)
-- addrconf_dad_run(idev);
--
-- /* If the MTU changed during the interface down, when the
-- interface up, the changed MTU must be reflected in the
-- idev as well as routers.
-- */
-- if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
-- rt6_mtu_change(dev, dev->mtu);
-- idev->cnf.mtu6 = dev->mtu;
-- }
-- idev->tstamp = jiffies;
-- inet6_ifinfo_notify(RTM_NEWLINK, idev);
-- /* If the changed mtu during down is lower than IPV6_MIN_MTU
-- stop IPv6 on this interface.
-- */
-- if (dev->mtu < IPV6_MIN_MTU)
-- addrconf_ifdown(dev, event != NETDEV_DOWN);
-- }
-- break;
--
-- case NETDEV_CHANGEMTU:
-- if ( idev && dev->mtu >= IPV6_MIN_MTU) {
-- rt6_mtu_change(dev, dev->mtu);
-- idev->cnf.mtu6 = dev->mtu;
-- break;
-- }
--
-- /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
--
-- case NETDEV_DOWN:
-- case NETDEV_UNREGISTER:
-- /*
-- * Remove all addresses from this interface.
-- */
-- addrconf_ifdown(dev, event != NETDEV_DOWN);
-- break;
--
-- case NETDEV_CHANGENAME:
-- if (idev) {
-- snmp6_unregister_dev(idev);
--#ifdef CONFIG_SYSCTL
-- addrconf_sysctl_unregister(&idev->cnf);
-- neigh_sysctl_unregister(idev->nd_parms);
-- neigh_sysctl_register(dev, idev->nd_parms,
-- NET_IPV6, NET_IPV6_NEIGH, "ipv6",
-- &ndisc_ifinfo_sysctl_change,
-- NULL);
-- addrconf_sysctl_register(idev, &idev->cnf);
--#endif
-- snmp6_register_dev(idev);
-- }
-- break;
-- }
--
-- return NOTIFY_OK;
--}
--
--/*
-- * addrconf module should be notified of a device going up
-- */
--static struct notifier_block ipv6_dev_notf = {
-- .notifier_call = addrconf_notify,
-- .priority = 0
--};
--
--static int addrconf_ifdown(struct net_device *dev, int how)
--{
-- struct inet6_dev *idev;
-- struct inet6_ifaddr *ifa, **bifa;
-- int i;
--
-- ASSERT_RTNL();
--
-- if (dev == &loopback_dev && how == 1)
-- how = 0;
--
-- rt6_ifdown(dev);
-- neigh_ifdown(&nd_tbl, dev);
--
-- idev = __in6_dev_get(dev);
-- if (idev == NULL)
-- return -ENODEV;
--
-- /* Step 1: remove reference to ipv6 device from parent device.
-- Do not dev_put!
-- */
-- if (how == 1) {
-- idev->dead = 1;
--
-- /* protected by rtnl_lock */
-- rcu_assign_pointer(dev->ip6_ptr, NULL);
--
-- /* Step 1.5: remove snmp6 entry */
-- snmp6_unregister_dev(idev);
--
-- }
--
-- /* Step 2: clear hash table */
-- for (i=0; i<IN6_ADDR_HSIZE; i++) {
-- bifa = &inet6_addr_lst[i];
--
-- write_lock_bh(&addrconf_hash_lock);
-- while ((ifa = *bifa) != NULL) {
-- if (ifa->idev == idev) {
-- *bifa = ifa->lst_next;
-- ifa->lst_next = NULL;
-- addrconf_del_timer(ifa);
-- in6_ifa_put(ifa);
-- continue;
-- }
-- bifa = &ifa->lst_next;
-- }
-- write_unlock_bh(&addrconf_hash_lock);
-- }
--
-- write_lock_bh(&idev->lock);
--
-- /* Step 3: clear flags for stateless addrconf */
-- if (how != 1)
-- idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
--
-- /* Step 4: clear address list */
--#ifdef CONFIG_IPV6_PRIVACY
-- if (how == 1 && del_timer(&idev->regen_timer))
-- in6_dev_put(idev);
--
-- /* clear tempaddr list */
-- while ((ifa = idev->tempaddr_list) != NULL) {
-- idev->tempaddr_list = ifa->tmp_next;
-- ifa->tmp_next = NULL;
-- ifa->dead = 1;
-- write_unlock_bh(&idev->lock);
-- spin_lock_bh(&ifa->lock);
--
-- if (ifa->ifpub) {
-- in6_ifa_put(ifa->ifpub);
-- ifa->ifpub = NULL;
-- }
-- spin_unlock_bh(&ifa->lock);
-- in6_ifa_put(ifa);
-- write_lock_bh(&idev->lock);
-- }
--#endif
-- while ((ifa = idev->addr_list) != NULL) {
-- idev->addr_list = ifa->if_next;
-- ifa->if_next = NULL;
-- ifa->dead = 1;
-- addrconf_del_timer(ifa);
-- write_unlock_bh(&idev->lock);
--
-- __ipv6_ifa_notify(RTM_DELADDR, ifa);
-- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
-- in6_ifa_put(ifa);
--
-- write_lock_bh(&idev->lock);
-- }
-- write_unlock_bh(&idev->lock);
--
-- /* Step 5: Discard multicast list */
--
-- if (how == 1)
-- ipv6_mc_destroy_dev(idev);
-- else
-- ipv6_mc_down(idev);
--
-- /* Step 5: netlink notification of this interface */
-- idev->tstamp = jiffies;
-- inet6_ifinfo_notify(RTM_DELLINK, idev);
--
-- /* Shot the device (if unregistered) */
--
-- if (how == 1) {
--#ifdef CONFIG_SYSCTL
-- addrconf_sysctl_unregister(&idev->cnf);
-- neigh_sysctl_unregister(idev->nd_parms);
--#endif
-- neigh_parms_release(&nd_tbl, idev->nd_parms);
-- neigh_ifdown(&nd_tbl, dev);
-- in6_dev_put(idev);
-- }
-- return 0;
--}
--
--static void addrconf_rs_timer(unsigned long data)
--{
-- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
--
-- if (ifp->idev->cnf.forwarding)
-- goto out;
--
-- if (ifp->idev->if_flags & IF_RA_RCVD) {
-- /*
-- * Announcement received after solicitation
-- * was sent
-- */
-- goto out;
-- }
--
-- spin_lock(&ifp->lock);
-- if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
-- struct in6_addr all_routers;
--
-- /* The wait after the last probe can be shorter */
-- addrconf_mod_timer(ifp, AC_RS,
-- (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
-- ifp->idev->cnf.rtr_solicit_delay :
-- ifp->idev->cnf.rtr_solicit_interval);
-- spin_unlock(&ifp->lock);
--
-- ipv6_addr_all_routers(&all_routers);
--
-- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
-- } else {
-- spin_unlock(&ifp->lock);
-- /*
-- * Note: we do not support deprecated "all on-link"
-- * assumption any longer.
-- */
-- printk(KERN_DEBUG "%s: no IPv6 routers present\n",
-- ifp->idev->dev->name);
-- }
--
--out:
-- in6_ifa_put(ifp);
--}
--
--/*
-- * Duplicate Address Detection
-- */
--static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
--{
-- unsigned long rand_num;
-- struct inet6_dev *idev = ifp->idev;
--
-- if (ifp->flags & IFA_F_OPTIMISTIC)
-- rand_num = 0;
-- else
-- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
--
-- ifp->probes = idev->cnf.dad_transmits;
-- addrconf_mod_timer(ifp, AC_DAD, rand_num);
--}
--
--static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
--{
-- struct inet6_dev *idev = ifp->idev;
-- struct net_device *dev = idev->dev;
--
-- addrconf_join_solict(dev, &ifp->addr);
--
-- net_srandom(ifp->addr.s6_addr32[3]);
--
-- read_lock_bh(&idev->lock);
-- if (ifp->dead)
-- goto out;
-- spin_lock_bh(&ifp->lock);
--
-- if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
-- !(ifp->flags&IFA_F_TENTATIVE) ||
-- ifp->flags & IFA_F_NODAD) {
-- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
-- spin_unlock_bh(&ifp->lock);
-- read_unlock_bh(&idev->lock);
--
-- addrconf_dad_completed(ifp);
-- return;
-- }
--
-- if (!(idev->if_flags & IF_READY)) {
-- spin_unlock_bh(&ifp->lock);
-- read_unlock_bh(&idev->lock);
-- /*
-- * If the defice is not ready:
-- * - keep it tentative if it is a permanent address.
-- * - otherwise, kill it.
-- */
-- in6_ifa_hold(ifp);
-- addrconf_dad_stop(ifp);
-- return;
-- }
--
-- /*
-- * Optimistic nodes can start receiving
-- * Frames right away
-- */
-- if(ifp->flags & IFA_F_OPTIMISTIC)
-- ip6_ins_rt(ifp->rt);
--
-- addrconf_dad_kick(ifp);
-- spin_unlock_bh(&ifp->lock);
--out:
-- read_unlock_bh(&idev->lock);
--}
--
--static void addrconf_dad_timer(unsigned long data)
--{
-- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
-- struct inet6_dev *idev = ifp->idev;
-- struct in6_addr unspec;
-- struct in6_addr mcaddr;
--
-- read_lock_bh(&idev->lock);
-- if (idev->dead) {
-- read_unlock_bh(&idev->lock);
-- goto out;
-- }
-- spin_lock_bh(&ifp->lock);
-- if (ifp->probes == 0) {
-- /*
-- * DAD was successful
-- */
--
-- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
-- spin_unlock_bh(&ifp->lock);
-- read_unlock_bh(&idev->lock);
--
-- addrconf_dad_completed(ifp);
--
-- goto out;
-- }
--
-- ifp->probes--;
-- addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
-- spin_unlock_bh(&ifp->lock);
-- read_unlock_bh(&idev->lock);
--
-- /* send a neighbour solicitation for our addr */
-- memset(&unspec, 0, sizeof(unspec));
-- addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
--out:
-- in6_ifa_put(ifp);
--}
--
--static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
--{
-- struct net_device * dev = ifp->idev->dev;
--
-- /*
-- * Configure the address for reception. Now it is valid.
-- */
--
-- ipv6_ifa_notify(RTM_NEWADDR, ifp);
--
-- /* If added prefix is link local and forwarding is off,
-- start sending router solicitations.
-- */
--
-- if (ifp->idev->cnf.forwarding == 0 &&
-- ifp->idev->cnf.rtr_solicits > 0 &&
-- (dev->flags&IFF_LOOPBACK) == 0 &&
-- (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
-- struct in6_addr all_routers;
--
-- ipv6_addr_all_routers(&all_routers);
--
-- /*
-- * If a host as already performed a random delay
-- * [...] as part of DAD [...] there is no need
-- * to delay again before sending the first RS
-- */
-- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
--
-- spin_lock_bh(&ifp->lock);
-- ifp->probes = 1;
-- ifp->idev->if_flags |= IF_RS_SENT;
-- addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
-- spin_unlock_bh(&ifp->lock);
-- }
--}
--
--static void addrconf_dad_run(struct inet6_dev *idev) {
-- struct inet6_ifaddr *ifp;
--
-- read_lock_bh(&idev->lock);
-- for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
-- spin_lock_bh(&ifp->lock);
-- if (!(ifp->flags & IFA_F_TENTATIVE)) {
-- spin_unlock_bh(&ifp->lock);
-- continue;
-- }
-- spin_unlock_bh(&ifp->lock);
-- addrconf_dad_kick(ifp);
-- }
-- read_unlock_bh(&idev->lock);
--}
--
--#ifdef CONFIG_PROC_FS
--struct if6_iter_state {
-- int bucket;
--};
--
--static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
--{
-- struct inet6_ifaddr *ifa = NULL;
-- struct if6_iter_state *state = seq->private;
--
-- for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-- ifa = inet6_addr_lst[state->bucket];
-- if (ifa)
-- break;
-- }
-- return ifa;
--}
--
--static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
--{
-- struct if6_iter_state *state = seq->private;
--
-- ifa = ifa->lst_next;
--try_again:
-- if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
-- ifa = inet6_addr_lst[state->bucket];
-- goto try_again;
-- }
-- return ifa;
--}
--
--static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
--{
-- struct inet6_ifaddr *ifa = if6_get_first(seq);
--
-- if (ifa)
-- while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
-- --pos;
-- return pos ? NULL : ifa;
--}
--
--static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
--{
-- read_lock_bh(&addrconf_hash_lock);
-- return if6_get_idx(seq, *pos);
--}
--
--static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
--{
-- struct inet6_ifaddr *ifa;
--
-- ifa = if6_get_next(seq, v);
-- ++*pos;
-- return ifa;
--}
--
--static void if6_seq_stop(struct seq_file *seq, void *v)
--{
-- read_unlock_bh(&addrconf_hash_lock);
--}
--
--static int if6_seq_show(struct seq_file *seq, void *v)
--{
-- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
-- seq_printf(seq,
-- NIP6_SEQFMT " %02x %02x %02x %02x %8s\n",
-- NIP6(ifp->addr),
-- ifp->idev->dev->ifindex,
-- ifp->prefix_len,
-- ifp->scope,
-- ifp->flags,
-- ifp->idev->dev->name);
-- return 0;
--}
--
--static struct seq_operations if6_seq_ops = {
-- .start = if6_seq_start,
-- .next = if6_seq_next,
-- .show = if6_seq_show,
-- .stop = if6_seq_stop,
--};
--
--static int if6_seq_open(struct inode *inode, struct file *file)
--{
-- struct seq_file *seq;
-- int rc = -ENOMEM;
-- struct if6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
--
-- if (!s)
-- goto out;
--
-- rc = seq_open(file, &if6_seq_ops);
-- if (rc)
-- goto out_kfree;
--
-- seq = file->private_data;
-- seq->private = s;
--out:
-- return rc;
--out_kfree:
-- kfree(s);
-- goto out;
--}
--
--static const struct file_operations if6_fops = {
-- .owner = THIS_MODULE,
-- .open = if6_seq_open,
-- .read = seq_read,
-- .llseek = seq_lseek,
-- .release = seq_release_private,
--};
--
--int __init if6_proc_init(void)
--{
-- if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
-- return -ENOMEM;
-- return 0;
--}
--
--void if6_proc_exit(void)
--{
-- proc_net_remove("if_inet6");
--}
--#endif /* CONFIG_PROC_FS */
--
--#ifdef CONFIG_IPV6_MIP6
--/* Check if address is a home address configured on any interface. */
--int ipv6_chk_home_addr(struct in6_addr *addr)
--{
-- int ret = 0;
-- struct inet6_ifaddr * ifp;
-- u8 hash = ipv6_addr_hash(addr);
-- read_lock_bh(&addrconf_hash_lock);
-- for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
-- if (ipv6_addr_cmp(&ifp->addr, addr) == 0 &&
-- (ifp->flags & IFA_F_HOMEADDRESS)) {
-- ret = 1;
-- break;
-- }
-- }
-- read_unlock_bh(&addrconf_hash_lock);
-- return ret;
--}
--#endif
--
--/*
-- * Periodic address status verification
-- */
--
--static void addrconf_verify(unsigned long foo)
--{
-- struct inet6_ifaddr *ifp;
-- unsigned long now, next;
-- int i;
--
-- spin_lock_bh(&addrconf_verify_lock);
-- now = jiffies;
-- next = now + ADDR_CHECK_FREQUENCY;
--
-- del_timer(&addr_chk_timer);
--
-- for (i=0; i < IN6_ADDR_HSIZE; i++) {
--
--restart:
-- read_lock(&addrconf_hash_lock);
-- for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
-- unsigned long age;
--#ifdef CONFIG_IPV6_PRIVACY
-- unsigned long regen_advance;
--#endif
--
-- if (ifp->flags & IFA_F_PERMANENT)
-- continue;
--
-- spin_lock(&ifp->lock);
-- age = (now - ifp->tstamp) / HZ;
--
--#ifdef CONFIG_IPV6_PRIVACY
-- regen_advance = ifp->idev->cnf.regen_max_retry *
-- ifp->idev->cnf.dad_transmits *
-- ifp->idev->nd_parms->retrans_time / HZ;
--#endif
--
-- if (ifp->valid_lft != INFINITY_LIFE_TIME &&
-- age >= ifp->valid_lft) {
-- spin_unlock(&ifp->lock);
-- in6_ifa_hold(ifp);
-- read_unlock(&addrconf_hash_lock);
-- ipv6_del_addr(ifp);
-- goto restart;
-- } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
-- spin_unlock(&ifp->lock);
-- continue;
-- } else if (age >= ifp->prefered_lft) {
-- /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
-- int deprecate = 0;
--
-- if (!(ifp->flags&IFA_F_DEPRECATED)) {
-- deprecate = 1;
-- ifp->flags |= IFA_F_DEPRECATED;
-- }
--
-- if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
-- next = ifp->tstamp + ifp->valid_lft * HZ;
--
-- spin_unlock(&ifp->lock);
--
-- if (deprecate) {
-- in6_ifa_hold(ifp);
-- read_unlock(&addrconf_hash_lock);
--
-- ipv6_ifa_notify(0, ifp);
-- in6_ifa_put(ifp);
-- goto restart;
-- }
--#ifdef CONFIG_IPV6_PRIVACY
-- } else if ((ifp->flags&IFA_F_TEMPORARY) &&
-- !(ifp->flags&IFA_F_TENTATIVE)) {
-- if (age >= ifp->prefered_lft - regen_advance) {
-- struct inet6_ifaddr *ifpub = ifp->ifpub;
-- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
-- next = ifp->tstamp + ifp->prefered_lft * HZ;
-- if (!ifp->regen_count && ifpub) {
-- ifp->regen_count++;
-- in6_ifa_hold(ifp);
-- in6_ifa_hold(ifpub);
-- spin_unlock(&ifp->lock);
-- read_unlock(&addrconf_hash_lock);
-- spin_lock(&ifpub->lock);
-- ifpub->regen_count = 0;
-- spin_unlock(&ifpub->lock);
-- ipv6_create_tempaddr(ifpub, ifp);
-- in6_ifa_put(ifpub);
-- in6_ifa_put(ifp);
-- goto restart;
-- }
-- } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
-- next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
-- spin_unlock(&ifp->lock);
--#endif
-- } else {
-- /* ifp->prefered_lft <= ifp->valid_lft */
-- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
-- next = ifp->tstamp + ifp->prefered_lft * HZ;
-- spin_unlock(&ifp->lock);
-- }
-- }
-- read_unlock(&addrconf_hash_lock);
-- }
--
-- addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
-- add_timer(&addr_chk_timer);
-- spin_unlock_bh(&addrconf_verify_lock);
--}
--
--static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
--{
-- struct in6_addr *pfx = NULL;
--
-- if (addr)
-- pfx = nla_data(addr);
--
-- if (local) {
-- if (pfx && nla_memcmp(local, pfx, sizeof(*pfx)))
-- pfx = NULL;
-- else
-- pfx = nla_data(local);
-- }
--
-- return pfx;
--}
--
--static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
-- [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) },
-- [IFA_LOCAL] = { .len = sizeof(struct in6_addr) },
-- [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
--};
--
--static int
--inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
--{
-- struct ifaddrmsg *ifm;
-- struct nlattr *tb[IFA_MAX+1];
-- struct in6_addr *pfx;
-- int err;
--
-- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
-- if (err < 0)
-- return err;
--
-- ifm = nlmsg_data(nlh);
-- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
-- if (pfx == NULL)
-- return -EINVAL;
--
-- return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
--}
--
--static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
-- u32 prefered_lft, u32 valid_lft)
--{
-- u32 flags = RTF_EXPIRES;
--
-- if (!valid_lft || (prefered_lft > valid_lft))
-- return -EINVAL;
--
-- if (valid_lft == INFINITY_LIFE_TIME) {
-- ifa_flags |= IFA_F_PERMANENT;
-- flags = 0;
-- } else if (valid_lft >= 0x7FFFFFFF/HZ)
-- valid_lft = 0x7FFFFFFF/HZ;
--
-- if (prefered_lft == 0)
-- ifa_flags |= IFA_F_DEPRECATED;
-- else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-- (prefered_lft != INFINITY_LIFE_TIME))
-- prefered_lft = 0x7FFFFFFF/HZ;
--
-- spin_lock_bh(&ifp->lock);
-- ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
-- ifp->tstamp = jiffies;
-- ifp->valid_lft = valid_lft;
-- ifp->prefered_lft = prefered_lft;
--
-- spin_unlock_bh(&ifp->lock);
-- if (!(ifp->flags&IFA_F_TENTATIVE))
-- ipv6_ifa_notify(0, ifp);
--
-- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
-- jiffies_to_clock_t(valid_lft * HZ), flags);
-- addrconf_verify(0);
--
-- return 0;
--}
--
--static int
--inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
--{
-- struct ifaddrmsg *ifm;
-- struct nlattr *tb[IFA_MAX+1];
-- struct in6_addr *pfx;
-- struct inet6_ifaddr *ifa;
-- struct net_device *dev;
-- u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
-- u8 ifa_flags;
-- int err;
--
-- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
-- if (err < 0)
-- return err;
--
-- ifm = nlmsg_data(nlh);
-- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
-- if (pfx == NULL)
-- return -EINVAL;
--
-- if (tb[IFA_CACHEINFO]) {
-- struct ifa_cacheinfo *ci;
--
-- ci = nla_data(tb[IFA_CACHEINFO]);
-- valid_lft = ci->ifa_valid;
-- preferred_lft = ci->ifa_prefered;
-- } else {
-- preferred_lft = INFINITY_LIFE_TIME;
-- valid_lft = INFINITY_LIFE_TIME;
-- }
--
-- dev = __dev_get_by_index(ifm->ifa_index);
-- if (dev == NULL)
-- return -ENODEV;
--
-- /* We ignore other flags so far. */
-- ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
--
-- ifa = ipv6_get_ifaddr(pfx, dev, 1);
-- if (ifa == NULL) {
-- /*
-- * It would be best to check for !NLM_F_CREATE here but
-- * userspace alreay relies on not having to provide this.
-- */
-- return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen,
-- ifa_flags, preferred_lft, valid_lft);
-- }
--
-- if (nlh->nlmsg_flags & NLM_F_EXCL ||
-- !(nlh->nlmsg_flags & NLM_F_REPLACE))
-- err = -EEXIST;
-- else
-- err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
--
-- in6_ifa_put(ifa);
--
-- return err;
--}
--
--static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags,
-- u8 scope, int ifindex)
--{
-- struct ifaddrmsg *ifm;
--
-- ifm = nlmsg_data(nlh);
-- ifm->ifa_family = AF_INET6;
-- ifm->ifa_prefixlen = prefixlen;
-- ifm->ifa_flags = flags;
-- ifm->ifa_scope = scope;
-- ifm->ifa_index = ifindex;
--}
--
--static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
-- unsigned long tstamp, u32 preferred, u32 valid)
--{
-- struct ifa_cacheinfo ci;
--
-- ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100
-- + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-- ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100
-- + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-- ci.ifa_prefered = preferred;
-- ci.ifa_valid = valid;
--
-- return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
--}
--
--static inline int rt_scope(int ifa_scope)
--{
-- if (ifa_scope & IFA_HOST)
-- return RT_SCOPE_HOST;
-- else if (ifa_scope & IFA_LINK)
-- return RT_SCOPE_LINK;
-- else if (ifa_scope & IFA_SITE)
-- return RT_SCOPE_SITE;
-- else
-- return RT_SCOPE_UNIVERSE;
--}
--
--static inline int inet6_ifaddr_msgsize(void)
--{
-- return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
-- + nla_total_size(16) /* IFA_ADDRESS */
-- + nla_total_size(sizeof(struct ifa_cacheinfo));
--}
--
--static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
-- u32 pid, u32 seq, int event, unsigned int flags)
--{
-- struct nlmsghdr *nlh;
-- u32 preferred, valid;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
-- ifa->idev->dev->ifindex);
--
-- if (!(ifa->flags&IFA_F_PERMANENT)) {
-- preferred = ifa->prefered_lft;
-- valid = ifa->valid_lft;
-- if (preferred != INFINITY_LIFE_TIME) {
-- long tval = (jiffies - ifa->tstamp)/HZ;
-- preferred -= tval;
-- if (valid != INFINITY_LIFE_TIME)
-- valid -= tval;
-- }
-- } else {
-- preferred = INFINITY_LIFE_TIME;
-- valid = INFINITY_LIFE_TIME;
-- }
--
-- if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 ||
-- put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) {
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
-- }
--
-- return nlmsg_end(skb, nlh);
--}
--
--static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
-- u32 pid, u32 seq, int event, u16 flags)
--{
-- struct nlmsghdr *nlh;
-- u8 scope = RT_SCOPE_UNIVERSE;
-- int ifindex = ifmca->idev->dev->ifindex;
--
-- if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
-- scope = RT_SCOPE_SITE;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
-- if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 ||
-- put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
-- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
-- }
--
-- return nlmsg_end(skb, nlh);
--}
--
--static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
-- u32 pid, u32 seq, int event, unsigned int flags)
--{
-- struct nlmsghdr *nlh;
-- u8 scope = RT_SCOPE_UNIVERSE;
-- int ifindex = ifaca->aca_idev->dev->ifindex;
--
-- if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
-- scope = RT_SCOPE_SITE;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
-- if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 ||
-- put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
-- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
-- }
--
-- return nlmsg_end(skb, nlh);
--}
--
--enum addr_type_t
--{
-- UNICAST_ADDR,
-- MULTICAST_ADDR,
-- ANYCAST_ADDR,
--};
--
--static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
-- enum addr_type_t type)
--{
-- int idx, ip_idx;
-- int s_idx, s_ip_idx;
-- int err = 1;
-- struct net_device *dev;
-- struct inet6_dev *idev = NULL;
-- struct inet6_ifaddr *ifa;
-- struct ifmcaddr6 *ifmca;
-- struct ifacaddr6 *ifaca;
--
-- s_idx = cb->args[0];
-- s_ip_idx = ip_idx = cb->args[1];
--
-- idx = 0;
-- for_each_netdev(dev) {
-- if (idx < s_idx)
-- goto cont;
-- if (idx > s_idx)
-- s_ip_idx = 0;
-- ip_idx = 0;
-- if ((idev = in6_dev_get(dev)) == NULL)
-- goto cont;
-- read_lock_bh(&idev->lock);
-- switch (type) {
-- case UNICAST_ADDR:
-- /* unicast address incl. temp addr */
-- for (ifa = idev->addr_list; ifa;
-- ifa = ifa->if_next, ip_idx++) {
-- if (ip_idx < s_ip_idx)
-- continue;
-- if ((err = inet6_fill_ifaddr(skb, ifa,
-- NETLINK_CB(cb->skb).pid,
-- cb->nlh->nlmsg_seq, RTM_NEWADDR,
-- NLM_F_MULTI)) <= 0)
-- goto done;
-- }
-- break;
-- case MULTICAST_ADDR:
-- /* multicast address */
-- for (ifmca = idev->mc_list; ifmca;
-- ifmca = ifmca->next, ip_idx++) {
-- if (ip_idx < s_ip_idx)
-- continue;
-- if ((err = inet6_fill_ifmcaddr(skb, ifmca,
-- NETLINK_CB(cb->skb).pid,
-- cb->nlh->nlmsg_seq, RTM_GETMULTICAST,
-- NLM_F_MULTI)) <= 0)
-- goto done;
-- }
-- break;
-- case ANYCAST_ADDR:
-- /* anycast address */
-- for (ifaca = idev->ac_list; ifaca;
-- ifaca = ifaca->aca_next, ip_idx++) {
-- if (ip_idx < s_ip_idx)
-- continue;
-- if ((err = inet6_fill_ifacaddr(skb, ifaca,
-- NETLINK_CB(cb->skb).pid,
-- cb->nlh->nlmsg_seq, RTM_GETANYCAST,
-- NLM_F_MULTI)) <= 0)
-- goto done;
-- }
-- break;
-- default:
-- break;
-- }
-- read_unlock_bh(&idev->lock);
-- in6_dev_put(idev);
--cont:
-- idx++;
-- }
--done:
-- if (err <= 0) {
-- read_unlock_bh(&idev->lock);
-- in6_dev_put(idev);
-- }
-- cb->args[0] = idx;
-- cb->args[1] = ip_idx;
-- return skb->len;
--}
--
--static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
--{
-- enum addr_type_t type = UNICAST_ADDR;
-- return inet6_dump_addr(skb, cb, type);
--}
--
--static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
--{
-- enum addr_type_t type = MULTICAST_ADDR;
-- return inet6_dump_addr(skb, cb, type);
--}
--
--
--static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
--{
-- enum addr_type_t type = ANYCAST_ADDR;
-- return inet6_dump_addr(skb, cb, type);
--}
--
--static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
-- void *arg)
--{
-- struct ifaddrmsg *ifm;
-- struct nlattr *tb[IFA_MAX+1];
-- struct in6_addr *addr = NULL;
-- struct net_device *dev = NULL;
-- struct inet6_ifaddr *ifa;
-- struct sk_buff *skb;
-- int err;
--
-- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
-- if (err < 0)
-- goto errout;
--
-- addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]);
-- if (addr == NULL) {
-- err = -EINVAL;
-- goto errout;
-- }
--
-- ifm = nlmsg_data(nlh);
-- if (ifm->ifa_index)
-- dev = __dev_get_by_index(ifm->ifa_index);
--
-- if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
-- err = -EADDRNOTAVAIL;
-- goto errout;
-- }
--
-- if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
-- err = -ENOBUFS;
-- goto errout_ifa;
-- }
--
-- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
-- nlh->nlmsg_seq, RTM_NEWADDR, 0);
-- if (err < 0) {
-- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
-- WARN_ON(err == -EMSGSIZE);
-- kfree_skb(skb);
-- goto errout_ifa;
-- }
-- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
--errout_ifa:
-- in6_ifa_put(ifa);
--errout:
-- return err;
--}
--
--static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
--{
-- struct sk_buff *skb;
-- int err = -ENOBUFS;
--
-- skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC);
-- if (skb == NULL)
-- goto errout;
--
-- err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0);
-- if (err < 0) {
-- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
-- WARN_ON(err == -EMSGSIZE);
-- kfree_skb(skb);
-- goto errout;
-- }
-- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
--errout:
-- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
--}
--
--static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
-- __s32 *array, int bytes)
--{
-- BUG_ON(bytes < (DEVCONF_MAX * 4));
--
-- memset(array, 0, bytes);
-- array[DEVCONF_FORWARDING] = cnf->forwarding;
-- array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
-- array[DEVCONF_MTU6] = cnf->mtu6;
-- array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
-- array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
-- array[DEVCONF_AUTOCONF] = cnf->autoconf;
-- array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
-- array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
-- array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
-- array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
-- array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
--#ifdef CONFIG_IPV6_PRIVACY
-- array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
-- array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
-- array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
-- array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
-- array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
--#endif
-- array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
-- array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
-- array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
--#ifdef CONFIG_IPV6_ROUTER_PREF
-- array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
-- array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval;
--#ifdef CONFIG_IPV6_ROUTE_INFO
-- array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
--#endif
--#endif
-- array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
-- array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
--#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-- array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
--#endif
--}
--
--static inline size_t inet6_if_nlmsg_size(void)
--{
-- return NLMSG_ALIGN(sizeof(struct ifinfomsg))
-- + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
-- + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
-- + nla_total_size(4) /* IFLA_MTU */
-- + nla_total_size(4) /* IFLA_LINK */
-- + nla_total_size( /* IFLA_PROTINFO */
-- nla_total_size(4) /* IFLA_INET6_FLAGS */
-- + nla_total_size(sizeof(struct ifla_cacheinfo))
-- + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
-- + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
-- + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
-- );
--}
--
--static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
-- int bytes)
--{
-- int i;
-- int pad = bytes - sizeof(u64) * items;
-- BUG_ON(pad < 0);
--
-- /* Use put_unaligned() because stats may not be aligned for u64. */
-- put_unaligned(items, &stats[0]);
-- for (i = 1; i < items; i++)
-- put_unaligned(snmp_fold_field(mib, i), &stats[i]);
--
-- memset(&stats[items], 0, pad);
--}
--
--static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
-- int bytes)
--{
-- switch(attrtype) {
-- case IFLA_INET6_STATS:
-- __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
-- break;
-- case IFLA_INET6_ICMP6STATS:
-- __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
-- break;
-- }
--}
--
--static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
-- u32 pid, u32 seq, int event, unsigned int flags)
--{
-- struct net_device *dev = idev->dev;
-- struct nlattr *nla;
-- struct ifinfomsg *hdr;
-- struct nlmsghdr *nlh;
-- void *protoinfo;
-- struct ifla_cacheinfo ci;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- hdr = nlmsg_data(nlh);
-- hdr->ifi_family = AF_INET6;
-- hdr->__ifi_pad = 0;
-- hdr->ifi_type = dev->type;
-- hdr->ifi_index = dev->ifindex;
-- hdr->ifi_flags = dev_get_flags(dev);
-- hdr->ifi_change = 0;
--
-- NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
--
-- if (dev->addr_len)
-- NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
--
-- NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
-- if (dev->ifindex != dev->iflink)
-- NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
--
-- protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
-- if (protoinfo == NULL)
-- goto nla_put_failure;
--
-- NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags);
--
-- ci.max_reasm_len = IPV6_MAXPLEN;
-- ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
-- + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
-- ci.reachable_time = idev->nd_parms->reachable_time;
-- ci.retrans_time = idev->nd_parms->retrans_time;
-- NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
--
-- nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
-- if (nla == NULL)
-- goto nla_put_failure;
-- ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
--
-- /* XXX - MC not implemented */
--
-- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
-- if (nla == NULL)
-- goto nla_put_failure;
-- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
--
-- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
-- if (nla == NULL)
-- goto nla_put_failure;
-- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
--
-- nla_nest_end(skb, protoinfo);
-- return nlmsg_end(skb, nlh);
--
--nla_put_failure:
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
--}
--
--static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
--{
-- int idx, err;
-- int s_idx = cb->args[0];
-- struct net_device *dev;
-- struct inet6_dev *idev;
--
-- read_lock(&dev_base_lock);
-- idx = 0;
-- for_each_netdev(dev) {
-- if (idx < s_idx)
-- goto cont;
-- if ((idev = in6_dev_get(dev)) == NULL)
-- goto cont;
-- err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
-- cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
-- in6_dev_put(idev);
-- if (err <= 0)
-- break;
--cont:
-- idx++;
-- }
-- read_unlock(&dev_base_lock);
-- cb->args[0] = idx;
--
-- return skb->len;
--}
--
--void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
--{
-- struct sk_buff *skb;
-- int err = -ENOBUFS;
--
-- skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC);
-- if (skb == NULL)
-- goto errout;
--
-- err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0);
-- if (err < 0) {
-- /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */
-- WARN_ON(err == -EMSGSIZE);
-- kfree_skb(skb);
-- goto errout;
-- }
-- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
--errout:
-- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
--}
--
--static inline size_t inet6_prefix_nlmsg_size(void)
--{
-- return NLMSG_ALIGN(sizeof(struct prefixmsg))
-- + nla_total_size(sizeof(struct in6_addr))
-- + nla_total_size(sizeof(struct prefix_cacheinfo));
--}
--
--static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
-- struct prefix_info *pinfo, u32 pid, u32 seq,
-- int event, unsigned int flags)
--{
-- struct prefixmsg *pmsg;
-- struct nlmsghdr *nlh;
-- struct prefix_cacheinfo ci;
--
-- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
-- if (nlh == NULL)
-- return -EMSGSIZE;
--
-- pmsg = nlmsg_data(nlh);
-- pmsg->prefix_family = AF_INET6;
-- pmsg->prefix_pad1 = 0;
-- pmsg->prefix_pad2 = 0;
-- pmsg->prefix_ifindex = idev->dev->ifindex;
-- pmsg->prefix_len = pinfo->prefix_len;
-- pmsg->prefix_type = pinfo->type;
-- pmsg->prefix_pad3 = 0;
-- pmsg->prefix_flags = 0;
-- if (pinfo->onlink)
-- pmsg->prefix_flags |= IF_PREFIX_ONLINK;
-- if (pinfo->autoconf)
-- pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
--
-- NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
--
-- ci.preferred_time = ntohl(pinfo->prefered);
-- ci.valid_time = ntohl(pinfo->valid);
-- NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
--
-- return nlmsg_end(skb, nlh);
--
--nla_put_failure:
-- nlmsg_cancel(skb, nlh);
-- return -EMSGSIZE;
--}
--
--static void inet6_prefix_notify(int event, struct inet6_dev *idev,
-- struct prefix_info *pinfo)
--{
-- struct sk_buff *skb;
-- int err = -ENOBUFS;
--
-- skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC);
-- if (skb == NULL)
-- goto errout;
--
-- err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0);
-- if (err < 0) {
-- /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */
-- WARN_ON(err == -EMSGSIZE);
-- kfree_skb(skb);
-- goto errout;
-- }
-- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
--errout:
-- if (err < 0)
-- rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
--}
--
--static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
--{
-- inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
--
-- switch (event) {
-- case RTM_NEWADDR:
-- /*
-- * If the address was optimistic
-- * we inserted the route at the start of
-- * our DAD process, so we don't need
-- * to do it again
-- */
-- if (!(ifp->rt->rt6i_node))
-- ip6_ins_rt(ifp->rt);
-- if (ifp->idev->cnf.forwarding)
-- addrconf_join_anycast(ifp);
-- break;
-- case RTM_DELADDR:
-- if (ifp->idev->cnf.forwarding)
-- addrconf_leave_anycast(ifp);
-- addrconf_leave_solict(ifp->idev, &ifp->addr);
-- dst_hold(&ifp->rt->u.dst);
-- if (ip6_del_rt(ifp->rt))
-- dst_free(&ifp->rt->u.dst);
-- break;
-- }
--}
--
--static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
--{
-- rcu_read_lock_bh();
-- if (likely(ifp->idev->dead == 0))
-- __ipv6_ifa_notify(event, ifp);
-- rcu_read_unlock_bh();
--}
--
--#ifdef CONFIG_SYSCTL
--
--static
--int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
-- void __user *buffer, size_t *lenp, loff_t *ppos)
--{
-- int *valp = ctl->data;
-- int val = *valp;
-- int ret;
--
-- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
--
-- if (write && valp != &ipv6_devconf_dflt.forwarding) {
-- if (valp != &ipv6_devconf.forwarding) {
-- if ((!*valp) ^ (!val)) {
-- struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
-- if (idev == NULL)
-- return ret;
-- dev_forward_change(idev);
-- }
-- } else {
-- ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
-- addrconf_forward_change();
-- }
-- if (*valp)
-- rt6_purge_dflt_routers();
-- }
--
-- return ret;
--}
--
--static int addrconf_sysctl_forward_strategy(ctl_table *table,
-- int __user *name, int nlen,
-- void __user *oldval,
-- size_t __user *oldlenp,
-- void __user *newval, size_t newlen)
--{
-- int *valp = table->data;
-- int new;
--
-- if (!newval || !newlen)
-- return 0;
-- if (newlen != sizeof(int))
-- return -EINVAL;
-- if (get_user(new, (int __user *)newval))
-- return -EFAULT;
-- if (new == *valp)
-- return 0;
-- if (oldval && oldlenp) {
-- size_t len;
-- if (get_user(len, oldlenp))
-- return -EFAULT;
-- if (len) {
-- if (len > table->maxlen)
-- len = table->maxlen;
-- if (copy_to_user(oldval, valp, len))
-- return -EFAULT;
-- if (put_user(len, oldlenp))
-- return -EFAULT;
-- }
-- }
--
-- if (valp != &ipv6_devconf_dflt.forwarding) {
-- if (valp != &ipv6_devconf.forwarding) {
-- struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
-- int changed;
-- if (unlikely(idev == NULL))
-- return -ENODEV;
-- changed = (!*valp) ^ (!new);
-- *valp = new;
-- if (changed)
-- dev_forward_change(idev);
-- } else {
-- *valp = new;
-- addrconf_forward_change();
-- }
--
-- if (*valp)
-- rt6_purge_dflt_routers();
-- } else
-- *valp = new;
--
-- return 1;
--}
--
--static struct addrconf_sysctl_table
--{
-- struct ctl_table_header *sysctl_header;
-- ctl_table addrconf_vars[__NET_IPV6_MAX];
-- ctl_table addrconf_dev[2];
-- ctl_table addrconf_conf_dir[2];
-- ctl_table addrconf_proto_dir[2];
-- ctl_table addrconf_root_dir[2];
--} addrconf_sysctl __read_mostly = {
-- .sysctl_header = NULL,
-- .addrconf_vars = {
-- {
-- .ctl_name = NET_IPV6_FORWARDING,
-- .procname = "forwarding",
-- .data = &ipv6_devconf.forwarding,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &addrconf_sysctl_forward,
-- .strategy = &addrconf_sysctl_forward_strategy,
-- },
-- {
-- .ctl_name = NET_IPV6_HOP_LIMIT,
-- .procname = "hop_limit",
-- .data = &ipv6_devconf.hop_limit,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_MTU,
-- .procname = "mtu",
-- .data = &ipv6_devconf.mtu6,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA,
-- .procname = "accept_ra",
-- .data = &ipv6_devconf.accept_ra,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_REDIRECTS,
-- .procname = "accept_redirects",
-- .data = &ipv6_devconf.accept_redirects,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_AUTOCONF,
-- .procname = "autoconf",
-- .data = &ipv6_devconf.autoconf,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_DAD_TRANSMITS,
-- .procname = "dad_transmits",
-- .data = &ipv6_devconf.dad_transmits,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_RTR_SOLICITS,
-- .procname = "router_solicitations",
-- .data = &ipv6_devconf.rtr_solicits,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL,
-- .procname = "router_solicitation_interval",
-- .data = &ipv6_devconf.rtr_solicit_interval,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies,
-- },
-- {
-- .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY,
-- .procname = "router_solicitation_delay",
-- .data = &ipv6_devconf.rtr_solicit_delay,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies,
-- },
-- {
-- .ctl_name = NET_IPV6_FORCE_MLD_VERSION,
-- .procname = "force_mld_version",
-- .data = &ipv6_devconf.force_mld_version,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#ifdef CONFIG_IPV6_PRIVACY
-- {
-- .ctl_name = NET_IPV6_USE_TEMPADDR,
-- .procname = "use_tempaddr",
-- .data = &ipv6_devconf.use_tempaddr,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_TEMP_VALID_LFT,
-- .procname = "temp_valid_lft",
-- .data = &ipv6_devconf.temp_valid_lft,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_TEMP_PREFERED_LFT,
-- .procname = "temp_prefered_lft",
-- .data = &ipv6_devconf.temp_prefered_lft,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_REGEN_MAX_RETRY,
-- .procname = "regen_max_retry",
-- .data = &ipv6_devconf.regen_max_retry,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR,
-- .procname = "max_desync_factor",
-- .data = &ipv6_devconf.max_desync_factor,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#endif
-- {
-- .ctl_name = NET_IPV6_MAX_ADDRESSES,
-- .procname = "max_addresses",
-- .data = &ipv6_devconf.max_addresses,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR,
-- .procname = "accept_ra_defrtr",
-- .data = &ipv6_devconf.accept_ra_defrtr,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA_PINFO,
-- .procname = "accept_ra_pinfo",
-- .data = &ipv6_devconf.accept_ra_pinfo,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#ifdef CONFIG_IPV6_ROUTER_PREF
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA_RTR_PREF,
-- .procname = "accept_ra_rtr_pref",
-- .data = &ipv6_devconf.accept_ra_rtr_pref,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL,
-- .procname = "router_probe_interval",
-- .data = &ipv6_devconf.rtr_probe_interval,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec_jiffies,
-- .strategy = &sysctl_jiffies,
-- },
--#ifdef CONFIG_IPV6_ROUTE_INFO
-- {
-- .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN,
-- .procname = "accept_ra_rt_info_max_plen",
-- .data = &ipv6_devconf.accept_ra_rt_info_max_plen,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#endif
--#endif
-- {
-- .ctl_name = NET_IPV6_PROXY_NDP,
-- .procname = "proxy_ndp",
-- .data = &ipv6_devconf.proxy_ndp,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
-- {
-- .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE,
-- .procname = "accept_source_route",
-- .data = &ipv6_devconf.accept_source_route,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
-- },
--#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
-- {
-- .ctl_name = CTL_UNNUMBERED,
-- .procname = "optimistic_dad",
-- .data = &ipv6_devconf.optimistic_dad,
-- .maxlen = sizeof(int),
-- .mode = 0644,
-- .proc_handler = &proc_dointvec,
--
-- },
--#endif
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
-- .addrconf_dev = {
-- {
-- .ctl_name = NET_PROTO_CONF_ALL,
-- .procname = "all",
-- .mode = 0555,
-- .child = addrconf_sysctl.addrconf_vars,
-- },
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
-- .addrconf_conf_dir = {
-- {
-- .ctl_name = NET_IPV6_CONF,
-- .procname = "conf",
-- .mode = 0555,
-- .child = addrconf_sysctl.addrconf_dev,
-- },
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
-- .addrconf_proto_dir = {
-- {
-- .ctl_name = NET_IPV6,
-- .procname = "ipv6",
-- .mode = 0555,
-- .child = addrconf_sysctl.addrconf_conf_dir,
-- },
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
-- .addrconf_root_dir = {
-- {
-- .ctl_name = CTL_NET,
-- .procname = "net",
-- .mode = 0555,
-- .child = addrconf_sysctl.addrconf_proto_dir,
-- },
-- {
-- .ctl_name = 0, /* sentinel */
-- }
-- },
--};
--
--static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
--{
-- int i;
-- struct net_device *dev = idev ? idev->dev : NULL;
-- struct addrconf_sysctl_table *t;
-- char *dev_name = NULL;
--
-- t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL);
-- if (t == NULL)
-- return;
-- for (i=0; t->addrconf_vars[i].data; i++) {
-- t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
-- t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
-- }
-- if (dev) {
-- dev_name = dev->name;
-- t->addrconf_dev[0].ctl_name = dev->ifindex;
-- } else {
-- dev_name = "default";
-- t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
-- }
--
-- /*
-- * Make a copy of dev_name, because '.procname' is regarded as const
-- * by sysctl and we wouldn't want anyone to change it under our feet
-- * (see SIOCSIFNAME).
-- */
-- dev_name = kstrdup(dev_name, GFP_KERNEL);
-- if (!dev_name)
-- goto free;
--
-- t->addrconf_dev[0].procname = dev_name;
--
-- t->addrconf_dev[0].child = t->addrconf_vars;
-- t->addrconf_conf_dir[0].child = t->addrconf_dev;
-- t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
-- t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
--
-- t->sysctl_header = register_sysctl_table(t->addrconf_root_dir);
-- if (t->sysctl_header == NULL)
-- goto free_procname;
-- else
-- p->sysctl = t;
-- return;
--
-- /* error path */
-- free_procname:
-- kfree(dev_name);
-- free:
-- kfree(t);
--
-- return;
--}
--
--static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
--{
-- if (p->sysctl) {
-- struct addrconf_sysctl_table *t = p->sysctl;
-- p->sysctl = NULL;
-- unregister_sysctl_table(t->sysctl_header);
-- kfree(t->addrconf_dev[0].procname);
-- kfree(t);
-- }
--}
--
--
--#endif
--
--/*
-- * Device notifier
-- */
--
--int register_inet6addr_notifier(struct notifier_block *nb)
--{
-- return atomic_notifier_chain_register(&inet6addr_chain, nb);
--}
--
--EXPORT_SYMBOL(register_inet6addr_notifier);
--
--int unregister_inet6addr_notifier(struct notifier_block *nb)
--{
-- return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
--}
--
--EXPORT_SYMBOL(unregister_inet6addr_notifier);
--
--/*
-- * Init / cleanup code
-- */
--
--int __init addrconf_init(void)
--{
-- int err = 0;
--
-- /* The addrconf netdev notifier requires that loopback_dev
-- * has it's ipv6 private information allocated and setup
-- * before it can bring up and give link-local addresses
-- * to other devices which are up.
-- *
-- * Unfortunately, loopback_dev is not necessarily the first
-- * entry in the global dev_base list of net devices. In fact,
-- * it is likely to be the very last entry on that list.
-- * So this causes the notifier registry below to try and
-- * give link-local addresses to all devices besides loopback_dev
-- * first, then loopback_dev, which cases all the non-loopback_dev
-- * devices to fail to get a link-local address.
-- *
-- * So, as a temporary fix, allocate the ipv6 structure for
-- * loopback_dev first by hand.
-- * Longer term, all of the dependencies ipv6 has upon the loopback
-- * device and it being up should be removed.
-- */
-- rtnl_lock();
-- if (!ipv6_add_dev(&loopback_dev))
-- err = -ENOMEM;
-- rtnl_unlock();
-- if (err)
-- return err;
--
-- ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev);
--#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-- ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev);
-- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev);
--#endif
--
-- register_netdevice_notifier(&ipv6_dev_notf);
--
-- addrconf_verify(0);
--
-- err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
-- if (err < 0)
-- goto errout;
--
-- /* Only the first call to __rtnl_register can fail */
-- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
-- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
-- __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
-- __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
-- __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
--
--#ifdef CONFIG_SYSCTL
-- addrconf_sysctl.sysctl_header =
-- register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
-- addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
--#endif
--
-- return 0;
--errout:
-- unregister_netdevice_notifier(&ipv6_dev_notf);
--
-- return err;
--}
--
--void __exit addrconf_cleanup(void)
--{
-- struct net_device *dev;
-- struct inet6_dev *idev;
-- struct inet6_ifaddr *ifa;
-- int i;
--
-- unregister_netdevice_notifier(&ipv6_dev_notf);
--
--#ifdef CONFIG_SYSCTL
-- addrconf_sysctl_unregister(&ipv6_devconf_dflt);
-- addrconf_sysctl_unregister(&ipv6_devconf);
--#endif
--
-- rtnl_lock();
--
-- /*
-- * clean dev list.
-- */
--
-- for_each_netdev(dev) {
-- if ((idev = __in6_dev_get(dev)) == NULL)
-- continue;
-- addrconf_ifdown(dev, 1);
-- }
-- addrconf_ifdown(&loopback_dev, 2);
--
-- /*
-- * Check hash table.
-- */
--
-- write_lock_bh(&addrconf_hash_lock);
-- for (i=0; i < IN6_ADDR_HSIZE; i++) {
-- for (ifa=inet6_addr_lst[i]; ifa; ) {
-- struct inet6_ifaddr *bifa;
--
-- bifa = ifa;
-- ifa = ifa->lst_next;
-- printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
-- /* Do not free it; something is wrong.
-- Now we can investigate it with debugger.
-- */
-- }
-- }
-- write_unlock_bh(&addrconf_hash_lock);
--
-- del_timer(&addr_chk_timer);
--
-- rtnl_unlock();
--
--#ifdef CONFIG_PROC_FS
-- proc_net_remove("if_inet6");
--#endif
--}
-diff -Nurb linux-2.6.22-570/net/ipv6/af_inet6.c linux-2.6.22-590/net/ipv6/af_inet6.c
---- linux-2.6.22-570/net/ipv6/af_inet6.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/af_inet6.c 2008-01-29 22:12:32.000000000 -0500
-@@ -59,9 +59,6 @@
- #ifdef CONFIG_IPV6_TUNNEL
- #include <net/ip6_tunnel.h>
- #endif
--#ifdef CONFIG_IPV6_MIP6
--#include <net/mip6.h>
--#endif
-
- #include <asm/uaccess.h>
- #include <asm/system.h>
-@@ -85,7 +82,7 @@
- return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
- }
-
--static int inet6_create(struct socket *sock, int protocol)
-+static int inet6_create(struct net *net, struct socket *sock, int protocol)
- {
- struct inet_sock *inet;
- struct ipv6_pinfo *np;
-@@ -98,6 +95,9 @@
- int try_loading_module = 0;
- int err;
-
-+ if (net != &init_net)
-+ return -EAFNOSUPPORT;
-+
- if (sock->type != SOCK_RAW &&
- sock->type != SOCK_DGRAM &&
- !inet_ehash_secret)
-@@ -166,7 +166,7 @@
- BUG_TRAP(answer_prot->slab != NULL);
-
- err = -ENOBUFS;
-- sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1);
-+ sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1);
- if (sk == NULL)
- goto out;
-
-@@ -209,7 +209,7 @@
- inet->mc_index = 0;
- inet->mc_list = NULL;
-
-- if (ipv4_config.no_pmtu_disc)
-+ if (init_net.sysctl_ipv4_no_pmtu_disc)
- inet->pmtudisc = IP_PMTUDISC_DONT;
- else
- inet->pmtudisc = IP_PMTUDISC_WANT;
-@@ -290,7 +290,7 @@
- /* Check if the address belongs to the host. */
- if (addr_type == IPV6_ADDR_MAPPED) {
- v4addr = addr->sin6_addr.s6_addr32[3];
-- if (inet_addr_type(v4addr) != RTN_LOCAL) {
-+ if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) {
- err = -EADDRNOTAVAIL;
- goto out;
- }
-@@ -316,7 +316,7 @@
- err = -EINVAL;
- goto out;
- }
-- dev = dev_get_by_index(sk->sk_bound_dev_if);
-+ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
- if (!dev) {
- err = -ENODEV;
- goto out;
-@@ -675,6 +675,7 @@
- struct flowi fl;
-
- memset(&fl, 0, sizeof(fl));
-+ fl.fl_net = &init_net;
- fl.proto = sk->sk_protocol;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-@@ -876,9 +877,6 @@
- ipv6_frag_init();
- ipv6_nodata_init();
- ipv6_destopt_init();
--#ifdef CONFIG_IPV6_MIP6
-- mip6_init();
--#endif
-
- /* Init v6 transport protocols. */
- udpv6_init();
-@@ -944,9 +942,7 @@
-
- /* Cleanup code parts. */
- ipv6_packet_cleanup();
--#ifdef CONFIG_IPV6_MIP6
-- mip6_fini();
--#endif
-+
- addrconf_cleanup();
- ip6_flowlabel_cleanup();
- ip6_route_cleanup();
-diff -Nurb linux-2.6.22-570/net/ipv6/ah6.c linux-2.6.22-590/net/ipv6/ah6.c
---- linux-2.6.22-570/net/ipv6/ah6.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/ah6.c 2008-01-29 22:12:32.000000000 -0500
-@@ -74,7 +74,7 @@
- return 0;
- }
-
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- /**
- * ipv6_rearrange_destopt - rearrange IPv6 destination options header
- * @iph: IPv6 header
-@@ -132,6 +132,8 @@
- bad:
- return;
- }
-+#else
-+static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {}
- #endif
-
- /**
-@@ -189,10 +191,8 @@
- while (exthdr.raw < end) {
- switch (nexthdr) {
- case NEXTHDR_DEST:
--#ifdef CONFIG_IPV6_MIP6
- if (dir == XFRM_POLICY_OUT)
- ipv6_rearrange_destopt(iph, exthdr.opth);
--#endif
- case NEXTHDR_HOP:
- if (!zero_out_mutable_opts(exthdr.opth)) {
- LIMIT_NETDEBUG(
-@@ -228,7 +228,7 @@
- u8 nexthdr;
- char tmp_base[8];
- struct {
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- struct in6_addr saddr;
- #endif
- struct in6_addr daddr;
-@@ -255,7 +255,7 @@
- err = -ENOMEM;
- goto error;
- }
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- memcpy(tmp_ext, &top_iph->saddr, extlen);
- #else
- memcpy(tmp_ext, &top_iph->daddr, extlen);
-@@ -294,7 +294,7 @@
-
- memcpy(top_iph, tmp_base, sizeof(tmp_base));
- if (tmp_ext) {
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- memcpy(&top_iph->saddr, tmp_ext, extlen);
- #else
- memcpy(&top_iph->daddr, tmp_ext, extlen);
-@@ -554,3 +554,4 @@
- module_exit(ah6_fini);
-
- MODULE_LICENSE("GPL");
-+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
-diff -Nurb linux-2.6.22-570/net/ipv6/anycast.c linux-2.6.22-590/net/ipv6/anycast.c
---- linux-2.6.22-570/net/ipv6/anycast.c 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/anycast.c 2008-01-29 22:12:32.000000000 -0500
-@@ -32,6 +32,7 @@
-
- #include <net/sock.h>
- #include <net/snmp.h>
-+#include <net/net_namespace.h>
-
- #include <net/ipv6.h>
- #include <net/protocol.h>
-@@ -112,10 +113,10 @@
- } else {
- /* router, no matching interface: just pick one */
-
-- dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
-+ dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK);
- }
- } else
-- dev = dev_get_by_index(ifindex);
-+ dev = dev_get_by_index(&init_net, ifindex);
-
- if (dev == NULL) {
- err = -ENODEV;
-@@ -196,7 +197,7 @@
-
- write_unlock_bh(&ipv6_sk_ac_lock);
-
-- dev = dev_get_by_index(pac->acl_ifindex);
-+ dev = dev_get_by_index(&init_net, pac->acl_ifindex);
- if (dev) {
- ipv6_dev_ac_dec(dev, &pac->acl_addr);
- dev_put(dev);
-@@ -224,7 +225,7 @@
- if (pac->acl_ifindex != prev_index) {
- if (dev)
- dev_put(dev);
-- dev = dev_get_by_index(pac->acl_ifindex);
-+ dev = dev_get_by_index(&init_net, pac->acl_ifindex);
- prev_index = pac->acl_ifindex;
- }
- if (dev)
-@@ -429,7 +430,7 @@
- if (dev)
- return ipv6_chk_acast_dev(dev, addr);
- read_lock(&dev_base_lock);
-- for_each_netdev(dev)
-+ for_each_netdev(&init_net, dev)
- if (ipv6_chk_acast_dev(dev, addr)) {
- found = 1;
- break;
-@@ -453,7 +454,7 @@
- struct ac6_iter_state *state = ac6_seq_private(seq);
-
- state->idev = NULL;
-- for_each_netdev(state->dev) {
-+ for_each_netdev(&init_net, state->dev) {
- struct inet6_dev *idev;
- idev = in6_dev_get(state->dev);
- if (!idev)
-@@ -579,7 +580,7 @@
-
- int __init ac6_proc_init(void)
- {
-- if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
-+ if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops))
- return -ENOMEM;
-
- return 0;
-@@ -587,7 +588,7 @@
-
- void ac6_proc_exit(void)
- {
-- proc_net_remove("anycast6");
-+ proc_net_remove(&init_net, "anycast6");
- }
- #endif
-
-diff -Nurb linux-2.6.22-570/net/ipv6/datagram.c linux-2.6.22-590/net/ipv6/datagram.c
---- linux-2.6.22-570/net/ipv6/datagram.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/datagram.c 2008-01-29 22:12:32.000000000 -0500
-@@ -60,6 +60,7 @@
- return -EAFNOSUPPORT;
-
- memset(&fl, 0, sizeof(fl));
-+ fl.fl_net = &init_net;
- if (np->sndflow) {
- fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
- if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-@@ -544,7 +545,7 @@
- if (!src_info->ipi6_ifindex)
- return -EINVAL;
- else {
-- dev = dev_get_by_index(src_info->ipi6_ifindex);
-+ dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex);
- if (!dev)
- return -ENODEV;
- }
-@@ -658,7 +659,7 @@
-
- switch (rthdr->type) {
- case IPV6_SRCRT_TYPE_0:
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- case IPV6_SRCRT_TYPE_2:
- #endif
- break;
-diff -Nurb linux-2.6.22-570/net/ipv6/esp6.c linux-2.6.22-590/net/ipv6/esp6.c
---- linux-2.6.22-570/net/ipv6/esp6.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/esp6.c 2008-01-29 22:12:32.000000000 -0500
-@@ -421,3 +421,4 @@
- module_exit(esp6_fini);
-
- MODULE_LICENSE("GPL");
-+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
-diff -Nurb linux-2.6.22-570/net/ipv6/exthdrs.c linux-2.6.22-590/net/ipv6/exthdrs.c
---- linux-2.6.22-570/net/ipv6/exthdrs.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/exthdrs.c 2008-01-29 22:12:32.000000000 -0500
-@@ -42,7 +42,7 @@
- #include <net/ndisc.h>
- #include <net/ip6_route.h>
- #include <net/addrconf.h>
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- #include <net/xfrm.h>
- #endif
-
-@@ -90,6 +90,7 @@
- bad:
- return -1;
- }
-+EXPORT_SYMBOL_GPL(ipv6_find_tlv);
-
- /*
- * Parsing tlv encoded headers.
-@@ -196,7 +197,7 @@
- Destination options header.
- *****************************/
-
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
- {
- struct sk_buff *skb = *skbp;
-@@ -270,7 +271,7 @@
- #endif
-
- static struct tlvtype_proc tlvprocdestopt_lst[] = {
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- {
- .type = IPV6_TLV_HAO,
- .func = ipv6_dest_hao,
-@@ -283,7 +284,7 @@
- {
- struct sk_buff *skb = *skbp;
- struct inet6_skb_parm *opt = IP6CB(skb);
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- __u16 dstbuf;
- #endif
- struct dst_entry *dst;
-@@ -298,7 +299,7 @@
- }
-
- opt->lastopt = opt->dst1 = skb_network_header_len(skb);
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- dstbuf = opt->dst1;
- #endif
-
-@@ -308,7 +309,7 @@
- skb = *skbp;
- skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
- opt = IP6CB(skb);
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- opt->nhoff = dstbuf;
- #else
- opt->nhoff = opt->dst1;
-@@ -427,7 +428,7 @@
- looped_back:
- if (hdr->segments_left == 0) {
- switch (hdr->type) {
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- case IPV6_SRCRT_TYPE_2:
- /* Silently discard type 2 header unless it was
- * processed by own
-@@ -463,7 +464,7 @@
- return -1;
- }
- break;
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- case IPV6_SRCRT_TYPE_2:
- /* Silently discard invalid RTH type 2 */
- if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
-@@ -520,7 +521,7 @@
- addr += i - 1;
-
- switch (hdr->type) {
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- case IPV6_SRCRT_TYPE_2:
- if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
- (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
-diff -Nurb linux-2.6.22-570/net/ipv6/fib6_rules.c linux-2.6.22-590/net/ipv6/fib6_rules.c
---- linux-2.6.22-570/net/ipv6/fib6_rules.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/fib6_rules.c 2008-01-29 22:12:32.000000000 -0500
-@@ -244,7 +244,7 @@
- return -ENOBUFS;
- }
-
--static u32 fib6_rule_default_pref(void)
-+static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
- {
- return 0x3FFF;
- }
-@@ -277,10 +277,10 @@
- list_add_tail(&local_rule.common.list, &fib6_rules);
- list_add_tail(&main_rule.common.list, &fib6_rules);
-
-- fib_rules_register(&fib6_rules_ops);
-+ fib_rules_register(&init_net, &fib6_rules_ops);
- }
-
- void fib6_rules_cleanup(void)
- {
-- fib_rules_unregister(&fib6_rules_ops);
-+ fib_rules_unregister(&init_net, &fib6_rules_ops);
- }
-diff -Nurb linux-2.6.22-570/net/ipv6/icmp.c linux-2.6.22-590/net/ipv6/icmp.c
---- linux-2.6.22-570/net/ipv6/icmp.c 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/icmp.c 2008-01-29 22:12:32.000000000 -0500
-@@ -272,7 +272,7 @@
- return 0;
- }
-
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- static void mip6_addr_swap(struct sk_buff *skb)
- {
- struct ipv6hdr *iph = ipv6_hdr(skb);
-@@ -377,6 +377,7 @@
- mip6_addr_swap(skb);
-
- memset(&fl, 0, sizeof(fl));
-+ fl.fl_net = &init_net;
- fl.proto = IPPROTO_ICMPV6;
- ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
- if (saddr)
-@@ -495,6 +496,7 @@
- tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
-
- memset(&fl, 0, sizeof(fl));
-+ fl.fl_net = &init_net;
- fl.proto = IPPROTO_ICMPV6;
- ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
- if (saddr)
-diff -Nurb linux-2.6.22-570/net/ipv6/inet6_connection_sock.c linux-2.6.22-590/net/ipv6/inet6_connection_sock.c
---- linux-2.6.22-570/net/ipv6/inet6_connection_sock.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/inet6_connection_sock.c 2008-01-29 22:12:32.000000000 -0500
-@@ -149,6 +149,7 @@
- struct in6_addr *final_p = NULL, final;
-
- memset(&fl, 0, sizeof(fl));
-+ fl.fl_net = &init_net;
- fl.proto = sk->sk_protocol;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-diff -Nurb linux-2.6.22-570/net/ipv6/inet6_hashtables.c linux-2.6.22-590/net/ipv6/inet6_hashtables.c
---- linux-2.6.22-570/net/ipv6/inet6_hashtables.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/inet6_hashtables.c 2008-01-29 22:12:32.000000000 -0500
-@@ -61,7 +61,7 @@
- const __be16 sport,
- const struct in6_addr *daddr,
- const u16 hnum,
-- const int dif)
-+ const int dif, struct net *net)
- {
- struct sock *sk;
- const struct hlist_node *node;
-@@ -105,7 +105,7 @@
-
- struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
- const struct in6_addr *daddr,
-- const unsigned short hnum, const int dif)
-+ const unsigned short hnum, const int dif, struct net *net)
- {
- struct sock *sk;
- const struct hlist_node *node;
-@@ -113,7 +113,7 @@
- int score, hiscore = 0;
-
- read_lock(&hashinfo->lhash_lock);
-- sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
-+ sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) {
- if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
- const struct ipv6_pinfo *np = inet6_sk(sk);
-
-@@ -152,12 +152,12 @@
- struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
- const struct in6_addr *saddr, const __be16 sport,
- const struct in6_addr *daddr, const __be16 dport,
-- const int dif)
-+ const int dif, struct net *net)
- {
- struct sock *sk;
-
- local_bh_disable();
-- sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
-+ sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif, net);
- local_bh_enable();
-
- return sk;
-@@ -251,6 +251,7 @@
- int inet6_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk)
- {
-+ struct net *net = sk->sk_net;
- struct inet_hashinfo *hinfo = death_row->hashinfo;
- const unsigned short snum = inet_sk(sk)->num;
- struct inet_bind_hashbucket *head;
-@@ -258,8 +259,8 @@
- int ret;
-
- if (snum == 0) {
-- const int low = sysctl_local_port_range[0];
-- const int high = sysctl_local_port_range[1];
-+ const int low = sk->sk_net->sysctl_local_port_range[0];
-+ const int high = sk->sk_net->sysctl_local_port_range[1];
- const int range = high - low;
- int i, port;
- static u32 hint;
-@@ -270,7 +271,7 @@
- local_bh_disable();
- for (i = 1; i <= range; i++) {
- port = low + (i + offset) % range;
-- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
-+ head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)];
- spin_lock(&head->lock);
-
- /* Does not bother with rcv_saddr checks,
-@@ -278,7 +279,7 @@
- * unique enough.
- */
- inet_bind_bucket_for_each(tb, node, &head->chain) {
-- if (tb->port == port) {
-+ if ((tb->port == port) && (tb->net == net)) {
- BUG_TRAP(!hlist_empty(&tb->owners));
- if (tb->fastreuse >= 0)
- goto next_port;
-@@ -291,7 +292,7 @@
- }
-
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-- head, port);
-+ head, net, port);
- if (!tb) {
- spin_unlock(&head->lock);
- break;
-@@ -326,7 +327,7 @@
- goto out;
- }
-
-- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
-+ head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
-
-diff -Nurb linux-2.6.22-570/net/ipv6/ip6_fib.c linux-2.6.22-590/net/ipv6/ip6_fib.c
---- linux-2.6.22-570/net/ipv6/ip6_fib.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/ip6_fib.c 2008-01-29 22:12:32.000000000 -0500
-@@ -361,6 +361,7 @@
-
- static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
- {
-+ struct net *net = skb->sk->sk_net;
- unsigned int h, s_h;
- unsigned int e = 0, s_e;
- struct rt6_rtnl_dump_arg arg;
-@@ -369,6 +370,9 @@
- struct hlist_node *node;
- int res = 0;
-
-+ if (net != &init_net)
-+ return 0;
-+
- s_h = cb->args[0];
- s_e = cb->args[1];
-
-@@ -1311,6 +1315,11 @@
-
- static int fib6_clean_node(struct fib6_walker_t *w)
- {
-+ struct nl_info info = {
-+ .nlh = NULL,
-+ .pid = 0,
-+ .net = &init_net,
-+ };
- int res;
- struct rt6_info *rt;
- struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
-@@ -1319,7 +1328,7 @@
- res = c->func(rt, c->arg);
- if (res < 0) {
- w->leaf = rt;
-- res = fib6_del(rt, NULL);
-+ res = fib6_del(rt, &info);
- if (res) {
- #if RT6_DEBUG >= 2
- printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
-diff -Nurb linux-2.6.22-570/net/ipv6/ip6_flowlabel.c linux-2.6.22-590/net/ipv6/ip6_flowlabel.c
---- linux-2.6.22-570/net/ipv6/ip6_flowlabel.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/ip6_flowlabel.c 2008-01-29 22:12:32.000000000 -0500
-@@ -22,6 +22,7 @@
- #include <linux/seq_file.h>
-
- #include <net/sock.h>
-+#include <net/net_namespace.h>
-
- #include <net/ipv6.h>
- #include <net/ndisc.h>
-@@ -309,6 +310,7 @@
-
- msg.msg_controllen = olen;
- msg.msg_control = (void*)(fl->opt+1);
-+ flowi.fl_net = &init_net;
- flowi.oif = 0;
-
- err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk);
-@@ -690,7 +692,7 @@
- void ip6_flowlabel_init(void)
- {
- #ifdef CONFIG_PROC_FS
-- proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
-+ proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
- #endif
- }
-
-@@ -698,6 +700,6 @@
- {
- del_timer(&ip6_fl_gc_timer);
- #ifdef CONFIG_PROC_FS
-- proc_net_remove("ip6_flowlabel");
-+ proc_net_remove(&init_net, "ip6_flowlabel");
- #endif
- }
-diff -Nurb linux-2.6.22-570/net/ipv6/ip6_input.c linux-2.6.22-590/net/ipv6/ip6_input.c
---- linux-2.6.22-570/net/ipv6/ip6_input.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/ip6_input.c 2008-01-29 22:12:32.000000000 -0500
-@@ -61,6 +61,11 @@
- u32 pkt_len;
- struct inet6_dev *idev;
-
-+ if (dev->nd_net != &init_net) {
-+ kfree_skb(skb);
-+ return 0;
-+ }
-+
- if (skb->pkt_type == PACKET_OTHERHOST) {
- kfree_skb(skb);
- return 0;
-diff -Nurb linux-2.6.22-570/net/ipv6/ip6_output.c linux-2.6.22-590/net/ipv6/ip6_output.c
---- linux-2.6.22-570/net/ipv6/ip6_output.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/ip6_output.c 2008-01-29 22:12:32.000000000 -0500
-@@ -423,7 +423,7 @@
-
- /* XXX: idev->cnf.proxy_ndp? */
- if (ipv6_devconf.proxy_ndp &&
-- pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
-+ pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
- int proxied = ip6_forward_proxy_check(skb);
- if (proxied > 0)
- return ip6_input(skb);
-@@ -543,7 +543,7 @@
- found_rhdr = 1;
- break;
- case NEXTHDR_DEST:
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
- break;
- #endif
-diff -Nurb linux-2.6.22-570/net/ipv6/ip6_tunnel.c linux-2.6.22-590/net/ipv6/ip6_tunnel.c
---- linux-2.6.22-570/net/ipv6/ip6_tunnel.c 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/ip6_tunnel.c 2008-01-29 22:12:32.000000000 -0500
-@@ -235,7 +235,7 @@
- int i;
- for (i = 1; i < IP6_TNL_MAX; i++) {
- sprintf(name, "ip6tnl%d", i);
-- if (__dev_get_by_name(name) == NULL)
-+ if (__dev_get_by_name(&init_net, name) == NULL)
- break;
- }
- if (i == IP6_TNL_MAX)
-@@ -651,7 +651,7 @@
- struct net_device *ldev = NULL;
-
- if (p->link)
-- ldev = dev_get_by_index(p->link);
-+ ldev = dev_get_by_index(&init_net, p->link);
-
- if ((ipv6_addr_is_multicast(&p->laddr) ||
- likely(ipv6_chk_addr(&p->laddr, ldev, 0))) &&
-@@ -787,7 +787,7 @@
- struct net_device *ldev = NULL;
-
- if (p->link)
-- ldev = dev_get_by_index(p->link);
-+ ldev = dev_get_by_index(&init_net, p->link);
-
- if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0)))
- printk(KERN_WARNING
-diff -Nurb linux-2.6.22-570/net/ipv6/ipcomp6.c linux-2.6.22-590/net/ipv6/ipcomp6.c
---- linux-2.6.22-570/net/ipv6/ipcomp6.c 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/ipcomp6.c 2008-01-29 22:12:32.000000000 -0500
-@@ -501,4 +501,4 @@
- MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
- MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
-
--
-+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
-diff -Nurb linux-2.6.22-570/net/ipv6/ipv6_sockglue.c linux-2.6.22-590/net/ipv6/ipv6_sockglue.c
---- linux-2.6.22-570/net/ipv6/ipv6_sockglue.c 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/ipv6_sockglue.c 2008-01-29 22:12:32.000000000 -0500
-@@ -123,7 +123,7 @@
- struct ipv6hdr *ipv6h;
- struct inet6_protocol *ops;
-
-- if (!(features & NETIF_F_HW_CSUM))
-+ if (!(features & NETIF_F_V6_CSUM))
- features &= ~NETIF_F_SG;
-
- if (unlikely(skb_shinfo(skb)->gso_type &
-@@ -417,7 +417,7 @@
- struct ipv6_rt_hdr *rthdr = opt->srcrt;
- switch (rthdr->type) {
- case IPV6_SRCRT_TYPE_0:
--#ifdef CONFIG_IPV6_MIP6
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- case IPV6_SRCRT_TYPE_2:
- #endif
- break;
-@@ -463,6 +463,7 @@
- struct flowi fl;
- int junk;
-
-+ fl.fl_net = &init_net;
- fl.fl6_flowlabel = 0;
- fl.oif = sk->sk_bound_dev_if;
-
-@@ -547,7 +548,7 @@
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
- goto e_inval;
-
-- if (__dev_get_by_index(val) == NULL) {
-+ if (__dev_get_by_index(&init_net, val) == NULL) {
- retv = -ENODEV;
- break;
- }
-diff -Nurb linux-2.6.22-570/net/ipv6/mcast.c linux-2.6.22-590/net/ipv6/mcast.c
---- linux-2.6.22-570/net/ipv6/mcast.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/mcast.c 2008-01-29 22:12:32.000000000 -0500
-@@ -51,6 +51,7 @@
-
- #include <net/sock.h>
- #include <net/snmp.h>
-+#include <net/net_namespace.h>
-
- #include <net/ipv6.h>
- #include <net/protocol.h>
-@@ -214,7 +215,7 @@
- dst_release(&rt->u.dst);
- }
- } else
-- dev = dev_get_by_index(ifindex);
-+ dev = dev_get_by_index(&init_net, ifindex);
-
- if (dev == NULL) {
- sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
-@@ -265,7 +266,7 @@
- *lnk = mc_lst->next;
- write_unlock_bh(&ipv6_sk_mc_lock);
-
-- if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
-+ if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) {
- struct inet6_dev *idev = in6_dev_get(dev);
-
- (void) ip6_mc_leave_src(sk, mc_lst, idev);
-@@ -300,7 +301,7 @@
- dst_release(&rt->u.dst);
- }
- } else
-- dev = dev_get_by_index(ifindex);
-+ dev = dev_get_by_index(&init_net, ifindex);
-
- if (!dev)
- return NULL;
-@@ -331,7 +332,7 @@
- np->ipv6_mc_list = mc_lst->next;
- write_unlock_bh(&ipv6_sk_mc_lock);
-
-- dev = dev_get_by_index(mc_lst->ifindex);
-+ dev = dev_get_by_index(&init_net, mc_lst->ifindex);
- if (dev) {
- struct inet6_dev *idev = in6_dev_get(dev);
-
-@@ -2332,7 +2333,7 @@
- struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
-
- state->idev = NULL;
-- for_each_netdev(state->dev) {
-+ for_each_netdev(&init_net, state->dev) {
- struct inet6_dev *idev;
- idev = in6_dev_get(state->dev);
- if (!idev)
-@@ -2476,7 +2477,7 @@
-
- state->idev = NULL;
- state->im = NULL;
-- for_each_netdev(state->dev) {
-+ for_each_netdev(&init_net, state->dev) {
- struct inet6_dev *idev;
- idev = in6_dev_get(state->dev);
- if (unlikely(idev == NULL))
-@@ -2658,8 +2659,8 @@
- np->hop_limit = 1;
-
- #ifdef CONFIG_PROC_FS
-- proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
-- proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
-+ proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops);
-+ proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
- #endif
-
- return 0;
-@@ -2671,7 +2672,7 @@
- igmp6_socket = NULL; /* for safety */
-
- #ifdef CONFIG_PROC_FS
-- proc_net_remove("mcfilter6");
-- proc_net_remove("igmp6");
-+ proc_net_remove(&init_net, "mcfilter6");
-+ proc_net_remove(&init_net, "igmp6");
- #endif
- }
-diff -Nurb linux-2.6.22-570/net/ipv6/mip6.c linux-2.6.22-590/net/ipv6/mip6.c
---- linux-2.6.22-570/net/ipv6/mip6.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/mip6.c 2008-01-29 22:12:32.000000000 -0500
-@@ -30,6 +30,7 @@
- #include <net/sock.h>
- #include <net/ipv6.h>
- #include <net/ip6_checksum.h>
-+#include <net/rawv6.h>
- #include <net/xfrm.h>
- #include <net/mip6.h>
-
-@@ -86,7 +87,7 @@
- return len;
- }
-
--int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
-+static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
- {
- struct ip6_mh *mh;
-
-@@ -471,7 +472,7 @@
- .remote_addr = mip6_xfrm_addr,
- };
-
--int __init mip6_init(void)
-+static int __init mip6_init(void)
- {
- printk(KERN_INFO "Mobile IPv6\n");
-
-@@ -483,18 +484,35 @@
- printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
- goto mip6_rthdr_xfrm_fail;
- }
-+ if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
-+ printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__);
-+ goto mip6_rawv6_mh_fail;
-+ }
-+
-+
- return 0;
-
-+ mip6_rawv6_mh_fail:
-+ xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
- mip6_rthdr_xfrm_fail:
- xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
- mip6_destopt_xfrm_fail:
- return -EAGAIN;
- }
-
--void __exit mip6_fini(void)
-+static void __exit mip6_fini(void)
- {
-+ if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
-+ printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__);
- if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
- printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
- if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
- printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__);
- }
-+
-+module_init(mip6_init);
-+module_exit(mip6_fini);
-+
-+MODULE_LICENSE("GPL");
-+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
-+MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
-diff -Nurb linux-2.6.22-570/net/ipv6/ndisc.c linux-2.6.22-590/net/ipv6/ndisc.c
---- linux-2.6.22-570/net/ipv6/ndisc.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/ndisc.c 2008-01-29 22:12:32.000000000 -0500
-@@ -418,6 +418,7 @@
- int oif)
- {
- memset(fl, 0, sizeof(*fl));
-+ fl->fl_net = &init_net;
- ipv6_addr_copy(&fl->fl6_src, saddr);
- ipv6_addr_copy(&fl->fl6_dst, daddr);
- fl->proto = IPPROTO_ICMPV6;
-@@ -760,7 +761,7 @@
- if (ipv6_chk_acast_addr(dev, &msg->target) ||
- (idev->cnf.forwarding &&
- (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
-- (pneigh = pneigh_lookup(&nd_tbl,
-+ (pneigh = pneigh_lookup(&nd_tbl, &init_net,
- &msg->target, dev, 0)) != NULL)) {
- if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
- skb->pkt_type != PACKET_HOST &&
-@@ -901,7 +902,7 @@
- */
- if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
- ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
-- pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
-+ pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) {
- /* XXX: idev->cnf.prixy_ndp */
- goto out;
- }
-@@ -1525,6 +1526,9 @@
- {
- struct net_device *dev = ptr;
-
-+ if (dev->nd_net != &init_net)
-+ return NOTIFY_DONE;
-+
- switch (event) {
- case NETDEV_CHANGEADDR:
- neigh_changeaddr(&nd_tbl, dev);
-diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6_queue.c linux-2.6.22-590/net/ipv6/netfilter/ip6_queue.c
---- linux-2.6.22-570/net/ipv6/netfilter/ip6_queue.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/netfilter/ip6_queue.c 2008-01-29 22:12:32.000000000 -0500
-@@ -24,6 +24,7 @@
- #include <linux/sysctl.h>
- #include <linux/proc_fs.h>
- #include <linux/mutex.h>
-+#include <net/net_namespace.h>
- #include <net/sock.h>
- #include <net/ipv6.h>
- #include <net/ip6_route.h>
-@@ -546,6 +547,9 @@
- {
- struct net_device *dev = ptr;
-
-+ if (dev->nd_net != &init_net)
-+ return NOTIFY_DONE;
-+
- /* Drop any packets associated with the downed device */
- if (event == NETDEV_DOWN)
- ipq_dev_drop(dev->ifindex);
-@@ -565,7 +569,7 @@
- if (event == NETLINK_URELEASE &&
- n->protocol == NETLINK_IP6_FW && n->pid) {
- write_lock_bh(&queue_lock);
-- if (n->pid == peer_pid)
-+ if ((n->net == &init_net) && (n->pid == peer_pid))
- __ipq_reset();
- write_unlock_bh(&queue_lock);
- }
-@@ -657,14 +661,14 @@
- struct proc_dir_entry *proc;
-
- netlink_register_notifier(&ipq_nl_notifier);
-- ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL,
-- THIS_MODULE);
-+ ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, ipq_rcv_sk,
-+ NULL, THIS_MODULE);
- if (ipqnl == NULL) {
- printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
- goto cleanup_netlink_notifier;
- }
-
-- proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
-+ proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info);
- if (proc)
- proc->owner = THIS_MODULE;
- else {
-@@ -685,7 +689,7 @@
- cleanup_sysctl:
- unregister_sysctl_table(ipq_sysctl_header);
- unregister_netdevice_notifier(&ipq_dev_notifier);
-- proc_net_remove(IPQ_PROC_FS_NAME);
-+ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
- cleanup_ipqnl:
- sock_release(ipqnl->sk_socket);
-@@ -705,7 +709,7 @@
-
- unregister_sysctl_table(ipq_sysctl_header);
- unregister_netdevice_notifier(&ipq_dev_notifier);
-- proc_net_remove(IPQ_PROC_FS_NAME);
-+ proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
- sock_release(ipqnl->sk_socket);
- mutex_lock(&ipqnl_mutex);
-diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6_tables.c linux-2.6.22-590/net/ipv6/netfilter/ip6_tables.c
---- linux-2.6.22-570/net/ipv6/netfilter/ip6_tables.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/netfilter/ip6_tables.c 2008-01-29 22:12:32.000000000 -0500
-@@ -906,7 +906,7 @@
- int ret;
- struct xt_table *t;
-
-- t = xt_find_table_lock(AF_INET6, entries->name);
-+ t = xt_find_table_lock(&init_net, AF_INET6, entries->name);
- if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
- duprintf("t->private->number = %u\n", private->number);
-@@ -972,7 +972,7 @@
-
- duprintf("ip_tables: Translated table\n");
-
-- t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name),
-+ t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, tmp.name),
- "ip6table_%s", tmp.name);
- if (!t || IS_ERR(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
-@@ -1073,7 +1073,7 @@
- goto free;
- }
-
-- t = xt_find_table_lock(AF_INET6, tmp.name);
-+ t = xt_find_table_lock(&init_net, AF_INET6, tmp.name);
- if (!t || IS_ERR(t)) {
- ret = t ? PTR_ERR(t) : -ENOENT;
- goto free;
-@@ -1109,6 +1109,9 @@
- {
- int ret;
-
-+ if (sk->sk_net != &init_net)
-+ return -ENOPROTOOPT;
-+
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
-@@ -1134,6 +1137,9 @@
- {
- int ret;
-
-+ if (sk->sk_net != &init_net)
-+ return -ENOPROTOOPT;
-+
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
-@@ -1155,7 +1161,7 @@
- }
- name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
-
-- t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
-+ t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name),
- "ip6table_%s", name);
- if (t && !IS_ERR(t)) {
- struct ip6t_getinfo info;
-@@ -1259,7 +1265,7 @@
- return ret;
- }
-
-- ret = xt_register_table(table, &bootstrap, newinfo);
-+ ret = xt_register_table(&init_net, table, &bootstrap, newinfo);
- if (ret != 0) {
- xt_free_table_info(newinfo);
- return ret;
-diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6t_REJECT.c linux-2.6.22-590/net/ipv6/netfilter/ip6t_REJECT.c
---- linux-2.6.22-570/net/ipv6/netfilter/ip6t_REJECT.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/netfilter/ip6t_REJECT.c 2008-01-29 22:12:32.000000000 -0500
-@@ -92,6 +92,7 @@
- }
-
- memset(&fl, 0, sizeof(fl));
-+ fl.fl_net = &init_net;
- fl.proto = IPPROTO_TCP;
- ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr);
- ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
-@@ -172,7 +173,7 @@
- send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
- {
- if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL)
-- skb_in->dev = &loopback_dev;
-+ skb_in->dev = &init_net.loopback_dev;
-
- icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
- }
-diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_filter.c linux-2.6.22-590/net/ipv6/netfilter/ip6table_filter.c
---- linux-2.6.22-570/net/ipv6/netfilter/ip6table_filter.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/netfilter/ip6table_filter.c 2008-01-29 22:12:32.000000000 -0500
-@@ -65,6 +65,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- return ip6t_do_table(pskb, hook, in, out, &packet_filter);
- }
-
-@@ -75,6 +79,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- #if 0
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
-diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.22-590/net/ipv6/netfilter/ip6table_mangle.c
---- linux-2.6.22-570/net/ipv6/netfilter/ip6table_mangle.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/netfilter/ip6table_mangle.c 2008-01-29 22:12:32.000000000 -0500
-@@ -79,6 +79,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
- }
-
-@@ -95,6 +99,10 @@
- u_int8_t hop_limit;
- u_int32_t flowlabel, mark;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- #if 0
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
-diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_raw.c linux-2.6.22-590/net/ipv6/netfilter/ip6table_raw.c
---- linux-2.6.22-570/net/ipv6/netfilter/ip6table_raw.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/netfilter/ip6table_raw.c 2008-01-29 22:12:32.000000000 -0500
-@@ -57,6 +57,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- return ip6t_do_table(pskb, hook, in, out, &packet_raw);
- }
-
-diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c linux-2.6.22-590/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
---- linux-2.6.22-570/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2008-01-29 22:12:32.000000000 -0500
-@@ -167,6 +167,10 @@
- unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
-
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* This is where we call the helper: as the packet goes out. */
- ct = nf_ct_get(*pskb, &ctinfo);
- if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
-@@ -203,6 +207,10 @@
- {
- struct sk_buff *reasm;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* Previously seen (loopback)? */
- if ((*pskb)->nfct)
- return NF_ACCEPT;
-@@ -231,6 +239,10 @@
- {
- struct sk_buff *reasm = (*pskb)->nfct_reasm;
-
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* This packet is fragmented and has reassembled packet. */
- if (reasm) {
- /* Reassembled packet isn't parsed yet ? */
-@@ -256,6 +268,10 @@
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
-+ /* Only filter packets in the initial network namespace */
-+ if ((in?in:out)->nd_net != &init_net)
-+ return NF_ACCEPT;
-+
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct ipv6hdr)) {
- if (net_ratelimit())
-diff -Nurb linux-2.6.22-570/net/ipv6/netfilter.c linux-2.6.22-590/net/ipv6/netfilter.c
---- linux-2.6.22-570/net/ipv6/netfilter.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/netfilter.c 2008-01-29 22:12:32.000000000 -0500
-@@ -14,6 +14,7 @@
- struct ipv6hdr *iph = ipv6_hdr(skb);
- struct dst_entry *dst;
- struct flowi fl = {
-+ .fl_net = &init_net,
- .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
- .mark = skb->mark,
- .nl_u =
-diff -Nurb linux-2.6.22-570/net/ipv6/proc.c linux-2.6.22-590/net/ipv6/proc.c
---- linux-2.6.22-570/net/ipv6/proc.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/proc.c 2008-01-29 22:12:32.000000000 -0500
-@@ -28,6 +28,7 @@
- #include <net/tcp.h>
- #include <net/transp_v6.h>
- #include <net/ipv6.h>
-+#include <net/net_namespace.h>
-
- static struct proc_dir_entry *proc_net_devsnmp6;
-
-@@ -231,22 +232,22 @@
- {
- int rc = 0;
-
-- if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
-+ if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops))
- goto proc_snmp6_fail;
-
-- proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
-+ proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net);
- if (!proc_net_devsnmp6)
- goto proc_dev_snmp6_fail;
-
-- if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
-+ if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops))
- goto proc_sockstat6_fail;
- out:
- return rc;
-
- proc_sockstat6_fail:
-- proc_net_remove("dev_snmp6");
-+ proc_net_remove(&init_net, "dev_snmp6");
- proc_dev_snmp6_fail:
-- proc_net_remove("snmp6");
-+ proc_net_remove(&init_net, "snmp6");
- proc_snmp6_fail:
- rc = -ENOMEM;
- goto out;
-@@ -254,8 +255,8 @@
-
- void ipv6_misc_proc_exit(void)
- {
-- proc_net_remove("sockstat6");
-- proc_net_remove("dev_snmp6");
-- proc_net_remove("snmp6");
-+ proc_net_remove(&init_net, "sockstat6");
-+ proc_net_remove(&init_net, "dev_snmp6");
-+ proc_net_remove(&init_net, "snmp6");
- }
-
-diff -Nurb linux-2.6.22-570/net/ipv6/raw.c linux-2.6.22-590/net/ipv6/raw.c
---- linux-2.6.22-570/net/ipv6/raw.c 2008-01-29 22:12:18.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/raw.c 2008-01-29 22:12:32.000000000 -0500
-@@ -49,7 +49,8 @@
- #include <net/udp.h>
- #include <net/inet_common.h>
- #include <net/tcp_states.h>
--#ifdef CONFIG_IPV6_MIP6
-+#include <net/net_namespace.h>
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- #include <net/mip6.h>
- #endif
-
-@@ -137,6 +138,28 @@
- return 0;
- }
-
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-+static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
-+
-+int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
-+ struct sk_buff *skb))
-+{
-+ rcu_assign_pointer(mh_filter, filter);
-+ return 0;
-+}
-+EXPORT_SYMBOL(rawv6_mh_filter_register);
-+
-+int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
-+ struct sk_buff *skb))
-+{
-+ rcu_assign_pointer(mh_filter, NULL);
-+ synchronize_rcu();
-+ return 0;
-+}
-+EXPORT_SYMBOL(rawv6_mh_filter_unregister);
-+
-+#endif
-+
- /*
- * demultiplex raw sockets.
- * (should consider queueing the skb in the sock receive_queue
-@@ -178,16 +201,22 @@
- case IPPROTO_ICMPV6:
- filtered = icmpv6_filter(sk, skb);
- break;
--#ifdef CONFIG_IPV6_MIP6
-+
-+#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
- case IPPROTO_MH:
-+ {
- /* XXX: To validate MH only once for each packet,
- * this is placed here. It should be after checking
- * xfrm policy, however it doesn't. The checking xfrm
- * policy is placed in rawv6_rcv() because it is
- * required for each socket.
- */
-- filtered = mip6_mh_filter(sk, skb);
-+ int (*filter)(struct sock *sock, struct sk_buff *skb);
-+
-+ filter = rcu_dereference(mh_filter);
-+ filtered = filter ? filter(sk, skb) : 0;
- break;
-+ }
- #endif
- default:
- filtered = 0;
-@@ -254,7 +283,7 @@
- if (!sk->sk_bound_dev_if)
- goto out;
-
-- dev = dev_get_by_index(sk->sk_bound_dev_if);
-+ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
- if (!dev) {
- err = -ENODEV;
- goto out;
-@@ -611,9 +640,7 @@
- struct iovec *iov;
- u8 __user *type = NULL;
- u8 __user *code = NULL;
--#ifdef CONFIG_IPV6_MIP6
- u8 len = 0;
--#endif
- int probed = 0;
- int i;
-
-@@ -646,7 +673,6 @@
- probed = 1;
- }
- break;
--#ifdef CONFIG_IPV6_MIP6
- case IPPROTO_MH:
- if (iov->iov_base && iov->iov_len < 1)
- break;
-@@ -660,7 +686,6 @@
- len += iov->iov_len;
-
- break;
--#endif
- default:
- probed = 1;
- break;
-@@ -704,6 +729,7 @@
- * Get and verify the address.
- */
- memset(&fl, 0, sizeof(fl));
-+ fl.fl_net = &init_net;
-
- if (sin6) {
- if (addr_len < SIN6_LEN_RFC2133)
-@@ -1291,13 +1317,13 @@
-
- int __init raw6_proc_init(void)
- {
-- if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
-+ if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops))
- return -ENOMEM;
- return 0;
- }
-
- void raw6_proc_exit(void)
- {
-- proc_net_remove("raw6");
-+ proc_net_remove(&init_net, "raw6");
- }
- #endif /* CONFIG_PROC_FS */
-diff -Nurb linux-2.6.22-570/net/ipv6/reassembly.c linux-2.6.22-590/net/ipv6/reassembly.c
---- linux-2.6.22-570/net/ipv6/reassembly.c 2007-07-08 19:32:17.000000000 -0400
-+++ linux-2.6.22-590/net/ipv6/reassembly.c 2008-01-29 22:12:32.000000000 -0500
-@@ -301,7 +301,7 @@
-
- fq_kill(fq);
-
-- dev = dev_get_by_index(fq->iif);
-+ dev = dev_get_by_index(&init_net, fq->iif);
- if (!dev)
- goto out;
-
-diff -Nurb linux-2.6.22-570/net/ipv6/route.c linux-2.6.22-590/net/ipv6/route.c
---- linux-2.6.22-570/net/ipv6/route.c 2008-01-29 22:12:21.000000000 -0500
-+++ linux-2.6.22-590/net/ipv6/route.c 2008-01-29 22:12:32.000000000 -0500
-@@ -56,6 +56,7 @@
- #include <net/xfrm.h>
- #include <net/netevent.h>
- #include <net/netlink.h>
-+#include <net/net_namespace.h>
-
- #include <asm/uaccess.h>
-
-@@ -137,7 +138,7 @@
- .dst = {
- .__refcnt = ATOMIC_INIT(1),
- .__use = 1,
-- .dev = &loopback_dev,
-+ .dev = NULL,
- .obsolete = -1,
- .error = -ENETUNREACH,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
-@@ -163,7 +164,7 @@
- .dst = {
- .__refcnt = ATOMIC_INIT(1),
- .__use = 1,
-- .dev = &loopback_dev,
-+ .dev = NULL,
- .obsolete = -1,
- .error = -EACCES,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
-@@ -183,7 +184,7 @@
- .dst = {
- .__refcnt = ATOMIC_INIT(1),
- .__use = 1,
-- .dev = &loopback_dev,
-+ .dev = NULL,
- .obsolete = -1,
- .error = -EINVAL,
- .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
-@@ -223,8 +224,8 @@
- struct rt6_info *rt = (struct rt6_info *)dst;
- struct inet6_dev *idev = rt->rt6i_idev;
-
-- if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
-- struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
-+ if (dev != &init_net.loopback_dev && idev != NULL && idev->dev == dev) {
-+ struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev);
- if (loopback_idev != NULL) {
- rt->rt6i_idev = loopback_idev;
- in6_dev_put(idev);
-@@ -564,6 +565,7 @@
- int oif, int strict)
- {
- struct flowi fl = {
-+ .fl_net = &init_net,
- .oif = oif,
- .nl_u = {
- .ip6_u = {
-@@ -611,7 +613,12 @@
-
- int ip6_ins_rt(struct rt6_info *rt)
- {
-- return __ip6_ins_rt(rt, NULL);
-+ struct nl_info info = {
-+ .nlh = NULL,
-+ .pid = 0,
-+ .net = &init_net,
-+ };
-+ return __ip6_ins_rt(rt, &info);
- }
-
- static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
-@@ -742,6 +749,7 @@
- struct ipv6hdr *iph = ipv6_hdr(skb);
- int flags = RT6_LOOKUP_F_HAS_SADDR;
- struct flowi fl = {
-+ .fl_net = &init_net,
- .iif = skb->dev->ifindex,
- .nl_u = {
- .ip6_u = {
-@@ -1129,7 +1137,7 @@
- #endif
- if (cfg->fc_ifindex) {
- err = -ENODEV;
-- dev = dev_get_by_index(cfg->fc_ifindex);
-+ dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
- if (!dev)
- goto out;
- idev = in6_dev_get(dev);
-@@ -1187,12 +1195,12 @@
- if ((cfg->fc_flags & RTF_REJECT) ||
- (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
- /* hold loopback dev/idev if we haven't done so. */
-- if (dev != &loopback_dev) {
-+ if (dev != &init_net.loopback_dev) {
- if (dev) {
- dev_put(dev);
- in6_dev_put(idev);
- }
-- dev = &loopback_dev;
-+ dev = &init_net.loopback_dev;
- dev_hold(dev);
- idev = in6_dev_get(dev);
- if (!idev) {
-@@ -1333,7 +1341,12 @@
-
- int ip6_del_rt(struct rt6_info *rt)
- {
-- return __ip6_del_rt(rt, NULL);
-+ struct nl_info info = {
-+ .nlh = NULL,
-+ .pid = 0,
-+ .net = &init_net,
-+ };
-+ return __ip6_del_rt(rt, &info);
- }
-
- static int ip6_route_del(struct fib6_config *cfg)
-@@ -1444,6 +1457,7 @@
- int flags = RT6_LOOKUP_F_HAS_SADDR;
- struct ip6rd_flowi rdfl = {
- .fl = {
-+ .fl_net = &init_net,
- .oif = dev->ifindex,
- .nl_u = {
- .ip6_u = {
-@@ -1896,13 +1910,13 @@
- if (rt == NULL)
- return ERR_PTR(-ENOMEM);
-
-- dev_hold(&loopback_dev);
-+ dev_hold(&init_net.loopback_dev);
- in6_dev_hold(idev);
-
- rt->u.dst.flags = DST_HOST;
- rt->u.dst.input = ip6_input;
- rt->u.dst.output = ip6_output;
-- rt->rt6i_dev = &loopback_dev;
-+ rt->rt6i_dev = &init_net.loopback_dev;
- rt->rt6i_idev = idev;
- rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
- rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
-@@ -2033,6 +2047,7 @@
-
- cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
- cfg->fc_nlinfo.nlh = nlh;
-+ cfg->fc_nlinfo.net = skb->sk->sk_net;
-
- if (tb[RTA_GATEWAY]) {
- nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
-@@ -2078,9 +2093,13 @@
-
- static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
- {
-+ struct net *net = skb->sk->sk_net;
- struct fib6_config cfg;
- int err;
-
-+ if (net != &init_net)
-+ return -EINVAL;
-+
- err = rtm_to_fib6_config(skb, nlh, &cfg);
- if (err < 0)
- return err;
-@@ -2090,9 +2109,13 @@
-
- static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
- {
-+ struct net *net = skb->sk->sk_net;
- struct fib6_config cfg;
- int err;
-
-+ if (net != &init_net)
-+ return -EINVAL;