diff -Nurb linux-2.6.22-try2/arch/ia64/hp/sim/simeth.c linux-2.6.22-try2-netns/arch/ia64/hp/sim/simeth.c --- linux-2.6.22-try2/arch/ia64/hp/sim/simeth.c 2007-12-19 13:37:12.000000000 -0500 +++ linux-2.6.22-try2-netns/arch/ia64/hp/sim/simeth.c 2007-12-19 22:49:13.000000000 -0500 @@ -300,6 +300,9 @@ return NOTIFY_DONE; } + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE; /* diff -Nurb linux-2.6.22-try2/arch/s390/appldata/appldata_net_sum.c linux-2.6.22-try2-netns/arch/s390/appldata/appldata_net_sum.c --- linux-2.6.22-try2/arch/s390/appldata/appldata_net_sum.c 2007-12-19 13:37:20.000000000 -0500 +++ linux-2.6.22-try2-netns/arch/s390/appldata/appldata_net_sum.c 2007-12-19 22:49:13.000000000 -0500 @@ -16,6 +16,7 @@ #include #include #include +#include #include "appldata.h" @@ -107,7 +108,7 @@ tx_dropped = 0; collisions = 0; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { stats = dev->get_stats(dev); rx_packets += stats->rx_packets; tx_packets += stats->tx_packets; diff -Nurb linux-2.6.22-try2/arch/sparc64/solaris/ioctl.c linux-2.6.22-try2-netns/arch/sparc64/solaris/ioctl.c --- linux-2.6.22-try2/arch/sparc64/solaris/ioctl.c 2007-12-19 13:37:22.000000000 -0500 +++ linux-2.6.22-try2-netns/arch/sparc64/solaris/ioctl.c 2007-12-19 22:49:13.000000000 -0500 @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -686,7 +687,7 @@ int i = 0; read_lock_bh(&dev_base_lock); - for_each_netdev(d) + for_each_netdev(&init_net, d) i++; read_unlock_bh(&dev_base_lock); diff -Nurb linux-2.6.22-try2/drivers/atm/idt77252.c linux-2.6.22-try2-netns/drivers/atm/idt77252.c --- linux-2.6.22-try2/drivers/atm/idt77252.c 2007-12-19 13:37:27.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/atm/idt77252.c 2007-12-19 22:49:13.000000000 -0500 @@ -3576,7 +3576,7 @@ * XXX: */ sprintf(tname, "eth%d", card->index); - tmp = dev_get_by_name(tname); /* jhs: was "tmp = dev_get(tname);" */ + tmp = dev_get_by_name(&init_net, tname); /* jhs: was "tmp = dev_get(tname);" */ if (tmp) { memcpy(card->atmdev->esi, tmp->dev_addr, 6); diff -Nurb linux-2.6.22-try2/drivers/base/class.c linux-2.6.22-try2-netns/drivers/base/class.c --- linux-2.6.22-try2/drivers/base/class.c 2007-12-19 15:29:22.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/base/class.c 2007-12-19 22:49:13.000000000 -0500 @@ -134,6 +134,17 @@ } } +static int class_setup_shadowing(struct class *cls) +{ + const struct shadow_dir_operations *shadow_ops; + + shadow_ops = cls->shadow_ops; + if (!shadow_ops) + return 0; + + return sysfs_enable_shadowing(&cls->subsys.kobj, shadow_ops); +} + int class_register(struct class * cls) { int error; @@ -152,11 +163,22 @@ subsys_set_kset(cls, class_subsys); error = subsystem_register(&cls->subsys); - if (!error) { - error = add_class_attrs(class_get(cls)); - class_put(cls); - } + if (error) + goto out; + + error = class_setup_shadowing(cls); + if (error) + goto out_unregister; + + error = add_class_attrs(cls); + if (error) + goto out_unregister; + +out: return error; +out_unregister: + subsystem_unregister(&cls->subsys); + goto out; } void class_unregister(struct class * cls) diff -Nurb linux-2.6.22-try2/drivers/base/core.c linux-2.6.22-try2-netns/drivers/base/core.c --- linux-2.6.22-try2/drivers/base/core.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/base/core.c 2007-12-19 22:49:13.000000000 -0500 @@ -622,8 +622,14 @@ return kobj; /* or create a new class-directory at the parent device */ - return kobject_kset_add_dir(&dev->class->class_dirs, + kobj = kobject_kset_add_dir(&dev->class->class_dirs, parent_kobj, dev->class->name); + + /* If we created a new class-directory setup shadowing */ + if (kobj && dev->class->shadow_ops) + sysfs_enable_shadowing(kobj, dev->class->shadow_ops); + + return kobj; } if (parent) @@ -913,8 +919,8 @@ /* If this is not a "fake" compatible device, remove the * symlink from the class to the device. */ if (dev->kobj.parent != &dev->class->subsys.kobj) - sysfs_remove_link(&dev->class->subsys.kobj, - dev->bus_id); + sysfs_delete_link(&dev->class->subsys.kobj, + &dev->kobj, dev->bus_id); if (parent) { #ifdef CONFIG_SYSFS_DEPRECATED char *class_name = make_class_name(dev->class->name, @@ -1212,6 +1218,13 @@ strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE); strlcpy(dev->bus_id, new_name, BUS_ID_SIZE); + if (dev->class && (dev->kobj.parent != &dev->class->subsys.kobj)) { + error = sysfs_rename_link(&dev->class->subsys.kobj, + &dev->kobj, old_device_name, new_name); + if (error) + goto out; + } + error = kobject_rename(&dev->kobj, new_name); if (error) { strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE); @@ -1220,27 +1233,17 @@ #ifdef CONFIG_SYSFS_DEPRECATED if (old_class_name) { + error = -ENOMEM; new_class_name = make_class_name(dev->class->name, &dev->kobj); - if (new_class_name) { - error = sysfs_create_link(&dev->parent->kobj, - &dev->kobj, new_class_name); + if (!new_class_name) + goto out; + + error = sysfs_rename_link(&dev->parent->kobj, &dev->kobj, + old_class_name, new_class_name); if (error) goto out; - sysfs_remove_link(&dev->parent->kobj, old_class_name); - } } #endif - - if (dev->class) { - sysfs_remove_link(&dev->class->subsys.kobj, old_device_name); - error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, - dev->bus_id); - if (error) { - /* Uh... how to unravel this if restoring can fail? */ - dev_err(dev, "%s: sysfs_create_symlink failed (%d)\n", - __FUNCTION__, error); - } - } out: put_device(dev); diff -Nurb linux-2.6.22-try2/drivers/block/aoe/aoecmd.c linux-2.6.22-try2-netns/drivers/block/aoe/aoecmd.c --- linux-2.6.22-try2/drivers/block/aoe/aoecmd.c 2007-12-19 13:37:27.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/block/aoe/aoecmd.c 2007-12-19 22:49:13.000000000 -0500 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "aoe.h" @@ -194,7 +195,7 @@ sl = sl_tail = NULL; read_lock(&dev_base_lock); - for_each_netdev(ifp) { + for_each_netdev(&init_net, ifp) { dev_hold(ifp); if (!is_aoe_netif(ifp)) goto cont; diff -Nurb linux-2.6.22-try2/drivers/block/aoe/aoenet.c linux-2.6.22-try2-netns/drivers/block/aoe/aoenet.c --- linux-2.6.22-try2/drivers/block/aoe/aoenet.c 2007-12-19 13:37:27.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/block/aoe/aoenet.c 2007-12-19 22:49:13.000000000 -0500 @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "aoe.h" @@ -114,6 +115,9 @@ struct aoe_hdr *h; u32 n; + if (ifp->nd_net != &init_net) + goto exit; + skb = skb_share_check(skb, GFP_ATOMIC); if (skb == NULL) return 0; diff -Nurb linux-2.6.22-try2/drivers/connector/connector.c linux-2.6.22-try2-netns/drivers/connector/connector.c --- linux-2.6.22-try2/drivers/connector/connector.c 2007-12-19 13:37:28.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/connector/connector.c 2007-12-19 22:49:13.000000000 -0500 @@ -446,7 +446,7 @@ dev->id.idx = cn_idx; dev->id.val = cn_val; - dev->nls = netlink_kernel_create(NETLINK_CONNECTOR, + dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, CN_NETLINK_USERS + 0xf, dev->input, NULL, THIS_MODULE); if (!dev->nls) diff -Nurb linux-2.6.22-try2/drivers/infiniband/core/addr.c linux-2.6.22-try2-netns/drivers/infiniband/core/addr.c --- linux-2.6.22-try2/drivers/infiniband/core/addr.c 2007-12-19 13:37:29.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/infiniband/core/addr.c 2007-12-19 22:49:13.000000000 -0500 @@ -110,7 +110,7 @@ __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; int ret; - dev = ip_dev_find(ip); + dev = ip_dev_find(&init_net, ip); if (!dev) return -EADDRNOTAVAIL; @@ -157,6 +157,7 @@ u32 dst_ip = dst_in->sin_addr.s_addr; memset(&fl, 0, sizeof fl); + fl.fl_net = &init_net; fl.nl_u.ip4_u.daddr = dst_ip; if (ip_route_output_key(&rt, &fl)) return; @@ -178,6 +179,7 @@ int ret; memset(&fl, 0, sizeof fl); + fl.fl_net = &init_net; fl.nl_u.ip4_u.daddr = dst_ip; fl.nl_u.ip4_u.saddr = src_ip; ret = ip_route_output_key(&rt, &fl); @@ -262,7 +264,7 @@ __be32 dst_ip = dst_in->sin_addr.s_addr; int ret; - dev = ip_dev_find(dst_ip); + dev = ip_dev_find(&init_net, dst_ip); if (!dev) return -EADDRNOTAVAIL; diff -Nurb linux-2.6.22-try2/drivers/infiniband/core/cma.c linux-2.6.22-try2-netns/drivers/infiniband/core/cma.c --- linux-2.6.22-try2/drivers/infiniband/core/cma.c 2007-12-19 13:37:29.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/infiniband/core/cma.c 2007-12-19 22:49:13.000000000 -0500 @@ -1267,7 +1267,7 @@ atomic_inc(&conn_id->dev_remove); conn_id->state = CMA_CONNECT; - dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); + dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; cma_enable_remove(conn_id); @@ -1880,18 +1880,18 @@ if (ret) goto err1; - if (port > sysctl_local_port_range[1]) { - if (next_port != sysctl_local_port_range[0]) { + if (port > init_net.sysctl_local_port_range[1]) { + if (next_port != init_net.sysctl_local_port_range[0]) { idr_remove(ps, port); - next_port = sysctl_local_port_range[0]; + next_port = init_net.sysctl_local_port_range[0]; goto retry; } ret = -EADDRNOTAVAIL; goto err2; } - if (port == sysctl_local_port_range[1]) - next_port = sysctl_local_port_range[0]; + if (port == init_net.sysctl_local_port_range[1]) + next_port = init_net.sysctl_local_port_range[0]; else next_port = port + 1; @@ -2774,8 +2774,9 @@ get_random_bytes(&next_port, sizeof next_port); next_port = ((unsigned int) next_port % - (sysctl_local_port_range[1] - sysctl_local_port_range[0])) + - sysctl_local_port_range[0]; + (init_net.sysctl_local_port_range[1] - + init_net.sysctl_local_port_range[0])) + + init_net.sysctl_local_port_range[0]; cma_wq = create_singlethread_workqueue("rdma_cm"); if (!cma_wq) return -ENOMEM; diff -Nurb linux-2.6.22-try2/drivers/isdn/divert/divert_procfs.c linux-2.6.22-try2-netns/drivers/isdn/divert/divert_procfs.c --- linux-2.6.22-try2/drivers/isdn/divert/divert_procfs.c 2007-12-19 13:37:29.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/isdn/divert/divert_procfs.c 2007-12-19 22:49:13.000000000 -0500 @@ -17,6 +17,7 @@ #include #endif #include +#include #include "isdn_divert.h" @@ -284,12 +285,12 @@ init_waitqueue_head(&rd_queue); #ifdef CONFIG_PROC_FS - isdn_proc_entry = proc_mkdir("net/isdn", NULL); + isdn_proc_entry = proc_mkdir("isdn", init_net.proc_net); if (!isdn_proc_entry) return (-1); isdn_divert_entry = create_proc_entry("divert", S_IFREG | S_IRUGO, isdn_proc_entry); if (!isdn_divert_entry) { - remove_proc_entry("net/isdn", NULL); + remove_proc_entry("isdn", init_net.proc_net); return (-1); } isdn_divert_entry->proc_fops = &isdn_fops; @@ -309,7 +310,7 @@ #ifdef CONFIG_PROC_FS remove_proc_entry("divert", isdn_proc_entry); - remove_proc_entry("net/isdn", NULL); + remove_proc_entry("isdn", init_net.proc_net); #endif /* CONFIG_PROC_FS */ return (0); diff -Nurb linux-2.6.22-try2/drivers/isdn/hardware/eicon/diva_didd.c linux-2.6.22-try2-netns/drivers/isdn/hardware/eicon/diva_didd.c --- linux-2.6.22-try2/drivers/isdn/hardware/eicon/diva_didd.c 2007-12-19 13:37:29.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/isdn/hardware/eicon/diva_didd.c 2007-12-19 22:49:13.000000000 -0500 @@ -15,6 +15,7 @@ #include #include #include +#include #include "platform.h" #include "di_defs.h" @@ -86,7 +87,7 @@ static int DIVA_INIT_FUNCTION create_proc(void) { - proc_net_eicon = proc_mkdir("net/eicon", NULL); + proc_net_eicon = proc_mkdir("eicon", init_net.proc_net); if (proc_net_eicon) { if ((proc_didd = @@ -102,7 +103,7 @@ static void remove_proc(void) { remove_proc_entry(DRIVERLNAME, proc_net_eicon); - remove_proc_entry("net/eicon", NULL); + remove_proc_entry("eicon", init_net.proc_net); } static int DIVA_INIT_FUNCTION divadidd_init(void) diff -Nurb linux-2.6.22-try2/drivers/isdn/hysdn/hysdn_procconf.c linux-2.6.22-try2-netns/drivers/isdn/hysdn/hysdn_procconf.c --- linux-2.6.22-try2/drivers/isdn/hysdn/hysdn_procconf.c 2007-12-19 13:37:29.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/isdn/hysdn/hysdn_procconf.c 2007-12-19 22:49:13.000000000 -0500 @@ -392,7 +392,7 @@ hysdn_card *card; unsigned char conf_name[20]; - hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, proc_net); + hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, init_net.proc_net); if (!hysdn_proc_entry) { printk(KERN_ERR "HYSDN: unable to create hysdn subdir\n"); return (-1); @@ -437,5 +437,5 @@ card = card->next; /* point to next card */ } - remove_proc_entry(PROC_SUBDIR_NAME, proc_net); + remove_proc_entry(PROC_SUBDIR_NAME, init_net.proc_net); } diff -Nurb linux-2.6.22-try2/drivers/net/Kconfig linux-2.6.22-try2-netns/drivers/net/Kconfig --- linux-2.6.22-try2/drivers/net/Kconfig 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/Kconfig 2007-12-19 22:49:13.000000000 -0500 @@ -119,6 +119,20 @@ If you don't know what to use this for, you don't need it. +config ETUN + tristate "Ethernet tunnel device driver support" + depends on SYSFS + ---help--- + ETUN provices a pair of network devices that can be used for + configuring interesting topolgies. What one devices transmits + the other receives and vice versa. The link level framing + is ethernet for wide compatibility with network stacks. + + To compile this driver as a module, choose M here: the module + will be called etun. + + If you don't know what to use this for, you don't need it. + config NET_SB1000 tristate "General Instruments Surfboard 1000" depends on PNP diff -Nurb linux-2.6.22-try2/drivers/net/Makefile linux-2.6.22-try2-netns/drivers/net/Makefile --- linux-2.6.22-try2/drivers/net/Makefile 2007-12-19 15:29:24.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/Makefile 2007-12-19 22:49:13.000000000 -0500 @@ -186,6 +186,7 @@ obj-$(CONFIG_MACMACE) += macmace.o obj-$(CONFIG_MAC89x0) += mac89x0.o obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_ETUN) += etun.o obj-$(CONFIG_NET_NETX) += netx-eth.o obj-$(CONFIG_DL2K) += dl2k.o obj-$(CONFIG_R8169) += r8169.o diff -Nurb linux-2.6.22-try2/drivers/net/bonding/bond_3ad.c linux-2.6.22-try2-netns/drivers/net/bonding/bond_3ad.c --- linux-2.6.22-try2/drivers/net/bonding/bond_3ad.c 2007-12-19 13:37:30.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/bonding/bond_3ad.c 2007-12-19 22:49:13.000000000 -0500 @@ -29,6 +29,7 @@ #include #include #include +#include #include "bonding.h" #include "bond_3ad.h" @@ -2448,6 +2449,9 @@ struct slave *slave = NULL; int ret = NET_RX_DROP; + if (dev->nd_net != &init_net) + goto out; + if (!(dev->flags & IFF_MASTER)) goto out; diff -Nurb linux-2.6.22-try2/drivers/net/bonding/bond_alb.c linux-2.6.22-try2-netns/drivers/net/bonding/bond_alb.c --- linux-2.6.22-try2/drivers/net/bonding/bond_alb.c 2007-12-19 13:37:30.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/bonding/bond_alb.c 2007-12-19 22:49:13.000000000 -0500 @@ -345,6 +345,9 @@ struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; + if (bond_dev->nd_net != &init_net) + goto out; + if (!(bond_dev->flags & IFF_MASTER)) goto out; diff -Nurb linux-2.6.22-try2/drivers/net/bonding/bond_main.c linux-2.6.22-try2-netns/drivers/net/bonding/bond_main.c --- linux-2.6.22-try2/drivers/net/bonding/bond_main.c 2007-12-19 13:37:30.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/bonding/bond_main.c 2007-12-19 22:49:13.000000000 -0500 @@ -75,6 +75,7 @@ #include #include #include +#include #include "bonding.h" #include "bond_3ad.h" #include "bond_alb.h" @@ -2376,6 +2377,7 @@ * can tag the ARP with the proper VLAN tag. */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.fl4_dst = targets[i]; fl.fl4_tos = RTO_ONLINK; @@ -2485,6 +2487,9 @@ unsigned char *arp_ptr; u32 sip, tip; + if (dev->nd_net != &init_net) + goto out; + if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) goto out; @@ -3172,7 +3177,7 @@ { int len = strlen(DRV_NAME); - for (bond_proc_dir = proc_net->subdir; bond_proc_dir; + for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir; bond_proc_dir = bond_proc_dir->next) { if ((bond_proc_dir->namelen == len) && !memcmp(bond_proc_dir->name, DRV_NAME, len)) { @@ -3181,7 +3186,7 @@ } if (!bond_proc_dir) { - bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); + bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net); if (bond_proc_dir) { bond_proc_dir->owner = THIS_MODULE; } else { @@ -3216,7 +3221,7 @@ bond_proc_dir->owner = NULL; } } else { - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); bond_proc_dir = NULL; } } @@ -3323,6 +3328,9 @@ { struct net_device *event_dev = (struct net_device *)ptr; + if (event_dev->nd_net != &init_net) + return NOTIFY_DONE; + dprintk("event_dev: %s, event: %lx\n", (event_dev ? event_dev->name : "None"), event); @@ -3740,7 +3748,7 @@ } down_write(&(bonding_rwsem)); - slave_dev = dev_get_by_name(ifr->ifr_slave); + slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave); dprintk("slave_dev=%p: \n", slave_dev); diff -Nurb linux-2.6.22-try2/drivers/net/bonding/bond_sysfs.c linux-2.6.22-try2-netns/drivers/net/bonding/bond_sysfs.c --- linux-2.6.22-try2/drivers/net/bonding/bond_sysfs.c 2007-12-19 13:37:31.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/bonding/bond_sysfs.c 2007-12-19 22:49:13.000000000 -0500 @@ -35,6 +35,7 @@ #include #include #include +#include /* #define BONDING_DEBUG 1 */ #include "bonding.h" @@ -299,7 +300,7 @@ read_unlock_bh(&bond->lock); printk(KERN_INFO DRV_NAME ": %s: Adding slave %s.\n", bond->dev->name, ifname); - dev = dev_get_by_name(ifname); + dev = dev_get_by_name(&init_net, ifname); if (!dev) { printk(KERN_INFO DRV_NAME ": %s: Interface %s does not exist!\n", diff -Nurb linux-2.6.22-try2/drivers/net/eql.c linux-2.6.22-try2-netns/drivers/net/eql.c --- linux-2.6.22-try2/drivers/net/eql.c 2007-12-19 13:37:31.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/eql.c 2007-12-19 22:49:13.000000000 -0500 @@ -116,6 +116,7 @@ #include #include #include +#include #include #include @@ -412,7 +413,7 @@ if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) return -EFAULT; - slave_dev = dev_get_by_name(srq.slave_name); + slave_dev = dev_get_by_name(&init_net, srq.slave_name); if (slave_dev) { if ((master_dev->flags & IFF_UP) == IFF_UP) { /* slave is not a master & not already a slave: */ @@ -460,7 +461,7 @@ if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) return -EFAULT; - slave_dev = dev_get_by_name(srq.slave_name); + slave_dev = dev_get_by_name(&init_net, srq.slave_name); ret = -EINVAL; if (slave_dev) { spin_lock_bh(&eql->queue.lock); @@ -493,7 +494,7 @@ if (copy_from_user(&sc, scp, sizeof (slave_config_t))) return -EFAULT; - slave_dev = dev_get_by_name(sc.slave_name); + slave_dev = dev_get_by_name(&init_net, sc.slave_name); if (!slave_dev) return -ENODEV; @@ -528,7 +529,7 @@ if (copy_from_user(&sc, scp, sizeof (slave_config_t))) return -EFAULT; - slave_dev = dev_get_by_name(sc.slave_name); + slave_dev = dev_get_by_name(&init_net, sc.slave_name); if (!slave_dev) return -ENODEV; diff -Nurb linux-2.6.22-try2/drivers/net/etun.c linux-2.6.22-try2-netns/drivers/net/etun.c --- linux-2.6.22-try2/drivers/net/etun.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/etun.c 2007-12-19 22:49:13.000000000 -0500 @@ -0,0 +1,489 @@ +/* + * ETUN - Universal ETUN device driver. + * Copyright (C) 2006 Linux Networx + * + */ + +#define DRV_NAME "etun" +#define DRV_VERSION "1.0" +#define DRV_DESCRIPTION "Ethernet pseudo tunnel device driver" +#define DRV_COPYRIGHT "(C) 2007 Linux Networx" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Device cheksum strategy. + * + * etun is designed to a be a pair of virutal devices + * connecting two network stack instances. + * + * Typically it will either be used with ethernet bridging or + * it will be used to route packets between the two stacks. + * + * The only checksum offloading I can do is to completely + * skip the checksumming step all together. + * + * When used for ethernet bridging I don't believe any + * checksum off loading is safe. + * - If my source is an external interface the checksum may be + * invalid so I don't want to report I have already checked it. + * - If my destination is an external interface I don't want to put + * a packet on the wire with someone computing the checksum. + * + * When used for routing between two stacks checksums should + * be as unnecessary as they are on the loopback device. + * + * So by default I am safe and disable checksumming and + * other advanced features like SG and TSO. + * + * However because I think these features could be useful + * I provide the ethtool functions to and enable/disable + * them at runtime. + * + * If you think you can correctly enable these go ahead. + * For checksums both the transmitter and the receiver must + * agree before the are actually disabled. + */ + +#define ETUN_NUM_STATS 1 +static struct { + const char string[ETH_GSTRING_LEN]; +} ethtool_stats_keys[ETUN_NUM_STATS] = { + { "partner_ifindex" }, +}; + +struct etun_info { + struct net_device *rx_dev; + unsigned ip_summed; + struct net_device_stats stats; + struct list_head list; + struct net_device *dev; +}; + +/* + * I have to hold the rtnl_lock during device delete. + * So I use the rtnl_lock to protect my list manipulations + * as well. Crude but simple. + */ +static LIST_HEAD(etun_list); + +/* + * The higher levels take care of making this non-reentrant (it's + * called with bh's disabled). + */ +static int etun_xmit(struct sk_buff *skb, struct net_device *tx_dev) +{ + struct etun_info *tx_info = tx_dev->priv; + struct net_device *rx_dev = tx_info->rx_dev; + struct etun_info *rx_info = rx_dev->priv; + + tx_info->stats.tx_packets++; + tx_info->stats.tx_bytes += skb->len; + + /* Drop the skb state that was needed to get here */ + skb_orphan(skb); + if (skb->dst) + skb->dst = dst_pop(skb->dst); /* Allow for smart routing */ + + /* Switch to the receiving device */ + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, rx_dev); + skb->dev = rx_dev; + skb->ip_summed = CHECKSUM_NONE; + + /* If both halves agree no checksum is needed */ + if (tx_dev->features & NETIF_F_NO_CSUM) + skb->ip_summed = rx_info->ip_summed; + + rx_dev->last_rx = jiffies; + rx_info->stats.rx_packets++; + rx_info->stats.rx_bytes += skb->len; + netif_rx(skb); + + return 0; +} + +static struct net_device_stats *etun_get_stats(struct net_device *dev) +{ + struct etun_info *info = dev->priv; + return &info->stats; +} + +/* ethtool interface */ +static int etun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + cmd->supported = 0; + cmd->advertising = 0; + cmd->speed = SPEED_10000; /* Memory is fast! */ + cmd->duplex = DUPLEX_FULL; + cmd->port = PORT_TP; + cmd->phy_address = 0; + cmd->transceiver = XCVR_INTERNAL; + cmd->autoneg = AUTONEG_DISABLE; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + return 0; +} + +static void etun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); + strcpy(info->fw_version, "N/A"); +} + +static void etun_get_strings(struct net_device *dev, u32 stringset, u8 *buf) +{ + switch(stringset) { + case ETH_SS_STATS: + memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); + break; + case ETH_SS_TEST: + default: + break; + } +} + +static int etun_get_stats_count(struct net_device *dev) +{ + return ETUN_NUM_STATS; +} + +static void etun_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct etun_info *info = dev->priv; + + data[0] = info->rx_dev->ifindex; +} + +static u32 etun_get_rx_csum(struct net_device *dev) +{ + struct etun_info *info = dev->priv; + return info->ip_summed == CHECKSUM_UNNECESSARY; +} + +static int etun_set_rx_csum(struct net_device *dev, u32 data) +{ + struct etun_info *info = dev->priv; + + info->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; + + return 0; +} + +static u32 etun_get_tx_csum(struct net_device *dev) +{ + return (dev->features & NETIF_F_NO_CSUM) != 0; +} + +static int etun_set_tx_csum(struct net_device *dev, u32 data) +{ + dev->features &= ~NETIF_F_NO_CSUM; + if (data) + dev->features |= NETIF_F_NO_CSUM; + + return 0; +} + +static struct ethtool_ops etun_ethtool_ops = { + .get_settings = etun_get_settings, + .get_drvinfo = etun_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_rx_csum = etun_get_rx_csum, + .set_rx_csum = etun_set_rx_csum, + .get_tx_csum = etun_get_tx_csum, + .set_tx_csum = etun_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, +#if 0 /* Does just setting the bit successfuly emulate tso? */ + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +#endif + .get_strings = etun_get_strings, + .get_stats_count = etun_get_stats_count, + .get_ethtool_stats = etun_get_ethtool_stats, + .get_perm_addr = ethtool_op_get_perm_addr, +}; + +static int etun_open(struct net_device *tx_dev) +{ + struct etun_info *tx_info = tx_dev->priv; + struct net_device *rx_dev = tx_info->rx_dev; + /* If we attempt to bring up etun in the small window before + * it is connected to it's partner error. + */ + if (!rx_dev) + return -ENOTCONN; + if (rx_dev->flags & IFF_UP) { + netif_carrier_on(tx_dev); + netif_carrier_on(rx_dev); + } + netif_start_queue(tx_dev); + return 0; +} + +static int etun_stop(struct net_device *tx_dev) +{ + struct etun_info *tx_info = tx_dev->priv; + struct net_device *rx_dev = tx_info->rx_dev; + netif_stop_queue(tx_dev); + if (netif_carrier_ok(tx_dev)) { + netif_carrier_off(tx_dev); + netif_carrier_off(rx_dev); + } + return 0; +} + +static int etun_change_mtu(struct net_device *dev, int new_mtu) +{ + /* Don't allow ridiculously small mtus */ + if (new_mtu < (ETH_ZLEN - ETH_HLEN)) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static void etun_set_multicast_list(struct net_device *dev) +{ + /* Nothing sane I can do here */ + return; +} + +static int etun_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + return -EOPNOTSUPP; +} + +/* Only allow letters and numbers in an etun device name */ +static int is_valid_name(const char *name) +{ + const char *ptr; + for (ptr = name; *ptr; ptr++) { + if (!isalnum(*ptr)) + return 0; + } + return 1; +} + +static struct net_device *etun_alloc(struct net *net, const char *name) +{ + struct net_device *dev; + struct etun_info *info; + int err; + + if (!name || !is_valid_name(name)) + return ERR_PTR(-EINVAL); + + dev = alloc_netdev(sizeof(struct etun_info), name, ether_setup); + if (!dev) + return ERR_PTR(-ENOMEM); + + info = dev->priv; + info->dev = dev; + dev->nd_net = net; + + random_ether_addr(dev->dev_addr); + dev->tx_queue_len = 0; /* A queue is silly for a loopback device */ + dev->hard_start_xmit = etun_xmit; + dev->get_stats = etun_get_stats; + dev->open = etun_open; + dev->stop = etun_stop; + dev->set_multicast_list = etun_set_multicast_list; + dev->do_ioctl = etun_ioctl; + dev->features = NETIF_F_FRAGLIST + | NETIF_F_HIGHDMA + | NETIF_F_LLTX; + dev->flags = IFF_BROADCAST | IFF_MULTICAST |IFF_PROMISC; + dev->ethtool_ops = &etun_ethtool_ops; + dev->destructor = free_netdev; + dev->change_mtu = etun_change_mtu; + err = register_netdev(dev); + if (err) { + free_netdev(dev); + dev = ERR_PTR(err); + goto out; + } + netif_carrier_off(dev); +out: + return dev; +} + +static int etun_alloc_pair(struct net *net, const char *name0, const char *name1) +{ + struct net_device *dev0, *dev1; + struct etun_info *info0, *info1; + + dev0 = etun_alloc(net, name0); + if (IS_ERR(dev0)) { + return PTR_ERR(dev0); + } + info0 = dev0->priv; + + dev1 = etun_alloc(net, name1); + if (IS_ERR(dev1)) { + unregister_netdev(dev0); + return PTR_ERR(dev1); + } + info1 = dev1->priv; + + dev_hold(dev0); + dev_hold(dev1); + info0->rx_dev = dev1; + info1->rx_dev = dev0; + + /* Only place one member of the pair on the list + * so I don't confuse list_for_each_entry_safe, + * by deleting two list entries at once. + */ + rtnl_lock(); + list_add(&info0->list, &etun_list); + INIT_LIST_HEAD(&info1->list); + rtnl_unlock(); + + return 0; +} + +static int etun_unregister_pair(struct net_device *dev0) +{ + struct etun_info *info0, *info1; + struct net_device *dev1; + + ASSERT_RTNL(); + + if (!dev0) + return -ENODEV; + + /* Ensure my network devices are not passing packets */ + dev_close(dev0); + info0 = dev0->priv; + dev1 = info0->rx_dev; + info1 = dev1->priv; + dev_close(dev1); + + /* Drop the cross device references */ + dev_put(dev0); + dev_put(dev1); + + /* Remove from the etun list */ + if (!list_empty(&info0->list)) + list_del_init(&info0->list); + if (!list_empty(&info1->list)) + list_del_init(&info1->list); + + unregister_netdevice(dev0); + unregister_netdevice(dev1); + return 0; +} + +static int etun_noget(char *buffer, struct kernel_param *kp) +{ + return 0; +} + +static int etun_newif(const char *val, struct kernel_param *kp) +{ + char name0[IFNAMSIZ], name1[IFNAMSIZ]; + const char *mid; + int len, len0, len1; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Avoid frustration by removing trailing whitespace */ + len = strlen(val); + while (isspace(val[len - 1])) + len--; + + /* Split the string into 2 names */ + mid = memchr(val, ',', len); + if (!mid) + return -EINVAL; + + /* Get the first device name */ + len0 = mid - val; + if (len0 > sizeof(name0) - 1) + len = sizeof(name0) - 1; + strncpy(name0, val, len0); + name0[len0] = '\0'; + + /* And the second device name */ + len1 = len - (len0 + 1); + if (len1 > sizeof(name1) - 1) + len1 = sizeof(name1) - 1; + strncpy(name1, mid + 1, len1); + name1[len1] = '\0'; + + return etun_alloc_pair(current->nsproxy->net_ns, name0, name1); +} + +static int etun_delif(const char *val, struct kernel_param *kp) +{ + char name[IFNAMSIZ]; + int len; + struct net_device *dev; + int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Avoid frustration by removing trailing whitespace */ + len = strlen(val); + while (isspace(val[len - 1])) + len--; + + /* Get the device name */ + if (len > sizeof(name) - 1) + return -EINVAL; + strncpy(name, val, len); + name[len] = '\0'; + + /* Double check I don't have strange characters in my device name */ + if (!is_valid_name(name)) + return -EINVAL; + + rtnl_lock(); + err = -ENODEV; + dev = __dev_get_by_name(current->nsproxy->net_ns, name); + err = etun_unregister_pair(dev); + rtnl_unlock(); + return err; +} + +static int __init etun_init(void) +{ + printk(KERN_INFO "etun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); + printk(KERN_INFO "etun: %s\n", DRV_COPYRIGHT); + + return 0; +} + +static void etun_cleanup(void) +{ + struct etun_info *info, *tmp; + rtnl_lock(); + list_for_each_entry_safe(info, tmp, &etun_list, list) { + etun_unregister_pair(info->dev); + } + rtnl_unlock(); +} + +module_param_call(newif, etun_newif, etun_noget, NULL, S_IWUSR); +module_param_call(delif, etun_delif, etun_noget, NULL, S_IWUSR); +module_init(etun_init); +module_exit(etun_cleanup); +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR("Eric Biederman "); +MODULE_LICENSE("GPL"); diff -Nurb linux-2.6.22-try2/drivers/net/hamradio/bpqether.c linux-2.6.22-try2-netns/drivers/net/hamradio/bpqether.c --- linux-2.6.22-try2/drivers/net/hamradio/bpqether.c 2007-12-19 13:37:31.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/hamradio/bpqether.c 2007-12-19 22:49:13.000000000 -0500 @@ -83,6 +83,7 @@ #include #include +#include #include @@ -172,6 +173,9 @@ struct ethhdr *eth; struct bpqdev *bpq; + if (dev->nd_net != &init_net) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; @@ -559,6 +563,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (!dev_is_ethdev(dev)) return NOTIFY_DONE; @@ -594,7 +601,7 @@ static int __init bpq_init_driver(void) { #ifdef CONFIG_PROC_FS - if (!proc_net_fops_create("bpqether", S_IRUGO, &bpq_info_fops)) { + if (!proc_net_fops_create(&init_net, "bpqether", S_IRUGO, &bpq_info_fops)) { printk(KERN_ERR "bpq: cannot create /proc/net/bpqether entry.\n"); return -ENOENT; @@ -618,7 +625,7 @@ unregister_netdevice_notifier(&bpq_dev_notifier); - proc_net_remove("bpqether"); + proc_net_remove(&init_net, "bpqether"); rtnl_lock(); while (!list_empty(&bpq_devices)) { diff -Nurb linux-2.6.22-try2/drivers/net/hamradio/scc.c linux-2.6.22-try2-netns/drivers/net/hamradio/scc.c --- linux-2.6.22-try2/drivers/net/hamradio/scc.c 2007-12-19 13:37:31.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/hamradio/scc.c 2007-12-19 22:49:13.000000000 -0500 @@ -174,6 +174,7 @@ #include #include +#include #include #include @@ -2114,7 +2115,7 @@ } rtnl_unlock(); - proc_net_fops_create("z8530drv", 0, &scc_net_seq_fops); + proc_net_fops_create(&init_net, "z8530drv", 0, &scc_net_seq_fops); return 0; } @@ -2169,7 +2170,7 @@ if (Vector_Latch) release_region(Vector_Latch, 1); - proc_net_remove("z8530drv"); + proc_net_remove(&init_net, "z8530drv"); } MODULE_AUTHOR("Joerg Reuter "); diff -Nurb linux-2.6.22-try2/drivers/net/hamradio/yam.c linux-2.6.22-try2-netns/drivers/net/hamradio/yam.c --- linux-2.6.22-try2/drivers/net/hamradio/yam.c 2007-12-19 13:37:31.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/hamradio/yam.c 2007-12-19 22:49:13.000000000 -0500 @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -1142,7 +1143,7 @@ yam_timer.expires = jiffies + HZ / 100; add_timer(&yam_timer); - proc_net_fops_create("yam", S_IRUGO, &yam_info_fops); + proc_net_fops_create(&init_net, "yam", S_IRUGO, &yam_info_fops); return 0; error: while (--i >= 0) { @@ -1174,7 +1175,7 @@ kfree(p); } - proc_net_remove("yam"); + proc_net_remove(&init_net, "yam"); } /* --------------------------------------------------------------------- */ diff -Nurb linux-2.6.22-try2/drivers/net/ibmveth.c linux-2.6.22-try2-netns/drivers/net/ibmveth.c --- linux-2.6.22-try2/drivers/net/ibmveth.c 2007-12-19 15:29:22.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/ibmveth.c 2007-12-19 22:49:13.000000000 -0500 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -97,7 +98,7 @@ static struct kobj_type ktype_veth_pool; #ifdef CONFIG_PROC_FS -#define IBMVETH_PROC_DIR "net/ibmveth" +#define IBMVETH_PROC_DIR "ibmveth" static struct proc_dir_entry *ibmveth_proc_dir; #endif @@ -1093,7 +1094,7 @@ #ifdef CONFIG_PROC_FS static void ibmveth_proc_register_driver(void) { - ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, NULL); + ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, init_net.proc_net); if (ibmveth_proc_dir) { SET_MODULE_OWNER(ibmveth_proc_dir); } @@ -1101,7 +1102,7 @@ static void ibmveth_proc_unregister_driver(void) { - remove_proc_entry(IBMVETH_PROC_DIR, NULL); + remove_proc_entry(IBMVETH_PROC_DIR, init_net.proc_net); } static void *ibmveth_seq_start(struct seq_file *seq, loff_t *pos) diff -Nurb linux-2.6.22-try2/drivers/net/loopback.c linux-2.6.22-try2-netns/drivers/net/loopback.c --- linux-2.6.22-try2/drivers/net/loopback.c 2007-12-19 13:37:32.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/loopback.c 2007-12-19 22:49:13.000000000 -0500 @@ -57,6 +57,7 @@ #include #include #include +#include struct pcpu_lstats { unsigned long packets; @@ -199,39 +200,52 @@ .get_rx_csum = always_on, }; +static int loopback_net_init(struct net *net) +{ + struct net_device *lo = &net->loopback_dev; /* * The loopback device is special. There is only one instance and * it is statically allocated. Don't do this for other devices. */ -struct net_device loopback_dev = { - .name = "lo", - .get_stats = &get_stats, - .mtu = (16 * 1024) + 20 + 20 + 12, - .hard_start_xmit = loopback_xmit, - .hard_header = eth_header, - .hard_header_cache = eth_header_cache, - .header_cache_update = eth_header_cache_update, - .hard_header_len = ETH_HLEN, /* 14 */ - .addr_len = ETH_ALEN, /* 6 */ - .tx_queue_len = 0, - .type = ARPHRD_LOOPBACK, /* 0x0001*/ - .rebuild_header = eth_rebuild_header, - .flags = IFF_LOOPBACK, - .features = NETIF_F_SG | NETIF_F_FRAGLIST + strcpy(lo->name, "lo"); + lo->get_stats = &get_stats, + lo->mtu = (16 * 1024) + 20 + 20 + 12, + lo->hard_start_xmit = loopback_xmit, + lo->hard_header = eth_header, + lo->hard_header_cache = eth_header_cache, + lo->header_cache_update = eth_header_cache_update, + lo->hard_header_len = ETH_HLEN, /* 14 */ + lo->addr_len = ETH_ALEN, /* 6 */ + lo->tx_queue_len = 0, + lo->type = ARPHRD_LOOPBACK, /* 0x0001*/ + lo->rebuild_header = eth_rebuild_header, + lo->flags = IFF_LOOPBACK, + lo->features = NETIF_F_SG | NETIF_F_FRAGLIST #ifdef LOOPBACK_TSO | NETIF_F_TSO #endif | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA - | NETIF_F_LLTX, - .ethtool_ops = &loopback_ethtool_ops, + | NETIF_F_LLTX + | NETIF_F_NETNS_LOCAL, + lo->ethtool_ops = &loopback_ethtool_ops, + lo->nd_net = net; + return register_netdev(lo); +} + +static void loopback_net_exit(struct net *net) +{ + unregister_netdev(&net->loopback_dev); +} + +static struct pernet_operations loopback_net_ops = { + .init = loopback_net_init, + .exit = loopback_net_exit, }; /* Setup and register the loopback device. */ static int __init loopback_init(void) { - return register_netdev(&loopback_dev); + return register_pernet_device(&loopback_net_ops); }; module_init(loopback_init); - -EXPORT_SYMBOL(loopback_dev); diff -Nurb linux-2.6.22-try2/drivers/net/pppoe.c linux-2.6.22-try2-netns/drivers/net/pppoe.c --- linux-2.6.22-try2/drivers/net/pppoe.c 2007-12-19 13:37:34.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/pppoe.c 2007-12-19 22:49:13.000000000 -0500 @@ -78,6 +78,7 @@ #include #include +#include #include #include @@ -210,7 +211,7 @@ struct net_device *dev; int ifindex; - dev = dev_get_by_name(sp->sa_addr.pppoe.dev); + dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev); if(!dev) return NULL; ifindex = dev->ifindex; @@ -295,6 +296,9 @@ { struct net_device *dev = (struct net_device *) ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Only look at sockets that are using this specific device. */ switch (event) { case NETDEV_CHANGEMTU: @@ -380,6 +384,9 @@ struct pppoe_hdr *ph; struct pppox_sock *po; + if (dev->nd_net != &init_net) + goto drop; + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto drop; @@ -412,6 +419,9 @@ struct pppoe_hdr *ph; struct pppox_sock *po; + if (dev->nd_net != &init_net) + goto abort; + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto abort; @@ -471,12 +481,12 @@ * Initialize a new struct sock. * **********************************************************************/ -static int pppoe_create(struct socket *sock) +static int pppoe_create(struct net *net, struct socket *sock) { int error = -ENOMEM; struct sock *sk; - sk = sk_alloc(PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1); + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1); if (!sk) goto out; @@ -588,7 +598,7 @@ /* Don't re-bind if sid==0 */ if (sp->sa_addr.pppoe.sid != 0) { - dev = dev_get_by_name(sp->sa_addr.pppoe.dev); + dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev); error = -ENODEV; if (!dev) @@ -1064,7 +1074,7 @@ { struct proc_dir_entry *p; - p = create_proc_entry("net/pppoe", S_IRUGO, NULL); + p = create_proc_entry("pppoe", S_IRUGO, init_net.proc_net); if (!p) return -ENOMEM; @@ -1135,7 +1145,7 @@ dev_remove_pack(&pppoes_ptype); dev_remove_pack(&pppoed_ptype); unregister_netdevice_notifier(&pppoe_notifier); - remove_proc_entry("net/pppoe", NULL); + remove_proc_entry("pppoe", init_net.proc_net); proto_unregister(&pppoe_sk_proto); } diff -Nurb linux-2.6.22-try2/drivers/net/pppox.c linux-2.6.22-try2-netns/drivers/net/pppox.c --- linux-2.6.22-try2/drivers/net/pppox.c 2007-12-19 13:37:34.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/pppox.c 2007-12-19 22:49:13.000000000 -0500 @@ -107,10 +107,13 @@ EXPORT_SYMBOL(pppox_ioctl); -static int pppox_create(struct socket *sock, int protocol) +static int pppox_create(struct net *net, struct socket *sock, int protocol) { int rc = -EPROTOTYPE; + if (net != &init_net) + return -EAFNOSUPPORT; + if (protocol < 0 || protocol > PX_MAX_PROTO) goto out; @@ -126,7 +129,7 @@ !try_module_get(pppox_protos[protocol]->owner)) goto out; - rc = pppox_protos[protocol]->create(sock); + rc = pppox_protos[protocol]->create(net, sock); module_put(pppox_protos[protocol]->owner); out: diff -Nurb linux-2.6.22-try2/drivers/net/shaper.c linux-2.6.22-try2-netns/drivers/net/shaper.c --- linux-2.6.22-try2/drivers/net/shaper.c 2007-12-19 13:37:34.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/shaper.c 2007-12-19 22:49:13.000000000 -0500 @@ -86,6 +86,7 @@ #include #include +#include struct shaper_cb { unsigned long shapeclock; /* Time it should go out */ @@ -488,7 +489,7 @@ { case SHAPER_SET_DEV: { - struct net_device *them=__dev_get_by_name(ss->ss_name); + struct net_device *them=__dev_get_by_name(&init_net, ss->ss_name); if(them==NULL) return -ENODEV; if(sh->dev) diff -Nurb linux-2.6.22-try2/drivers/net/tokenring/lanstreamer.c linux-2.6.22-try2-netns/drivers/net/tokenring/lanstreamer.c --- linux-2.6.22-try2/drivers/net/tokenring/lanstreamer.c 2007-12-19 13:37:37.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/tokenring/lanstreamer.c 2007-12-19 22:49:13.000000000 -0500 @@ -250,7 +250,7 @@ #if STREAMER_NETWORK_MONITOR #ifdef CONFIG_PROC_FS if (!dev_streamer) - create_proc_read_entry("net/streamer_tr", 0, 0, + create_proc_read_entry("streamer_tr", 0, init_net.proc_net, streamer_proc_info, NULL); streamer_priv->next = dev_streamer; dev_streamer = streamer_priv; @@ -423,7 +423,7 @@ } } if (!dev_streamer) - remove_proc_entry("net/streamer_tr", NULL); + remove_proc_entry("streamer_tr", init_net.proc_net); } #endif #endif diff -Nurb linux-2.6.22-try2/drivers/net/tokenring/olympic.c linux-2.6.22-try2-netns/drivers/net/tokenring/olympic.c --- linux-2.6.22-try2/drivers/net/tokenring/olympic.c 2007-12-19 13:37:37.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/tokenring/olympic.c 2007-12-19 22:49:13.000000000 -0500 @@ -101,6 +101,7 @@ #include #include +#include #include #include @@ -268,9 +269,9 @@ printk("Olympic: %s registered as: %s\n",olympic_priv->olympic_card_name,dev->name); if (olympic_priv->olympic_network_monitor) { /* Must go after register_netdev as we need the device name */ char proc_name[20] ; - strcpy(proc_name,"net/olympic_") ; + strcpy(proc_name,"olympic_") ; strcat(proc_name,dev->name) ; - create_proc_read_entry(proc_name,0,NULL,olympic_proc_info,(void *)dev) ; + create_proc_read_entry(proc_name,0,init_net.proc_net,olympic_proc_info,(void *)dev) ; printk("Olympic: Network Monitor information: /proc/%s\n",proc_name); } return 0 ; @@ -1752,9 +1753,9 @@ if (olympic_priv->olympic_network_monitor) { char proc_name[20] ; - strcpy(proc_name,"net/olympic_") ; + strcpy(proc_name,"olympic_") ; strcat(proc_name,dev->name) ; - remove_proc_entry(proc_name,NULL); + remove_proc_entry(proc_name,init_net.proc_net); } unregister_netdev(dev) ; iounmap(olympic_priv->olympic_mmio) ; diff -Nurb linux-2.6.22-try2/drivers/net/tun.c linux-2.6.22-try2-netns/drivers/net/tun.c --- linux-2.6.22-try2/drivers/net/tun.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/tun.c 2007-12-19 22:49:13.000000000 -0500 @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -475,7 +476,7 @@ !capable(CAP_NET_ADMIN)) return -EPERM; } - else if (__dev_get_by_name(ifr->ifr_name)) + else if (__dev_get_by_name(&init_net, ifr->ifr_name)) return -EINVAL; else { char *name; diff -Nurb linux-2.6.22-try2/drivers/net/wan/dlci.c linux-2.6.22-try2-netns/drivers/net/wan/dlci.c --- linux-2.6.22-try2/drivers/net/wan/dlci.c 2007-12-19 13:37:38.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/wan/dlci.c 2007-12-19 22:49:13.000000000 -0500 @@ -361,7 +361,7 @@ /* validate slave device */ - slave = dev_get_by_name(dlci->devname); + slave = dev_get_by_name(&init_net, dlci->devname); if (!slave) return -ENODEV; @@ -427,7 +427,7 @@ int err; /* validate slave device */ - master = __dev_get_by_name(dlci->devname); + master = __dev_get_by_name(&init_net, dlci->devname); if (!master) return(-ENODEV); @@ -513,6 +513,9 @@ { struct net_device *dev = (struct net_device *) ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { struct dlci_local *dlp; diff -Nurb linux-2.6.22-try2/drivers/net/wan/hdlc.c linux-2.6.22-try2-netns/drivers/net/wan/hdlc.c --- linux-2.6.22-try2/drivers/net/wan/hdlc.c 2007-12-19 13:37:38.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/wan/hdlc.c 2007-12-19 22:49:13.000000000 -0500 @@ -36,6 +36,7 @@ #include #include #include +#include static const char* version = "HDLC support module revision 1.21"; @@ -66,6 +67,12 @@ struct packet_type *p, struct net_device *orig_dev) { struct hdlc_device_desc *desc = dev_to_desc(dev); + + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if (desc->netif_rx) return desc->netif_rx(skb); @@ -102,6 +109,9 @@ unsigned long flags; int on; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->get_stats != hdlc_get_stats) return NOTIFY_DONE; /* not an HDLC device */ diff -Nurb linux-2.6.22-try2/drivers/net/wan/lapbether.c linux-2.6.22-try2-netns/drivers/net/wan/lapbether.c --- linux-2.6.22-try2/drivers/net/wan/lapbether.c 2007-12-19 13:37:38.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/wan/lapbether.c 2007-12-19 22:49:13.000000000 -0500 @@ -91,6 +91,9 @@ int len, err; struct lapbethdev *lapbeth; + if (dev->nd_net != &init_net) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; @@ -391,6 +394,9 @@ struct lapbethdev *lapbeth; struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (!dev_is_ethdev(dev)) return NOTIFY_DONE; diff -Nurb linux-2.6.22-try2/drivers/net/wan/sbni.c linux-2.6.22-try2-netns/drivers/net/wan/sbni.c --- linux-2.6.22-try2/drivers/net/wan/sbni.c 2007-12-19 13:37:38.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/wan/sbni.c 2007-12-19 22:49:13.000000000 -0500 @@ -54,6 +54,7 @@ #include #include +#include #include #include @@ -1362,7 +1363,7 @@ if (copy_from_user( slave_name, ifr->ifr_data, sizeof slave_name )) return -EFAULT; - slave_dev = dev_get_by_name( slave_name ); + slave_dev = dev_get_by_name(&init_net, slave_name ); if( !slave_dev || !(slave_dev->flags & IFF_UP) ) { printk( KERN_ERR "%s: trying to enslave non-active " "device %s\n", dev->name, slave_name ); diff -Nurb linux-2.6.22-try2/drivers/net/wan/syncppp.c linux-2.6.22-try2-netns/drivers/net/wan/syncppp.c --- linux-2.6.22-try2/drivers/net/wan/syncppp.c 2007-12-19 13:37:38.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/wan/syncppp.c 2007-12-19 22:49:13.000000000 -0500 @@ -51,6 +51,7 @@ #include #include +#include #include #include @@ -1445,6 +1446,11 @@ static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; sppp_input(dev,skb); diff -Nurb linux-2.6.22-try2/drivers/net/wireless/hostap/hostap_main.c linux-2.6.22-try2-netns/drivers/net/wireless/hostap/hostap_main.c --- linux-2.6.22-try2/drivers/net/wireless/hostap/hostap_main.c 2007-12-19 13:37:38.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/wireless/hostap/hostap_main.c 2007-12-19 22:49:13.000000000 -0500 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1094,8 +1095,8 @@ static int __init hostap_init(void) { - if (proc_net != NULL) { - hostap_proc = proc_mkdir("hostap", proc_net); + if (init_net.proc_net != NULL) { + hostap_proc = proc_mkdir("hostap", init_net.proc_net); if (!hostap_proc) printk(KERN_WARNING "Failed to mkdir " "/proc/net/hostap\n"); @@ -1110,7 +1111,7 @@ { if (hostap_proc != NULL) { hostap_proc = NULL; - remove_proc_entry("hostap", proc_net); + remove_proc_entry("hostap", init_net.proc_net); } } diff -Nurb linux-2.6.22-try2/drivers/net/wireless/strip.c linux-2.6.22-try2-netns/drivers/net/wireless/strip.c --- linux-2.6.22-try2/drivers/net/wireless/strip.c 2007-12-19 13:37:38.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/net/wireless/strip.c 2007-12-19 22:49:13.000000000 -0500 @@ -107,6 +107,7 @@ #include #include #include +#include #include #include @@ -1971,7 +1972,7 @@ sizeof(zero_address))) { struct net_device *dev; read_lock_bh(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (dev->type == strip_info->dev->type && !memcmp(dev->dev_addr, &strip_info->true_dev_addr, @@ -2787,7 +2788,7 @@ /* * Register the status file with /proc */ - proc_net_fops_create("strip", S_IFREG | S_IRUGO, &strip_seq_fops); + proc_net_fops_create(&init_net, "strip", S_IFREG | S_IRUGO, &strip_seq_fops); return status; } @@ -2809,7 +2810,7 @@ } /* Unregister with the /proc/net file here. */ - proc_net_remove("strip"); + proc_net_remove(&init_net, "strip"); if ((i = tty_unregister_ldisc(N_STRIP))) printk(KERN_ERR "STRIP: can't unregister line discipline (err = %d)\n", i); diff -Nurb linux-2.6.22-try2/drivers/parisc/led.c linux-2.6.22-try2-netns/drivers/parisc/led.c --- linux-2.6.22-try2/drivers/parisc/led.c 2007-12-19 13:37:38.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/parisc/led.c 2007-12-19 22:49:13.000000000 -0500 @@ -359,7 +359,7 @@ * for reading should be OK */ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct net_device_stats *stats; struct in_device *in_dev = __in_dev_get_rcu(dev); if (!in_dev || !in_dev->ifa_list) diff -Nurb linux-2.6.22-try2/drivers/s390/net/qeth_main.c linux-2.6.22-try2-netns/drivers/s390/net/qeth_main.c --- linux-2.6.22-try2/drivers/s390/net/qeth_main.c 2007-12-19 13:37:38.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/s390/net/qeth_main.c 2007-12-19 22:49:13.000000000 -0500 @@ -8127,7 +8127,7 @@ neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); - neigh->type = inet_addr_type(*(__be32 *) neigh->primary_key); + neigh->type = inet_addr_type(&init_net, *(__be32 *) neigh->primary_key); neigh->nud_state = NUD_NOARP; neigh->ops = arp_direct_ops; neigh->output = neigh->ops->queue_xmit; diff -Nurb linux-2.6.22-try2/drivers/scsi/scsi_netlink.c linux-2.6.22-try2-netns/drivers/scsi/scsi_netlink.c --- linux-2.6.22-try2/drivers/scsi/scsi_netlink.c 2007-12-19 13:37:39.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/scsi/scsi_netlink.c 2007-12-19 22:49:13.000000000 -0500 @@ -167,7 +167,7 @@ return; } - scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT, + scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT, SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL, THIS_MODULE); if (!scsi_nl_sock) { diff -Nurb linux-2.6.22-try2/drivers/scsi/scsi_transport_iscsi.c linux-2.6.22-try2-netns/drivers/scsi/scsi_transport_iscsi.c --- linux-2.6.22-try2/drivers/scsi/scsi_transport_iscsi.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/drivers/scsi/scsi_transport_iscsi.c 2007-12-19 22:49:13.000000000 -0500 @@ -1523,7 +1523,7 @@ if (err) goto unregister_conn_class; - nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL, + nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, 1, iscsi_if_rx, NULL, THIS_MODULE); if (!nls) { err = -ENOBUFS; diff -Nurb linux-2.6.22-try2/edit linux-2.6.22-try2-netns/edit --- linux-2.6.22-try2/edit 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-try2-netns/edit 2007-12-19 22:57:06.000000000 -0500 @@ -0,0 +1,19 @@ +vi -o ./fs/proc/root.c ./fs/proc/root.c.rej +vi -o ./include/linux/nsproxy.h ./include/linux/nsproxy.h.rej +vi -o ./include/linux/sched.h ./include/linux/sched.h.rej +vi -o ./include/net/inet_timewait_sock.h ./include/net/inet_timewait_sock.h.rej +vi -o ./include/net/route.h ./include/net/route.h.rej +vi -o ./include/net/sock.h ./include/net/sock.h.rej +vi -o ./kernel/nsproxy.c ./kernel/nsproxy.c.rej +vi -o ./lib/Makefile ./lib/Makefile.rej +vi -o ./net/core/dev.c ./net/core/dev.c.rej +vi -o ./net/core/rtnetlink.c ./net/core/rtnetlink.c.rej +vi -o ./net/core/sock.c ./net/core/sock.c.rej +vi -o ./net/ipv4/af_inet.c ./net/ipv4/af_inet.c.rej +vi -o ./net/ipv4/inet_connection_sock.c ./net/ipv4/inet_connection_sock.c.rej +vi -o ./net/ipv4/inet_hashtables.c ./net/ipv4/inet_hashtables.c.rej +vi -o ./net/ipv4/raw.c ./net/ipv4/raw.c.rej +vi -o ./net/ipv4/tcp_ipv4.c ./net/ipv4/tcp_ipv4.c.rej +vi -o ./net/ipv4/udp.c ./net/ipv4/udp.c.rej +vi -o ./net/ipv6/addrconf.c ./net/ipv6/addrconf.c.rej +vi -o ./net/unix/af_unix.c ./net/unix/af_unix.c.rej diff -Nurb linux-2.6.22-try2/fs/afs/netdevices.c linux-2.6.22-try2-netns/fs/afs/netdevices.c --- linux-2.6.22-try2/fs/afs/netdevices.c 2007-12-19 13:37:40.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/afs/netdevices.c 2007-12-19 22:49:13.000000000 -0500 @@ -8,6 +8,7 @@ #include #include #include +#include #include "internal.h" /* @@ -23,7 +24,7 @@ BUG(); rtnl_lock(); - dev = __dev_getfirstbyhwtype(ARPHRD_ETHER); + dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); if (dev) { memcpy(mac, dev->dev_addr, maclen); ret = 0; @@ -47,7 +48,7 @@ ASSERT(maxbufs > 0); rtnl_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (dev->type == ARPHRD_LOOPBACK && !wantloopback) continue; idev = __in_dev_get_rtnl(dev); diff -Nurb linux-2.6.22-try2/fs/compat_ioctl.c linux-2.6.22-try2-netns/fs/compat_ioctl.c --- linux-2.6.22-try2/fs/compat_ioctl.c 2007-12-19 13:37:40.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/compat_ioctl.c 2007-12-19 22:49:13.000000000 -0500 @@ -319,22 +319,21 @@ static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg) { - struct net_device *dev; - struct ifreq32 ifr32; + struct ifreq __user *uifr; int err; - if (copy_from_user(&ifr32, compat_ptr(arg), sizeof(ifr32))) + uifr = compat_alloc_user_space(sizeof(struct ifreq)); + if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32))); return -EFAULT; - dev = dev_get_by_index(ifr32.ifr_ifindex); - if (!dev) - return -ENODEV; + err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr); + if (err) + return err; - strlcpy(ifr32.ifr_name, dev->name, sizeof(ifr32.ifr_name)); - dev_put(dev); + if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32))) + return -EFAULT; - err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32)); - return (err ? -EFAULT : 0); + return 0; } static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg) diff -Nurb linux-2.6.22-try2/fs/proc/Makefile linux-2.6.22-try2-netns/fs/proc/Makefile --- linux-2.6.22-try2/fs/proc/Makefile 2007-12-19 13:37:46.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/proc/Makefile 2007-12-19 22:49:13.000000000 -0500 @@ -11,6 +11,7 @@ proc_tty.o proc_misc.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o +proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o diff -Nurb linux-2.6.22-try2/fs/proc/internal.h linux-2.6.22-try2-netns/fs/proc/internal.h --- linux-2.6.22-try2/fs/proc/internal.h 2007-12-19 15:29:24.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/proc/internal.h 2007-12-19 22:49:13.000000000 -0500 @@ -17,6 +17,11 @@ #else static inline void proc_sys_init(void) { } #endif +#ifdef CONFIG_NET +extern int proc_net_init(void); +#else +static inline int proc_net_init(void) { return 0; } +#endif struct vmalloc_info { unsigned long used; diff -Nurb linux-2.6.22-try2/fs/proc/proc_net.c linux-2.6.22-try2-netns/fs/proc/proc_net.c --- linux-2.6.22-try2/fs/proc/proc_net.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/proc/proc_net.c 2007-12-19 22:49:13.000000000 -0500 @@ -0,0 +1,154 @@ +/* + * linux/fs/proc/net.c + * + * Copyright (C) 2007 + * + * Author: Eric Biederman + * + * proc net directory handling functions + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +static struct proc_dir_entry *proc_net_shadow; + +static struct dentry *proc_net_shadow_dentry(struct dentry *parent, + struct proc_dir_entry *de) +{ + struct dentry *shadow = NULL; + struct inode *inode; + if (!de) + goto out; + de_get(de); + inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de); + if (!inode) + goto out_de_put; + shadow = d_alloc_name(parent, de->name); + if (!shadow) + goto out_iput; + shadow->d_op = parent->d_op; /* proc_dentry_operations */ + d_instantiate(shadow, inode); +out: + return shadow; +out_iput: + iput(inode); +out_de_put: + de_put(de); + goto out; +} + +static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd) +{ + struct net *net = current->nsproxy->net_ns; + struct dentry *shadow; + shadow = proc_net_shadow_dentry(parent, net->proc_net); + if (!shadow) + return ERR_PTR(-ENOENT); + + dput(nd->dentry); + /* My dentry count is 1 and that should be enough as the + * shadow dentry is thrown away immediately. + */ + nd->dentry = shadow; + return NULL; +} + +static struct dentry *proc_net_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct net *net = current->nsproxy->net_ns; + struct dentry *shadow; + + shadow = proc_net_shadow_dentry(nd->dentry, net->proc_net); + if (!shadow) + return ERR_PTR(-ENOENT); + + dput(nd->dentry); + nd->dentry = shadow; + + return shadow->d_inode->i_op->lookup(shadow->d_inode, dentry, nd); +} + +static int proc_net_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct net *net = current->nsproxy->net_ns; + struct dentry *shadow; + int ret; + + shadow = proc_net_shadow_dentry(dentry->d_parent, net->proc_net); + if (!shadow) + return -ENOENT; + ret = shadow->d_inode->i_op->setattr(shadow, iattr); + dput(shadow); + return ret; +} + +static const struct file_operations proc_net_dir_operations = { + .read = generic_read_dir, +}; + +static struct inode_operations proc_net_dir_inode_operations = { + .follow_link = proc_net_follow_link, + .lookup = proc_net_lookup, + .setattr = proc_net_setattr, +}; + + +static int proc_net_ns_init(struct net *net) +{ + struct proc_dir_entry *netd, *net_statd; + + netd = proc_mkdir("net", &net->proc_net_root); + if (!netd) + return -EEXIST; + + net_statd = proc_mkdir("stat", netd); + if (!net_statd) { + remove_proc_entry("net", &net->proc_net_root); + return -EEXIST; + } + + netd->data = net; + net_statd->data = net; + net->proc_net_root.data = net; + net->proc_net = netd; + net->proc_net_stat = net_statd; + + return 0; +} + +static void proc_net_ns_exit(struct net *net) +{ + remove_proc_entry("stat", net->proc_net); + remove_proc_entry("net", &net->proc_net_root); + +} + +struct pernet_operations proc_net_ns_ops = { + .init = proc_net_ns_init, + .exit = proc_net_ns_exit, +}; + +int proc_net_init(void) +{ + proc_net_shadow = proc_mkdir("net", NULL); + proc_net_shadow->proc_iops = &proc_net_dir_inode_operations; + proc_net_shadow->proc_fops = &proc_net_dir_operations; + + return register_pernet_subsys(&proc_net_ns_ops); +} diff -Nurb linux-2.6.22-try2/fs/proc/root.c linux-2.6.22-try2-netns/fs/proc/root.c --- linux-2.6.22-try2/fs/proc/root.c 2007-12-19 13:37:46.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/proc/root.c 2007-12-19 22:57:39.000000000 -0500 @@ -21,11 +21,11 @@ #include "internal.h" -struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; struct proc_dir_entry *proc_virtual; extern void proc_vx_init(void); +struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; static int proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -64,8 +64,8 @@ return; } proc_misc_init(); - proc_net = proc_mkdir("net", NULL); - proc_net_stat = proc_mkdir("net/stat", NULL); + + proc_net_init(); #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); @@ -163,7 +163,5 @@ EXPORT_SYMBOL(remove_proc_entry); EXPORT_SYMBOL(proc_root); EXPORT_SYMBOL(proc_root_fs); -EXPORT_SYMBOL(proc_net); -EXPORT_SYMBOL(proc_net_stat); EXPORT_SYMBOL(proc_bus); EXPORT_SYMBOL(proc_root_driver); diff -Nurb linux-2.6.22-try2/fs/sysfs/bin.c linux-2.6.22-try2-netns/fs/sysfs/bin.c --- linux-2.6.22-try2/fs/sysfs/bin.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/sysfs/bin.c 2007-12-19 22:49:13.000000000 -0500 @@ -248,7 +248,7 @@ void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) { + if (sysfs_hash_and_remove(kobj, kobj->sd, attr->attr.name) < 0) { printk(KERN_ERR "%s: " "bad dentry or inode or no such file: \"%s\"\n", __FUNCTION__, attr->attr.name); diff -Nurb linux-2.6.22-try2/fs/sysfs/dir.c linux-2.6.22-try2-netns/fs/sysfs/dir.c --- linux-2.6.22-try2/fs/sysfs/dir.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/sysfs/dir.c 2007-12-19 22:49:13.000000000 -0500 @@ -14,12 +14,33 @@ #include #include "sysfs.h" +static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd); + DEFINE_MUTEX(sysfs_mutex); spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED; static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED; static DEFINE_IDA(sysfs_ino_ida); +static struct sysfs_dirent *find_shadow_sd(struct sysfs_dirent *parent_sd, const void *target) +{ + /* Find the shadow directory for the specified tag */ + struct sysfs_dirent *sd; + + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { + if (sd->s_name != target) + continue; + break; + } + return sd; +} + +static const void *find_shadow_tag(struct kobject *kobj) +{ + /* Find the tag the current kobj is cached with */ + return kobj->sd->s_parent->s_name; +} + /** * sysfs_link_sibling - link sysfs_dirent into sibling list * @sd: sysfs_dirent of interest @@ -323,6 +344,7 @@ if (sysfs_type(sd) & SYSFS_COPY_NAME) kfree(sd->s_name); kfree(sd->s_iattr); + if (sysfs_type(sd) != SYSFS_SHADOW_DIR) sysfs_free_ino(sd->s_ino); kmem_cache_free(sysfs_dir_cachep, sd); @@ -413,6 +435,7 @@ sd->s_dentry = dentry; spin_unlock(&sysfs_assoc_lock); + if (dentry->d_flags & DCACHE_UNHASHED) d_rehash(dentry); } @@ -568,8 +591,9 @@ spin_unlock(&dcache_lock); spin_unlock(&sysfs_assoc_lock); - /* dentries for shadowed inodes are pinned, unpin */ - if (dentry && sysfs_is_shadowed_inode(dentry->d_inode)) + /* dentries for shadowed directories are pinned, unpin */ + if ((sysfs_type(sd) == SYSFS_SHADOW_DIR) || + (sd->s_flags & SYSFS_FLAG_SHADOWED)) dput(dentry); dput(dentry); @@ -624,6 +648,7 @@ acxt->removed = sd->s_sibling; sd->s_sibling = NULL; + sysfs_prune_shadow_sd(sd->s_parent); sysfs_drop_dentry(sd); sysfs_deactivate(sd); sysfs_put(sd); @@ -689,6 +714,7 @@ umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; struct sysfs_addrm_cxt acxt; struct sysfs_dirent *sd; + int err; /* allocate */ sd = sysfs_new_dirent(name, mode, SYSFS_DIR); @@ -698,17 +724,24 @@ /* link in */ sysfs_addrm_start(&acxt, parent_sd); - if (!sysfs_find_dirent(parent_sd, name)) { + err = -ENOENT; + if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) + goto addrm_finish; + + err = -EEXIST; + if (!sysfs_find_dirent(acxt.parent_sd, name)) { sysfs_add_one(&acxt, sd); sysfs_link_sibling(sd); + err = 0; } +addrm_finish: if (sysfs_addrm_finish(&acxt)) { *p_sd = sd; return 0; } sysfs_put(sd); - return -EEXIST; + return err; } int sysfs_create_subdir(struct kobject *kobj, const char *name, @@ -720,19 +753,15 @@ /** * sysfs_create_dir - create a directory for an object. * @kobj: object we're creating directory for. - * @shadow_parent: parent object. */ -int sysfs_create_dir(struct kobject *kobj, - struct sysfs_dirent *shadow_parent_sd) +int sysfs_create_dir(struct kobject * kobj) { struct sysfs_dirent *parent_sd, *sd; int error = 0; BUG_ON(!kobj); - if (shadow_parent_sd) - parent_sd = shadow_parent_sd; - else if (kobj->parent) + if (kobj->parent) parent_sd = kobj->parent->sd; else if (sysfs_mount && sysfs_mount->mnt_sb) parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; @@ -817,18 +846,56 @@ return NULL; } +static void *sysfs_shadow_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct sysfs_dirent *sd; + struct dentry *dest; + + sd = dentry->d_fsdata; + dest = NULL; + if (sd->s_flags & SYSFS_FLAG_SHADOWED) { + const struct shadow_dir_operations *shadow_ops; + const void *tag; + + mutex_lock(&sysfs_mutex); + + shadow_ops = dentry->d_inode->i_private; + tag = shadow_ops->current_tag(); + + sd = find_shadow_sd(sd, tag); + if (sd) + dest = sd->s_dentry; + dget(dest); + + mutex_unlock(&sysfs_mutex); + } + if (!dest) + dest = dget(dentry); + dput(nd->dentry); + nd->dentry = dest; + + return NULL; +} + + const struct inode_operations sysfs_dir_inode_operations = { .lookup = sysfs_lookup, .setattr = sysfs_setattr, + .follow_link = sysfs_shadow_follow_link, }; +static void __remove_dir(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +{ + sysfs_unlink_sibling(sd); + sysfs_remove_one(acxt, sd); +} + static void remove_dir(struct sysfs_dirent *sd) { struct sysfs_addrm_cxt acxt; sysfs_addrm_start(&acxt, sd->s_parent); - sysfs_unlink_sibling(sd); - sysfs_remove_one(&acxt, sd); + __remove_dir(&acxt, sd); sysfs_addrm_finish(&acxt); } @@ -837,17 +904,11 @@ remove_dir(sd); } - -static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) +static void sysfs_empty_dir(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *dir_sd) { - struct sysfs_addrm_cxt acxt; struct sysfs_dirent **pos; - if (!dir_sd) - return; - - pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); - sysfs_addrm_start(&acxt, dir_sd); pos = &dir_sd->s_children; while (*pos) { struct sysfs_dirent *sd = *pos; @@ -855,10 +916,39 @@ if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) { *pos = sd->s_sibling; sd->s_sibling = NULL; - sysfs_remove_one(&acxt, sd); + sysfs_remove_one(acxt, sd); } else pos = &(*pos)->s_sibling; } +} + +static void sysfs_remove_shadows(struct sysfs_addrm_cxt * acxt, + struct sysfs_dirent *dir_sd) +{ + struct sysfs_dirent **pos; + + pos = &dir_sd->s_children; + while (*pos) { + struct sysfs_dirent *sd = *pos; + + sysfs_empty_dir(acxt, sd); + __remove_dir(acxt, sd); + } +} + +static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) +{ + struct sysfs_addrm_cxt acxt; + + if (!dir_sd) + return; + + pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); + sysfs_addrm_start(&acxt, dir_sd); + if (sysfs_type(dir_sd) == SYSFS_DIR) + sysfs_empty_dir(&acxt, dir_sd); + else + sysfs_remove_shadows(&acxt, dir_sd); sysfs_addrm_finish(&acxt); remove_dir(dir_sd); @@ -884,89 +974,77 @@ __sysfs_remove_dir(sd); } -int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd, - const char *new_name) +int sysfs_rename_dir(struct kobject * kobj, const char *new_name) { - struct sysfs_dirent *sd = kobj->sd; - struct dentry *new_parent = NULL; - struct dentry *old_dentry = NULL, *new_dentry = NULL; - const char *dup_name = NULL; + struct dentry *old_dentry, *new_dentry, *parent; + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + const char *dup_name; int error; - /* get dentries */ - old_dentry = sysfs_get_dentry(sd); - if (IS_ERR(old_dentry)) { - error = PTR_ERR(old_dentry); - goto out_dput; - } + dup_name = NULL; + new_dentry = NULL; - new_parent = sysfs_get_dentry(new_parent_sd); - if (IS_ERR(new_parent)) { - error = PTR_ERR(new_parent); - goto out_dput; - } + sd = kobj->sd; + sysfs_addrm_start(&acxt, sd->s_parent); + error = -ENOENT; + if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) + goto addrm_finish; - /* lock new_parent and get dentry for new name */ - mutex_lock(&new_parent->d_inode->i_mutex); + error = -EEXIST; + if (sysfs_find_dirent(acxt.parent_sd, new_name)) + goto addrm_finish; - new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name)); + error = -EINVAL; + if ((sd->s_parent == acxt.parent_sd) && + (strcmp(new_name, sd->s_name) == 0)) + goto addrm_finish; + + old_dentry = sd->s_dentry; + parent = acxt.parent_sd->s_dentry; + if (old_dentry) { + old_dentry = sd->s_dentry; + parent = acxt.parent_sd->s_dentry; + new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); if (IS_ERR(new_dentry)) { error = PTR_ERR(new_dentry); - goto out_unlock; + goto addrm_finish; } - /* By allowing two different directories with the same - * d_parent we allow this routine to move between different - * shadows of the same directory - */ error = -EINVAL; - if (old_dentry->d_parent->d_inode != new_parent->d_inode || - new_dentry->d_parent->d_inode != new_parent->d_inode || - old_dentry == new_dentry) - goto out_unlock; - - error = -EEXIST; - if (new_dentry->d_inode) - goto out_unlock; + if (old_dentry == new_dentry) + goto addrm_finish; + } /* rename kobject and sysfs_dirent */ error = -ENOMEM; new_name = dup_name = kstrdup(new_name, GFP_KERNEL); if (!new_name) - goto out_drop; + goto addrm_finish; error = kobject_set_name(kobj, "%s", new_name); if (error) - goto out_drop; + goto addrm_finish; dup_name = sd->s_name; sd->s_name = new_name; /* move under the new parent */ - d_add(new_dentry, NULL); - d_move(sd->s_dentry, new_dentry); - - mutex_lock(&sysfs_mutex); - sysfs_unlink_sibling(sd); - sysfs_get(new_parent_sd); + sysfs_get(acxt.parent_sd); sysfs_put(sd->s_parent); - sd->s_parent = new_parent_sd; + sd->s_parent = acxt.parent_sd; sysfs_link_sibling(sd); - mutex_unlock(&sysfs_mutex); - + if (new_dentry) { + d_add(new_dentry, NULL); + d_move(old_dentry, new_dentry); + } error = 0; - goto out_unlock; +addrm_finish: + sysfs_addrm_finish(&acxt); - out_drop: - d_drop(new_dentry); - out_unlock: - mutex_unlock(&new_parent->d_inode->i_mutex); - out_dput: kfree(dup_name); - dput(new_parent); - dput(old_dentry); dput(new_dentry); return error; } @@ -1103,8 +1181,11 @@ i++; /* fallthrough */ default: - mutex_lock(&sysfs_mutex); + /* If I am the shadow master return nothing. */ + if (parent_sd->s_flags & SYSFS_FLAG_SHADOWED) + return 0; + mutex_lock(&sysfs_mutex); pos = &parent_sd->s_children; while (*pos != cursor) pos = &(*pos)->s_sibling; @@ -1186,125 +1267,192 @@ return offset; } +const struct file_operations sysfs_dir_operations = { + .open = sysfs_dir_open, + .release = sysfs_dir_close, + .llseek = sysfs_dir_lseek, + .read = generic_read_dir, + .readdir = sysfs_readdir, +}; -/** - * sysfs_make_shadowed_dir - Setup so a directory can be shadowed - * @kobj: object we're creating shadow of. - */ -int sysfs_make_shadowed_dir(struct kobject *kobj, - void * (*follow_link)(struct dentry *, struct nameidata *)) +static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd) { - struct dentry *dentry; - struct inode *inode; - struct inode_operations *i_op; + struct sysfs_addrm_cxt acxt; - /* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */ - dentry = sysfs_get_dentry(kobj->sd); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); + /* If a shadow directory goes empty remove it. */ + if (sysfs_type(sd) != SYSFS_SHADOW_DIR) + return; - inode = dentry->d_inode; - if (inode->i_op != &sysfs_dir_inode_operations) { - dput(dentry); - return -EINVAL; - } + if (sd->s_children) + return; - i_op = kmalloc(sizeof(*i_op), GFP_KERNEL); - if (!i_op) - return -ENOMEM; + sysfs_addrm_start(&acxt, sd->s_parent); - memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op)); - i_op->follow_link = follow_link; + if (sd->s_flags & SYSFS_FLAG_REMOVED) + goto addrm_finish; - /* Locking of inode->i_op? - * Since setting i_op is a single word write and they - * are atomic we should be ok here. - */ - inode->i_op = i_op; - return 0; -} + if (sd->s_children) + goto addrm_finish; -/** - * sysfs_create_shadow_dir - create a shadow directory for an object. - * @kobj: object we're creating directory for. - * - * sysfs_make_shadowed_dir must already have been called on this - * directory. - */ + __remove_dir(&acxt, sd); +addrm_finish: + sysfs_addrm_finish(&acxt); +} -struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj) +static struct sysfs_dirent *add_shadow_sd(struct sysfs_dirent *parent_sd, const void *tag) { - struct sysfs_dirent *parent_sd = kobj->sd->s_parent; - struct dentry *dir, *parent, *shadow; + struct sysfs_dirent *sd = NULL; + struct dentry *dir, *shadow; struct inode *inode; - struct sysfs_dirent *sd; - struct sysfs_addrm_cxt acxt; - - dir = sysfs_get_dentry(kobj->sd); - if (IS_ERR(dir)) { - sd = (void *)dir; - goto out; - } - parent = dir->d_parent; + dir = parent_sd->s_dentry; inode = dir->d_inode; - sd = ERR_PTR(-EINVAL); - if (!sysfs_is_shadowed_inode(inode)) - goto out_dput; - shadow = d_alloc(parent, &dir->d_name); + shadow = d_alloc(dir->d_parent, &dir->d_name); if (!shadow) - goto nomem; + goto out; - sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR); + /* Since the shadow directory is reachable make it look + * like it is actually hashed. + */ + shadow->d_hash.pprev = &shadow->d_hash.next; + shadow->d_hash.next = NULL; + shadow->d_flags &= ~DCACHE_UNHASHED; + + sd = sysfs_new_dirent(tag, parent_sd->s_mode, SYSFS_SHADOW_DIR); if (!sd) - goto nomem; - sd->s_elem.dir.kobj = kobj; + goto error; - sysfs_addrm_start(&acxt, parent_sd); + sd->s_elem.dir.kobj = parent_sd->s_elem.dir.kobj; + sd->s_parent = sysfs_get(parent_sd); - /* add but don't link into children list */ - sysfs_add_one(&acxt, sd); + /* Use the inode number of the parent we are shadowing */ + sysfs_free_ino(sd->s_ino); + sd->s_ino = parent_sd->s_ino; + + inc_nlink(inode); + inc_nlink(dir->d_parent->d_inode); - /* attach and instantiate dentry */ + sysfs_link_sibling(sd); + __iget(inode); + sysfs_instantiate(shadow, inode); sysfs_attach_dentry(sd, shadow); - d_instantiate(shadow, igrab(inode)); - inc_nlink(inode); /* tj: synchronization? */ +out: + return sd; +error: + dput(shadow); + goto out; +} - sysfs_addrm_finish(&acxt); +int sysfs_resolve_for_create(struct kobject *kobj, + struct sysfs_dirent **parent_sd) +{ + const struct shadow_dir_operations *shadow_ops; + struct sysfs_dirent *sd, *shadow_sd; + + sd = *parent_sd; + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + sd = sd->s_parent; + + if (sd->s_flags & SYSFS_FLAG_SHADOWED) { + const void *tag; + + shadow_ops = sd->s_dentry->d_inode->i_private; + tag = shadow_ops->kobject_tag(kobj); + + shadow_sd = find_shadow_sd(sd, tag); + if (!shadow_sd) + shadow_sd = add_shadow_sd(sd, tag); + sd = shadow_sd; + } + if (sd) { + *parent_sd = sd; + return 1; + } + return 0; +} - dget(shadow); /* Extra count - pin the dentry in core */ +int sysfs_resolve_for_remove(struct kobject *kobj, + struct sysfs_dirent **parent_sd) +{ + struct sysfs_dirent *sd; + /* If dentry is a shadow directory find the shadow that is + * stored under the same tag as kobj. This allows removal + * of dirents to function properly even if the value of + * kobject_tag() has changed since we initially created + * the dirents assoctated with kobj. + */ - goto out_dput; + sd = *parent_sd; + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + sd = sd->s_parent; + if (sd->s_flags & SYSFS_FLAG_SHADOWED) { + const void *tag; - nomem: - dput(shadow); - sd = ERR_PTR(-ENOMEM); - out_dput: - dput(dir); - out: - return sd; + tag = find_shadow_tag(kobj); + sd = find_shadow_sd(sd, tag); + } + if (sd) { + *parent_sd = sd; + return 1; + } + return 0; } /** - * sysfs_remove_shadow_dir - remove an object's directory. - * @shadow_sd: sysfs_dirent of shadow directory + * sysfs_enable_shadowing - Automatically create shadows of a directory + * @kobj: object to automatically shadow * - * The only thing special about this is that we remove any files in - * the directory before we remove the directory, and we've inlined - * what used to be sysfs_rmdir() below, instead of calling separately. + * Once shadowing has been enabled on a directory the contents + * of the directory become dependent upon context. + * + * shadow_ops->current_tag() returns the context for the current + * process. + * + * shadow_ops->kobject_tag() returns the context that a given kobj + * resides in. + * + * Using those methods the sysfs code on shadowed directories + * carefully stores the files so that when we lookup files + * we get the proper answer for our context. + * + * If the context of a kobject is changed it is expected that + * the kobject will be renamed so the appopriate sysfs data structures + * can be updated. */ - -void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd) +int sysfs_enable_shadowing(struct kobject *kobj, + const struct shadow_dir_operations *shadow_ops) { - __sysfs_remove_dir(shadow_sd); + struct sysfs_dirent *sd; + struct dentry *dentry; + int err; + + /* Find the dentry for the shadowed directory and + * increase it's count. + */ + err = -ENOENT; + sd = kobj->sd; + dentry = sysfs_get_dentry(sd); + if (!dentry) + goto out; + + mutex_lock(&sysfs_mutex); + err = -EINVAL; + /* We can only enable shadowing on empty directories + * where shadowing is not already enabled. + */ + if (!sd->s_children && (sysfs_type(sd) == SYSFS_DIR) && + !(sd->s_flags & SYSFS_FLAG_REMOVED) && + !(sd->s_flags & SYSFS_FLAG_SHADOWED)) { + sd->s_flags |= SYSFS_FLAG_SHADOWED; + dentry->d_inode->i_private = (void *)shadow_ops; + err = 0; + } + mutex_unlock(&sysfs_mutex); +out: + if (err) + dput(dentry); + return err; } -const struct file_operations sysfs_dir_operations = { - .open = sysfs_dir_open, - .release = sysfs_dir_close, - .llseek = sysfs_dir_lseek, - .read = generic_read_dir, - .readdir = sysfs_readdir, -}; diff -Nurb linux-2.6.22-try2/fs/sysfs/file.c linux-2.6.22-try2-netns/fs/sysfs/file.c --- linux-2.6.22-try2/fs/sysfs/file.c 2007-12-19 15:46:06.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/sysfs/file.c 2007-12-19 22:49:13.000000000 -0500 @@ -556,7 +556,7 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) { - sysfs_hash_and_remove(kobj->sd, attr->name); + sysfs_hash_and_remove(kobj, kobj->sd, attr->name); } @@ -573,7 +573,7 @@ dir_sd = sysfs_get_dirent(kobj->sd, group); if (dir_sd) { - sysfs_hash_and_remove(dir_sd, attr->name); + sysfs_hash_and_remove(kobj, dir_sd, attr->name); sysfs_put(dir_sd); } } diff -Nurb linux-2.6.22-try2/fs/sysfs/group.c linux-2.6.22-try2-netns/fs/sysfs/group.c --- linux-2.6.22-try2/fs/sysfs/group.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/sysfs/group.c 2007-12-19 22:49:13.000000000 -0500 @@ -13,21 +13,20 @@ #include #include #include -#include #include #include "sysfs.h" -static void remove_files(struct sysfs_dirent *dir_sd, +static void remove_files(struct kobject *kobj, struct sysfs_dirent *dir_sd, const struct attribute_group *grp) { struct attribute *const* attr; for (attr = grp->attrs; *attr; attr++) - sysfs_hash_and_remove(dir_sd, (*attr)->name); + sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name); } -static int create_files(struct sysfs_dirent *dir_sd, +static int create_files(struct kobject *kobj, struct sysfs_dirent *dir_sd, const struct attribute_group *grp) { struct attribute *const* attr; @@ -36,7 +35,7 @@ for (attr = grp->attrs; *attr && !error; attr++) error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); if (error) - remove_files(dir_sd, grp); + remove_files(kobj, dir_sd, grp); return error; } @@ -56,7 +55,7 @@ } else sd = kobj->sd; sysfs_get(sd); - error = create_files(sd, grp); + error = create_files(kobj, sd, grp); if (error) { if (grp->name) sysfs_remove_subdir(sd); @@ -77,7 +76,7 @@ } else sd = sysfs_get(dir_sd); - remove_files(sd, grp); + remove_files(kobj, sd, grp); if (grp->name) sysfs_remove_subdir(sd); diff -Nurb linux-2.6.22-try2/fs/sysfs/inode.c linux-2.6.22-try2-netns/fs/sysfs/inode.c --- linux-2.6.22-try2/fs/sysfs/inode.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/sysfs/inode.c 2007-12-19 22:49:13.000000000 -0500 @@ -34,16 +34,6 @@ .setattr = sysfs_setattr, }; -void sysfs_delete_inode(struct inode *inode) -{ - /* Free the shadowed directory inode operations */ - if (sysfs_is_shadowed_inode(inode)) { - kfree(inode->i_op); - inode->i_op = NULL; - } - return generic_delete_inode(inode); -} - int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) { struct inode * inode = dentry->d_inode; @@ -197,17 +187,16 @@ d_instantiate(dentry, inode); } -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) +int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd, const char *name) { struct sysfs_addrm_cxt acxt; struct sysfs_dirent **pos, *sd; - if (!dir_sd) - return -ENOENT; - sysfs_addrm_start(&acxt, dir_sd); + if (!sysfs_resolve_for_remove(kobj, &acxt.parent_sd)) + goto addrm_finish; - for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) { + for (pos = &acxt.parent_sd->s_children; *pos; pos = &(*pos)->s_sibling) { sd = *pos; if (!sysfs_type(sd)) @@ -219,7 +208,7 @@ break; } } - +addrm_finish: if (sysfs_addrm_finish(&acxt)) return 0; return -ENOENT; diff -Nurb linux-2.6.22-try2/fs/sysfs/mount.c linux-2.6.22-try2-netns/fs/sysfs/mount.c --- linux-2.6.22-try2/fs/sysfs/mount.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/sysfs/mount.c 2007-12-19 22:49:13.000000000 -0500 @@ -19,7 +19,7 @@ static const struct super_operations sysfs_ops = { .statfs = simple_statfs, - .drop_inode = sysfs_delete_inode, + .drop_inode = generic_delete_inode, }; struct sysfs_dirent sysfs_root = { diff -Nurb linux-2.6.22-try2/fs/sysfs/symlink.c linux-2.6.22-try2-netns/fs/sysfs/symlink.c --- linux-2.6.22-try2/fs/sysfs/symlink.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/sysfs/symlink.c 2007-12-19 22:49:13.000000000 -0500 @@ -15,8 +15,11 @@ { int depth = 0; - for (; sd->s_parent; sd = sd->s_parent) + for (; sd->s_parent; sd = sd->s_parent) { + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + continue; depth++; + } return depth; } @@ -25,17 +28,24 @@ { int length = 1; - for (; sd->s_parent; sd = sd->s_parent) + for (; sd->s_parent; sd = sd->s_parent) { + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + continue; length += strlen(sd->s_name) + 1; + } return length; } static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length) { + int cur; --length; for (; sd->s_parent; sd = sd->s_parent) { - int cur = strlen(sd->s_name); + if (sysfs_type(sd) == SYSFS_SHADOW_DIR) + continue; + + cur = strlen(sd->s_name); /* back up enough to print this bus id with '/' */ length -= cur; @@ -89,12 +99,15 @@ sd->s_elem.symlink.target_sd = target_sd; sysfs_addrm_start(&acxt, parent_sd); + if (!sysfs_resolve_for_create(target, &acxt.parent_sd)) + goto addrm_finish; - if (!sysfs_find_dirent(parent_sd, name)) { + if (!sysfs_find_dirent(acxt.parent_sd, name)) { sysfs_add_one(&acxt, sd); sysfs_link_sibling(sd); } +addrm_finish: if (sysfs_addrm_finish(&acxt)) return 0; @@ -108,6 +121,21 @@ /** + * sysfs_delete_link - remove symlink in object's directory. + * @kobj: object we're acting for. + * @targ: object we're pointing to. + * @name: name of the symlink to remove. + * + * Unlike sysfs_remove_link sysfs_delete_link has enough information + * to successfully delete symlinks in shadow directories. + */ +void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, + const char *name) +{ + sysfs_hash_and_remove(targ, kobj->sd, name); +} + +/** * sysfs_remove_link - remove symlink in object's directory. * @kobj: object we're acting for. * @name: name of the symlink to remove. @@ -115,7 +143,23 @@ void sysfs_remove_link(struct kobject * kobj, const char * name) { - sysfs_hash_and_remove(kobj->sd, name); + sysfs_hash_and_remove(kobj, kobj->sd, name); +} + +/** + * sysfs_rename_link - rename symlink in object's directory. + * @kobj: object we're acting for. + * @targ: object we're pointing to. + * @old: previous name of the symlink. + * @new: new name of the symlink. + * + * A helper function for the common rename symlink idiom. + */ +int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, + const char *old, const char *new) +{ + sysfs_delete_link(kobj, targ, old); + return sysfs_create_link(kobj, targ, new); } static int sysfs_get_target_path(struct sysfs_dirent * parent_sd, diff -Nurb linux-2.6.22-try2/fs/sysfs/sysfs.h linux-2.6.22-try2-netns/fs/sysfs/sysfs.h --- linux-2.6.22-try2/fs/sysfs/sysfs.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/fs/sysfs/sysfs.h 2007-12-19 22:49:13.000000000 -0500 @@ -58,6 +58,12 @@ extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd); extern void sysfs_link_sibling(struct sysfs_dirent *sd); extern void sysfs_unlink_sibling(struct sysfs_dirent *sd); + +extern int sysfs_resolve_for_create(struct kobject *kobj, + struct sysfs_dirent **parent_sd); +extern int sysfs_resolve_for_remove(struct kobject *kobj, + struct sysfs_dirent **parent_sd); + extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); extern void sysfs_put_active(struct sysfs_dirent *sd); extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd); @@ -70,7 +76,6 @@ struct sysfs_dirent *sd); extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); -extern void sysfs_delete_inode(struct inode *inode); extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode); extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd); extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode); @@ -85,7 +90,8 @@ extern int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, int type); -extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); +extern int sysfs_hash_and_remove(struct kobject *kobj, + struct sysfs_dirent *dir_sd, const char *name); extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); extern int sysfs_create_subdir(struct kobject *kobj, const char *name, @@ -122,8 +128,3 @@ if (sd && atomic_dec_and_test(&sd->s_count)) release_sysfs_dirent(sd); } - -static inline int sysfs_is_shadowed_inode(struct inode *inode) -{ - return S_ISDIR(inode->i_mode) && inode->i_op->follow_link; -} diff -Nurb linux-2.6.22-try2/include/linux/device.h linux-2.6.22-try2-netns/include/linux/device.h --- linux-2.6.22-try2/include/linux/device.h 2007-12-19 15:29:22.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/device.h 2007-12-19 22:49:13.000000000 -0500 @@ -200,6 +200,8 @@ int (*suspend)(struct device *, pm_message_t state); int (*resume)(struct device *); + + const struct shadow_dir_operations *shadow_ops; }; extern int __must_check class_register(struct class *); diff -Nurb linux-2.6.22-try2/include/linux/if_bridge.h linux-2.6.22-try2-netns/include/linux/if_bridge.h --- linux-2.6.22-try2/include/linux/if_bridge.h 2007-12-19 13:37:51.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/if_bridge.h 2007-12-19 22:49:13.000000000 -0500 @@ -104,7 +104,7 @@ #include -extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *)); +extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff *skb); extern int (*br_should_route_hook)(struct sk_buff **pskb); diff -Nurb linux-2.6.22-try2/include/linux/if_pppox.h linux-2.6.22-try2-netns/include/linux/if_pppox.h --- linux-2.6.22-try2/include/linux/if_pppox.h 2007-12-19 13:37:51.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/if_pppox.h 2007-12-19 22:49:13.000000000 -0500 @@ -160,7 +160,7 @@ struct module; struct pppox_proto { - int (*create)(struct socket *sock); + int (*create)(struct net *net, struct socket *sock); int (*ioctl)(struct socket *sock, unsigned int cmd, unsigned long arg); struct module *owner; diff -Nurb linux-2.6.22-try2/include/linux/if_vlan.h linux-2.6.22-try2-netns/include/linux/if_vlan.h --- linux-2.6.22-try2/include/linux/if_vlan.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/if_vlan.h 2007-12-19 22:49:13.000000000 -0500 @@ -62,7 +62,7 @@ #define VLAN_VID_MASK 0xfff /* found in socket.c */ -extern void vlan_ioctl_set(int (*hook)(void __user *)); +extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); #define VLAN_NAME "vlan" diff -Nurb linux-2.6.22-try2/include/linux/inetdevice.h linux-2.6.22-try2-netns/include/linux/inetdevice.h --- linux-2.6.22-try2/include/linux/inetdevice.h 2007-12-19 13:37:51.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/inetdevice.h 2007-12-19 22:49:13.000000000 -0500 @@ -17,8 +17,6 @@ DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1); }; -extern struct ipv4_devconf ipv4_devconf; - struct in_device { struct net_device *dev; @@ -44,7 +42,7 @@ }; #define IPV4_DEVCONF(cnf, attr) ((cnf).data[NET_IPV4_CONF_ ## attr - 1]) -#define IPV4_DEVCONF_ALL(attr) IPV4_DEVCONF(ipv4_devconf, attr) +#define IPV4_DEVCONF_ALL(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf), attr) static inline int ipv4_devconf_get(struct in_device *in_dev, int index) { @@ -71,14 +69,14 @@ ipv4_devconf_set((in_dev), NET_IPV4_CONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(attr) && IN_DEV_CONF_GET((in_dev), attr)) + (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) && IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(attr) || IN_DEV_CONF_GET((in_dev), attr)) + (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) || IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_MAXCONF(in_dev, attr) \ - (max(IPV4_DEVCONF_ALL(attr), IN_DEV_CONF_GET((in_dev), attr))) + (max(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr), IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) -#define IN_DEV_MFORWARD(in_dev) (IPV4_DEVCONF_ALL(MC_FORWARDING) && \ +#define IN_DEV_MFORWARD(in_dev) (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, MC_FORWARDING) && \ IPV4_DEVCONF((in_dev)->cnf, \ MC_FORWARDING)) #define IN_DEV_RPFILTER(in_dev) IN_DEV_ANDCONF((in_dev), RP_FILTER) @@ -127,15 +125,15 @@ extern int register_inetaddr_notifier(struct notifier_block *nb); extern int unregister_inetaddr_notifier(struct notifier_block *nb); -extern struct net_device *ip_dev_find(__be32 addr); +extern struct net_device *ip_dev_find(struct net *net, __be32 addr); extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); -extern int devinet_ioctl(unsigned int cmd, void __user *); +extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); extern void devinet_init(void); -extern struct in_device *inetdev_by_index(int); +extern struct in_device *inetdev_by_index(struct net *, int); extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); -extern __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope); +extern __be32 inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope); extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); -extern void inet_forward_change(void); +extern void inet_forward_change(struct net *net); static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { diff -Nurb linux-2.6.22-try2/include/linux/init_task.h linux-2.6.22-try2-netns/include/linux/init_task.h --- linux-2.6.22-try2/include/linux/init_task.h 2007-12-19 15:29:24.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/init_task.h 2007-12-19 22:49:13.000000000 -0500 @@ -9,6 +9,7 @@ #include #include #include +#include #define INIT_FDTABLE \ { \ @@ -78,6 +79,7 @@ .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ .uts_ns = &init_uts_ns, \ .mnt_ns = NULL, \ + .net_ns = &init_net, \ INIT_IPC_NS(ipc_ns) \ .user_ns = &init_user_ns, \ } diff -Nurb linux-2.6.22-try2/include/linux/kobject.h linux-2.6.22-try2-netns/include/linux/kobject.h --- linux-2.6.22-try2/include/linux/kobject.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/kobject.h 2007-12-19 22:49:13.000000000 -0500 @@ -71,14 +71,9 @@ extern void kobject_cleanup(struct kobject *); extern int __must_check kobject_add(struct kobject *); -extern int __must_check kobject_shadow_add(struct kobject *kobj, - struct sysfs_dirent *shadow_parent); extern void kobject_del(struct kobject *); extern int __must_check kobject_rename(struct kobject *, const char *new_name); -extern int __must_check kobject_shadow_rename(struct kobject *kobj, - struct sysfs_dirent *new_parent, - const char *new_name); extern int __must_check kobject_move(struct kobject *, struct kobject *); extern int __must_check kobject_register(struct kobject *); diff -Nurb linux-2.6.22-try2/include/linux/net.h linux-2.6.22-try2-netns/include/linux/net.h --- linux-2.6.22-try2/include/linux/net.h 2007-12-19 13:37:51.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/net.h 2007-12-19 22:49:13.000000000 -0500 @@ -23,6 +23,7 @@ struct poll_table_struct; struct inode; +struct net; #define NPROTO 34 /* should be enough for now.. */ @@ -170,7 +171,7 @@ struct net_proto_family { int family; - int (*create)(struct socket *sock, int protocol); + int (*create)(struct net *net, struct socket *sock, int protocol); struct module *owner; }; diff -Nurb linux-2.6.22-try2/include/linux/netdevice.h linux-2.6.22-try2-netns/include/linux/netdevice.h --- linux-2.6.22-try2/include/linux/netdevice.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/netdevice.h 2007-12-19 22:49:13.000000000 -0500 @@ -39,6 +39,7 @@ #include #include +struct net; struct vlan_group; struct ethtool_ops; struct netpoll_info; @@ -326,6 +327,7 @@ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -537,6 +539,9 @@ void (*poll_controller)(struct net_device *dev); #endif + /* Network namespace this network device is inside */ + struct net *nd_net; + /* bridge stuff */ struct net_bridge_port *br_port; @@ -583,45 +588,48 @@ #include #include -extern struct net_device loopback_dev; /* The loopback */ -extern struct list_head dev_base_head; /* All devices */ extern rwlock_t dev_base_lock; /* Device list lock */ -#define for_each_netdev(d) \ - list_for_each_entry(d, &dev_base_head, dev_list) -#define for_each_netdev_safe(d, n) \ - list_for_each_entry_safe(d, n, &dev_base_head, dev_list) -#define for_each_netdev_continue(d) \ - list_for_each_entry_continue(d, &dev_base_head, dev_list) -#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) - -static inline struct net_device *next_net_device(struct net_device *dev) -{ - struct list_head *lh; - lh = dev->dev_list.next; - return lh == &dev_base_head ? NULL : net_device_entry(lh); -} +#define for_each_netdev(net, d) \ + list_for_each_entry(d, &(net)->dev_base_head, dev_list) +#define for_each_netdev_safe(net, d, n) \ + list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) +#define for_each_netdev_continue(net, d) \ + list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) +#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) -static inline struct net_device *first_net_device(void) -{ - return list_empty(&dev_base_head) ? NULL : - net_device_entry(dev_base_head.next); -} +#define next_net_device(d) \ +({ \ + struct net_device *dev = d; \ + struct list_head *lh; \ + struct net *net; \ + \ + net = dev->nd_net; \ + lh = dev->dev_list.next; \ + lh == &net->dev_base_head ? NULL : net_device_entry(lh); \ +}) + +#define first_net_device(N) \ +({ \ + struct net *NET = (N); \ + list_empty(&NET->dev_base_head) ? NULL : \ + net_device_entry(NET->dev_base_head.next); \ +}) extern int netdev_boot_setup_check(struct net_device *dev); extern unsigned long netdev_boot_base(const char *prefix, int unit); -extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr); -extern struct net_device *dev_getfirstbyhwtype(unsigned short type); -extern struct net_device *__dev_getfirstbyhwtype(unsigned short type); +extern struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *hwaddr); +extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); +extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); extern void dev_add_pack(struct packet_type *pt); extern void dev_remove_pack(struct packet_type *pt); extern void __dev_remove_pack(struct packet_type *pt); -extern struct net_device *dev_get_by_flags(unsigned short flags, +extern struct net_device *dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); -extern struct net_device *dev_get_by_name(const char *name); -extern struct net_device *__dev_get_by_name(const char *name); +extern struct net_device *dev_get_by_name(struct net *net, const char *name); +extern struct net_device *__dev_get_by_name(struct net *net, const char *name); extern int dev_alloc_name(struct net_device *dev, const char *name); extern int dev_open(struct net_device *dev); extern int dev_close(struct net_device *dev); @@ -632,9 +640,9 @@ extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); extern int unregister_netdevice_notifier(struct notifier_block *nb); -extern int call_netdevice_notifiers(unsigned long val, void *v); -extern struct net_device *dev_get_by_index(int ifindex); -extern struct net_device *__dev_get_by_index(int ifindex); +extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); +extern struct net_device *dev_get_by_index(struct net *net, int ifindex); +extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); extern int dev_restart(struct net_device *dev); #ifdef CONFIG_NETPOLL_TRAP extern int netpoll_trap(void); @@ -739,11 +747,13 @@ #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); extern int dev_valid_name(const char *name); -extern int dev_ioctl(unsigned int cmd, void __user *); -extern int dev_ethtool(struct ifreq *); +extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); +extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); extern int dev_change_flags(struct net_device *, unsigned); extern int dev_change_name(struct net_device *, char *); +extern int dev_change_net_namespace(struct net_device *, + struct net *, const char *); extern int dev_set_mtu(struct net_device *, int); extern int dev_set_mac_address(struct net_device *, struct sockaddr *); @@ -1013,7 +1023,7 @@ extern void netdev_state_change(struct net_device *dev); extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ -extern void dev_load(const char *name); +extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); extern int netdev_max_backlog; extern int weight_p; diff -Nurb linux-2.6.22-try2/include/linux/netfilter/x_tables.h linux-2.6.22-try2-netns/include/linux/netfilter/x_tables.h --- linux-2.6.22-try2/include/linux/netfilter/x_tables.h 2007-12-19 13:37:51.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/netfilter/x_tables.h 2007-12-19 22:49:13.000000000 -0500 @@ -289,7 +289,7 @@ unsigned int size, const char *table, unsigned int hook, unsigned short proto, int inv_proto); -extern int xt_register_table(struct xt_table *table, +extern int xt_register_table(struct net *net, struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo); extern void *xt_unregister_table(struct xt_table *table); @@ -306,7 +306,7 @@ extern int xt_find_revision(int af, const char *name, u8 revision, int target, int *err); -extern struct xt_table *xt_find_table_lock(int af, const char *name); +extern struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name); extern void xt_table_unlock(struct xt_table *t); extern int xt_proto_init(int af); diff -Nurb linux-2.6.22-try2/include/linux/netfilter.h linux-2.6.22-try2-netns/include/linux/netfilter.h --- linux-2.6.22-try2/include/linux/netfilter.h 2007-12-19 13:37:51.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/netfilter.h 2007-12-19 22:49:13.000000000 -0500 @@ -362,11 +362,6 @@ #endif } -#ifdef CONFIG_PROC_FS -#include -extern struct proc_dir_entry *proc_net_netfilter; -#endif - #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) diff -Nurb linux-2.6.22-try2/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.22-try2-netns/include/linux/netfilter_ipv4/ip_tables.h --- linux-2.6.22-try2/include/linux/netfilter_ipv4/ip_tables.h 2007-12-19 13:37:52.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/netfilter_ipv4/ip_tables.h 2007-12-19 22:49:13.000000000 -0500 @@ -292,7 +292,7 @@ #include extern void ipt_init(void) __init; -extern int ipt_register_table(struct xt_table *table, +extern int ipt_register_table(struct net *net, struct xt_table *table, const struct ipt_replace *repl); extern void ipt_unregister_table(struct xt_table *table); diff -Nurb linux-2.6.22-try2/include/linux/netfilter_ipv4.h linux-2.6.22-try2-netns/include/linux/netfilter_ipv4.h --- linux-2.6.22-try2/include/linux/netfilter_ipv4.h 2007-12-19 13:37:52.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/netfilter_ipv4.h 2007-12-19 22:49:13.000000000 -0500 @@ -75,7 +75,7 @@ #define SO_ORIGINAL_DST 80 #ifdef __KERNEL__ -extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type); +extern int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type); extern int ip_xfrm_me_harder(struct sk_buff **pskb); extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff -Nurb linux-2.6.22-try2/include/linux/netlink.h linux-2.6.22-try2-netns/include/linux/netlink.h --- linux-2.6.22-try2/include/linux/netlink.h 2007-12-19 15:29:22.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/netlink.h 2007-12-19 22:49:13.000000000 -0500 @@ -27,6 +27,8 @@ #define MAX_LINKS 32 +struct net; + struct sockaddr_nl { sa_family_t nl_family; /* AF_NETLINK */ @@ -157,7 +159,8 @@ #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, unsigned int groups, +extern struct sock *netlink_kernel_create(struct net *net, + int unit,unsigned int groups, void (*input)(struct sock *sk, int len), struct mutex *cb_mutex, struct module *module); @@ -204,6 +207,7 @@ struct netlink_notify { + struct net *net; int pid; int protocol; }; diff -Nurb linux-2.6.22-try2/include/linux/nsproxy.h linux-2.6.22-try2-netns/include/linux/nsproxy.h --- linux-2.6.22-try2/include/linux/nsproxy.h 2007-12-19 15:50:41.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/nsproxy.h 2007-12-19 22:57:59.000000000 -0500 @@ -36,6 +36,7 @@ struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; struct user_namespace *user_ns; + struct net *net_ns; }; extern struct nsproxy init_nsproxy; diff -Nurb linux-2.6.22-try2/include/linux/proc_fs.h linux-2.6.22-try2-netns/include/linux/proc_fs.h --- linux-2.6.22-try2/include/linux/proc_fs.h 2007-12-19 15:29:24.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/proc_fs.h 2007-12-19 22:49:13.000000000 -0500 @@ -86,8 +86,6 @@ extern struct proc_dir_entry proc_root; extern struct proc_dir_entry *proc_root_fs; -extern struct proc_dir_entry *proc_net; -extern struct proc_dir_entry *proc_net_stat; extern struct proc_dir_entry *proc_bus; extern struct proc_dir_entry *proc_root_driver; extern struct proc_dir_entry *proc_root_kcore; @@ -112,6 +110,10 @@ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); +static inline void remove_proc_pde(struct proc_dir_entry *pde) +{ + return remove_proc_entry(pde->name, pde->parent); +} extern struct vfsmount *proc_mnt; extern int proc_fill_super(struct super_block *,void *,int); @@ -181,42 +183,18 @@ return res; } -static inline struct proc_dir_entry *proc_net_create(const char *name, - mode_t mode, get_info_t *get_info) -{ - return create_proc_info_entry(name,mode,proc_net,get_info); -} - -static inline struct proc_dir_entry *proc_net_fops_create(const char *name, - mode_t mode, const struct file_operations *fops) -{ - struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net); - if (res) - res->proc_fops = fops; - return res; -} - -static inline void proc_net_remove(const char *name) -{ - remove_proc_entry(name,proc_net); -} - #else #define proc_root_driver NULL -#define proc_net NULL #define proc_bus NULL -#define proc_net_fops_create(name, mode, fops) ({ (void)(mode), NULL; }) -#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) -static inline void proc_net_remove(const char *name) {} - static inline void proc_flush_task(struct task_struct *task) { } static inline struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent) { return NULL; } #define remove_proc_entry(name, parent) do {} while (0) +#define remove_proc_pde(PDE) do {} while (0) static inline struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent,const char *dest) {return NULL;} diff -Nurb linux-2.6.22-try2/include/linux/rtnetlink.h linux-2.6.22-try2-netns/include/linux/rtnetlink.h --- linux-2.6.22-try2/include/linux/rtnetlink.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/rtnetlink.h 2007-12-19 22:49:13.000000000 -0500 @@ -580,11 +580,11 @@ ({ data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \ __rtattr_parse_nested_compat(tb, max, rta, len); }) -extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); -extern int rtnl_unicast(struct sk_buff *skb, u32 pid); -extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, +extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); +extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); +extern int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags); -extern void rtnl_set_sk_err(u32 group, int error); +extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, u32 ts, u32 tsage, long expires, diff -Nurb linux-2.6.22-try2/include/linux/sched.h linux-2.6.22-try2-netns/include/linux/sched.h --- linux-2.6.22-try2/include/linux/sched.h 2007-12-19 15:50:06.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/sched.h 2007-12-19 22:58:22.000000000 -0500 @@ -28,6 +28,7 @@ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ #define CLONE_NEWUSER 0x10000000 /* New user namespace */ #define CLONE_KTHREAD 0x10000000 /* clone a kernel thread */ +#define CLONE_NEWNET 0x40000000 /* New network namespace */ /* * Scheduling policies diff -Nurb linux-2.6.22-try2/include/linux/socket.h linux-2.6.22-try2-netns/include/linux/socket.h --- linux-2.6.22-try2/include/linux/socket.h 2007-12-19 13:37:52.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/socket.h 2007-12-19 22:49:13.000000000 -0500 @@ -24,7 +24,6 @@ #include /* pid_t */ #include /* __user */ -extern int sysctl_somaxconn; #ifdef CONFIG_PROC_FS struct seq_file; extern void socket_seq_show(struct seq_file *seq); diff -Nurb linux-2.6.22-try2/include/linux/sysctl.h linux-2.6.22-try2-netns/include/linux/sysctl.h --- linux-2.6.22-try2/include/linux/sysctl.h 2007-12-19 15:29:24.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/sysctl.h 2007-12-19 22:49:13.000000000 -0500 @@ -31,6 +31,7 @@ struct file; struct completion; +struct net; #define CTL_MAXNAME 10 /* how many path components do we allow in a call to sysctl? In other words, what is @@ -985,6 +986,7 @@ void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); +extern ctl_handler sysctl_data; extern ctl_handler sysctl_string; extern ctl_handler sysctl_intvec; extern ctl_handler sysctl_jiffies; @@ -1061,6 +1063,12 @@ void unregister_sysctl_table(struct ctl_table_header * table); +#ifdef CONFIG_NET +extern struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table); +extern void unregister_net_sysctl_table(struct ctl_table_header *header); +extern ctl_table net_root_table[]; +#endif + #else /* __KERNEL__ */ #endif /* __KERNEL__ */ diff -Nurb linux-2.6.22-try2/include/linux/sysfs.h linux-2.6.22-try2-netns/include/linux/sysfs.h --- linux-2.6.22-try2/include/linux/sysfs.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/linux/sysfs.h 2007-12-19 22:49:13.000000000 -0500 @@ -19,9 +19,6 @@ struct kobject; struct module; -struct nameidata; -struct dentry; -struct sysfs_dirent; /* FIXME * The *owner field is no longer used, but leave around @@ -79,16 +76,23 @@ ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t); }; +struct shadow_dir_operations { + const void *(*current_tag)(void); + const void *(*kobject_tag)(struct kobject *kobj); +}; + #define SYSFS_TYPE_MASK 0x00ff #define SYSFS_ROOT 0x0001 #define SYSFS_DIR 0x0002 #define SYSFS_KOBJ_ATTR 0x0004 #define SYSFS_KOBJ_BIN_ATTR 0x0008 #define SYSFS_KOBJ_LINK 0x0020 +#define SYSFS_SHADOW_DIR 0x0040 #define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) #define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK #define SYSFS_FLAG_REMOVED 0x0100 +#define SYSFS_FLAG_SHADOWED 0x0200 #ifdef CONFIG_SYSFS @@ -96,14 +100,13 @@ void (*func)(void *), void *data, struct module *owner); extern int __must_check -sysfs_create_dir(struct kobject *kobj, struct sysfs_dirent *shadow_parent_sd); +sysfs_create_dir(struct kobject *); extern void sysfs_remove_dir(struct kobject *); extern int __must_check -sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd, - const char *new_name); +sysfs_rename_dir(struct kobject *kobj, const char *new_name); extern int __must_check sysfs_move_dir(struct kobject *, struct kobject *); @@ -126,6 +129,13 @@ extern void sysfs_remove_link(struct kobject *, const char * name); +extern int +sysfs_rename_link(struct kobject *kobj, struct kobject *target, + const char *old_name, const char *new_name); + +extern void +sysfs_delete_link(struct kobject *dir, struct kobject *targ, const char *name); + int __must_check sysfs_create_bin_file(struct kobject *kobj, struct bin_attribute *attr); void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr); @@ -140,11 +150,7 @@ void sysfs_notify(struct kobject * k, char *dir, char *attr); - -extern int sysfs_make_shadowed_dir(struct kobject *kobj, - void * (*follow_link)(struct dentry *, struct nameidata *)); -extern struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj); -extern void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd); +int sysfs_enable_shadowing(struct kobject *, const struct shadow_dir_operations *); extern int __must_check sysfs_init(void); @@ -156,8 +162,7 @@ return -ENOSYS; } -static inline int sysfs_create_dir(struct kobject *kobj, - struct sysfs_dirent *shadow_parent_sd) +static inline int sysfs_create_dir(struct kobject * kobj) { return 0; } @@ -167,9 +172,7 @@ ; } -static inline int sysfs_rename_dir(struct kobject *kobj, - struct sysfs_dirent *new_parent_sd, - const char *new_name) +static inline int sysfs_rename_dir(struct kobject * kobj, const char *new_name) { return 0; } @@ -208,6 +211,17 @@ ; } +static inline int +sysfs_rename_link(struct kobject * k, struct kobject *t, + const char *old_name, const char * new_name) +{ + return 0; +} + +static inline void +sysfs_delete_link(struct kobject *k, struct kobject *t, const char *name) +{ +} static inline int sysfs_create_bin_file(struct kobject * k, struct bin_attribute * a) { @@ -244,8 +258,8 @@ { } -static inline int sysfs_make_shadowed_dir(struct kobject *kobj, - void * (*follow_link)(struct dentry *, struct nameidata *)) +static inline int sysfs_enable_shadowing(struct kobject *kobj, + const struct shadow_dir_operations *shadow_ops) { return 0; } diff -Nurb linux-2.6.22-try2/include/net/af_unix.h linux-2.6.22-try2-netns/include/net/af_unix.h --- linux-2.6.22-try2/include/net/af_unix.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/af_unix.h 2007-12-19 22:49:13.000000000 -0500 @@ -91,12 +91,11 @@ #define unix_sk(__sk) ((struct unix_sock *)__sk) #ifdef CONFIG_SYSCTL -extern int sysctl_unix_max_dgram_qlen; -extern void unix_sysctl_register(void); -extern void unix_sysctl_unregister(void); +extern void unix_sysctl_register(struct net *net); +extern void unix_sysctl_unregister(struct net *net); #else -static inline void unix_sysctl_register(void) {} -static inline void unix_sysctl_unregister(void) {} +static inline void unix_sysctl_register(struct net *net) {} +static inline void unix_sysctl_unregister(struct net *net) {} #endif #endif #endif diff -Nurb linux-2.6.22-try2/include/net/arp.h linux-2.6.22-try2-netns/include/net/arp.h --- linux-2.6.22-try2/include/net/arp.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/arp.h 2007-12-19 22:49:13.000000000 -0500 @@ -11,7 +11,7 @@ extern void arp_init(void); extern int arp_find(unsigned char *haddr, struct sk_buff *skb); -extern int arp_ioctl(unsigned int cmd, void __user *arg); +extern int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); extern void arp_send(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *th); diff -Nurb linux-2.6.22-try2/include/net/fib_rules.h linux-2.6.22-try2-netns/include/net/fib_rules.h --- linux-2.6.22-try2/include/net/fib_rules.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/fib_rules.h 2007-12-19 22:49:13.000000000 -0500 @@ -56,12 +56,12 @@ int (*fill)(struct fib_rule *, struct sk_buff *, struct nlmsghdr *, struct fib_rule_hdr *); - u32 (*default_pref)(void); + u32 (*default_pref)(struct fib_rules_ops *ops); size_t (*nlmsg_payload)(struct fib_rule *); /* Called after modifications to the rules set, must flush * the route cache if one exists. */ - void (*flush_cache)(void); + void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; const struct nla_policy *policy; @@ -101,8 +101,8 @@ return frh->table; } -extern int fib_rules_register(struct fib_rules_ops *); -extern int fib_rules_unregister(struct fib_rules_ops *); +extern int fib_rules_register(struct net *net, struct fib_rules_ops *); +extern int fib_rules_unregister(struct net *net, struct fib_rules_ops *); extern int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, diff -Nurb linux-2.6.22-try2/include/net/flow.h linux-2.6.22-try2-netns/include/net/flow.h --- linux-2.6.22-try2/include/net/flow.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/flow.h 2007-12-19 22:49:13.000000000 -0500 @@ -8,9 +8,11 @@ #define _NET_FLOW_H #include +#include #include struct flowi { + struct net *fl_net; int oif; int iif; __u32 mark; diff -Nurb linux-2.6.22-try2/include/net/inet6_hashtables.h linux-2.6.22-try2-netns/include/net/inet6_hashtables.h --- linux-2.6.22-try2/include/net/inet6_hashtables.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/inet6_hashtables.h 2007-12-19 22:49:13.000000000 -0500 @@ -62,31 +62,31 @@ const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif); + const int dif, struct net *net); extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, const unsigned short hnum, - const int dif); + const int dif, struct net *net); static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, struct net *net) { struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, - daddr, hnum, dif); + daddr, hnum, dif, net); if (sk) return sk; - return inet6_lookup_listener(hashinfo, daddr, hnum, dif); + return inet6_lookup_listener(hashinfo, daddr, hnum, dif, net); } extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, - const int dif); + const int dif, struct net *net); #endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ #endif /* _INET6_HASHTABLES_H */ diff -Nurb linux-2.6.22-try2/include/net/inet_hashtables.h linux-2.6.22-try2-netns/include/net/inet_hashtables.h --- linux-2.6.22-try2/include/net/inet_hashtables.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/inet_hashtables.h 2007-12-19 22:49:13.000000000 -0500 @@ -75,6 +75,7 @@ * ports are created in O(1) time? I thought so. ;-) -DaveM */ struct inet_bind_bucket { + struct net *net; unsigned short port; signed short fastreuse; struct hlist_node node; @@ -138,34 +139,35 @@ extern struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, + struct net *net, const unsigned short snum); extern void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); -static inline int inet_bhashfn(const __u16 lport, const int bhash_size) +static inline int inet_bhashfn(struct net *net, const __u16 lport, const int bhash_size) { - return lport & (bhash_size - 1); + return (((unsigned long)net) ^ lport) & (bhash_size - 1); } extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum); /* These can have wildcards, don't try too hard. */ -static inline int inet_lhashfn(const unsigned short num) +static inline int inet_lhashfn(struct net *net, const unsigned short num) { - return num & (INET_LHTABLE_SIZE - 1); + return (((unsigned long)net) ^ num) & (INET_LHTABLE_SIZE - 1); } static inline int inet_sk_listen_hashfn(const struct sock *sk) { - return inet_lhashfn(inet_sk(sk)->num); + return inet_lhashfn(sk->sk_net, inet_sk(sk)->num); } /* Caller must disable local BH processing. */ static inline void __inet_inherit_port(struct inet_hashinfo *table, struct sock *sk, struct sock *child) { - const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); + const int bhash = inet_bhashfn(sk->sk_net, inet_sk(child)->num, table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; @@ -274,12 +276,13 @@ extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, - const int dif); + const int dif, struct net *net); static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, - __be32 daddr, __be16 dport, int dif) + __be32 daddr, __be16 dport, + int dif, struct net *net) { - return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif); + return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif, net); } /* Socket demux engine toys. */ @@ -313,30 +316,34 @@ (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ -#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\ (((__sk)->sk_hash == (__hash)) && \ ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_net == __net)) +#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\ (((__sk)->sk_hash == (__hash)) && \ ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_net == __net)) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) -#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ +#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net) \ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_net == __net)) +#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif, __net) \ (((__sk)->sk_hash == (__hash)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ + ((__sk)->sk_net == __net)) #endif /* 64-bit arch */ /* @@ -349,7 +356,7 @@ __inet_lookup_established(struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, - const int dif) + const int dif, struct net *net) { INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(sport, hnum); @@ -358,19 +365,19 @@ /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); + unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); prefetch(head->chain.first); read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { - if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &head->twchain) { - if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net)) goto hit; } sk = NULL; @@ -386,32 +393,32 @@ inet_lookup_established(struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif) + const int dif, struct net *net) { return __inet_lookup_established(hashinfo, saddr, sport, daddr, - ntohs(dport), dif); + ntohs(dport), dif, net); } static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif) + const int dif, struct net *net) { u16 hnum = ntohs(dport); struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, - hnum, dif); - return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif); + hnum, dif, net); + return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif, net); } static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - const int dif) + const int dif, struct net *net) { struct sock *sk; local_bh_disable(); - sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif); + sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif, net); local_bh_enable(); return sk; diff -Nurb linux-2.6.22-try2/include/net/inet_sock.h linux-2.6.22-try2-netns/include/net/inet_sock.h --- linux-2.6.22-try2/include/net/inet_sock.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/inet_sock.h 2007-12-19 22:49:13.000000000 -0500 @@ -171,10 +171,12 @@ extern u32 inet_ehash_secret; extern void build_ehash_secret(void); -static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport, +static inline unsigned int inet_ehashfn(struct net *net, + const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { - return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr, + return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr ^ + (__force __u32) ((unsigned long)net), ((__u32) lport) << 16 | (__force __u32)fport, inet_ehash_secret); } @@ -187,7 +189,7 @@ const __be32 faddr = inet->daddr; const __be16 fport = inet->dport; - return inet_ehashfn(laddr, lport, faddr, fport); + return inet_ehashfn(sk->sk_net, laddr, lport, faddr, fport); } #endif /* _INET_SOCK_H */ diff -Nurb linux-2.6.22-try2/include/net/inet_timewait_sock.h linux-2.6.22-try2-netns/include/net/inet_timewait_sock.h --- linux-2.6.22-try2/include/net/inet_timewait_sock.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/inet_timewait_sock.h 2007-12-19 22:58:33.000000000 -0500 @@ -115,6 +115,7 @@ #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot +#define tw_net __tw_common.skc_net #define tw_xid __tw_common.skc_xid #define tw_vx_info __tw_common.skc_vx_info #define tw_nid __tw_common.skc_nid diff -Nurb linux-2.6.22-try2/include/net/inetpeer.h linux-2.6.22-try2-netns/include/net/inetpeer.h --- linux-2.6.22-try2/include/net/inetpeer.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/inetpeer.h 2007-12-19 22:49:13.000000000 -0500 @@ -15,6 +15,8 @@ #include #include +struct net; + struct inet_peer { /* group together avl_left,avl_right,v4daddr to speedup lookups */ @@ -22,7 +24,11 @@ __be32 v4daddr; /* peer's address */ __u16 avl_height; __u16 ip_id_count; /* IP ID for the next packet */ - struct inet_peer *unused_next, **unused_prevp; + union { + struct inet_peer *unused_next; + struct net *net; + } u; + struct inet_peer **unused_prevp; __u32 dtime; /* the time of last use of not * referenced entries */ atomic_t refcnt; @@ -34,7 +40,7 @@ void inet_initpeers(void) __init; /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(__be32 daddr, int create); +struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create); /* can be called from BH context or outside */ extern void inet_putpeer(struct inet_peer *p); diff -Nurb linux-2.6.22-try2/include/net/ip.h linux-2.6.22-try2-netns/include/net/ip.h --- linux-2.6.22-try2/include/net/ip.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/ip.h 2007-12-19 22:49:13.000000000 -0500 @@ -149,13 +149,6 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -struct ipv4_config -{ - int log_martians; - int no_pmtu_disc; -}; - -extern struct ipv4_config ipv4_config; DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics); #define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field) #define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field) @@ -171,27 +164,6 @@ extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign); extern void snmp_mib_free(void *ptr[2]); -extern int sysctl_local_port_range[2]; -extern int sysctl_ip_default_ttl; -extern int sysctl_ip_nonlocal_bind; - -/* From ip_fragment.c */ -extern int sysctl_ipfrag_high_thresh; -extern int sysctl_ipfrag_low_thresh; -extern int sysctl_ipfrag_time; -extern int sysctl_ipfrag_secret_interval; -extern int sysctl_ipfrag_max_dist; - -/* From inetpeer.c */ -extern int inet_peer_threshold; -extern int inet_peer_minttl; -extern int inet_peer_maxttl; -extern int inet_peer_gc_mintime; -extern int inet_peer_gc_maxtime; - -/* From ip_output.c */ -extern int sysctl_ip_dynaddr; - extern void ipfrag_init(void); #ifdef CONFIG_INET @@ -332,8 +304,6 @@ }; struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user); -extern int ip_frag_nqueues; -extern atomic_t ip_frag_mem; /* * Functions provided by ip_forward.c @@ -392,5 +362,6 @@ #endif extern struct ctl_table ipv4_table[]; +extern struct ctl_table multi_ipv4_table[]; #endif /* _IP_H */ diff -Nurb linux-2.6.22-try2/include/net/ip_fib.h linux-2.6.22-try2-netns/include/net/ip_fib.h --- linux-2.6.22-try2/include/net/ip_fib.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/ip_fib.h 2007-12-19 22:49:13.000000000 -0500 @@ -85,6 +85,10 @@ #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_power; #endif +#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED + u32 fib_mp_alg; +#endif + struct net * fib_net; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev }; @@ -155,43 +159,43 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES -extern struct fib_table *ip_fib_local_table; -extern struct fib_table *ip_fib_main_table; - -static inline struct fib_table *fib_get_table(u32 id) +static inline struct fib_table *fib_get_table(struct net *net, u32 id) { if (id != RT_TABLE_LOCAL) - return ip_fib_main_table; - return ip_fib_local_table; + return net->ip_fib_main_table; + return net->ip_fib_local_table; } -static inline struct fib_table *fib_new_table(u32 id) +static inline struct fib_table *fib_new_table(struct net *net, u32 id) { - return fib_get_table(id); + return fib_get_table(net, id); } static inline int fib_lookup(const struct flowi *flp, struct fib_result *res) { - if (ip_fib_local_table->tb_lookup(ip_fib_local_table, flp, res) && - ip_fib_main_table->tb_lookup(ip_fib_main_table, flp, res)) + struct net *net = flp->fl_net; + struct fib_table *local_table = net->ip_fib_local_table; + struct fib_table *main_table = net->ip_fib_main_table; + if (local_table->tb_lookup(local_table, flp, res) && + main_table->tb_lookup(main_table, flp, res)) return -ENETUNREACH; return 0; } static inline void fib_select_default(const struct flowi *flp, struct fib_result *res) { + struct net *net = flp->fl_net; + struct fib_table *main_table = net->ip_fib_main_table; if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - ip_fib_main_table->tb_select_default(ip_fib_main_table, flp, res); + main_table->tb_select_default(main_table, flp, res); } #else /* CONFIG_IP_MULTIPLE_TABLES */ -#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) -#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) extern int fib_lookup(struct flowi *flp, struct fib_result *res); -extern struct fib_table *fib_new_table(u32 id); -extern struct fib_table *fib_get_table(u32 id); +extern struct fib_table *fib_new_table(struct net *net, u32 id); +extern struct fib_table *fib_get_table(struct net *net, u32 id); extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -207,15 +211,17 @@ /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); -extern int fib_sync_down(__be32 local, struct net_device *dev, int force); +extern int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ extern struct fib_table *fib_hash_init(u32 id); +extern void fib_hash_exit(struct fib_table *tb); #ifdef CONFIG_IP_MULTIPLE_TABLES -extern void __init fib4_rules_init(void); +extern void fib4_rules_init(struct net * net); +extern void fib4_rules_exit(struct net * net); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); @@ -258,8 +264,11 @@ } #ifdef CONFIG_PROC_FS -extern int fib_proc_init(void); -extern void fib_proc_exit(void); +extern int fib_proc_init(struct net * net); +extern void fib_proc_exit(struct net * net); #endif +extern int fib_info_init(struct net *net); +extern void fib_info_exit(struct net *net); + #endif /* _NET_FIB_H */ diff -Nurb linux-2.6.22-try2/include/net/llc_conn.h linux-2.6.22-try2-netns/include/net/llc_conn.h --- linux-2.6.22-try2/include/net/llc_conn.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/llc_conn.h 2007-12-19 22:49:13.000000000 -0500 @@ -93,7 +93,7 @@ return skb->cb[sizeof(skb->cb) - 1]; } -extern struct sock *llc_sk_alloc(int family, gfp_t priority, +extern struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot); extern void llc_sk_free(struct sock *sk); diff -Nurb linux-2.6.22-try2/include/net/neighbour.h linux-2.6.22-try2-netns/include/net/neighbour.h --- linux-2.6.22-try2/include/net/neighbour.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/neighbour.h 2007-12-19 22:49:13.000000000 -0500 @@ -34,6 +34,7 @@ struct neigh_parms { + struct net *net; struct net_device *dev; struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); @@ -126,6 +127,7 @@ struct pneigh_entry { struct pneigh_entry *next; + struct net *net; struct net_device *dev; u8 flags; u8 key[0]; @@ -187,6 +189,7 @@ const void *pkey, struct net_device *dev); extern struct neighbour * neigh_lookup_nodev(struct neigh_table *tbl, + struct net *net, const void *pkey); extern struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey, @@ -205,21 +208,24 @@ struct net_device *dev); extern struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl); +extern struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl, struct net *net); extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms); extern void neigh_parms_destroy(struct neigh_parms *parms); extern unsigned long neigh_rand_reach_time(unsigned long base); extern void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb); -extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, const void *key, struct net_device *dev, int creat); -extern int pneigh_delete(struct neigh_table *tbl, const void *key, struct net_device *dev); +extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat); +extern int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); extern void neigh_app_ns(struct neighbour *n); extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); -struct neigh_seq_state { +struct neigh_seq_state +{ + struct net *net; struct neigh_table *tbl; void *(*neigh_sub_iter)(struct neigh_seq_state *state, struct neighbour *n, loff_t *pos); diff -Nurb linux-2.6.22-try2/include/net/net_namespace.h linux-2.6.22-try2-netns/include/net/net_namespace.h --- linux-2.6.22-try2/include/net/net_namespace.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/net_namespace.h 2007-12-19 22:49:13.000000000 -0500 @@ -0,0 +1,236 @@ +/* + * Operations on the network namespace + */ +#ifndef __NET_NET_NAMESPACE_H +#define __NET_NET_NAMESPACE_H + +#include +#include +#include +#include +#include +#include +#include + +struct sock; +struct xt_af_pernet; +struct ipv4_devconf; +struct neigh_parms; +struct inet_peer; +struct xt_table; +struct net { + atomic_t count; /* To decided when the network namespace + * should go + */ + atomic_t use_count; /* For references we destroy on demand */ + struct list_head list; /* list of network namespace structures */ + struct work_struct work; /* work struct for freeing */ + +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_net; + struct proc_dir_entry *proc_net_stat; + struct proc_dir_entry proc_net_root; +# ifdef CONFIG_NETFILTER + struct proc_dir_entry *proc_net_netfilter; +# endif +#endif +#ifdef CONFIG_SYSCTL + struct ctl_table_header net_table_header; +#endif + struct net_device loopback_dev; /* The loopback */ + struct list_head dev_base_head; /* All devices */ + + struct hlist_head *dev_name_head; + struct hlist_head *dev_index_head; + + struct sock * rtnl; /* rtnetlink socket */ + + + /* core netfilter */ + struct xt_af_pernet * xtn; + + /* core fib_rules */ + struct list_head rules_ops; + spinlock_t rules_mod_lock; + +#ifdef CONFIG_XFRM + u32 sysctl_xfrm_aevent_etime; + u32 sysctl_xfrm_aevent_rseqth; + int sysctl_xfrm_larval_drop; + u32 sysctl_xfrm_acq_expires; +#endif /* CONFIG_XFRM */ + + int sysctl_somaxconn; + +#ifdef CONFIG_PACKET + /* List of all packet sockets. */ + rwlock_t packet_sklist_lock; + struct hlist_head packet_sklist; +#endif /* CONFIG_PACKET */ +#ifdef CONFIG_UNIX + int sysctl_unix_max_dgram_qlen; + void * unix_sysctl; +#endif /* CONFIG_UNIX */ +#ifdef CONFIG_IP_MULTIPLE_TABLES + void * fib4_table; +#endif /* CONFIG_IP_MULTIPLE_TABLES */ +#ifdef CONFIG_IP_FIB_HASH + int fn_hash_last_dflt; +#endif +#ifdef CONFIG_IP_FIB_TRIE + int trie_last_dflt; +#endif +#ifndef CONFIG_IP_MULTIPLE_TABLES + struct fib_table *ip_fib_local_table; + struct fib_table *ip_fib_main_table; +#endif + struct hlist_head *ip_fib_table_hash; + struct sock *nlfl; + + /* fib_semantics */ + struct hlist_head *fib_info_hash; + struct hlist_head *fib_info_laddrhash; + unsigned int fib_info_hash_size; + unsigned int fib_info_cnt; + struct hlist_head *fib_info_devhash; + + /* af_inet.c */ + int sysctl_ip_nonlocal_bind; /* __read_mostly */ + int sysctl_ip_default_ttl; /* __read_mostly */ + int sysctl_ipfrag_high_thresh; + int sysctl_ipfrag_low_thresh; + int sysctl_ipfrag_time; + int sysctl_ipfrag_secret_interval; + int sysctl_ipfrag_max_dist; + int sysctl_ipv4_no_pmtu_disc; + int sysctl_local_port_range[2]; + int sysctl_ip_dynaddr; + int sysctl_tcp_timestamps; /* __read_mostly */ + int sysctl_tcp_window_scaling; /* __read_mostly */ + /* inetpeer.c */ + int inet_peer_threshold; + int inet_peer_minttl; + int inet_peer_maxttl; + int inet_peer_gc_mintime; + int inet_peer_gc_maxtime; + + /* devinet */ + struct ipv4_devconf *ipv4_devconf; + struct ipv4_devconf *ipv4_devconf_dflt; + + /* arp.c */ + struct neigh_parms *arp_neigh_parms_default; + + /* icmp.c */ + struct socket **__icmp_socket; + + /* inetpeer.c */ + struct inet_peer *peer_root; + int peer_total; + struct inet_peer *inet_peer_unused_head; + struct inet_peer **inet_peer_unused_tailp; + struct timer_list peer_periodic_timer; + + /* ip_fragment.c */ + struct hlist_head *ipq_hash; + u32 ipfrag_hash_rnd; + struct list_head ipq_lru_list; + int ip_frag_nqueues; + atomic_t ip_frag_mem; + struct timer_list ipfrag_secret_timer; + + /* udp.c */ + int udp_port_rover; + + /* iptable_filter.c */ + struct xt_table *ip_packet_filter; +}; + +extern struct net init_net; +extern struct list_head net_namespace_list; + +extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns); +extern void __put_net(struct net *net); + +static inline struct net *get_net(struct net *net) +{ + atomic_inc(&net->count); + return net; +} + +static inline void put_net(struct net *net) +{ + if (atomic_dec_and_test(&net->count)) + __put_net(net); +} + +static inline struct net *hold_net(struct net *net) +{ + atomic_inc(&net->use_count); + return net; +} + +static inline void release_net(struct net *net) +{ + atomic_dec(&net->use_count); +} + +extern void net_lock(void); +extern void net_unlock(void); + +#define for_each_net(VAR) \ + list_for_each_entry(VAR, &net_namespace_list, list) + + +struct pernet_operations { + struct list_head list; + int (*init)(struct net *net); + void (*exit)(struct net *net); +}; + +extern int register_pernet_subsys(struct pernet_operations *); +extern void unregister_pernet_subsys(struct pernet_operations *); +extern int register_pernet_device(struct pernet_operations *); +extern void unregister_pernet_device(struct pernet_operations *); + +#ifdef CONFIG_PROC_FS +static inline struct net *PDE_NET(struct proc_dir_entry *pde) +{ + return pde->parent->data; +} + +static inline struct net *PROC_NET(const struct inode *inode) +{ + return PDE_NET(PDE(inode)); +} + +static inline struct proc_dir_entry *proc_net_create(struct net *net, + const char *name, mode_t mode, get_info_t *get_info) +{ + return create_proc_info_entry(name,mode, net->proc_net, get_info); +} + +static inline struct proc_dir_entry *proc_net_fops_create(struct net *net, + const char *name, mode_t mode, const struct file_operations *fops) +{ + struct proc_dir_entry *res = + create_proc_entry(name, mode, net->proc_net); + if (res) + res->proc_fops = fops; + return res; +} + +static inline void proc_net_remove(struct net *net, const char *name) +{ + remove_proc_entry(name, net->proc_net); +} + +#else + +#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) +#define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; }) +static inline void proc_net_remove(struct net *net, const char *name) {} + +#endif /* CONFIG_PROC_FS */ + +#endif /* __NET_NET_NAMESPACE_H */ diff -Nurb linux-2.6.22-try2/include/net/netlink.h linux-2.6.22-try2-netns/include/net/netlink.h --- linux-2.6.22-try2/include/net/netlink.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/netlink.h 2007-12-19 22:49:13.000000000 -0500 @@ -218,6 +218,7 @@ struct nl_info { struct nlmsghdr *nlh; u32 pid; + struct net *net; }; extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, diff -Nurb linux-2.6.22-try2/include/net/pkt_cls.h linux-2.6.22-try2-netns/include/net/pkt_cls.h --- linux-2.6.22-try2/include/net/pkt_cls.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/pkt_cls.h 2007-12-19 22:49:13.000000000 -0500 @@ -2,6 +2,7 @@ #define __NET_PKT_CLS_H #include +#include #include #include @@ -357,7 +358,7 @@ if (indev[0]) { if (!skb->iif) return 0; - dev = __dev_get_by_index(skb->iif); + dev = __dev_get_by_index(&init_net, skb->iif); if (!dev || strcmp(indev, dev->name)) return 0; } diff -Nurb linux-2.6.22-try2/include/net/protocol.h linux-2.6.22-try2-netns/include/net/protocol.h --- linux-2.6.22-try2/include/net/protocol.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/protocol.h 2007-12-19 22:49:13.000000000 -0500 @@ -86,6 +86,7 @@ #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */ #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ #define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ +#define INET_PROTOSW_NETNS 0x08 /* Multiple namespaces support? */ extern struct net_protocol *inet_protocol_base; extern struct net_protocol *inet_protos[MAX_INET_PROTOS]; diff -Nurb linux-2.6.22-try2/include/net/raw.h linux-2.6.22-try2-netns/include/net/raw.h --- linux-2.6.22-try2/include/net/raw.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/raw.h 2007-12-19 22:49:13.000000000 -0500 @@ -34,7 +34,7 @@ extern rwlock_t raw_v4_lock; -extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, +extern struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int tag); diff -Nurb linux-2.6.22-try2/include/net/route.h linux-2.6.22-try2-netns/include/net/route.h --- linux-2.6.22-try2/include/net/route.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/route.h 2007-12-19 22:58:46.000000000 -0500 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -122,9 +123,9 @@ extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); extern void ip_rt_send_redirect(struct sk_buff *skb); -extern unsigned inet_addr_type(__be32 addr); +extern unsigned inet_addr_type(struct net *net, __be32 addr); extern void ip_rt_multicast_event(struct in_device *); -extern int ip_rt_ioctl(unsigned int cmd, void __user *arg); +extern int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg); extern void ip_rt_get_source(u8 *src, struct rtable *rt); extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb); @@ -153,7 +154,8 @@ __be16 sport, __be16 dport, struct sock *sk, int flags) { - struct flowi fl = { .oif = oif, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = oif, .nl_u = { .ip4_u = { .daddr = dst, .saddr = src, .tos = tos } }, @@ -198,6 +200,7 @@ struct flowi fl; memcpy(&fl, &(*rp)->fl, sizeof(fl)); + fl.fl_net = sk->sk_net; fl.fl_ip_sport = sport; fl.fl_ip_dport = dport; fl.proto = protocol; diff -Nurb linux-2.6.22-try2/include/net/sock.h linux-2.6.22-try2-netns/include/net/sock.h --- linux-2.6.22-try2/include/net/sock.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/sock.h 2007-12-19 22:59:14.000000000 -0500 @@ -55,6 +55,7 @@ #include #include #include +#include /* * This structure really needs to be cleaned up. @@ -105,6 +106,7 @@ * @skc_refcnt: reference count * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family + * @skc_net: reference to the network namespace of this socket * * This is the minimal network layer representation of sockets, the header * for struct sock and struct inet_timewait_sock. @@ -119,6 +121,7 @@ atomic_t skc_refcnt; unsigned int skc_hash; struct proto *skc_prot; + struct net *skc_net; xid_t skc_xid; struct vx_info *skc_vx_info; nid_t skc_nid; @@ -199,6 +202,7 @@ #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot +#define sk_net __sk_common.skc_net #define sk_xid __sk_common.skc_xid #define sk_vx_info __sk_common.skc_vx_info #define sk_nid __sk_common.skc_nid @@ -781,7 +785,7 @@ SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) -extern struct sock *sk_alloc(int family, +extern struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); @@ -1010,6 +1014,7 @@ #endif memcpy(nsk, osk, osk->sk_prot->obj_size); + get_net(nsk->sk_net); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); @@ -1373,6 +1378,7 @@ #ifdef CONFIG_SYSCTL extern struct ctl_table core_table[]; +extern struct ctl_table multi_core_table[]; #endif extern int sysctl_optmem_max; diff -Nurb linux-2.6.22-try2/include/net/tcp.h linux-2.6.22-try2-netns/include/net/tcp.h --- linux-2.6.22-try2/include/net/tcp.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/tcp.h 2007-12-19 22:49:13.000000000 -0500 @@ -191,8 +191,6 @@ extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ -extern int sysctl_tcp_timestamps; -extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_keepalive_time; @@ -1293,6 +1291,7 @@ }; struct tcp_iter_state { + struct net *net; sa_family_t family; enum tcp_seq_states state; struct sock *syn_wait_sk; @@ -1300,8 +1299,8 @@ struct seq_operations seq_ops; }; -extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); -extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); +extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo); +extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo); extern struct request_sock_ops tcp_request_sock_ops; diff -Nurb linux-2.6.22-try2/include/net/udp.h linux-2.6.22-try2-netns/include/net/udp.h --- linux-2.6.22-try2/include/net/udp.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/udp.h 2007-12-19 22:49:13.000000000 -0500 @@ -160,6 +160,7 @@ }; struct udp_iter_state { + struct net *net; sa_family_t family; struct hlist_head *hashtable; int bucket; @@ -167,8 +168,8 @@ }; #ifdef CONFIG_PROC_FS -extern int udp_proc_register(struct udp_seq_afinfo *afinfo); -extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); +extern int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo); +extern void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo); extern int udp4_proc_init(void); extern void udp4_proc_exit(void); diff -Nurb linux-2.6.22-try2/include/net/wext.h linux-2.6.22-try2-netns/include/net/wext.h --- linux-2.6.22-try2/include/net/wext.h 2007-12-19 13:37:54.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/wext.h 2007-12-19 22:49:13.000000000 -0500 @@ -5,16 +5,23 @@ * wireless extensions interface to the core code */ +struct net; + #ifdef CONFIG_WIRELESS_EXT -extern int wext_proc_init(void); -extern int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, +extern int wext_proc_init(struct net *net); +extern void wext_proc_exit(struct net *net); +extern int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg); #else -static inline int wext_proc_init(void) +static inline int wext_proc_init(struct net *net) { return 0; } -static inline int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, +static inline void wext_proc_exit(struct net *net) +{ + return; +} +static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg) { return -EINVAL; diff -Nurb linux-2.6.22-try2/include/net/xfrm.h linux-2.6.22-try2-netns/include/net/xfrm.h --- linux-2.6.22-try2/include/net/xfrm.h 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/include/net/xfrm.h 2007-12-19 22:49:13.000000000 -0500 @@ -34,8 +34,6 @@ MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) extern struct sock *xfrm_nl; -extern u32 sysctl_xfrm_aevent_etime; -extern u32 sysctl_xfrm_aevent_rseqth; extern struct mutex xfrm_cfg_mutex; diff -Nurb linux-2.6.22-try2/kernel/audit.c linux-2.6.22-try2-netns/kernel/audit.c --- linux-2.6.22-try2/kernel/audit.c 2007-12-19 15:29:24.000000000 -0500 +++ linux-2.6.22-try2-netns/kernel/audit.c 2007-12-19 22:49:13.000000000 -0500 @@ -795,8 +795,8 @@ printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, - NULL, THIS_MODULE); + audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0, + audit_receive, NULL, THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); else diff -Nurb linux-2.6.22-try2/kernel/nsproxy.c linux-2.6.22-try2-netns/kernel/nsproxy.c --- linux-2.6.22-try2/kernel/nsproxy.c 2007-12-19 21:24:51.000000000 -0500 +++ linux-2.6.22-try2-netns/kernel/nsproxy.c 2007-12-19 23:01:55.000000000 -0500 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -89,8 +90,17 @@ if (IS_ERR(new_nsp->user_ns)) goto out_user; + new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); + if (IS_ERR(new_nsp->net_ns)) + goto out_net; + return new_nsp; +out_net: + if (new_nsp->user_ns) + put_user_ns(new_nsp->user_ns); + if (ns->net_ns) + put_net(ns->net_ns); out_user: if (new_nsp->pid_ns) put_pid_ns(new_nsp->pid_ns); @@ -153,9 +163,15 @@ get_nsproxy(old_ns); - if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER))) + if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET))) return 0; + #ifndef CONFIG_NET_NS + if (unshare_flags & CLONE_NEWNET) + return -EINVAL; + #endif + + if (!capable(CAP_SYS_ADMIN)) { err = -EPERM; goto out; @@ -211,9 +227,13 @@ unshare_flags, current->nsproxy); if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWUSER))) + CLONE_NEWUSER | CLONE_NEWNET))) return 0; +#ifndef CONFIG_NET_NS + if (unshare_flags & CLONE_NEWNET) + return -EINVAL; +#endif if (!capable(CAP_SYS_ADMIN)) return -EPERM; diff -Nurb linux-2.6.22-try2/kernel/sysctl.c linux-2.6.22-try2-netns/kernel/sysctl.c --- linux-2.6.22-try2/kernel/sysctl.c 2007-12-19 15:29:24.000000000 -0500 +++ linux-2.6.22-try2-netns/kernel/sysctl.c 2007-12-19 22:49:13.000000000 -0500 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -139,6 +140,10 @@ void __user *buffer, size_t *lenp, loff_t *ppos); #endif +#ifdef CONFIG_NET +static void sysctl_net_init(struct net *net); +#endif + static ctl_table root_table[]; static struct ctl_table_header root_table_header = { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) }; @@ -1151,6 +1156,11 @@ { struct ctl_table_header *head; struct list_head *tmp; + struct net *net = current->nsproxy->net_ns; + + if (!net->net_table_header.ctl_table) + sysctl_net_init(net); + spin_lock(&sysctl_lock); if (prev) { tmp = &prev->ctl_entry; @@ -1168,6 +1178,10 @@ next: tmp = tmp->next; if (tmp == &root_table_header.ctl_entry) +#ifdef CONFIG_NET + tmp = &net->net_table_header.ctl_entry; + else if (tmp == &net->net_table_header.ctl_entry) +#endif break; } spin_unlock(&sysctl_lock); @@ -1283,7 +1297,6 @@ void __user *newval, size_t newlen) { int op = 0, rc; - size_t len; if (oldval) op |= 004; @@ -1304,25 +1317,10 @@ /* If there is no strategy routine, or if the strategy returns * zero, proceed with automatic r/w */ if (table->data && table->maxlen) { - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(len, oldlenp)) - return -EFAULT; - } - } - if (newval && newlen) { - len = newlen; - if (len > table->maxlen) - len = table->maxlen; - if(copy_from_user(table->data, newval, len)) - return -EFAULT; - } + rc = sysctl_data(table, name, nlen, oldval, oldlenp, + newval, newlen); + if (rc < 0) + return rc; } return 0; } @@ -1413,7 +1411,8 @@ * This routine returns %NULL on a failure to register, and a pointer * to the table header on success. */ -struct ctl_table_header *register_sysctl_table(ctl_table * table) +static struct ctl_table_header *__register_sysctl_table( + struct ctl_table_header *root, ctl_table * table) { struct ctl_table_header *tmp; tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); @@ -1425,11 +1424,16 @@ tmp->unregistering = NULL; sysctl_set_parent(NULL, table); spin_lock(&sysctl_lock); - list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); + list_add_tail(&tmp->ctl_entry, &root->ctl_entry); spin_unlock(&sysctl_lock); return tmp; } +struct ctl_table_header *register_sysctl_table(ctl_table *table) +{ + return __register_sysctl_table(&root_table_header, table); +} + /** * unregister_sysctl_table - unregister a sysctl table hierarchy * @header: the header returned from register_sysctl_table @@ -1446,6 +1450,92 @@ kfree(header); } +#ifdef CONFIG_NET + +static void *fixup_table_addr(void *addr, + const char *start, size_t size, const char *new) +{ + char *ptr = addr; + if ((ptr >= start) && (ptr < (start + size))) + ptr += new - start; + return ptr; +} + +static void table_fixup(struct ctl_table *table, + const void *start, size_t size, const void *new) +{ + for (; table->ctl_name || table->procname; table++) { + table->data = fixup_table_addr(table->data, start, size, new); + table->extra1 = fixup_table_addr(table->extra1, start, size, new); + table->extra2 = fixup_table_addr(table->extra2, start, size, new); + + /* Whee recursive functions on the kernel stack */ + if (table->child) + table_fixup(table->child, start, size, new); + } +} + +static unsigned count_table_entries(struct ctl_table *table) +{ + unsigned entries = 0; + for (; table->ctl_name || table->procname; table++) { + entries += 1; + + if (table->child) + entries += count_table_entries(table->child); + } + entries += 1; /* Null terminating entry */ + return entries; +} + +static struct ctl_table *copy_table_entries( + struct ctl_table *dest, struct ctl_table *src) +{ + struct ctl_table *table = dest; + for (; src->ctl_name || src->procname; src++) { + *dest++ = *table; + } + dest++; /* Null terminating entry */ + for (; table->ctl_name || table->procname; table++) { + if (table->child) + dest = copy_table_entries(dest, table->child); + } + return dest; +} + +static void sysctl_net_init(struct net *net) +{ + unsigned entries; + struct ctl_table *table; + + entries = count_table_entries(net_root_table); + table = kzalloc(GFP_KERNEL, sizeof(*table)*entries); + /* FIXME free table... */ + + copy_table_entries(table, net_root_table); + table_fixup(table, &init_net, sizeof(init_net), net); + + net->net_table_header.ctl_table = table; + INIT_LIST_HEAD(&net->net_table_header.ctl_entry); +} + +struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table) +{ + if (!net->net_table_header.ctl_table) + sysctl_net_init(net); + table_fixup(table, &init_net, sizeof(init_net), net); + return __register_sysctl_table(&net->net_table_header, table); +} +EXPORT_SYMBOL_GPL(register_net_sysctl_table); + +void unregister_net_sysctl_table(struct ctl_table_header *header) +{ + return unregister_sysctl_table(header); +} +EXPORT_SYMBOL_GPL(unregister_net_sysctl_table); +#endif + + #else /* !CONFIG_SYSCTL */ struct ctl_table_header *register_sysctl_table(ctl_table * table) { @@ -2221,6 +2311,40 @@ * General sysctl support routines */ +/* The generic sysctl data routine (used if no strategy routine supplied) */ +int sysctl_data(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + size_t len; + + /* Get out of I don't have a variable */ + if (!table->data || !table->maxlen) + return -ENOTDIR; + + if (oldval && oldlenp) { + if (get_user(len, oldlenp)) + return -EFAULT; + if (len) { + if (len > table->maxlen) + len = table->maxlen; + if (copy_to_user(oldval, table->data, len)) + return -EFAULT; + if (put_user(len, oldlenp)) + return -EFAULT; + } + } + + if (newval && newlen) { + if (newlen > table->maxlen) + newlen = table->maxlen; + + if (copy_from_user(table->data, newval, newlen)) + return -EFAULT; + } + return 1; +} + /* The generic string strategy routine: */ int sysctl_string(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, @@ -2409,6 +2533,13 @@ return -ENOSYS; } +int sysctl_data(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + return -ENOSYS; +} + int sysctl_string(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) @@ -2456,4 +2587,5 @@ EXPORT_SYMBOL(sysctl_jiffies); EXPORT_SYMBOL(sysctl_ms_jiffies); EXPORT_SYMBOL(sysctl_string); +EXPORT_SYMBOL(sysctl_data); EXPORT_SYMBOL(unregister_sysctl_table); diff -Nurb linux-2.6.22-try2/lib/kobject.c linux-2.6.22-try2-netns/lib/kobject.c --- linux-2.6.22-try2/lib/kobject.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/lib/kobject.c 2007-12-19 22:49:18.000000000 -0500 @@ -44,11 +44,11 @@ return error; } -static int create_dir(struct kobject *kobj, struct sysfs_dirent *shadow_parent) +static int create_dir(struct kobject * kobj) { int error = 0; if (kobject_name(kobj)) { - error = sysfs_create_dir(kobj, shadow_parent); + error = sysfs_create_dir(kobj); if (!error) { if ((error = populate_dir(kobj))) sysfs_remove_dir(kobj); @@ -157,12 +157,11 @@ } /** - * kobject_shadow_add - add an object to the hierarchy. + * kobject_add - add an object to the hierarchy. * @kobj: object. - * @shadow_parent: sysfs directory to add to. */ -int kobject_shadow_add(struct kobject *kobj, struct sysfs_dirent *shadow_parent) +int kobject_add(struct kobject * kobj) { int error = 0; struct kobject * parent; @@ -194,7 +193,7 @@ kobj->parent = parent; } - error = create_dir(kobj, shadow_parent); + error = create_dir(kobj); if (error) { /* unlink does the kobject_put() for us */ unlink(kobj); @@ -216,16 +215,6 @@ } /** - * kobject_add - add an object to the hierarchy. - * @kobj: object. - */ -int kobject_add(struct kobject * kobj) -{ - return kobject_shadow_add(kobj, NULL); -} - - -/** * kobject_register - initialize and add an object. * @kobj: object in question. */ @@ -338,7 +327,7 @@ /* Note : if we want to send the new name alone, not the full path, * we could probably use kobject_name(kobj); */ - error = sysfs_rename_dir(kobj, kobj->parent->sd, new_name); + error = sysfs_rename_dir(kobj, new_name); /* This function is mostly/only used for network interface. * Some hotplug package track interfaces by their name and @@ -355,27 +344,6 @@ } /** - * kobject_rename - change the name of an object - * @kobj: object in question. - * @new_parent: object's new parent - * @new_name: object's new name - */ - -int kobject_shadow_rename(struct kobject *kobj, - struct sysfs_dirent *new_parent, const char *new_name) -{ - int error = 0; - - kobj = kobject_get(kobj); - if (!kobj) - return -EINVAL; - error = sysfs_rename_dir(kobj, new_parent, new_name); - kobject_put(kobj); - - return error; -} - -/** * kobject_move - move object to another parent * @kobj: object in question. * @new_parent: object's new parent (can be NULL) diff -Nurb linux-2.6.22-try2/lib/kobject_uevent.c linux-2.6.22-try2-netns/lib/kobject_uevent.c --- linux-2.6.22-try2/lib/kobject_uevent.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/lib/kobject_uevent.c 2007-12-19 22:49:18.000000000 -0500 @@ -290,9 +290,8 @@ #if defined(CONFIG_NET) static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, - NULL, THIS_MODULE); - + uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, + 1, NULL, NULL, THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR "kobject_uevent: unable to create netlink socket!\n"); diff -Nurb linux-2.6.22-try2/net/802/tr.c linux-2.6.22-try2-netns/net/802/tr.c --- linux-2.6.22-try2/net/802/tr.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/802/tr.c 2007-12-19 22:49:18.000000000 -0500 @@ -36,6 +36,7 @@ #include #include #include +#include static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev); static void rif_check_expire(unsigned long dummy); @@ -532,7 +533,7 @@ seq_puts(seq, "if TR address TTL rcf routing segments\n"); else { - struct net_device *dev = dev_get_by_index(entry->iface); + struct net_device *dev = dev_get_by_index(&init_net, entry->iface); long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout) - (long) jiffies; @@ -639,7 +640,7 @@ rif_timer.function = rif_check_expire; add_timer(&rif_timer); - proc_net_fops_create("tr_rif", S_IRUGO, &rif_seq_fops); + proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops); return 0; } diff -Nurb linux-2.6.22-try2/net/8021q/vlan.c linux-2.6.22-try2-netns/net/8021q/vlan.c --- linux-2.6.22-try2/net/8021q/vlan.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/8021q/vlan.c 2007-12-19 22:49:18.000000000 -0500 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "vlan.h" @@ -50,7 +51,7 @@ static char vlan_buggyright[] = "David S. Miller "; static int vlan_device_event(struct notifier_block *, unsigned long, void *); -static int vlan_ioctl_handler(void __user *); +static int vlan_ioctl_handler(struct net *net, void __user *); static int unregister_vlan_dev(struct net_device *, unsigned short ); static struct notifier_block vlan_notifier_block = { @@ -124,7 +125,7 @@ struct net_device *dev, *nxt; rtnl_lock(); - for_each_netdev_safe(dev, nxt) { + for_each_netdev_safe(&init_net, dev, nxt) { if (dev->priv_flags & IFF_802_1Q_VLAN) { unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, VLAN_DEV_INFO(dev)->vlan_id); @@ -599,6 +600,9 @@ int i, flgs; struct net_device *vlandev; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (!grp) goto out; @@ -678,7 +682,7 @@ * o execute requested action or pass command to the device driver * arg is really a struct vlan_ioctl_args __user *. */ -static int vlan_ioctl_handler(void __user *arg) +static int vlan_ioctl_handler(struct net *net, void __user *arg) { int err; unsigned short vid = 0; @@ -707,7 +711,7 @@ case GET_VLAN_REALDEV_NAME_CMD: case GET_VLAN_VID_CMD: err = -ENODEV; - dev = __dev_get_by_name(args.device1); + dev = __dev_get_by_name(&init_net, args.device1); if (!dev) goto out; diff -Nurb linux-2.6.22-try2/net/8021q/vlan_dev.c linux-2.6.22-try2-netns/net/8021q/vlan_dev.c --- linux-2.6.22-try2/net/8021q/vlan_dev.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/8021q/vlan_dev.c 2007-12-19 22:49:18.000000000 -0500 @@ -132,6 +132,11 @@ vhdr = (struct vlan_hdr *)(skb->data); + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ vlan_TCI = ntohs(vhdr->h_vlan_TCI); @@ -776,7 +781,7 @@ break; case SIOCETHTOOL: - err = dev_ethtool(&ifrr); + err = dev_ethtool(real_dev->nd_net, &ifrr); } if (!err) diff -Nurb linux-2.6.22-try2/net/8021q/vlan_netlink.c linux-2.6.22-try2-netns/net/8021q/vlan_netlink.c --- linux-2.6.22-try2/net/8021q/vlan_netlink.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/8021q/vlan_netlink.c 2007-12-19 22:49:18.000000000 -0500 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include "vlan.h" @@ -105,7 +106,7 @@ if (!tb[IFLA_LINK]) return -EINVAL; - real_dev = __dev_get_by_index(nla_get_u32(tb[IFLA_LINK])); + real_dev = __dev_get_by_index(&init_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; diff -Nurb linux-2.6.22-try2/net/8021q/vlanproc.c linux-2.6.22-try2-netns/net/8021q/vlanproc.c --- linux-2.6.22-try2/net/8021q/vlanproc.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/8021q/vlanproc.c 2007-12-19 22:49:18.000000000 -0500 @@ -33,6 +33,7 @@ #include #include #include +#include #include "vlanproc.h" #include "vlan.h" @@ -143,7 +144,7 @@ remove_proc_entry(name_conf, proc_vlan_dir); if (proc_vlan_dir) - proc_net_remove(name_root); + proc_net_remove(&init_net, name_root); /* Dynamically added entries should be cleaned up as their vlan_device * is removed, so we should not have to take care of it here... @@ -156,7 +157,7 @@ int __init vlan_proc_init(void) { - proc_vlan_dir = proc_mkdir(name_root, proc_net); + proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net); if (proc_vlan_dir) { proc_vlan_conf = create_proc_entry(name_conf, S_IFREG|S_IRUSR|S_IWUSR, @@ -253,7 +254,7 @@ if (*pos == 0) return SEQ_START_TOKEN; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!is_vlan_dev(dev)) continue; @@ -272,9 +273,9 @@ dev = (struct net_device *)v; if (v == SEQ_START_TOKEN) - dev = net_device_entry(&dev_base_head); + dev = net_device_entry(&init_net.dev_base_head); - for_each_netdev_continue(dev) { + for_each_netdev_continue(&init_net, dev) { if (!is_vlan_dev(dev)) continue; diff -Nurb linux-2.6.22-try2/net/Kconfig linux-2.6.22-try2-netns/net/Kconfig --- linux-2.6.22-try2/net/Kconfig 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/Kconfig 2007-12-19 22:49:18.000000000 -0500 @@ -27,6 +27,13 @@ menu "Networking options" +config NET_NS + bool "Network namespace support" + depends on EXPERIMENTAL + help + Support what appear to user space as multiple instances of the + network stack. + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" diff -Nurb linux-2.6.22-try2/net/appletalk/aarp.c linux-2.6.22-try2-netns/net/appletalk/aarp.c --- linux-2.6.22-try2/net/appletalk/aarp.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/appletalk/aarp.c 2007-12-19 22:49:18.000000000 -0500 @@ -330,15 +330,19 @@ static int aarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; int ct; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { write_lock_bh(&aarp_lock); for (ct = 0; ct < AARP_HASH_SIZE; ct++) { - __aarp_expire_device(&resolved[ct], ptr); - __aarp_expire_device(&unresolved[ct], ptr); - __aarp_expire_device(&proxies[ct], ptr); + __aarp_expire_device(&resolved[ct], dev); + __aarp_expire_device(&unresolved[ct], dev); + __aarp_expire_device(&proxies[ct], dev); } write_unlock_bh(&aarp_lock); @@ -712,6 +716,9 @@ struct atalk_addr sa, *ma, da; struct atalk_iface *ifa; + if (dev->nd_net != &init_net) + goto out0; + /* We only do Ethernet SNAP AARP. */ if (dev->type != ARPHRD_ETHER) goto out0; diff -Nurb linux-2.6.22-try2/net/appletalk/atalk_proc.c linux-2.6.22-try2-netns/net/appletalk/atalk_proc.c --- linux-2.6.22-try2/net/appletalk/atalk_proc.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/appletalk/atalk_proc.c 2007-12-19 22:49:18.000000000 -0500 @@ -13,6 +13,7 @@ #include #include #include +#include static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos) @@ -271,7 +272,7 @@ struct proc_dir_entry *p; int rc = -ENOMEM; - atalk_proc_dir = proc_mkdir("atalk", proc_net); + atalk_proc_dir = proc_mkdir("atalk", init_net.proc_net); if (!atalk_proc_dir) goto out; atalk_proc_dir->owner = THIS_MODULE; @@ -306,7 +307,7 @@ out_route: remove_proc_entry("interface", atalk_proc_dir); out_interface: - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", init_net.proc_net); goto out; } @@ -316,5 +317,5 @@ remove_proc_entry("route", atalk_proc_dir); remove_proc_entry("socket", atalk_proc_dir); remove_proc_entry("arp", atalk_proc_dir); - remove_proc_entry("atalk", proc_net); + remove_proc_entry("atalk", init_net.proc_net); } diff -Nurb linux-2.6.22-try2/net/appletalk/ddp.c linux-2.6.22-try2-netns/net/appletalk/ddp.c --- linux-2.6.22-try2/net/appletalk/ddp.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/appletalk/ddp.c 2007-12-19 22:49:18.000000000 -0500 @@ -647,9 +647,14 @@ static int ddp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) /* Discard any use of this */ - atalk_dev_down(ptr); + atalk_dev_down(dev); return NOTIFY_DONE; } @@ -672,7 +677,7 @@ if (copy_from_user(&atreq, arg, sizeof(atreq))) return -EFAULT; - dev = __dev_get_by_name(atreq.ifr_name); + dev = __dev_get_by_name(&init_net, atreq.ifr_name); if (!dev) return -ENODEV; @@ -896,7 +901,7 @@ if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) return -EFAULT; name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(&init_net, name); if (!dev) return -ENODEV; } @@ -1024,11 +1029,14 @@ * Create a socket. Initialise the socket, blank the addresses * set the state. */ -static int atalk_create(struct socket *sock, int protocol) +static int atalk_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + /* * We permit SOCK_DGRAM and RAW is an extension. It is trivial to do * and gives you the full ELAP frame. Should be handy for CAP 8) @@ -1036,7 +1044,7 @@ if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; rc = -ENOMEM; - sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); + sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); if (!sk) goto out; rc = 0; @@ -1265,7 +1273,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb) { - struct net_device *dev = __dev_get_by_name("ipddp0"); + struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0"); struct net_device_stats *stats; /* This needs to be able to handle ipddp"N" devices */ @@ -1398,6 +1406,9 @@ int origlen; __u16 len_hops; + if (dev->nd_net != &init_net) + goto freeit; + /* Don't mangle buffer if shared */ if (!(skb = skb_share_check(skb, GFP_ATOMIC))) goto out; @@ -1483,6 +1494,9 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { + if (dev->nd_net != &init_net) + goto freeit; + /* Expand any short form frames */ if (skb_mac_header(skb)[2] == 1) { struct ddpehdr *ddp; diff -Nurb linux-2.6.22-try2/net/atm/clip.c linux-2.6.22-try2-netns/net/atm/clip.c --- linux-2.6.22-try2/net/atm/clip.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/atm/clip.c 2007-12-19 22:49:18.000000000 -0500 @@ -293,7 +293,7 @@ struct neigh_parms *parms; DPRINTK("clip_constructor (neigh %p, entry %p)\n", neigh, entry); - neigh->type = inet_addr_type(entry->ip); + neigh->type = inet_addr_type(&init_net, entry->ip); if (neigh->type != RTN_UNICAST) return -EINVAL; @@ -525,7 +525,10 @@ struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} }; + struct flowi fl = { + .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} + }; struct rtable *rt; if (vcc->push != clip_push) { @@ -620,6 +623,9 @@ { struct net_device *dev = arg; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { neigh_ifdown(&clip_tbl, dev); return NOTIFY_DONE; @@ -954,6 +960,7 @@ seq = file->private_data; seq->private = state; + state->ns.net = get_net(PROC_NET(inode)); out: return rc; @@ -962,11 +969,19 @@ goto out; } +static int arp_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct clip_seq_state *state = seq->private; + put_net(state->ns.net); + return seq_release_private(inode, file); +} + static const struct file_operations arp_seq_fops = { .open = arp_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = arp_seq_release, .owner = THIS_MODULE }; #endif diff -Nurb linux-2.6.22-try2/net/atm/common.c linux-2.6.22-try2-netns/net/atm/common.c --- linux-2.6.22-try2/net/atm/common.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/atm/common.c 2007-12-19 22:49:18.000000000 -0500 @@ -132,7 +132,7 @@ .obj_size = sizeof(struct atm_vcc), }; -int vcc_create(struct socket *sock, int protocol, int family) +int vcc_create(struct net *net, struct socket *sock, int protocol, int family) { struct sock *sk; struct atm_vcc *vcc; @@ -140,7 +140,7 @@ sock->sk = NULL; if (sock->type == SOCK_STREAM) return -EINVAL; - sk = sk_alloc(family, GFP_KERNEL, &vcc_proto, 1); + sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1); if (!sk) return -ENOMEM; sock_init_data(sock, sk); diff -Nurb linux-2.6.22-try2/net/atm/common.h linux-2.6.22-try2-netns/net/atm/common.h --- linux-2.6.22-try2/net/atm/common.h 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/atm/common.h 2007-12-19 22:49:18.000000000 -0500 @@ -10,7 +10,7 @@ #include /* for poll_table */ -int vcc_create(struct socket *sock, int protocol, int family); +int vcc_create(struct net *net, struct socket *sock, int protocol, int family); int vcc_release(struct socket *sock); int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, diff -Nurb linux-2.6.22-try2/net/atm/mpc.c linux-2.6.22-try2-netns/net/atm/mpc.c --- linux-2.6.22-try2/net/atm/mpc.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/atm/mpc.c 2007-12-19 22:49:18.000000000 -0500 @@ -244,7 +244,7 @@ char name[IFNAMSIZ]; sprintf(name, "lec%d", itf); - dev = dev_get_by_name(name); + dev = dev_get_by_name(&init_net, name); return dev; } @@ -956,6 +956,10 @@ struct lec_priv *priv; dev = (struct net_device *)dev_ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->name == NULL || strncmp(dev->name, "lec", 3)) return NOTIFY_DONE; /* we are only interested in lec:s */ diff -Nurb linux-2.6.22-try2/net/atm/proc.c linux-2.6.22-try2-netns/net/atm/proc.c --- linux-2.6.22-try2/net/atm/proc.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/atm/proc.c 2007-12-19 22:49:18.000000000 -0500 @@ -22,6 +22,7 @@ #include #include #include /* for __init */ +#include #include #include #include @@ -475,7 +476,7 @@ if (e->dirent) remove_proc_entry(e->name, atm_proc_root); } - remove_proc_entry("net/atm", NULL); + remove_proc_entry("atm", init_net.proc_net); } int __init atm_proc_init(void) @@ -483,7 +484,7 @@ static struct atm_proc_entry *e; int ret; - atm_proc_root = proc_mkdir("net/atm",NULL); + atm_proc_root = proc_mkdir("atm", init_net.proc_net); if (!atm_proc_root) goto err_out; for (e = atm_proc_ents; e->name; e++) { diff -Nurb linux-2.6.22-try2/net/atm/pvc.c linux-2.6.22-try2-netns/net/atm/pvc.c --- linux-2.6.22-try2/net/atm/pvc.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/atm/pvc.c 2007-12-19 22:49:18.000000000 -0500 @@ -124,10 +124,13 @@ }; -static int pvc_create(struct socket *sock,int protocol) +static int pvc_create(struct net *net, struct socket *sock,int protocol) { + if (net != &init_net) + return -EAFNOSUPPORT; + sock->ops = &pvc_proto_ops; - return vcc_create(sock, protocol, PF_ATMPVC); + return vcc_create(net, sock, protocol, PF_ATMPVC); } diff -Nurb linux-2.6.22-try2/net/atm/svc.c linux-2.6.22-try2-netns/net/atm/svc.c --- linux-2.6.22-try2/net/atm/svc.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/atm/svc.c 2007-12-19 22:49:18.000000000 -0500 @@ -33,7 +33,7 @@ #endif -static int svc_create(struct socket *sock,int protocol); +static int svc_create(struct net *net, struct socket *sock,int protocol); /* @@ -335,7 +335,7 @@ lock_sock(sk); - error = svc_create(newsock,0); + error = svc_create(sk->sk_net, newsock,0); if (error) goto out; @@ -636,12 +636,15 @@ }; -static int svc_create(struct socket *sock,int protocol) +static int svc_create(struct net *net, struct socket *sock,int protocol) { int error; + if (net != &init_net) + return -EAFNOSUPPORT; + sock->ops = &svc_proto_ops; - error = vcc_create(sock, protocol, AF_ATMSVC); + error = vcc_create(net, sock, protocol, AF_ATMSVC); if (error) return error; ATM_SD(sock)->local.sas_family = AF_ATMSVC; ATM_SD(sock)->remote.sas_family = AF_ATMSVC; diff -Nurb linux-2.6.22-try2/net/ax25/af_ax25.c linux-2.6.22-try2-netns/net/ax25/af_ax25.c --- linux-2.6.22-try2/net/ax25/af_ax25.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ax25/af_ax25.c 2007-12-19 22:49:18.000000000 -0500 @@ -47,6 +47,7 @@ #include #include #include +#include @@ -103,6 +104,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Reject non AX.25 devices */ if (dev->type != ARPHRD_AX25) return NOTIFY_DONE; @@ -627,7 +631,7 @@ break; } - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) { res = -ENODEV; break; @@ -779,11 +783,14 @@ .obj_size = sizeof(struct sock), }; -static int ax25_create(struct socket *sock, int protocol) +static int ax25_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; ax25_cb *ax25; + if (net != &init_net) + return -EAFNOSUPPORT; + switch (sock->type) { case SOCK_DGRAM: if (protocol == 0 || protocol == PF_AX25) @@ -829,7 +836,7 @@ return -ESOCKTNOSUPPORT; } - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) return -ENOMEM; ax25 = sk->sk_protinfo = ax25_create_cb(); @@ -854,7 +861,7 @@ struct sock *sk; ax25_cb *ax25, *oax25; - if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; if ((ax25 = ax25_create_cb()) == NULL) { @@ -1998,9 +2005,9 @@ register_netdevice_notifier(&ax25_dev_notifier); ax25_register_sysctl(); - proc_net_fops_create("ax25_route", S_IRUGO, &ax25_route_fops); - proc_net_fops_create("ax25", S_IRUGO, &ax25_info_fops); - proc_net_fops_create("ax25_calls", S_IRUGO, &ax25_uid_fops); + proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops); + proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops); + proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops); out: return rc; } @@ -2014,9 +2021,9 @@ static void __exit ax25_exit(void) { - proc_net_remove("ax25_route"); - proc_net_remove("ax25"); - proc_net_remove("ax25_calls"); + proc_net_remove(&init_net, "ax25_route"); + proc_net_remove(&init_net, "ax25"); + proc_net_remove(&init_net, "ax25_calls"); ax25_rt_free(); ax25_uid_free(); ax25_dev_free(); diff -Nurb linux-2.6.22-try2/net/ax25/ax25_in.c linux-2.6.22-try2-netns/net/ax25/ax25_in.c --- linux-2.6.22-try2/net/ax25/ax25_in.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ax25/ax25_in.c 2007-12-19 22:49:18.000000000 -0500 @@ -451,6 +451,11 @@ skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if ((*skb->data & 0x0F) != 0) { kfree_skb(skb); /* Not a KISS data frame */ return 0; diff -Nurb linux-2.6.22-try2/net/bluetooth/af_bluetooth.c linux-2.6.22-try2-netns/net/bluetooth/af_bluetooth.c --- linux-2.6.22-try2/net/bluetooth/af_bluetooth.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bluetooth/af_bluetooth.c 2007-12-19 22:49:18.000000000 -0500 @@ -95,10 +95,13 @@ } EXPORT_SYMBOL(bt_sock_unregister); -static int bt_sock_create(struct socket *sock, int proto) +static int bt_sock_create(struct net *net, struct socket *sock, int proto) { int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; @@ -113,7 +116,7 @@ read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { - err = bt_proto[proto]->create(sock, proto); + err = bt_proto[proto]->create(net, sock, proto); module_put(bt_proto[proto]->owner); } diff -Nurb linux-2.6.22-try2/net/bluetooth/bnep/sock.c linux-2.6.22-try2-netns/net/bluetooth/bnep/sock.c --- linux-2.6.22-try2/net/bluetooth/bnep/sock.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bluetooth/bnep/sock.c 2007-12-19 22:49:18.000000000 -0500 @@ -204,7 +204,7 @@ .obj_size = sizeof(struct bt_sock) }; -static int bnep_sock_create(struct socket *sock, int protocol) +static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -213,7 +213,7 @@ if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); if (!sk) return -ENOMEM; diff -Nurb linux-2.6.22-try2/net/bluetooth/cmtp/sock.c linux-2.6.22-try2-netns/net/bluetooth/cmtp/sock.c --- linux-2.6.22-try2/net/bluetooth/cmtp/sock.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bluetooth/cmtp/sock.c 2007-12-19 22:49:18.000000000 -0500 @@ -195,7 +195,7 @@ .obj_size = sizeof(struct bt_sock) }; -static int cmtp_sock_create(struct socket *sock, int protocol) +static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -204,7 +204,7 @@ if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); if (!sk) return -ENOMEM; diff -Nurb linux-2.6.22-try2/net/bluetooth/hci_sock.c linux-2.6.22-try2-netns/net/bluetooth/hci_sock.c --- linux-2.6.22-try2/net/bluetooth/hci_sock.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bluetooth/hci_sock.c 2007-12-19 22:49:18.000000000 -0500 @@ -618,7 +618,7 @@ .obj_size = sizeof(struct hci_pinfo) }; -static int hci_sock_create(struct socket *sock, int protocol) +static int hci_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -629,7 +629,7 @@ sock->ops = &hci_sock_ops; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); if (!sk) return -ENOMEM; diff -Nurb linux-2.6.22-try2/net/bluetooth/hidp/sock.c linux-2.6.22-try2-netns/net/bluetooth/hidp/sock.c --- linux-2.6.22-try2/net/bluetooth/hidp/sock.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bluetooth/hidp/sock.c 2007-12-19 22:49:18.000000000 -0500 @@ -246,7 +246,7 @@ .obj_size = sizeof(struct bt_sock) }; -static int hidp_sock_create(struct socket *sock, int protocol) +static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -255,7 +255,7 @@ if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); if (!sk) return -ENOMEM; diff -Nurb linux-2.6.22-try2/net/bluetooth/l2cap.c linux-2.6.22-try2-netns/net/bluetooth/l2cap.c --- linux-2.6.22-try2/net/bluetooth/l2cap.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bluetooth/l2cap.c 2007-12-19 22:49:18.000000000 -0500 @@ -518,11 +518,11 @@ .obj_size = sizeof(struct l2cap_pinfo) }; -static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &l2cap_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1); if (!sk) return NULL; @@ -543,7 +543,7 @@ return sk; } -static int l2cap_sock_create(struct socket *sock, int protocol) +static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -560,7 +560,7 @@ sock->ops = &l2cap_sock_ops; - sk = l2cap_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -1425,7 +1425,7 @@ goto response; } - sk = l2cap_sock_alloc(NULL, BTPROTO_L2CAP, GFP_ATOMIC); + sk = l2cap_sock_alloc(parent->sk_net, NULL, BTPROTO_L2CAP, GFP_ATOMIC); if (!sk) goto response; diff -Nurb linux-2.6.22-try2/net/bluetooth/rfcomm/sock.c linux-2.6.22-try2-netns/net/bluetooth/rfcomm/sock.c --- linux-2.6.22-try2/net/bluetooth/rfcomm/sock.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bluetooth/rfcomm/sock.c 2007-12-19 22:49:18.000000000 -0500 @@ -282,12 +282,12 @@ .obj_size = sizeof(struct rfcomm_pinfo) }; -static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct rfcomm_dlc *d; struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &rfcomm_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1); if (!sk) return NULL; @@ -323,7 +323,7 @@ return sk; } -static int rfcomm_sock_create(struct socket *sock, int protocol) +static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -336,7 +336,7 @@ sock->ops = &rfcomm_sock_ops; - sk = rfcomm_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -868,7 +868,7 @@ goto done; } - sk = rfcomm_sock_alloc(NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + sk = rfcomm_sock_alloc(parent->sk_net, NULL, BTPROTO_RFCOMM, GFP_ATOMIC); if (!sk) goto done; diff -Nurb linux-2.6.22-try2/net/bluetooth/sco.c linux-2.6.22-try2-netns/net/bluetooth/sco.c --- linux-2.6.22-try2/net/bluetooth/sco.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bluetooth/sco.c 2007-12-19 22:49:18.000000000 -0500 @@ -414,11 +414,11 @@ .obj_size = sizeof(struct sco_pinfo) }; -static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) +static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) { struct sock *sk; - sk = sk_alloc(PF_BLUETOOTH, prio, &sco_proto, 1); + sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1); if (!sk) return NULL; @@ -439,7 +439,7 @@ return sk; } -static int sco_sock_create(struct socket *sock, int protocol) +static int sco_sock_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; @@ -452,7 +452,7 @@ sock->ops = &sco_sock_ops; - sk = sco_sock_alloc(sock, protocol, GFP_ATOMIC); + sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); if (!sk) return -ENOMEM; @@ -807,7 +807,7 @@ bh_lock_sock(parent); - sk = sco_sock_alloc(NULL, BTPROTO_SCO, GFP_ATOMIC); + sk = sco_sock_alloc(parent->sk_net, NULL, BTPROTO_SCO, GFP_ATOMIC); if (!sk) { bh_unlock_sock(parent); goto done; diff -Nurb linux-2.6.22-try2/net/bridge/br_if.c linux-2.6.22-try2-netns/net/bridge/br_if.c --- linux-2.6.22-try2/net/bridge/br_if.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/br_if.c 2007-12-19 22:49:18.000000000 -0500 @@ -45,7 +45,7 @@ old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ethtool(&ifr); + err = dev_ethtool(dev->nd_net, &ifr); set_fs(old_fs); if (!err) { @@ -314,7 +314,7 @@ int ret = 0; rtnl_lock(); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(&init_net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -455,7 +455,7 @@ struct net_device *dev, *nxt; rtnl_lock(); - for_each_netdev_safe(dev, nxt) + for_each_netdev_safe(&init_net, dev, nxt) if (dev->priv_flags & IFF_EBRIDGE) del_br(dev->priv); rtnl_unlock(); diff -Nurb linux-2.6.22-try2/net/bridge/br_ioctl.c linux-2.6.22-try2-netns/net/bridge/br_ioctl.c --- linux-2.6.22-try2/net/bridge/br_ioctl.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/br_ioctl.c 2007-12-19 22:49:18.000000000 -0500 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "br_private.h" @@ -27,7 +28,7 @@ struct net_device *dev; int i = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) @@ -90,7 +91,7 @@ if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) return -EINVAL; @@ -364,7 +365,7 @@ return -EOPNOTSUPP; } -int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) +int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) { switch (cmd) { case SIOCGIFBR: diff -Nurb linux-2.6.22-try2/net/bridge/br_netfilter.c linux-2.6.22-try2-netns/net/bridge/br_netfilter.c --- linux-2.6.22-try2/net/bridge/br_netfilter.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/br_netfilter.c 2007-12-19 22:49:18.000000000 -0500 @@ -310,6 +310,7 @@ if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct rtable *rt; struct flowi fl = { + .fl_net = &init_net, .nl_u = { .ip4_u = { .daddr = iph->daddr, @@ -518,6 +519,10 @@ if (unlikely(!pskb_may_pull(skb, len))) goto out; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { #ifdef CONFIG_SYSCTL @@ -591,6 +596,10 @@ { struct sk_buff *skb = *pskb; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (skb->dst == (struct dst_entry *)&__fake_rtable) { dst_release(skb->dst); skb->dst = NULL; @@ -635,6 +644,10 @@ struct net_device *parent; int pf; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (!skb->nf_bridge) return NF_ACCEPT; @@ -674,6 +687,10 @@ struct sk_buff *skb = *pskb; struct net_device **d = (struct net_device **)(skb->cb); + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + #ifdef CONFIG_SYSCTL if (!brnf_call_arptables) return NF_ACCEPT; @@ -718,6 +735,10 @@ struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (!skb->nf_bridge) return NF_ACCEPT; @@ -762,6 +783,10 @@ struct net_device *realoutdev = bridge_parent(skb->dev); int pf; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + #ifdef CONFIG_NETFILTER_DEBUG /* Be very paranoid. This probably won't happen anymore, but let's * keep the check just to be sure... */ @@ -833,6 +858,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if ((*pskb)->nf_bridge && !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { return NF_STOP; diff -Nurb linux-2.6.22-try2/net/bridge/br_netlink.c linux-2.6.22-try2-netns/net/bridge/br_netlink.c --- linux-2.6.22-try2/net/bridge/br_netlink.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/br_netlink.c 2007-12-19 22:49:18.000000000 -0500 @@ -12,6 +12,8 @@ #include #include +#include +#include #include "br_private.h" static inline size_t br_nlmsg_size(void) @@ -95,10 +97,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_LINK, err); + rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err); } /* @@ -106,11 +108,15 @@ */ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct net_device *dev; int idx; + if (net != &init_net) + return 0; + idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { /* not a bridge port */ if (dev->br_port == NULL || idx < cb->args[0]) goto skip; @@ -134,12 +140,16 @@ */ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct nlattr *protinfo; struct net_device *dev; struct net_bridge_port *p; u8 new_state; + if (net != &init_net) + return -EINVAL; + if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -155,7 +165,7 @@ if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(&init_net, ifm->ifi_index); if (!dev) return -ENODEV; diff -Nurb linux-2.6.22-try2/net/bridge/br_notify.c linux-2.6.22-try2-netns/net/bridge/br_notify.c --- linux-2.6.22-try2/net/bridge/br_notify.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/br_notify.c 2007-12-19 22:49:18.000000000 -0500 @@ -15,6 +15,7 @@ #include #include +#include #include "br_private.h" @@ -36,6 +37,9 @@ struct net_bridge_port *p = dev->br_port; struct net_bridge *br; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* not a port of a bridge */ if (p == NULL) return NOTIFY_DONE; diff -Nurb linux-2.6.22-try2/net/bridge/br_private.h linux-2.6.22-try2-netns/net/bridge/br_private.h --- linux-2.6.22-try2/net/bridge/br_private.h 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/br_private.h 2007-12-19 22:49:18.000000000 -0500 @@ -196,7 +196,7 @@ /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg); +extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg); /* br_netfilter.c */ #ifdef CONFIG_BRIDGE_NETFILTER diff -Nurb linux-2.6.22-try2/net/bridge/br_stp_bpdu.c linux-2.6.22-try2-netns/net/bridge/br_stp_bpdu.c --- linux-2.6.22-try2/net/bridge/br_stp_bpdu.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/br_stp_bpdu.c 2007-12-19 22:49:18.000000000 -0500 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -141,6 +142,9 @@ struct net_bridge *br; const unsigned char *buf; + if (dev->nd_net != &init_net) + goto err; + if (!p) goto err; diff -Nurb linux-2.6.22-try2/net/bridge/netfilter/ebt_ulog.c linux-2.6.22-try2-netns/net/bridge/netfilter/ebt_ulog.c --- linux-2.6.22-try2/net/bridge/netfilter/ebt_ulog.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/netfilter/ebt_ulog.c 2007-12-19 22:49:18.000000000 -0500 @@ -301,8 +301,9 @@ spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, - NULL, NULL, THIS_MODULE); + ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, + EBT_ULOG_MAXNLGROUPS, NULL, NULL, + THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff -Nurb linux-2.6.22-try2/net/bridge/netfilter/ebtable_filter.c linux-2.6.22-try2-netns/net/bridge/netfilter/ebtable_filter.c --- linux-2.6.22-try2/net/bridge/netfilter/ebtable_filter.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/netfilter/ebtable_filter.c 2007-12-19 22:49:18.000000000 -0500 @@ -64,6 +64,10 @@ ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ebt_do_table(hook, pskb, in, out, &frame_filter); } diff -Nurb linux-2.6.22-try2/net/bridge/netfilter/ebtable_nat.c linux-2.6.22-try2-netns/net/bridge/netfilter/ebtable_nat.c --- linux-2.6.22-try2/net/bridge/netfilter/ebtable_nat.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/netfilter/ebtable_nat.c 2007-12-19 22:49:18.000000000 -0500 @@ -64,6 +64,10 @@ ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ebt_do_table(hook, pskb, in, out, &frame_nat); } @@ -71,6 +75,10 @@ ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ebt_do_table(hook, pskb, in, out, &frame_nat); } diff -Nurb linux-2.6.22-try2/net/bridge/netfilter/ebtables.c linux-2.6.22-try2-netns/net/bridge/netfilter/ebtables.c --- linux-2.6.22-try2/net/bridge/netfilter/ebtables.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/bridge/netfilter/ebtables.c 2007-12-19 22:49:18.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" @@ -1438,6 +1439,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + switch(cmd) { case EBT_SO_SET_ENTRIES: ret = do_replace(user, len); @@ -1457,6 +1461,9 @@ struct ebt_replace tmp; struct ebt_table *t; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; diff -Nurb linux-2.6.22-try2/net/core/Makefile linux-2.6.22-try2-netns/net/core/Makefile --- linux-2.6.22-try2/net/core/Makefile 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/Makefile 2007-12-19 22:49:18.000000000 -0500 @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o + gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff -Nurb linux-2.6.22-try2/net/core/dev.c linux-2.6.22-try2-netns/net/core/dev.c --- linux-2.6.22-try2/net/core/dev.c 2007-12-19 15:29:25.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/dev.c 2007-12-19 23:03:34.000000000 -0500 @@ -116,6 +116,7 @@ #include #include #include +#include #include #include @@ -189,25 +190,50 @@ * unregister_netdevice(), which must be called with the rtnl * semaphore held. */ -LIST_HEAD(dev_base_head); DEFINE_RWLOCK(dev_base_lock); -EXPORT_SYMBOL(dev_base_head); EXPORT_SYMBOL(dev_base_lock); #define NETDEV_HASHBITS 8 -static struct hlist_head dev_name_head[1<dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; } -static inline struct hlist_head *dev_index_hash(int ifindex) +static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) { - return &dev_index_head[ifindex & ((1<dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; +} + +/* Device list insertion */ +static int list_netdevice(struct net_device *dev) +{ + struct net *net = dev->nd_net; + + ASSERT_RTNL(); + + write_lock_bh(&dev_base_lock); + list_add_tail(&dev->dev_list, &net->dev_base_head); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + write_unlock_bh(&dev_base_lock); + return 0; +} + +/* Device list removal */ +static void unlist_netdevice(struct net_device *dev) +{ + ASSERT_RTNL(); + + /* Unlink dev from the device chain */ + write_lock_bh(&dev_base_lock); + list_del(&dev->dev_list); + hlist_del(&dev->name_hlist); + hlist_del(&dev->index_hlist); + write_unlock_bh(&dev_base_lock); } /* @@ -490,7 +516,7 @@ * If device already registered then return base of 1 * to indicate not to probe for this interface */ - if (__dev_get_by_name(name)) + if (__dev_get_by_name(&init_net, name)) return 1; for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) @@ -545,11 +571,11 @@ * careful with locks. */ -struct net_device *__dev_get_by_name(const char *name) +struct net_device *__dev_get_by_name(struct net *net, const char *name) { struct hlist_node *p; - hlist_for_each(p, dev_name_hash(name)) { + hlist_for_each(p, dev_name_hash(net, name)) { struct net_device *dev = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(dev->name, name, IFNAMSIZ)) @@ -569,12 +595,12 @@ * matching device is found. */ -struct net_device *dev_get_by_name(const char *name) +struct net_device *dev_get_by_name(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -592,11 +618,11 @@ * or @dev_base_lock. */ -struct net_device *__dev_get_by_index(int ifindex) +struct net_device *__dev_get_by_index(struct net *net, int ifindex) { struct hlist_node *p; - hlist_for_each(p, dev_index_hash(ifindex)) { + hlist_for_each(p, dev_index_hash(net, ifindex)) { struct net_device *dev = hlist_entry(p, struct net_device, index_hlist); if (dev->ifindex == ifindex) @@ -616,12 +642,12 @@ * dev_put to indicate they have finished with it. */ -struct net_device *dev_get_by_index(int ifindex) +struct net_device *dev_get_by_index(struct net *net, int ifindex) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev) dev_hold(dev); read_unlock(&dev_base_lock); @@ -642,13 +668,13 @@ * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; @@ -658,12 +684,12 @@ EXPORT_SYMBOL(dev_getbyhwaddr); -struct net_device *__dev_getfirstbyhwtype(unsigned short type) +struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; ASSERT_RTNL(); - for_each_netdev(dev) + for_each_netdev(net, dev) if (dev->type == type) return dev; @@ -672,12 +698,12 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype); -struct net_device *dev_getfirstbyhwtype(unsigned short type) +struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev; rtnl_lock(); - dev = __dev_getfirstbyhwtype(type); + dev = __dev_getfirstbyhwtype(net, type); if (dev) dev_hold(dev); rtnl_unlock(); @@ -697,13 +723,13 @@ * dev_put to indicate they have finished with it. */ -struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) +struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) { struct net_device *dev, *ret; ret = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); ret = dev; @@ -740,9 +766,10 @@ } /** - * dev_alloc_name - allocate a name for a device - * @dev: device + * __dev_alloc_name - allocate a name for a device + * @net: network namespace to allocate the device name in * @name: name format string + * @buf: scratch buffer and result name string * * Passed a format string - eg "lt%d" it will try and find a suitable * id. It scans list of devices to build up a free map, then chooses @@ -753,10 +780,9 @@ * Returns the number of the unit assigned or a negative errno code. */ -int dev_alloc_name(struct net_device *dev, const char *name) +static int __dev_alloc_name(struct net *net, const char *name, char *buf) { int i = 0; - char buf[IFNAMSIZ]; const char *p; const int max_netdevices = 8*PAGE_SIZE; long *inuse; @@ -777,14 +803,14 @@ if (!inuse) return -ENOMEM; - for_each_netdev(d) { + for_each_netdev(net, d) { if (!sscanf(d->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) continue; /* avoid cases where sscanf is not exact inverse of printf */ - snprintf(buf, sizeof(buf), name, i); + snprintf(buf, IFNAMSIZ, name, i); if (!strncmp(buf, d->name, IFNAMSIZ)) set_bit(i, inuse); } @@ -793,11 +819,9 @@ free_page((unsigned long) inuse); } - snprintf(buf, sizeof(buf), name, i); - if (!__dev_get_by_name(buf)) { - strlcpy(dev->name, buf, IFNAMSIZ); + snprintf(buf, IFNAMSIZ, name, i); + if (!__dev_get_by_name(net, buf)) return i; - } /* It is possible to run out of possible slots * when the name is long and there isn't enough space left @@ -806,6 +830,34 @@ return -ENFILE; } +/** + * dev_alloc_name - allocate a name for a device + * @dev: device + * @name: name format string + * + * Passed a format string - eg "lt%d" it will try and find a suitable + * id. It scans list of devices to build up a free map, then chooses + * the first empty slot. The caller must hold the dev_base or rtnl lock + * while allocating the name and adding the device in order to avoid + * duplicates. + * Limited to bits_per_byte * page size devices (ie 32K on most platforms). + * Returns the number of the unit assigned or a negative errno code. + */ + +int dev_alloc_name(struct net_device *dev, const char *name) +{ + char buf[IFNAMSIZ]; + struct net *net; + int ret; + + BUG_ON(!dev->nd_net); + net = dev->nd_net; + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; +} + /** * dev_change_name - change name of a device @@ -818,9 +870,12 @@ int dev_change_name(struct net_device *dev, char *newname) { int err = 0; + struct net *net; ASSERT_RTNL(); + BUG_ON(!dev->nd_net); + net = dev->nd_net; if (dev->flags & IFF_UP) return -EBUSY; @@ -833,7 +888,7 @@ return err; strcpy(newname, dev->name); } - else if (__dev_get_by_name(newname)) + else if (__dev_get_by_name(net, newname)) return -EEXIST; else { if (strncmp(newname, dev->name, IFNAMSIZ)) @@ -844,7 +899,7 @@ device_rename(&dev->dev, dev->name); hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); + hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); return err; @@ -888,12 +943,12 @@ * available in this kernel then it becomes a nop. */ -void dev_load(const char *name) +void dev_load(struct net *net, const char *name) { struct net_device *dev; read_lock(&dev_base_lock); - dev = __dev_get_by_name(name); + dev = __dev_get_by_name(net, name); read_unlock(&dev_base_lock); if (!dev && capable(CAP_SYS_MODULE)) @@ -1036,6 +1091,8 @@ } +static int dev_boot_phase = 1; + /* * Device change register/unregister. These are not inline or static * as we export them to the world. @@ -1062,14 +1119,17 @@ rtnl_lock(); err = raw_notifier_chain_register(&netdev_chain, nb); - if (!err) { - for_each_netdev(dev) { + if (!err && !dev_boot_phase) { + struct net *net; + for_each_net(net) { + for_each_netdev(net, dev) { nb->notifier_call(nb, NETDEV_REGISTER, dev); if (dev->flags & IFF_UP) nb->notifier_call(nb, NETDEV_UP, dev); } } + } rtnl_unlock(); return err; } @@ -1103,9 +1163,9 @@ * are as for raw_notifier_call_chain(). */ -int call_netdevice_notifiers(unsigned long val, void *v) +int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - return raw_notifier_call_chain(&netdev_chain, val, v); + return raw_notifier_call_chain(&netdev_chain, val, dev); } /* When > 0 there are consumers of rx skb time stamps */ @@ -2083,7 +2143,7 @@ * match. --pb */ -static int dev_ifname(struct ifreq __user *arg) +static int dev_ifname(struct net *net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; @@ -2096,7 +2156,7 @@ return -EFAULT; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifr.ifr_ifindex); + dev = __dev_get_by_index(net, ifr.ifr_ifindex); if (!dev) { read_unlock(&dev_base_lock); return -ENODEV; @@ -2116,7 +2176,7 @@ * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(char __user *arg) +static int dev_ifconf(struct net *net, char __user *arg) { struct ifconf ifc; struct net_device *dev; @@ -2140,7 +2200,7 @@ */ total = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (!nx_dev_visible(current->nx_info, dev)) continue; for (i = 0; i < NPROTO; i++) { @@ -2176,6 +2236,7 @@ */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; loff_t off; struct net_device *dev; @@ -2184,7 +2245,7 @@ return SEQ_START_TOKEN; off = 1; - for_each_netdev(dev) + for_each_netdev(net, dev) if (off++ == *pos) return dev; @@ -2193,9 +2254,10 @@ void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq->private; ++*pos; return v == SEQ_START_TOKEN ? - first_net_device() : next_net_device((struct net_device *)v); + first_net_device(net) : next_net_device((struct net_device *)v); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -2294,7 +2356,22 @@ static int dev_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_net(PROC_NET(inode)); + } + return res; +} + +static int dev_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_seq_fops = { @@ -2302,7 +2379,7 @@ .open = dev_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_seq_release, }; static const struct seq_operations softnet_seq_ops = { @@ -2454,30 +2531,49 @@ }; -static int __init dev_proc_init(void) +static int dev_proc_net_init(struct net *net) { int rc = -ENOMEM; - if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) + if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) goto out; - if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) + if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) goto out_dev; - if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) - goto out_dev2; - - if (wext_proc_init()) + if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) goto out_softnet; + + if (wext_proc_init(net)) + goto out_ptype; rc = 0; out: return rc; +out_ptype: + proc_net_remove(net, "ptype"); out_softnet: - proc_net_remove("ptype"); -out_dev2: - proc_net_remove("softnet_stat"); + proc_net_remove(net, "softnet_stat"); out_dev: - proc_net_remove("dev"); + proc_net_remove(net, "dev"); goto out; } + +static void dev_proc_net_exit(struct net *net) +{ + wext_proc_exit(net); + + proc_net_remove(net, "ptype"); + proc_net_remove(net, "softnet_stat"); + proc_net_remove(net, "dev"); +} + +static struct pernet_operations dev_proc_ops = { + .init = dev_proc_net_init, + .exit = dev_proc_net_exit, +}; + +static int __init dev_proc_init(void) +{ + return register_pernet_subsys(&dev_proc_ops); +} #else #define dev_proc_init() 0 #endif /* CONFIG_PROC_FS */ @@ -2711,10 +2807,10 @@ /* * Perform the SIOCxIFxxx calls. */ -static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) +static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) return -ENODEV; @@ -2867,7 +2963,7 @@ * positive or a negative errno code on error. */ -int dev_ioctl(unsigned int cmd, void __user *arg) +int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; int ret; @@ -2880,12 +2976,12 @@ if (cmd == SIOCGIFCONF) { rtnl_lock(); - ret = dev_ifconf((char __user *) arg); + ret = dev_ifconf(net, (char __user *) arg); rtnl_unlock(); return ret; } if (cmd == SIOCGIFNAME) - return dev_ifname((struct ifreq __user *)arg); + return dev_ifname(net, (struct ifreq __user *)arg); if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; @@ -2915,9 +3011,9 @@ case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); read_lock(&dev_base_lock); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); read_unlock(&dev_base_lock); if (!ret) { if (colon) @@ -2929,9 +3025,9 @@ return ret; case SIOCETHTOOL: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ethtool(&ifr); + ret = dev_ethtool(net, &ifr); rtnl_unlock(); if (!ret) { if (colon) @@ -2953,9 +3049,9 @@ case SIOCSIFNAME: if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret) { if (colon) @@ -2994,9 +3090,9 @@ /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); return ret; @@ -3016,9 +3112,9 @@ if (cmd == SIOCWANDEV || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(&ifr, cmd); + ret = dev_ifsioc(net, &ifr, cmd); rtnl_unlock(); if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq))) @@ -3027,7 +3123,7 @@ } /* Take care of Wireless Extensions */ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) - return wext_handle_ioctl(&ifr, cmd, arg); + return wext_handle_ioctl(net, &ifr, cmd, arg); return -EINVAL; } } @@ -3040,19 +3136,17 @@ * number. The caller must hold the rtnl semaphore or the * dev_base_lock to be sure it remains unique. */ -static int dev_new_index(void) +static int dev_new_index(struct net *net) { static int ifindex; for (;;) { if (++ifindex <= 0) ifindex = 1; - if (!__dev_get_by_index(ifindex)) + if (!__dev_get_by_index(net, ifindex)) return ifindex; } } -static int dev_boot_phase = 1; - /* Delayed registration/unregisteration */ static DEFINE_SPINLOCK(net_todo_list_lock); static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); @@ -3086,6 +3180,7 @@ struct hlist_head *head; struct hlist_node *p; int ret; + struct net *net; BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3094,6 +3189,8 @@ /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); + BUG_ON(!dev->nd_net); + net = dev->nd_net; spin_lock_init(&dev->queue_lock); spin_lock_init(&dev->_xmit_lock); @@ -3118,12 +3215,12 @@ goto out; } - dev->ifindex = dev_new_index(); + dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; /* Check for existence of name */ - head = dev_name_hash(dev->name); + head = dev_name_hash(net, dev->name); hlist_for_each(p, head) { struct net_device *d = hlist_entry(p, struct net_device, name_hlist); @@ -3200,12 +3297,8 @@ set_bit(__LINK_STATE_PRESENT, &dev->state); dev_init_scheduler(dev); - write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &dev_base_head); - hlist_add_head(&dev->name_hlist, head); - hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); dev_hold(dev); - write_unlock_bh(&dev_base_lock); + list_netdevice(dev); /* Notify protocols, that a new device appeared. */ raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); @@ -3415,6 +3508,7 @@ dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); dev->padded = (char *)dev - (char *)p; + dev->nd_net = &init_net; if (sizeof_priv) dev->priv = netdev_priv(dev); @@ -3493,11 +3587,7 @@ dev_close(dev); /* And unlink it from device chain. */ - write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); - write_unlock_bh(&dev_base_lock); + unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; @@ -3555,6 +3645,122 @@ EXPORT_SYMBOL(unregister_netdev); +/** + * dev_change_net_namespace - move device to different nethost namespace + * @dev: device + * @net: network namespace + * @pat: If not NULL name pattern to try if the current device name + * is already taken in the destination network namespace. + * + * This function shuts down a device interface and moves it + * to a new network namespace. On success 0 is returned, on + * a failure a netagive errno code is returned. + * + * Callers must hold the rtnl semaphore. + */ + +int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) +{ + char buf[IFNAMSIZ]; + const char *destname; + int err; + + ASSERT_RTNL(); + + /* Don't allow namespace local devices to be moved. */ + err = -EINVAL; + if (dev->features & NETIF_F_NETNS_LOCAL) + goto out; + + /* Ensure the device has been registrered */ + err = -EINVAL; + if (dev->reg_state != NETREG_REGISTERED) + goto out; + + /* Get out if there is nothing todo */ + err = 0; + if (dev->nd_net == net) + goto out; + + /* Pick the destination device name, and ensure + * we can use it in the destination network namespace. + */ + err = -EEXIST; + destname = dev->name; + if (__dev_get_by_name(net, destname)) { + /* We get here if we can't use the current device name */ + if (!pat) + goto out; + if (!dev_valid_name(pat)) + goto out; + if (strchr(pat, '%')) { + if (__dev_alloc_name(net, pat, buf) < 0) + goto out; + destname = buf; + } else + destname = pat; + if (__dev_get_by_name(net, destname)) + goto out; + } + + /* + * And now a mini version of register_netdevice unregister_netdevice. + */ + + /* If device is running close it first. */ + if (dev->flags & IFF_UP) + dev_close(dev); + + /* And unlink it from device chain */ + err = -ENODEV; + unlist_netdevice(dev); + + synchronize_net(); + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + /* + * Flush the multicast chain + */ + dev_mc_discard(dev); + + /* Actually switch the network namespace */ + dev->nd_net = net; + + /* Assign the new device name */ + if (destname != dev->name) + strcpy(dev->name, destname); + + /* If there is an ifindex conflict assign a new one */ + if (__dev_get_by_index(net, dev->ifindex)) { + int iflink = (dev->iflink == dev->ifindex); + dev->ifindex = dev_new_index(net); + if (iflink) + dev->iflink = dev->ifindex; + } + + /* Fixup sysfs */ + err = device_rename(&dev->dev, dev->name); + BUG_ON(err); + + /* Add the device back in the hashes */ + list_netdevice(dev); + + /* Notify protocols, that a new device appeared. */ + call_netdevice_notifiers(NETDEV_REGISTER, dev); + + synchronize_net(); + err = 0; +out: + return err; +} + static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -3745,6 +3951,75 @@ } EXPORT_SYMBOL(netdev_compute_features); +/* Initialize per network namespace state */ +static int netdev_init(struct net *net) +{ + int i; + INIT_LIST_HEAD(&net->dev_base_head); + rwlock_init(&dev_base_lock); + + net->dev_name_head = kmalloc( + sizeof(*net->dev_name_head)*NETDEV_HASHENTRIES, GFP_KERNEL); + if (!net->dev_name_head) + return -ENOMEM; + + net->dev_index_head = kmalloc( + sizeof(*net->dev_index_head)*NETDEV_HASHENTRIES, GFP_KERNEL); + if (!net->dev_index_head) { + kfree(net->dev_name_head); + return -ENOMEM; + } + + for (i = 0; i < NETDEV_HASHENTRIES; i++) + INIT_HLIST_HEAD(&net->dev_name_head[i]); + + for (i = 0; i < NETDEV_HASHENTRIES; i++) + INIT_HLIST_HEAD(&net->dev_index_head[i]); + + return 0; +} + +static void netdev_exit(struct net *net) +{ + kfree(net->dev_name_head); + kfree(net->dev_index_head); +} + +static struct pernet_operations netdev_net_ops = { + .init = netdev_init, + .exit = netdev_exit, +}; + +static void default_device_exit(struct net *net) +{ + struct net_device *dev, *next; + /* + * Push all migratable of the network devices back to the + * initial network namespace + */ + rtnl_lock(); + for_each_netdev_safe(net, dev, next) { + int err; + + /* Ignore unmoveable devices (i.e. loopback) */ + if (dev->features & NETIF_F_NETNS_LOCAL) + continue; + + /* Push remaing network devices to init_net */ + err = dev_change_net_namespace(dev, &init_net, "dev%d"); + if (err) { + printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n", + __func__, dev->name, err); + unregister_netdevice(dev); + } + } + rtnl_unlock(); +} + +static struct pernet_operations default_device_ops = { + .exit = default_device_exit, +}; + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -3772,11 +4047,11 @@ for (i = 0; i < 16; i++) INIT_LIST_HEAD(&ptype_base[i]); - for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) - INIT_HLIST_HEAD(&dev_name_head[i]); + if (register_pernet_subsys(&netdev_net_ops)) + goto out; - for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) - INIT_HLIST_HEAD(&dev_index_head[i]); + if (register_pernet_device(&default_device_ops)) + goto out; /* * Initialise the packet receive queues. diff -Nurb linux-2.6.22-try2/net/core/dev_mcast.c linux-2.6.22-try2-netns/net/core/dev_mcast.c --- linux-2.6.22-try2/net/core/dev_mcast.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/dev_mcast.c 2007-12-19 22:49:18.000000000 -0500 @@ -46,6 +46,7 @@ #include #include #include +#include /* @@ -219,11 +220,12 @@ #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq->private; struct net_device *dev; loff_t off = 0; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (off++ == *pos) return dev; } @@ -272,7 +274,22 @@ static int dev_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &dev_mc_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &dev_mc_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_net(PROC_NET(inode)); + } + return res; +} + +static int dev_mc_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations dev_mc_seq_fops = { @@ -280,14 +297,31 @@ .open = dev_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = dev_mc_seq_release, }; #endif +static int dev_mc_net_init(struct net *net) +{ + if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) + return -ENOMEM; + return 0; +} + +static void dev_mc_net_exit(struct net *net) +{ + proc_net_remove(net, "dev_mcast"); +} + +static struct pernet_operations dev_mc_net_ops = { + .init = dev_mc_net_init, + .exit = dev_mc_net_exit, +}; + void __init dev_mcast_init(void) { - proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops); + register_pernet_subsys(&dev_mc_net_ops); } EXPORT_SYMBOL(dev_mc_add); diff -Nurb linux-2.6.22-try2/net/core/dst.c linux-2.6.22-try2-netns/net/core/dst.c --- linux-2.6.22-try2/net/core/dst.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/dst.c 2007-12-19 22:49:18.000000000 -0500 @@ -15,7 +15,9 @@ #include #include #include +#include +#include #include /* Locking strategy: @@ -236,13 +238,14 @@ if (!unregister) { dst->input = dst->output = dst_discard; } else { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + struct net *net = dev->nd_net; + dst->dev = &net->loopback_dev; + dev_hold(dst->dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = &loopback_dev; + dst->neighbour->dev = &net->loopback_dev; dev_put(dev); - dev_hold(&loopback_dev); + dev_hold(dst->neighbour->dev); } } } @@ -252,6 +255,9 @@ struct net_device *dev = ptr; struct dst_entry *dst; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: case NETDEV_DOWN: diff -Nurb linux-2.6.22-try2/net/core/ethtool.c linux-2.6.22-try2-netns/net/core/ethtool.c --- linux-2.6.22-try2/net/core/ethtool.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/ethtool.c 2007-12-19 22:49:18.000000000 -0500 @@ -798,9 +798,9 @@ /* The main entry point in this file. Called from net/core/dev.c */ -int dev_ethtool(struct ifreq *ifr) +int dev_ethtool(struct net *net, struct ifreq *ifr) { - struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; diff -Nurb linux-2.6.22-try2/net/core/fib_rules.c linux-2.6.22-try2-netns/net/core/fib_rules.c --- linux-2.6.22-try2/net/core/fib_rules.c 2007-12-19 13:37:56.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/fib_rules.c 2007-12-19 22:49:18.000000000 -0500 @@ -11,21 +11,20 @@ #include #include #include +#include +#include #include -static LIST_HEAD(rules_ops); -static DEFINE_SPINLOCK(rules_mod_lock); - -static void notify_rule_change(int event, struct fib_rule *rule, +static void notify_rule_change(struct net *net, int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid); -static struct fib_rules_ops *lookup_rules_ops(int family) +static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family) { struct fib_rules_ops *ops; rcu_read_lock(); - list_for_each_entry_rcu(ops, &rules_ops, list) { + list_for_each_entry_rcu(ops, &net->rules_ops, list) { if (ops->family == family) { if (!try_module_get(ops->owner)) ops = NULL; @@ -47,10 +46,10 @@ static void flush_route_cache(struct fib_rules_ops *ops) { if (ops->flush_cache) - ops->flush_cache(); + ops->flush_cache(ops); } -int fib_rules_register(struct fib_rules_ops *ops) +int fib_rules_register(struct net *net, struct fib_rules_ops *ops) { int err = -EEXIST; struct fib_rules_ops *o; @@ -63,15 +62,16 @@ ops->action == NULL) return -EINVAL; - spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) + spin_lock(&net->rules_mod_lock); + list_for_each_entry(o, &net->rules_ops, list) if (ops->family == o->family) goto errout; - list_add_tail_rcu(&ops->list, &rules_ops); + hold_net(net); + list_add_tail_rcu(&ops->list, &net->rules_ops); err = 0; errout: - spin_unlock(&rules_mod_lock); + spin_unlock(&net->rules_mod_lock); return err; } @@ -88,13 +88,13 @@ } } -int fib_rules_unregister(struct fib_rules_ops *ops) +int fib_rules_unregister(struct net *net, struct fib_rules_ops *ops) { int err = 0; struct fib_rules_ops *o; - spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) { + spin_lock(&net->rules_mod_lock); + list_for_each_entry(o, &net->rules_ops, list) { if (o == ops) { list_del_rcu(&o->list); cleanup_ops(ops); @@ -104,9 +104,11 @@ err = -ENOENT; out: - spin_unlock(&rules_mod_lock); + spin_unlock(&net->rules_mod_lock); synchronize_rcu(); + if (!err) + release_net(net); return err; } @@ -197,6 +199,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r, *last = NULL; @@ -206,7 +209,7 @@ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; - ops = lookup_rules_ops(frh->family); + ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { err = EAFNOSUPPORT; goto errout; @@ -234,7 +237,7 @@ rule->ifindex = -1; nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(rule->ifname); + dev = __dev_get_by_name(net, rule->ifname); if (dev) rule->ifindex = dev->ifindex; } @@ -256,7 +259,7 @@ rule->table = frh_get_table(frh, tb); if (!rule->pref && ops->default_pref) - rule->pref = ops->default_pref(); + rule->pref = ops->default_pref(ops); err = -EINVAL; if (tb[FRA_GOTO]) { @@ -319,7 +322,7 @@ else list_add_rcu(&rule->list, ops->rules_list); - notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + notify_rule_change(net, RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); flush_route_cache(ops); rules_ops_put(ops); return 0; @@ -333,6 +336,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; @@ -342,7 +346,7 @@ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; - ops = lookup_rules_ops(frh->family); + ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { err = EAFNOSUPPORT; goto errout; @@ -408,7 +412,7 @@ } synchronize_rcu(); - notify_rule_change(RTM_DELRULE, rule, ops, nlh, + notify_rule_change(net, RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).pid); fib_rule_put(rule); flush_route_cache(ops); @@ -514,13 +518,17 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct fib_rules_ops *ops; int idx = 0, family; + if (net != &init_net) + return -EINVAL; + family = rtnl_msg_family(cb->nlh); if (family != AF_UNSPEC) { /* Protocol specific dump request */ - ops = lookup_rules_ops(family); + ops = lookup_rules_ops(net, family); if (ops == NULL) return -EAFNOSUPPORT; @@ -528,7 +536,7 @@ } rcu_read_lock(); - list_for_each_entry_rcu(ops, &rules_ops, list) { + list_for_each_entry_rcu(ops, &net->rules_ops, list) { if (idx < cb->args[0] || !try_module_get(ops->owner)) goto skip; @@ -545,7 +553,7 @@ return skb->len; } -static void notify_rule_change(int event, struct fib_rule *rule, +static void notify_rule_change(struct net *net, int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid) { @@ -563,10 +571,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); + err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(ops->nlgroup, err); + rtnl_set_sk_err(net, ops->nlgroup, err); } static void attach_rules(struct list_head *rules, struct net_device *dev) @@ -594,19 +602,23 @@ void *ptr) { struct net_device *dev = ptr; + struct net *net = dev->nd_net; struct fib_rules_ops *ops; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + ASSERT_RTNL(); rcu_read_lock(); switch (event) { case NETDEV_REGISTER: - list_for_each_entry(ops, &rules_ops, list) + list_for_each_entry(ops, &net->rules_ops, list) attach_rules(ops->rules_list, dev); break; case NETDEV_UNREGISTER: - list_for_each_entry(ops, &rules_ops, list) + list_for_each_entry(ops, &net->rules_ops, list) detach_rules(ops->rules_list, dev); break; } @@ -620,13 +632,28 @@ .notifier_call = fib_rules_event, }; +static int fib_rules_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->rules_ops); + spin_lock_init(&net->rules_mod_lock); + return 0; +} + +static struct pernet_operations fib_rules_net_ops = { + .init = fib_rules_net_init, +}; + static int __init fib_rules_init(void) { + int ret; rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); - return register_netdevice_notifier(&fib_rules_notifier); + ret = register_pernet_subsys(&fib_rules_net_ops); + if (!ret) + ret = register_netdevice_notifier(&fib_rules_notifier); + return ret; } subsys_initcall(fib_rules_init); diff -Nurb linux-2.6.22-try2/net/core/neighbour.c linux-2.6.22-try2-netns/net/core/neighbour.c --- linux-2.6.22-try2/net/core/neighbour.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/neighbour.c 2007-12-19 22:49:18.000000000 -0500 @@ -33,6 +33,7 @@ #include #include #include +#include #define NEIGH_DEBUG 1 @@ -361,7 +362,7 @@ return n; } -struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) +struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net * net, const void *pkey) { struct neighbour *n; int key_len = tbl->key_len; @@ -371,7 +372,8 @@ read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { - if (!memcmp(n->primary_key, pkey, key_len)) { + if (!memcmp(n->primary_key, pkey, key_len) && + (net == n->dev->nd_net)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); break; @@ -449,7 +451,8 @@ goto out; } -struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, +struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, + struct net * net, const void *pkey, struct net_device *dev, int creat) { struct pneigh_entry *n; @@ -465,6 +468,7 @@ for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { if (!memcmp(n->key, pkey, key_len) && + (n->net == net) && (n->dev == dev || !n->dev)) { read_unlock_bh(&tbl->lock); goto out; @@ -479,6 +483,7 @@ if (!n) goto out; + n->net = hold_net(net); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -501,7 +506,7 @@ } -int pneigh_delete(struct neigh_table *tbl, const void *pkey, +int pneigh_delete(struct neigh_table *tbl, struct net * net, const void *pkey, struct net_device *dev) { struct pneigh_entry *n, **np; @@ -516,13 +521,15 @@ write_lock_bh(&tbl->lock); for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; np = &n->next) { - if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { + if (!memcmp(n->key, pkey, key_len) && n->dev == dev && + (n->net == net)) { *np = n->next; write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); + release_net(n->net); kfree(n); return 0; } @@ -545,6 +552,7 @@ tbl->pdestructor(n); if (n->dev) dev_put(n->dev); + release_net(n->net); kfree(n); continue; } @@ -1266,12 +1274,37 @@ spin_unlock(&tbl->proxy_queue.lock); } +static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, + struct net * net, int ifindex) +{ + struct neigh_parms *p; + + for (p = &tbl->parms; p; p = p->next) { + if (p->net != net) + continue; + if ((p->dev && p->dev->ifindex == ifindex) || + (!p->dev && !ifindex)) + return p; + } + + return NULL; +} struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); + struct neigh_parms *p, *ref; + struct net * net; + + net = &init_net; + if (dev) + net = dev->nd_net; + + ref = lookup_neigh_params(tbl, net, 0); + if (!ref) + return NULL; + p = kmemdup(ref, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); @@ -1287,6 +1320,7 @@ dev_hold(dev); p->dev = dev; } + p->net = hold_net(net); p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; @@ -1296,6 +1330,20 @@ return p; } +struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl, + struct net *net) +{ + struct neigh_parms *parms; + if (net != &init_net) { + parms = neigh_parms_alloc(NULL, tbl); + release_net(parms->net); + parms->net = hold_net(net); + } + else + parms = neigh_parms_clone(&tbl->parms); + return parms; +} + static void neigh_rcu_free_parms(struct rcu_head *head) { struct neigh_parms *parms = @@ -1328,6 +1376,7 @@ void neigh_parms_destroy(struct neigh_parms *parms) { + release_net(parms->net); kfree(parms); } @@ -1338,6 +1387,7 @@ unsigned long now = jiffies; unsigned long phsize; + tbl->parms.net = &init_net; atomic_set(&tbl->parms.refcnt, 1); INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = @@ -1353,7 +1403,7 @@ panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); + tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat); if (!tbl->pde) panic("cannot create neighbour proc dir entry"); tbl->pde->proc_fops = &neigh_stat_seq_fops; @@ -1443,6 +1493,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; @@ -1458,7 +1509,7 @@ ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1477,7 +1528,7 @@ goto out_dev_put; if (ndm->ndm_flags & NTF_PROXY) { - err = pneigh_delete(tbl, nla_data(dst_attr), dev); + err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); goto out_dev_put; } @@ -1508,6 +1559,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; @@ -1524,7 +1576,7 @@ ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); + dev = dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1553,7 +1605,7 @@ struct pneigh_entry *pn; err = -ENOBUFS; - pn = pneigh_lookup(tbl, dst, dev, 1); + pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { pn->flags = ndm->ndm_flags; err = 0; @@ -1748,19 +1800,6 @@ return -EMSGSIZE; } -static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, - int ifindex) -{ - struct neigh_parms *p; - - for (p = &tbl->parms; p; p = p->next) - if ((p->dev && p->dev->ifindex == ifindex) || - (!p->dev && !ifindex)) - return p; - - return NULL; -} - static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { [NDTA_NAME] = { .type = NLA_STRING }, [NDTA_THRESH1] = { .type = NLA_U32 }, @@ -1788,6 +1827,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; @@ -1837,7 +1877,7 @@ if (tbp[NDTPA_IFINDEX]) ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); - p = lookup_neigh_params(tbl, ifindex); + p = lookup_neigh_params(tbl, net, ifindex); if (p == NULL) { err = -ENOENT; goto errout_tbl_lock; @@ -1912,6 +1952,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int family, tidx, nidx = 0; int tbl_skip = cb->args[0]; int neigh_skip = cb->args[1]; @@ -1931,8 +1972,11 @@ NLM_F_MULTI) <= 0) break; - for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { - if (nidx < neigh_skip) + for (nidx = 0, p = tbl->parms.next; p; p = p->next) { + if (net != p->net) + continue; + + if (nidx++ < neigh_skip) continue; if (neightbl_fill_param_info(skb, tbl, p, @@ -2003,6 +2047,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { + struct net * net = skb->sk->sk_net; struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; @@ -2013,8 +2058,12 @@ continue; if (h > s_h) s_idx = 0; - for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { - if (idx < s_idx) + for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { + int lidx; + if (n->dev->nd_net != net) + continue; + lidx = idx++; + if (lidx < s_idx) continue; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -2109,6 +2158,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; + struct net * net = state->net; struct neigh_table *tbl = state->tbl; struct neighbour *n = NULL; int bucket = state->bucket; @@ -2118,6 +2168,8 @@ n = tbl->hash_buckets[bucket]; while (n) { + if (n->dev->nd_net != net) + goto next; if (state->neigh_sub_iter) { loff_t fakep = 0; void *v; @@ -2147,6 +2199,7 @@ loff_t *pos) { struct neigh_seq_state *state = seq->private; + struct net * net = state->net; struct neigh_table *tbl = state->tbl; if (state->neigh_sub_iter) { @@ -2158,6 +2211,8 @@ while (1) { while (n) { + if (n->dev->nd_net != net) + goto next; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); if (v) @@ -2204,6 +2259,7 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; + struct net * net = state->net; struct neigh_table *tbl = state->tbl; struct pneigh_entry *pn = NULL; int bucket = state->bucket; @@ -2211,6 +2267,8 @@ state->flags |= NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { pn = tbl->phash_buckets[bucket]; + while (pn && (pn->net != net)) + pn = pn->next; if (pn) break; } @@ -2224,6 +2282,7 @@ loff_t *pos) { struct neigh_seq_state *state = seq->private; + struct net * net = state->net; struct neigh_table *tbl = state->tbl; pn = pn->next; @@ -2231,6 +2290,8 @@ if (++state->bucket > PNEIGH_HASHMASK) break; pn = tbl->phash_buckets[state->bucket]; + while (pn && (pn->net != net)) + pn = pn->next; if (pn) break; } @@ -2433,6 +2494,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) { + struct net * net = n->dev->nd_net; struct sk_buff *skb; int err = -ENOBUFS; @@ -2447,10 +2509,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_NEIGH, err); + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } void neigh_app_ns(struct neighbour *n) @@ -2648,6 +2710,7 @@ if (!t) return -ENOBUFS; + t->neigh_vars[0].data = &p->mcast_probes; t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; @@ -2716,7 +2779,7 @@ t->neigh_proto_dir[0].child = t->neigh_neigh_dir; t->neigh_root_dir[0].child = t->neigh_proto_dir; - t->sysctl_header = register_sysctl_table(t->neigh_root_dir); + t->sysctl_header = register_net_sysctl_table(p->net, t->neigh_root_dir); if (!t->sysctl_header) { err = -ENOBUFS; goto free_procname; @@ -2738,7 +2801,7 @@ if (p->sysctl_table) { struct neigh_sysctl_table *t = p->sysctl_table; p->sysctl_table = NULL; - unregister_sysctl_table(t->sysctl_header); + unregister_net_sysctl_table(t->sysctl_header); kfree(t->neigh_dev[0].procname); kfree(t); } @@ -2771,6 +2834,7 @@ EXPORT_SYMBOL(neigh_lookup); EXPORT_SYMBOL(neigh_lookup_nodev); EXPORT_SYMBOL(neigh_parms_alloc); +EXPORT_SYMBOL(neigh_parms_alloc_default); EXPORT_SYMBOL(neigh_parms_release); EXPORT_SYMBOL(neigh_rand_reach_time); EXPORT_SYMBOL(neigh_resolve_output); diff -Nurb linux-2.6.22-try2/net/core/net-sysfs.c linux-2.6.22-try2-netns/net/core/net-sysfs.c --- linux-2.6.22-try2/net/core/net-sysfs.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/net-sysfs.c 2007-12-19 22:49:18.000000000 -0500 @@ -13,7 +13,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -29,16 +31,16 @@ } /* use same locking rules as GIF* ioctl's */ -static ssize_t netdev_show(const struct device *dev, +static ssize_t netdev_show(const struct device *device, struct device_attribute *attr, char *buf, ssize_t (*format)(const struct net_device *, char *)) { - struct net_device *net = to_net_dev(dev); + struct net_device *dev = to_net_dev(device); ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (dev_isalive(net)) - ret = (*format)(net, buf); + if (dev_isalive(dev)) + ret = (*format)(dev, buf); read_unlock(&dev_base_lock); return ret; @@ -46,9 +48,9 @@ /* generate a show function for simple field */ #define NETDEVICE_SHOW(field, format_string) \ -static ssize_t format_##field(const struct net_device *net, char *buf) \ +static ssize_t format_##field(const struct net_device *dev, char *buf) \ { \ - return sprintf(buf, format_string, net->field); \ + return sprintf(buf, format_string, dev->field); \ } \ static ssize_t show_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -58,11 +60,11 @@ /* use same locking and permission rules as SIF* ioctl's */ -static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, +static ssize_t netdev_store(struct device *device, struct device_attribute *attr, const char *buf, size_t len, int (*set)(struct net_device *, unsigned long)) { - struct net_device *net = to_net_dev(dev); + struct net_device *dev = to_net_dev(device); char *endp; unsigned long new; int ret = -EINVAL; @@ -75,8 +77,8 @@ goto err; rtnl_lock(); - if (dev_isalive(net)) { - if ((ret = (*set)(net, new)) == 0) + if (dev_isalive(dev)) { + if ((ret = (*set)(dev, new)) == 0) ret = len; } rtnl_unlock(); @@ -103,45 +105,45 @@ return cp - buf; } -static ssize_t show_address(struct device *dev, struct device_attribute *attr, +static ssize_t show_address(struct device *device, struct device_attribute *attr, char *buf) { - struct net_device *net = to_net_dev(dev); + struct net_device *dev = to_net_dev(device); ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (dev_isalive(net)) - ret = format_addr(buf, net->dev_addr, net->addr_len); + if (dev_isalive(dev)) + ret = format_addr(buf, dev->dev_addr, dev->addr_len); read_unlock(&dev_base_lock); return ret; } -static ssize_t show_broadcast(struct device *dev, +static ssize_t show_broadcast(struct device *device, struct device_attribute *attr, char *buf) { - struct net_device *net = to_net_dev(dev); - if (dev_isalive(net)) - return format_addr(buf, net->broadcast, net->addr_len); + struct net_device *dev = to_net_dev(device); + if (dev_isalive(dev)) + return format_addr(buf, dev->broadcast, dev->addr_len); return -EINVAL; } -static ssize_t show_carrier(struct device *dev, +static ssize_t show_carrier(struct device *device, struct device_attribute *attr, char *buf) { - struct net_device *netdev = to_net_dev(dev); - if (netif_running(netdev)) { - return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev)); + struct net_device *dev = to_net_dev(device); + if (netif_running(dev)) { + return sprintf(buf, fmt_dec, !!netif_carrier_ok(dev)); } return -EINVAL; } -static ssize_t show_dormant(struct device *dev, +static ssize_t show_dormant(struct device *device, struct device_attribute *attr, char *buf) { - struct net_device *netdev = to_net_dev(dev); + struct net_device *dev = to_net_dev(device); - if (netif_running(netdev)) - return sprintf(buf, fmt_dec, !!netif_dormant(netdev)); + if (netif_running(dev)) + return sprintf(buf, fmt_dec, !!netif_dormant(dev)); return -EINVAL; } @@ -156,15 +158,15 @@ "up" }; -static ssize_t show_operstate(struct device *dev, +static ssize_t show_operstate(struct device *device, struct device_attribute *attr, char *buf) { - const struct net_device *netdev = to_net_dev(dev); + const struct net_device *dev = to_net_dev(device); unsigned char operstate; read_lock(&dev_base_lock); - operstate = netdev->operstate; - if (!netif_running(netdev)) + operstate = dev->operstate; + if (!netif_running(dev)) operstate = IF_OPER_DOWN; read_unlock(&dev_base_lock); @@ -177,57 +179,57 @@ /* read-write attributes */ NETDEVICE_SHOW(mtu, fmt_dec); -static int change_mtu(struct net_device *net, unsigned long new_mtu) +static int change_mtu(struct net_device *dev, unsigned long new_mtu) { - return dev_set_mtu(net, (int) new_mtu); + return dev_set_mtu(dev, (int) new_mtu); } -static ssize_t store_mtu(struct device *dev, struct device_attribute *attr, +static ssize_t store_mtu(struct device *device, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, change_mtu); + return netdev_store(device, attr, buf, len, change_mtu); } NETDEVICE_SHOW(flags, fmt_hex); -static int change_flags(struct net_device *net, unsigned long new_flags) +static int change_flags(struct net_device *dev, unsigned long new_flags) { - return dev_change_flags(net, (unsigned) new_flags); + return dev_change_flags(dev, (unsigned) new_flags); } -static ssize_t store_flags(struct device *dev, struct device_attribute *attr, +static ssize_t store_flags(struct device *device, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, change_flags); + return netdev_store(device, attr, buf, len, change_flags); } NETDEVICE_SHOW(tx_queue_len, fmt_ulong); -static int change_tx_queue_len(struct net_device *net, unsigned long new_len) +static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) { - net->tx_queue_len = new_len; + dev->tx_queue_len = new_len; return 0; } -static ssize_t store_tx_queue_len(struct device *dev, +static ssize_t store_tx_queue_len(struct device *device, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, change_tx_queue_len); + return netdev_store(device, attr, buf, len, change_tx_queue_len); } NETDEVICE_SHOW(weight, fmt_dec); -static int change_weight(struct net_device *net, unsigned long new_weight) +static int change_weight(struct net_device *dev, unsigned long new_weight) { - net->weight = new_weight; + dev->weight = new_weight; return 0; } -static ssize_t store_weight(struct device *dev, struct device_attribute *attr, +static ssize_t store_weight(struct device *device, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, change_weight); + return netdev_store(device, attr, buf, len, change_weight); } static struct device_attribute net_class_attributes[] = { @@ -447,6 +449,23 @@ kfree((char *)dev - dev->padded); } +static const void *net_current_tag(void) +{ + return current->nsproxy->net_ns; +} + +static const void *net_kobject_tag(struct kobject *kobj) +{ + struct net_device *dev; + dev = container_of(kobj, struct net_device, dev.kobj); + return dev->nd_net; +} + +static const struct shadow_dir_operations net_shadow_dir_operations = { + .current_tag = net_current_tag, + .kobject_tag = net_kobject_tag, +}; + static struct class net_class = { .name = "net", .dev_release = netdev_release, @@ -454,42 +473,43 @@ #ifdef CONFIG_HOTPLUG .dev_uevent = netdev_uevent, #endif + .shadow_ops = &net_shadow_dir_operations, }; /* Delete sysfs entries but hold kobject reference until after all * netdev references are gone. */ -void netdev_unregister_sysfs(struct net_device * net) +void netdev_unregister_sysfs(struct net_device * dev) { - struct device *dev = &(net->dev); + struct device *device = &(dev->dev); - kobject_get(&dev->kobj); - device_del(dev); + kobject_get(&device->kobj); + device_del(device); } /* Create sysfs entries for network device. */ -int netdev_register_sysfs(struct net_device *net) +int netdev_register_sysfs(struct net_device *dev) { - struct device *dev = &(net->dev); - struct attribute_group **groups = net->sysfs_groups; + struct device *device = &(dev->dev); + struct attribute_group **groups = dev->sysfs_groups; - device_initialize(dev); - dev->class = &net_class; - dev->platform_data = net; - dev->groups = groups; + device_initialize(device); + device->class = &net_class; + device->platform_data = dev; + device->groups = groups; BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); - strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); + strlcpy(device->bus_id, dev->name, BUS_ID_SIZE); - if (net->get_stats) + if (dev->get_stats) *groups++ = &netstat_group; #ifdef CONFIG_WIRELESS_EXT - if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) + if (dev->wireless_handlers && dev->wireless_handlers->get_wireless_stats) *groups++ = &wireless_group; #endif - return device_add(dev); + return device_add(device); } int netdev_sysfs_init(void) diff -Nurb linux-2.6.22-try2/net/core/net_namespace.c linux-2.6.22-try2-netns/net/core/net_namespace.c --- linux-2.6.22-try2/net/core/net_namespace.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/net_namespace.c 2007-12-19 22:49:18.000000000 -0500 @@ -0,0 +1,332 @@ +#include +#include +#include +#include +#include +#include +#include + +/* + * Our network namespace constructor/destructor lists + */ + +static LIST_HEAD(pernet_list); +static struct list_head *first_device = &pernet_list; +static DEFINE_MUTEX(net_mutex); + +static DEFINE_MUTEX(net_list_mutex); +LIST_HEAD(net_namespace_list); + +static struct kmem_cache *net_cachep; + +struct net init_net; +EXPORT_SYMBOL_GPL(init_net); + +void net_lock(void) +{ + mutex_lock(&net_list_mutex); +} + +void net_unlock(void) +{ + mutex_unlock(&net_list_mutex); +} + +static struct net *net_alloc(void) +{ + return kmem_cache_alloc(net_cachep, GFP_KERNEL); +} + +static void net_free(struct net *net) +{ + if (!net) + return; + + if (unlikely(atomic_read(&net->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } + + kmem_cache_free(net_cachep, net); +} + +static void cleanup_net(struct work_struct *work) +{ + struct pernet_operations *ops; + struct list_head *ptr; + struct net *net; + + net = container_of(work, struct net, work); + + mutex_lock(&net_mutex); + + /* Don't let anyone else find us. */ + net_lock(); + list_del(&net->list); + net_unlock(); + + /* Run all of the network namespace exit methods */ + list_for_each_prev(ptr, &pernet_list) { + ops = list_entry(ptr, struct pernet_operations, list); + if (ops->exit) + ops->exit(net); + } + + mutex_unlock(&net_mutex); + + /* Ensure there are no outstanding rcu callbacks using this + * network namespace. + */ + rcu_barrier(); + + /* Finally it is safe to free my network namespace structure */ + net_free(net); +} + + +void __put_net(struct net *net) +{ + /* Cleanup the network namespace in process context */ + INIT_WORK(&net->work, cleanup_net); + schedule_work(&net->work); +} +EXPORT_SYMBOL_GPL(__put_net); + +/* + * setup_net runs the initializers for the network namespace object. + */ +static int setup_net(struct net *net) +{ + /* Must be called with net_mutex held */ + struct pernet_operations *ops; + struct list_head *ptr; + int error; + + memset(net, 0, sizeof(struct net)); + atomic_set(&net->count, 1); + atomic_set(&net->use_count, 0); + + error = 0; + list_for_each(ptr, &pernet_list) { + ops = list_entry(ptr, struct pernet_operations, list); + if (ops->init) { + error = ops->init(net); + if (error < 0) + goto out_undo; + } + } +out: + return error; +out_undo: + /* Walk through the list backwards calling the exit functions + * for the pernet modules whose init functions did not fail. + */ + for (ptr = ptr->prev; ptr != &pernet_list; ptr = ptr->prev) { + ops = list_entry(ptr, struct pernet_operations, list); + if (ops->exit) + ops->exit(net); + } + goto out; +} + +struct net *copy_net_ns(unsigned long flags, struct net *old_net) +{ + struct net *new_net = NULL; + int err; + + get_net(old_net); + + if (!(flags & CLONE_NEWNET)) + return old_net; + + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + err = -ENOMEM; + new_net = net_alloc(); + if (!new_net) + goto out; + + mutex_lock(&net_mutex); + err = setup_net(new_net); + if (err) + goto out_unlock; + + net_lock(); + list_add_tail(&new_net->list, &net_namespace_list); + net_unlock(); + + +out_unlock: + mutex_unlock(&net_mutex); +out: + put_net(old_net); + if (err) { + net_free(new_net); + new_net = ERR_PTR(err); + } + return new_net; +} + +static int __init net_ns_init(void) +{ + int err; + + printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); + net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), + SMP_CACHE_BYTES, + SLAB_PANIC, NULL, NULL); + mutex_lock(&net_mutex); + err = setup_net(&init_net); + + net_lock(); + list_add_tail(&init_net.list, &net_namespace_list); + net_unlock(); + + mutex_unlock(&net_mutex); + if (err) + panic("Could not setup the initial network namespace"); + + return 0; +} + +pure_initcall(net_ns_init); + +static int register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + struct net *net, *undo_net; + int error; + + error = 0; + list_add_tail(&ops->list, list); + for_each_net(net) { + if (ops->init) { + error = ops->init(net); + if (error) + goto out_undo; + } + } +out: + return error; + +out_undo: + /* If I have an error cleanup all namespaces I initialized */ + list_del(&ops->list); + for_each_net(undo_net) { + if (undo_net == net) + goto undone; + if (ops->exit) + ops->exit(undo_net); + } +undone: + goto out; +} + +static void unregister_pernet_operations(struct pernet_operations *ops) +{ + struct net *net; + + list_del(&ops->list); + for_each_net(net) + if (ops->exit) + ops->exit(net); +} + +/** + * register_pernet_subsys - register a network namespace subsystem + * @ops: pernet operations structure for the subsystem + * + * Register a subsystem which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_subsys(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(first_device, ops); + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_subsys); + +/** + * unregister_pernet_subsys - unregister a network namespace subsystem + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_subsys(struct pernet_operations *module) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(module); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_subsys); + +/** + * register_pernet_device - register a network namespace device + * @ops: pernet operations structure for the subsystem + * + * Register a device which has init and exit functions + * that are called when network namespaces are created and + * destroyed respectively. + * + * When registered all network namespace init functions are + * called for every existing network namespace. Allowing kernel + * modules to have a race free view of the set of network namespaces. + * + * When a new network namespace is created all of the init + * methods are called in the order in which they were registered. + * + * When a network namespace is destroyed all of the exit methods + * are called in the reverse of the order with which they were + * registered. + */ +int register_pernet_device(struct pernet_operations *ops) +{ + int error; + mutex_lock(&net_mutex); + error = register_pernet_operations(&pernet_list, ops); + if (!error && (first_device == &pernet_list)) + first_device = &ops->list; + mutex_unlock(&net_mutex); + return error; +} +EXPORT_SYMBOL_GPL(register_pernet_device); + +/** + * unregister_pernet_device - unregister a network namespace netdevice + * @ops: pernet operations structure to manipulate + * + * Remove the pernet operations structure from the list to be + * used when network namespaces are created or destoryed. In + * addition run the exit method for all existing network + * namespaces. + */ +void unregister_pernet_device(struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + if (&ops->list == first_device) + first_device = first_device->next; + unregister_pernet_operations(ops); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_device); diff -Nurb linux-2.6.22-try2/net/core/netpoll.c linux-2.6.22-try2-netns/net/core/netpoll.c --- linux-2.6.22-try2/net/core/netpoll.c 2007-12-19 15:29:24.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/netpoll.c 2007-12-19 22:49:18.000000000 -0500 @@ -634,7 +634,7 @@ int err; if (np->dev_name) - ndev = dev_get_by_name(np->dev_name); + ndev = dev_get_by_name(&init_net, np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); diff -Nurb linux-2.6.22-try2/net/core/pktgen.c linux-2.6.22-try2-netns/net/core/pktgen.c --- linux-2.6.22-try2/net/core/pktgen.c 2007-12-19 15:29:24.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/pktgen.c 2007-12-19 22:49:18.000000000 -0500 @@ -155,6 +155,7 @@ #include #include #include +#include #include #include #include @@ -1903,6 +1904,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. */ @@ -1933,7 +1937,7 @@ pkt_dev->odev = NULL; } - odev = dev_get_by_name(ifname); + odev = dev_get_by_name(&init_net, ifname); if (!odev) { printk("pktgen: no such netdevice: \"%s\"\n", ifname); return -ENODEV; @@ -3570,7 +3574,7 @@ printk(version); - pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); + pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); if (!pg_proc_dir) return -ENODEV; pg_proc_dir->owner = THIS_MODULE; @@ -3579,7 +3583,7 @@ if (pe == NULL) { printk("pktgen: ERROR: cannot create %s procfs entry.\n", PGCTRL); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); return -EINVAL; } @@ -3602,7 +3606,7 @@ printk("pktgen: ERROR: Initialization failed for all threads\n"); unregister_netdevice_notifier(&pktgen_notifier_block); remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); return -ENODEV; } @@ -3629,7 +3633,7 @@ /* Clean up proc file system */ remove_proc_entry(PGCTRL, pg_proc_dir); - proc_net_remove(PG_PROC_DIR); + proc_net_remove(&init_net, PG_PROC_DIR); } module_init(pg_init); diff -Nurb linux-2.6.22-try2/net/core/rtnetlink.c linux-2.6.22-try2-netns/net/core/rtnetlink.c --- linux-2.6.22-try2/net/core/rtnetlink.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/rtnetlink.c 2007-12-19 23:04:00.000000000 -0500 @@ -59,7 +59,6 @@ }; static DEFINE_MUTEX(rtnl_mutex); -static struct sock *rtnl; void rtnl_lock(void) { @@ -73,9 +72,17 @@ void rtnl_unlock(void) { + struct net *net; mutex_unlock(&rtnl_mutex); + + net_lock(); + for_each_net(net) { + struct sock *rtnl = net->rtnl; if (rtnl && rtnl->sk_receive_queue.qlen) rtnl->sk_data_ready(rtnl, 0); + } + net_unlock(); + netdev_run_todo(); } @@ -446,8 +453,9 @@ return ret; } -int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) { + struct sock *rtnl = net->rtnl; int err = 0; NETLINK_CB(skb).dst_group = group; @@ -459,14 +467,17 @@ return err; } -int rtnl_unicast(struct sk_buff *skb, u32 pid) +int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) { + struct sock *rtnl = net->rtnl; + return nlmsg_unicast(rtnl, skb, pid); } -int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, +int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { + struct sock *rtnl = net->rtnl; int report = 0; if (nlh) @@ -475,8 +486,10 @@ return nlmsg_notify(rtnl, skb, pid, group, report, flags); } -void rtnl_set_sk_err(u32 group, int error) +void rtnl_set_sk_err(struct net *net, u32 group, int error) { + struct sock *rtnl = net->rtnl; + netlink_set_err(rtnl, 0, group, error); } @@ -687,12 +700,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx; int s_idx = cb->args[0]; struct net_device *dev; idx = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (!nx_dev_visible(skb->sk->sk_nx_info, dev)) continue; if (idx < s_idx) @@ -857,6 +871,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct net_device *dev; int err; @@ -875,9 +890,9 @@ err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(ifname); + dev = dev_get_by_name(net, ifname); else goto errout; @@ -903,6 +918,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -919,9 +935,9 @@ ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(ifname); + dev = __dev_get_by_name(net, ifname); else return -EINVAL; @@ -938,6 +954,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; const struct rtnl_link_ops *ops; struct net_device *dev; struct ifinfomsg *ifm; @@ -959,9 +976,9 @@ ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (ifname[0]) - dev = __dev_get_by_name(ifname); + dev = __dev_get_by_name(net, ifname); else dev = NULL; @@ -1079,6 +1096,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; @@ -1091,7 +1109,7 @@ ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { - dev = dev_get_by_index(ifm->ifi_index); + dev = dev_get_by_index(net, ifm->ifi_index); if (dev == NULL) return -ENODEV; } else @@ -1111,7 +1129,7 @@ kfree_skb(nskb); goto errout; } - err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); errout: dev_put(dev); @@ -1144,6 +1162,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { + struct net *net = dev->nd_net; struct sk_buff *skb; int err = -ENOBUFS; @@ -1161,10 +1180,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); } /* Protected by RTNL sempahore. */ @@ -1175,6 +1194,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = skb->sk->sk_net; rtnl_doit_func doit; int sz_idx, kind; int min_len; @@ -1203,6 +1223,7 @@ return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + struct sock *rtnl; rtnl_dumpit_func dumpit; dumpit = rtnl_get_dumpit(family, type); @@ -1210,6 +1231,7 @@ return -EOPNOTSUPP; __rtnl_unlock(); + rtnl = net->rtnl; err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); rtnl_lock(); return err; @@ -1259,6 +1281,10 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_UNREGISTER: rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); @@ -1284,6 +1310,36 @@ .notifier_call = rtnetlink_event, }; + +static int rtnetlink_net_init(struct net *net) +{ + struct sock *sk; + sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, + rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); + if (!sk) + return -ENOMEM; + + /* Don't hold an extra reference on the namespace */ + put_net(sk->sk_net); + net->rtnl = sk; + return 0; +} + +static void rtnetlink_net_exit(struct net *net) +{ + /* At the last minute lie and say this is a socket for the + * initial network namespace. So the socket will be safe to + * free. + */ + net->rtnl->sk_net = get_net(&init_net); + sock_put(net->rtnl); +} + +static struct pernet_operations rtnetlink_net_ops = { + .init = rtnetlink_net_init, + .exit = rtnetlink_net_exit, +}; + void __init rtnetlink_init(void) { int i; @@ -1296,10 +1352,9 @@ if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, - &rtnl_mutex, THIS_MODULE); - if (rtnl == NULL) + if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); + netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); diff -Nurb linux-2.6.22-try2/net/core/sock.c linux-2.6.22-try2-netns/net/core/sock.c --- linux-2.6.22-try2/net/core/sock.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/sock.c 2007-12-19 23:04:11.000000000 -0500 @@ -123,6 +123,7 @@ #include #include #include +#include #include #include @@ -360,6 +361,7 @@ char __user *optval, int optlen) { struct sock *sk=sock->sk; + struct net *net = sk->sk_net; struct sk_filter *filter; int val; int valbool; @@ -614,7 +616,7 @@ if (devname[0] == '\0') { sk->sk_bound_dev_if = 0; } else { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(net, devname); if (!dev) { ret = -ENODEV; break; @@ -867,7 +869,7 @@ * @prot: struct proto associated with this new sock instance * @zero_it: if we should zero the newly allocated sock */ -struct sock *sk_alloc(int family, gfp_t priority, +struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; @@ -888,6 +890,7 @@ */ sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); + sk->sk_net = get_net(net); } sock_vx_init(sk); sock_nx_init(sk); @@ -929,6 +932,7 @@ __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); + put_net(sk->sk_net); vx_sock_dec(sk); clr_vx_info(&sk->sk_vx_info); sk->sk_xid = -1; @@ -943,7 +947,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { - struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); + struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); if (newsk != NULL) { struct sk_filter *filter; @@ -2017,7 +2021,7 @@ static int __init proto_init(void) { /* register /proc/net/protocols */ - return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; } subsys_initcall(proto_init); diff -Nurb linux-2.6.22-try2/net/core/sysctl_net_core.c linux-2.6.22-try2-netns/net/core/sysctl_net_core.c --- linux-2.6.22-try2/net/core/sysctl_net_core.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/core/sysctl_net_core.c 2007-12-19 22:49:18.000000000 -0500 @@ -9,25 +9,10 @@ #include #include #include +#include +#include #include -#ifdef CONFIG_SYSCTL - -extern int netdev_max_backlog; -extern int weight_p; - -extern __u32 sysctl_wmem_max; -extern __u32 sysctl_rmem_max; - -extern int sysctl_core_destroy_delay; - -#ifdef CONFIG_XFRM -extern u32 sysctl_xfrm_aevent_etime; -extern u32 sysctl_xfrm_aevent_rseqth; -extern int sysctl_xfrm_larval_drop; -extern u32 sysctl_xfrm_acq_expires; -#endif - ctl_table core_table[] = { #ifdef CONFIG_NET { @@ -103,11 +88,32 @@ .mode = 0644, .proc_handler = &proc_dointvec }, +#endif /* CONFIG_NET */ + { + .ctl_name = NET_CORE_BUDGET, + .procname = "netdev_budget", + .data = &netdev_budget, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_CORE_WARNINGS, + .procname = "warnings", + .data = &net_msg_warn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .ctl_name = 0 } +}; + +struct ctl_table multi_core_table[] = { #ifdef CONFIG_XFRM { .ctl_name = NET_CORE_AEVENT_ETIME, .procname = "xfrm_aevent_etime", - .data = &sysctl_xfrm_aevent_etime, + .data = &init_net.sysctl_xfrm_aevent_etime, .maxlen = sizeof(u32), .mode = 0644, .proc_handler = &proc_dointvec @@ -115,7 +121,7 @@ { .ctl_name = NET_CORE_AEVENT_RSEQTH, .procname = "xfrm_aevent_rseqth", - .data = &sysctl_xfrm_aevent_rseqth, + .data = &init_net.sysctl_xfrm_aevent_rseqth, .maxlen = sizeof(u32), .mode = 0644, .proc_handler = &proc_dointvec @@ -123,7 +129,7 @@ { .ctl_name = CTL_UNNUMBERED, .procname = "xfrm_larval_drop", - .data = &sysctl_xfrm_larval_drop, + .data = &init_net.sysctl_xfrm_larval_drop, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -131,38 +137,19 @@ { .ctl_name = CTL_UNNUMBERED, .procname = "xfrm_acq_expires", - .data = &sysctl_xfrm_acq_expires, + .data = &init_net.sysctl_xfrm_acq_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec }, #endif /* CONFIG_XFRM */ -#endif /* CONFIG_NET */ { .ctl_name = NET_CORE_SOMAXCONN, .procname = "somaxconn", - .data = &sysctl_somaxconn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_CORE_BUDGET, - .procname = "netdev_budget", - .data = &netdev_budget, + .data = &init_net.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec }, - { - .ctl_name = NET_CORE_WARNINGS, - .procname = "warnings", - .data = &net_msg_warn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = 0 } + {} }; - -#endif diff -Nurb linux-2.6.22-try2/net/dccp/ipv4.c linux-2.6.22-try2-netns/net/dccp/ipv4.c --- linux-2.6.22-try2/net/dccp/ipv4.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/dccp/ipv4.c 2007-12-19 22:49:18.000000000 -0500 @@ -202,6 +202,7 @@ */ static void dccp_v4_err(struct sk_buff *skb, u32 info) { + struct net *net = skb->dev->nd_net; const struct iphdr *iph = (struct iphdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); @@ -213,13 +214,16 @@ __u64 seq; int err; + if (skb->dev->nd_net != &init_net) + return; + if (skb->len < (iph->ihl << 2) + 8) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; } sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, - iph->saddr, dh->dccph_sport, inet_iif(skb)); + iph->saddr, dh->dccph_sport, inet_iif(skb), net); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -441,7 +445,7 @@ nsk = inet_lookup_established(&dccp_hashinfo, iph->saddr, dh->dccph_sport, iph->daddr, dh->dccph_dport, - inet_iif(skb)); + inet_iif(skb), sk->sk_net); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); @@ -458,7 +462,8 @@ struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, + struct flowi fl = { .fl_net = &init_net, + .oif = ((struct rtable *)skb->dst)->rt_iif, .nl_u = { .ip4_u = { .daddr = ip_hdr(skb)->saddr, .saddr = ip_hdr(skb)->daddr, @@ -809,11 +814,16 @@ /* this is called when real data arrives */ static int dccp_v4_rcv(struct sk_buff *skb) { + struct net *net = skb->dev->nd_net; const struct dccp_hdr *dh; const struct iphdr *iph; struct sock *sk; int min_cov; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) @@ -852,7 +862,7 @@ * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, iph->saddr, dh->dccph_sport, - iph->daddr, dh->dccph_dport, inet_iif(skb)); + iph->daddr, dh->dccph_dport, inet_iif(skb), net); /* * Step 2: * If no socket ... diff -Nurb linux-2.6.22-try2/net/dccp/ipv6.c linux-2.6.22-try2-netns/net/dccp/ipv6.c --- linux-2.6.22-try2/net/dccp/ipv6.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/dccp/ipv6.c 2007-12-19 22:49:18.000000000 -0500 @@ -94,6 +94,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = skb->dev->nd_net; struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct ipv6_pinfo *np; @@ -102,7 +103,7 @@ __u64 seq; sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport, - &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); + &hdr->saddr, dh->dccph_sport, inet6_iif(skb), net); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -142,6 +143,7 @@ for now. */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); @@ -242,6 +244,7 @@ int err = -1; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net, fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); @@ -358,6 +361,7 @@ &rxip6h->daddr); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); @@ -407,7 +411,7 @@ nsk = __inet6_lookup_established(&dccp_hashinfo, &iph->saddr, dh->dccph_sport, &iph->daddr, ntohs(dh->dccph_dport), - inet6_iif(skb)); + inet6_iif(skb), sk->sk_net); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); @@ -584,6 +588,7 @@ struct flowi fl; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); if (opt != NULL && opt->srcrt != NULL) { @@ -819,6 +824,7 @@ { const struct dccp_hdr *dh; struct sk_buff *skb = *pskb; + struct net *net = skb->dev->nd_net; struct sock *sk; int min_cov; @@ -849,7 +855,7 @@ sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr, dh->dccph_sport, &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), - inet6_iif(skb)); + inet6_iif(skb), net); /* * Step 2: * If no socket ... @@ -937,6 +943,7 @@ return -EAFNOSUPPORT; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; diff -Nurb linux-2.6.22-try2/net/dccp/probe.c linux-2.6.22-try2-netns/net/dccp/probe.c --- linux-2.6.22-try2/net/dccp/probe.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/dccp/probe.c 2007-12-19 22:49:18.000000000 -0500 @@ -30,6 +30,7 @@ #include #include #include +#include #include "dccp.h" #include "ccid.h" @@ -168,7 +169,7 @@ if (IS_ERR(dccpw.fifo)) return PTR_ERR(dccpw.fifo); - if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) goto err0; ret = register_jprobe(&dccp_send_probe); @@ -178,7 +179,7 @@ pr_info("DCCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(&init_net, procname); err0: kfifo_free(dccpw.fifo); return ret; @@ -188,7 +189,7 @@ static __exit void dccpprobe_exit(void) { kfifo_free(dccpw.fifo); - proc_net_remove(procname); + proc_net_remove(&init_net, procname); unregister_jprobe(&dccp_send_probe); } diff -Nurb linux-2.6.22-try2/net/decnet/af_decnet.c linux-2.6.22-try2-netns/net/decnet/af_decnet.c --- linux-2.6.22-try2/net/decnet/af_decnet.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/af_decnet.c 2007-12-19 22:49:18.000000000 -0500 @@ -131,6 +131,7 @@ #include #include #include +#include #include #include #include @@ -470,10 +471,10 @@ .obj_size = sizeof(struct dn_sock), }; -static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) +static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) { struct dn_scp *scp; - struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1); + struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1); if (!sk) goto out; @@ -674,10 +675,13 @@ -static int dn_create(struct socket *sock, int protocol) +static int dn_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; + if (net != &init_net) + return -EAFNOSUPPORT; + switch(sock->type) { case SOCK_SEQPACKET: if (protocol != DNPROTO_NSP) @@ -690,7 +694,7 @@ } - if ((sk = dn_alloc_sock(sock, GFP_KERNEL)) == NULL) + if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) return -ENOBUFS; sk->sk_protocol = protocol; @@ -747,7 +751,7 @@ if (dn_ntohs(saddr->sdn_nodeaddrl)) { read_lock(&dev_base_lock); ldev = NULL; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!dev->dn_ptr) continue; if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { @@ -943,6 +947,7 @@ err = -EHOSTUNREACH; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.oif = sk->sk_bound_dev_if; fl.fld_dst = dn_saddr2dn(&scp->peer); fl.fld_src = dn_saddr2dn(&scp->addr); @@ -1090,7 +1095,7 @@ cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(newsock, sk->sk_allocation); + newsk = dn_alloc_sock(sk->sk_net, newsock, sk->sk_allocation); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); @@ -2085,6 +2090,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch(event) { case NETDEV_UP: dn_dev_up(dev); @@ -2399,7 +2407,7 @@ dev_add_pack(&dn_dix_packet_type); register_netdevice_notifier(&dn_dev_notifier); - proc_net_fops_create("decnet", S_IRUGO, &dn_socket_seq_fops); + proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops); dn_register_sysctl(); out: return rc; @@ -2428,7 +2436,7 @@ dn_neigh_cleanup(); dn_fib_cleanup(); - proc_net_remove("decnet"); + proc_net_remove(&init_net, "decnet"); proto_unregister(&dn_proto); } diff -Nurb linux-2.6.22-try2/net/decnet/dn_dev.c linux-2.6.22-try2-netns/net/decnet/dn_dev.c --- linux-2.6.22-try2/net/decnet/dn_dev.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/dn_dev.c 2007-12-19 22:49:18.000000000 -0500 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -513,7 +514,7 @@ ifr->ifr_name[IFNAMSIZ-1] = 0; #ifdef CONFIG_KMOD - dev_load(ifr->ifr_name); + dev_load(&init_net, ifr->ifr_name); #endif switch(cmd) { @@ -531,7 +532,7 @@ rtnl_lock(); - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) { + if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) { ret = -ENODEV; goto done; } @@ -629,7 +630,7 @@ { struct net_device *dev; struct dn_dev *dn_dev = NULL; - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev) { dn_dev = dev->dn_ptr; dev_put(dev); @@ -647,12 +648,16 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct nlattr *tb[IFA_MAX+1]; struct dn_dev *dn_db; struct ifaddrmsg *ifm; struct dn_ifaddr *ifa, **ifap; int err = -EADDRNOTAVAIL; + if (net != &init_net) + goto errout; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); if (err < 0) goto errout; @@ -679,6 +684,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct nlattr *tb[IFA_MAX+1]; struct net_device *dev; struct dn_dev *dn_db; @@ -686,6 +692,9 @@ struct dn_ifaddr *ifa; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); if (err < 0) return err; @@ -694,7 +703,7 @@ return -EINVAL; ifm = nlmsg_data(nlh); - if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) return -ENODEV; if ((dn_db = dev->dn_ptr) == NULL) { @@ -783,24 +792,28 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); } static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, dn_idx = 0, skip_ndevs, skip_naddr; struct net_device *dev; struct dn_dev *dn_db; struct dn_ifaddr *ifa; + if (net != &init_net) + return 0; + skip_ndevs = cb->args[0]; skip_naddr = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < skip_ndevs) goto cont; else if (idx > skip_ndevs) { @@ -869,10 +882,10 @@ rv = dn_dev_get_first(dev, addr); read_unlock(&dev_base_lock); dev_put(dev); - if (rv == 0 || dev == &loopback_dev) + if (rv == 0 || dev == &init_net.loopback_dev) return rv; } - dev = &loopback_dev; + dev = &init_net.loopback_dev; dev_hold(dev); goto last_chance; } @@ -1299,7 +1312,7 @@ struct net_device *dev; rtnl_lock(); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) dn_dev_down(dev); rtnl_unlock(); @@ -1310,7 +1323,7 @@ struct net_device *dev; rtnl_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (dev->flags & IFF_UP) dn_dev_up(dev); } @@ -1344,7 +1357,7 @@ return SEQ_START_TOKEN; i = 1; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1363,9 +1376,9 @@ dev = (struct net_device *)v; if (v == SEQ_START_TOKEN) - dev = net_device_entry(&dev_base_head); + dev = net_device_entry(&init_net.dev_base_head); - for_each_netdev_continue(dev) { + for_each_netdev_continue(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1465,7 +1478,7 @@ rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL); rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr); - proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops); + proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops); #ifdef CONFIG_SYSCTL { @@ -1486,7 +1499,7 @@ } #endif /* CONFIG_SYSCTL */ - proc_net_remove("decnet_dev"); + proc_net_remove(&init_net, "decnet_dev"); dn_dev_devices_off(); } diff -Nurb linux-2.6.22-try2/net/decnet/dn_fib.c linux-2.6.22-try2-netns/net/decnet/dn_fib.c --- linux-2.6.22-try2/net/decnet/dn_fib.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/dn_fib.c 2007-12-19 22:49:18.000000000 -0500 @@ -203,8 +203,6 @@ struct flowi fl; struct dn_fib_res res; - memset(&fl, 0, sizeof(fl)); - if (nh->nh_flags&RTNH_F_ONLINK) { struct net_device *dev; @@ -212,7 +210,7 @@ return -EINVAL; if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -223,6 +221,7 @@ } memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.fld_dst = nh->nh_gw; fl.oif = nh->nh_oif; fl.fld_scope = r->rtm_scope + 1; @@ -255,7 +254,7 @@ if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; - dev = __dev_get_by_index(nh->nh_oif); + dev = __dev_get_by_index(&init_net, nh->nh_oif); if (dev == NULL || dev->dn_ptr == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) @@ -355,7 +354,7 @@ if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; @@ -506,10 +505,14 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct dn_fib_table *tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); + if (net != &init_net) + return -EINVAL; + if (dn_fib_check_attr(r, rta)) return -EINVAL; @@ -522,10 +525,14 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct dn_fib_table *tb; struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); + if (net != &init_net) + return -EINVAL; + if (dn_fib_check_attr(r, rta)) return -EINVAL; @@ -602,7 +609,7 @@ /* Scan device list */ read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { dn_db = dev->dn_ptr; if (dn_db == NULL) continue; diff -Nurb linux-2.6.22-try2/net/decnet/dn_neigh.c linux-2.6.22-try2-netns/net/decnet/dn_neigh.c --- linux-2.6.22-try2/net/decnet/dn_neigh.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/dn_neigh.c 2007-12-19 22:49:18.000000000 -0500 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -591,6 +592,7 @@ seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -598,12 +600,20 @@ goto out; } +static int dn_neigh_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct neigh_seq_state *state = seq->private; + put_net(state->net); + return seq_release_private(inode, file); +} + static const struct file_operations dn_neigh_seq_fops = { .owner = THIS_MODULE, .open = dn_neigh_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = dn_neigh_seq_release, }; #endif @@ -611,11 +621,11 @@ void __init dn_neigh_init(void) { neigh_table_init(&dn_neigh_table); - proc_net_fops_create("decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); + proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); } void __exit dn_neigh_cleanup(void) { - proc_net_remove("decnet_neigh"); + proc_net_remove(&init_net, "decnet_neigh"); neigh_table_clear(&dn_neigh_table); } diff -Nurb linux-2.6.22-try2/net/decnet/dn_nsp_out.c linux-2.6.22-try2-netns/net/decnet/dn_nsp_out.c --- linux-2.6.22-try2/net/decnet/dn_nsp_out.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/dn_nsp_out.c 2007-12-19 22:49:18.000000000 -0500 @@ -91,6 +91,7 @@ } memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.oif = sk->sk_bound_dev_if; fl.fld_src = dn_saddr2dn(&scp->addr); fl.fld_dst = dn_saddr2dn(&scp->peer); diff -Nurb linux-2.6.22-try2/net/decnet/dn_route.c linux-2.6.22-try2-netns/net/decnet/dn_route.c --- linux-2.6.22-try2/net/decnet/dn_route.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/dn_route.c 2007-12-19 22:49:18.000000000 -0500 @@ -82,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -583,6 +584,9 @@ struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; + if (dev->nd_net != &init_net) + goto dump_it; + if (dn == NULL) goto dump_it; @@ -877,13 +881,14 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) { - struct flowi fl = { .nl_u = { .dn_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .dn_u = { .daddr = oldflp->fld_dst, .saddr = oldflp->fld_src, .scope = RT_SCOPE_UNIVERSE, } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = init_net.loopback_dev.ifindex, .oif = oldflp->oif }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL, *dev; @@ -900,11 +905,11 @@ "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), dn_ntohs(oldflp->fld_src), - oldflp->mark, loopback_dev.ifindex, oldflp->oif); + oldflp->mark, init_net.loopback_dev.ifindex, oldflp->oif); /* If we have an output interface, verify its a DECnet device */ if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(&init_net, oldflp->oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -925,7 +930,7 @@ goto out; } read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (!dev->dn_ptr) continue; if (!dn_dev_islocal(dev, oldflp->fld_src)) @@ -953,7 +958,7 @@ err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &init_net.loopback_dev; dev_hold(dev_out); if (!fl.fld_dst) { fl.fld_dst = @@ -962,7 +967,7 @@ if (!fl.fld_dst) goto out; } - fl.oif = loopback_dev.ifindex; + fl.oif = init_net.loopback_dev.ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -995,7 +1000,7 @@ * here */ if (!try_hard) { - neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst); + neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); if (neigh) { if ((oldflp->oif && (neigh->dev->ifindex != oldflp->oif)) || @@ -1008,7 +1013,7 @@ if (dev_out) dev_put(dev_out); if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { - dev_out = &loopback_dev; + dev_out = &init_net.loopback_dev; res.type = RTN_LOCAL; } else { dev_out = neigh->dev; @@ -1029,7 +1034,7 @@ /* Possible improvement - check all devices for local addr */ if (dn_dev_islocal(dev_out, fl.fld_dst)) { dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &init_net.loopback_dev; dev_hold(dev_out); res.type = RTN_LOCAL; goto select_source; @@ -1065,7 +1070,7 @@ fl.fld_src = fl.fld_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &init_net.loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) @@ -1103,6 +1108,7 @@ atomic_set(&rt->u.dst.__refcnt, 1); rt->u.dst.flags = DST_HOST; + rt->fl.fl_net = &init_net; rt->fl.fld_src = oldflp->fld_src; rt->fl.fld_dst = oldflp->fld_dst; rt->fl.oif = oldflp->oif; @@ -1226,7 +1232,8 @@ int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { .nl_u = { .dn_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .dn_u = { .daddr = cb->dst, .saddr = cb->src, .scope = RT_SCOPE_UNIVERSE, @@ -1374,6 +1381,7 @@ rt->rt_dst_map = fl.fld_dst; rt->rt_src_map = fl.fld_src; + rt->fl.fl_net = &init_net; rt->fl.fld_src = cb->src; rt->fl.fld_dst = cb->dst; rt->fl.oif = 0; @@ -1526,6 +1534,7 @@ */ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct rtattr **rta = arg; struct rtmsg *rtm = NLMSG_DATA(nlh); struct dn_route *rt = NULL; @@ -1534,7 +1543,11 @@ struct sk_buff *skb; struct flowi fl; + if (net != &init_net) + return -EINVAL; + memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = DNPROTO_NSP; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); @@ -1552,7 +1565,7 @@ if (fl.iif) { struct net_device *dev; - if ((dev = dev_get_by_index(fl.iif)) == NULL) { + if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { kfree_skb(skb); return -ENODEV; } @@ -1598,7 +1611,7 @@ goto out_free; } - return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); out_free: kfree_skb(skb); @@ -1611,10 +1624,14 @@ */ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct dn_route *rt; int h, s_h; int idx, s_idx; + if (net != &init_net) + return 0; + if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg)) return -EINVAL; if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)) @@ -1814,7 +1831,7 @@ dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); - proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); + proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); #ifdef CONFIG_DECNET_ROUTER rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump); @@ -1829,6 +1846,6 @@ del_timer(&dn_route_timer); dn_run_flush(0); - proc_net_remove("decnet_cache"); + proc_net_remove(&init_net, "decnet_cache"); } diff -Nurb linux-2.6.22-try2/net/decnet/dn_rules.c linux-2.6.22-try2-netns/net/decnet/dn_rules.c --- linux-2.6.22-try2/net/decnet/dn_rules.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/dn_rules.c 2007-12-19 22:49:18.000000000 -0500 @@ -186,7 +186,10 @@ unsigned dnet_addr_type(__le16 addr) { - struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; + struct flowi fl = { + .fl_net = &init_net, + .nl_u = { .dn_u = { .daddr = addr } } + }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); @@ -223,7 +226,7 @@ return -ENOBUFS; } -static u32 dn_fib_rule_default_pref(void) +static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops) { struct list_head *pos; struct fib_rule *rule; @@ -240,7 +243,7 @@ return 0; } -static void dn_fib_rule_flush_cache(void) +static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) { dn_rt_cache_flush(-1); } @@ -265,12 +268,12 @@ void __init dn_fib_rules_init(void) { list_add_tail(&default_rule.common.list, &dn_fib_rules); - fib_rules_register(&dn_fib_rules_ops); + fib_rules_register(&init_net, &dn_fib_rules_ops); } void __exit dn_fib_rules_cleanup(void) { - fib_rules_unregister(&dn_fib_rules_ops); + fib_rules_unregister(&init_net, &dn_fib_rules_ops); } diff -Nurb linux-2.6.22-try2/net/decnet/dn_table.c linux-2.6.22-try2-netns/net/decnet/dn_table.c --- linux-2.6.22-try2/net/decnet/dn_table.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/dn_table.c 2007-12-19 22:49:18.000000000 -0500 @@ -375,10 +375,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); + rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); } static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, @@ -463,12 +463,16 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct dn_fib_table *tb; struct hlist_node *node; int dumped = 0; + if (net != &init_net) + return 0; + if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) return dn_cache_dump(skb, cb); diff -Nurb linux-2.6.22-try2/net/decnet/netfilter/dn_rtmsg.c linux-2.6.22-try2-netns/net/decnet/netfilter/dn_rtmsg.c --- linux-2.6.22-try2/net/decnet/netfilter/dn_rtmsg.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/netfilter/dn_rtmsg.c 2007-12-19 22:49:18.000000000 -0500 @@ -93,6 +93,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + dnrmg_send_peer(*pskb); return NF_ACCEPT; } @@ -137,7 +141,8 @@ { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg = netlink_kernel_create(&init_net, + NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, dnrmg_receive_user_sk, NULL, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); diff -Nurb linux-2.6.22-try2/net/decnet/sysctl_net_decnet.c linux-2.6.22-try2-netns/net/decnet/sysctl_net_decnet.c --- linux-2.6.22-try2/net/decnet/sysctl_net_decnet.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/decnet/sysctl_net_decnet.c 2007-12-19 22:49:18.000000000 -0500 @@ -259,7 +259,7 @@ devname[newlen] = 0; - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) return -ENODEV; @@ -299,7 +299,7 @@ devname[*lenp] = 0; strip_it(devname); - dev = dev_get_by_name(devname); + dev = dev_get_by_name(&init_net, devname); if (dev == NULL) return -ENODEV; diff -Nurb linux-2.6.22-try2/net/econet/af_econet.c linux-2.6.22-try2-netns/net/econet/af_econet.c --- linux-2.6.22-try2/net/econet/af_econet.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/econet/af_econet.c 2007-12-19 22:49:18.000000000 -0500 @@ -608,12 +608,15 @@ * Create an Econet socket */ -static int econet_create(struct socket *sock, int protocol) +static int econet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct econet_sock *eo; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + /* Econet only provides datagram services. */ if (sock->type != SOCK_DGRAM) return -ESOCKTNOSUPPORT; @@ -621,7 +624,7 @@ sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_ECONET, GFP_KERNEL, &econet_proto, 1); + sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1); if (sk == NULL) goto out; @@ -659,7 +662,7 @@ if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - if ((dev = dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) return -ENODEV; sec = (struct sockaddr_ec *)&ifr.ifr_addr; @@ -1062,6 +1065,9 @@ struct sock *sk; struct ec_device *edev = dev->ec_ptr; + if (dev->nd_net != &init_net) + goto drop; + if (skb->pkt_type == PACKET_OTHERHOST) goto drop; @@ -1116,6 +1122,9 @@ struct net_device *dev = (struct net_device *)data; struct ec_device *edev; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (msg) { case NETDEV_UNREGISTER: /* A device has gone down - kill any data we hold for it. */ diff -Nurb linux-2.6.22-try2/net/ieee80211/ieee80211_module.c linux-2.6.22-try2-netns/net/ieee80211/ieee80211_module.c --- linux-2.6.22-try2/net/ieee80211/ieee80211_module.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ieee80211/ieee80211_module.c 2007-12-19 22:49:18.000000000 -0500 @@ -264,7 +264,7 @@ struct proc_dir_entry *e; ieee80211_debug_level = debug; - ieee80211_proc = proc_mkdir(DRV_NAME, proc_net); + ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net); if (ieee80211_proc == NULL) { IEEE80211_ERROR("Unable to create " DRV_NAME " proc directory\n"); @@ -273,7 +273,7 @@ e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR, ieee80211_proc); if (!e) { - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); ieee80211_proc = NULL; return -EIO; } @@ -293,7 +293,7 @@ #ifdef CONFIG_IEEE80211_DEBUG if (ieee80211_proc) { remove_proc_entry("debug_level", ieee80211_proc); - remove_proc_entry(DRV_NAME, proc_net); + remove_proc_entry(DRV_NAME, init_net.proc_net); ieee80211_proc = NULL; } #endif /* CONFIG_IEEE80211_DEBUG */ diff -Nurb linux-2.6.22-try2/net/ipv4/af_inet.c linux-2.6.22-try2-netns/net/ipv4/af_inet.c --- linux-2.6.22-try2/net/ipv4/af_inet.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/af_inet.c 2007-12-19 23:20:19.000000000 -0500 @@ -244,7 +244,7 @@ * Create an inet socket. */ -static int inet_create(struct socket *sock, int protocol) +static int inet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct list_head *p; @@ -310,6 +310,10 @@ goto out_rcu_unlock; } + err = -EPROTONOSUPPORT; + if (!(answer->flags & INET_PROTOSW_NETNS) && (net != &init_net)) + goto out_rcu_unlock; + err = -EPERM; if ((protocol == IPPROTO_ICMP) && nx_capable(answer->capability, NXC_RAW_ICMP)) @@ -326,7 +330,7 @@ BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; @@ -344,7 +348,7 @@ inet->hdrincl = 1; } - if (ipv4_config.no_pmtu_disc) + if (net->sysctl_ipv4_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -423,12 +427,12 @@ } /* It is off by default, see below. */ -int sysctl_ip_nonlocal_bind __read_mostly; int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct inet_sock *inet = inet_sk(sk); struct nx_v4_sock_addr nsa; unsigned short snum; @@ -448,7 +452,7 @@ if (err) goto out; - chk_addr_ret = inet_addr_type(nsa.saddr); + chk_addr_ret = inet_addr_type(net, nsa.saddr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -458,7 +462,7 @@ * is temporarily down) */ err = -EADDRNOTAVAIL; - if (!sysctl_ip_nonlocal_bind && + if (!net->sysctl_ip_nonlocal_bind && !inet->freebind && nsa.saddr != INADDR_ANY && chk_addr_ret != RTN_LOCAL && @@ -787,6 +791,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; int err = 0; switch (cmd) { @@ -799,12 +804,12 @@ case SIOCADDRT: case SIOCDELRT: case SIOCRTMSG: - err = ip_rt_ioctl(cmd, (void __user *)arg); + err = ip_rt_ioctl(net, cmd, (void __user *)arg); break; case SIOCDARP: case SIOCGARP: case SIOCSARP: - err = arp_ioctl(cmd, (void __user *)arg); + err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -817,7 +822,7 @@ case SIOCSIFPFLAGS: case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - err = devinet_ioctl(cmd, (void __user *)arg); + err = devinet_ioctl(net, cmd, (void __user *)arg); break; default: if (sk->sk_prot->ioctl) @@ -927,7 +932,8 @@ .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | - INET_PROTOSW_ICSK, + INET_PROTOSW_ICSK | + INET_PROTOSW_NETNS, }, { @@ -937,7 +943,8 @@ .ops = &inet_dgram_ops, .capability = -1, .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | + INET_PROTOSW_NETNS, }, @@ -948,7 +955,8 @@ .ops = &inet_sockraw_ops, .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, - .flags = INET_PROTOSW_REUSE, + .flags = INET_PROTOSW_REUSE | + INET_PROTOSW_NETNS, } }; @@ -1029,8 +1037,6 @@ * Shall we try to damage output packets if routing dev changes? */ -int sysctl_ip_dynaddr __read_mostly; - static int inet_sk_reselect_saddr(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); @@ -1059,7 +1065,7 @@ if (new_saddr == old_saddr) return 0; - if (sysctl_ip_dynaddr > 1) { + if (sk->sk_net->sysctl_ip_dynaddr > 1) { printk(KERN_INFO "%s(): shifting inet->" "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", __FUNCTION__, @@ -1098,6 +1104,7 @@ daddr = inet->opt->faddr; { struct flowi fl = { + .fl_net = sk->sk_net, .oif = sk->sk_bound_dev_if, .nl_u = { .ip4_u = { @@ -1127,7 +1134,7 @@ * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sysctl_ip_dynaddr || + if (!sk->sk_net->sysctl_ip_dynaddr || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) @@ -1356,6 +1363,24 @@ .gso_segment = inet_gso_segment, }; + +static int inet_net_init(struct net *net) +{ + net->sysctl_ip_default_ttl = IPDEFTTL; + net->sysctl_ip_dynaddr = 0; + + return 0; +} + +static void inet_net_exit(struct net *net) +{ +} + +static struct pernet_operations inet_net_ops = { + .init = inet_net_init, + .exit = inet_net_exit, +}; + static int __init inet_init(void) { struct sk_buff *dummy_skb; @@ -1377,6 +1402,10 @@ if (rc) goto out_unregister_udp_proto; + rc = register_pernet_subsys(&inet_net_ops); + if (rc) + goto out_unregister_raw_proto; + /* * Tell SOCKET that we are alive... */ @@ -1453,6 +1482,8 @@ rc = 0; out: return rc; +out_unregister_raw_proto: + proto_unregister(&raw_prot); out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: @@ -1475,15 +1506,11 @@ goto out_tcp; if (udp4_proc_init()) goto out_udp; - if (fib_proc_init()) - goto out_fib; if (ip_misc_proc_init()) goto out_misc; out: return rc; out_misc: - fib_proc_exit(); -out_fib: udp4_proc_exit(); out_udp: tcp4_proc_exit(); @@ -1519,4 +1546,3 @@ EXPORT_SYMBOL(inet_stream_ops); EXPORT_SYMBOL(inet_unregister_protosw); EXPORT_SYMBOL(net_statistics); -EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); diff -Nurb linux-2.6.22-try2/net/ipv4/ah4.c linux-2.6.22-try2-netns/net/ipv4/ah4.c --- linux-2.6.22-try2/net/ipv4/ah4.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ah4.c 2007-12-19 22:49:18.000000000 -0500 @@ -198,6 +198,9 @@ struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; + if (skb->dev->nd_net != &init_net) + return; + if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; diff -Nurb linux-2.6.22-try2/net/ipv4/arp.c linux-2.6.22-try2-netns/net/ipv4/arp.c --- linux-2.6.22-try2/net/ipv4/arp.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/arp.c 2007-12-19 22:49:18.000000000 -0500 @@ -109,6 +109,7 @@ #include #include #include +#include #include #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) #include @@ -235,10 +236,11 @@ { __be32 addr = *(__be32*)neigh->primary_key; struct net_device *dev = neigh->dev; + struct net *net = dev->nd_net; struct in_device *in_dev; struct neigh_parms *parms; - neigh->type = inet_addr_type(addr); + neigh->type = inet_addr_type(net, addr); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -332,6 +334,7 @@ __be32 saddr = 0; u8 *dst_ha = NULL; struct net_device *dev = neigh->dev; + struct net *net = dev->nd_net; __be32 target = *(__be32*)neigh->primary_key; int probes = atomic_read(&neigh->probes); struct in_device *in_dev = in_dev_get(dev); @@ -342,14 +345,14 @@ switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { default: case 0: /* By default announce any local IP */ - if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL) + if (skb && inet_addr_type(net, ip_hdr(skb)->saddr) == RTN_LOCAL) saddr = ip_hdr(skb)->saddr; break; case 1: /* Restrict announcements of saddr in same subnet */ if (!skb) break; saddr = ip_hdr(skb)->saddr; - if (inet_addr_type(saddr) == RTN_LOCAL) { + if (inet_addr_type(net, saddr) == RTN_LOCAL) { /* saddr should be known to target */ if (inet_addr_onlink(in_dev, target, saddr)) break; @@ -386,6 +389,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev, __be32 sip, __be32 tip) { + struct net *net = dev->nd_net; int scope; switch (IN_DEV_ARP_IGNORE(in_dev)) { @@ -416,13 +420,15 @@ default: return 0; } - return !inet_confirm_addr(dev, sip, tip, scope); + return !inet_confirm_addr(net, dev, sip, tip, scope); } static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, - .saddr = tip } } }; + struct flowi fl = { + .fl_net = dev->nd_net, + .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } } + }; struct rtable *rt; int flag = 0; /*unsigned long now; */ @@ -469,6 +475,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct net *net = dev->nd_net; __be32 paddr; struct neighbour *n; @@ -480,7 +487,7 @@ paddr = ((struct rtable*)skb->dst)->rt_gateway; - if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev)) + if (arp_set_predefined(inet_addr_type(net, paddr), haddr, paddr, dev)) return 0; n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); @@ -704,6 +711,7 @@ static int arp_process(struct sk_buff *skb) { struct net_device *dev = skb->dev; + struct net *net = dev->nd_net; struct in_device *in_dev = in_dev_get(dev); struct arphdr *arp; unsigned char *arp_ptr; @@ -824,7 +832,7 @@ /* Special case: IPv4 duplicate address detection packet (RFC2131) */ if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && - inet_addr_type(tip) == RTN_LOCAL && + inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev,dev,sip,tip)) arp_send(ARPOP_REPLY,ETH_P_ARP,tip,dev,tip,sha,dev->dev_addr,dev->dev_addr); goto out; @@ -854,7 +862,7 @@ } else if (IN_DEV_FORWARD(in_dev)) { if ((rt->rt_flags&RTCF_DNAT) || (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && - (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) { + (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); @@ -877,14 +885,14 @@ n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) { + if (IPV4_DEVCONF_ALL(net, ARP_ACCEPT)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) */ if (n == NULL && arp->ar_op == htons(ARPOP_REPLY) && - inet_addr_type(sip) == RTN_UNICAST) + inet_addr_type(net, sip) == RTN_UNICAST) n = __neigh_lookup(&arp_tbl, &sip, dev, -1); } @@ -966,7 +974,7 @@ * Set (create) an ARP cache entry. */ -static int arp_req_set(struct arpreq *r, struct net_device * dev) +static int arp_req_set(struct net *net, struct arpreq *r, struct net_device * dev) { __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; @@ -977,17 +985,17 @@ if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data); + dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; } if (mask) { - if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL) + if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL) return -ENOBUFS; return 0; } if (dev == NULL) { - IPV4_DEVCONF_ALL(PROXY_ARP) = 1; + IPV4_DEVCONF_ALL(net, PROXY_ARP) = 1; return 0; } if (__in_dev_get_rtnl(dev)) { @@ -1000,8 +1008,10 @@ if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; if (dev == NULL) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } } }; + struct flowi fl = { + .fl_net = net, + .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } } + }; struct rtable * rt; if ((err = ip_route_output_key(&rt, &fl)) != 0) return err; @@ -1080,7 +1090,7 @@ return err; } -static int arp_req_delete(struct arpreq *r, struct net_device * dev) +static int arp_req_delete(struct net *net, struct arpreq *r, struct net_device * dev) { int err; __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; @@ -1090,10 +1100,10 @@ __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; if (mask == htonl(0xFFFFFFFF)) - return pneigh_delete(&arp_tbl, &ip, dev); + return pneigh_delete(&arp_tbl, net, &ip, dev); if (mask == 0) { if (dev == NULL) { - IPV4_DEVCONF_ALL(PROXY_ARP) = 0; + IPV4_DEVCONF_ALL(net, PROXY_ARP) = 0; return 0; } if (__in_dev_get_rtnl(dev)) { @@ -1107,8 +1117,10 @@ } if (dev == NULL) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } } }; + struct flowi fl = { + .fl_net = net, + .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } } + }; struct rtable * rt; if ((err = ip_route_output_key(&rt, &fl)) != 0) return err; @@ -1133,7 +1145,7 @@ * Handle an ARP layer I/O control request. */ -int arp_ioctl(unsigned int cmd, void __user *arg) +int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) { int err; struct arpreq r; @@ -1165,7 +1177,7 @@ rtnl_lock(); if (r.arp_dev[0]) { err = -ENODEV; - if ((dev = __dev_get_by_name(r.arp_dev)) == NULL) + if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL) goto out; /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ @@ -1181,10 +1193,10 @@ switch (cmd) { case SIOCDARP: - err = arp_req_delete(&r, dev); + err = arp_req_delete(net, &r, dev); break; case SIOCSARP: - err = arp_req_set(&r, dev); + err = arp_req_set(net, &r, dev); break; case SIOCGARP: err = arp_req_get(&r, dev); @@ -1201,6 +1213,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); @@ -1227,6 +1242,54 @@ } +static int arp_proc_init(struct net *net); +static void arp_proc_exit(struct net *net); + + +static int arp_net_init(struct net *net) +{ + int error; + if ((error = arp_proc_init(net))) + goto out_proc; + + error = -ENOMEM; + net->arp_neigh_parms_default = neigh_parms_alloc_default(&arp_tbl, net); + if (!net->arp_neigh_parms_default) + goto out_parm; + +#ifdef CONFIG_SYSCTL + if ((error = neigh_sysctl_register( + NULL, net->arp_neigh_parms_default, + NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL))) + goto out_sysctl; +#endif + +out: + return error; + +#ifdef CONFIG_SYSCTL +out_sysctl: + neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default); +#endif +out_parm: + arp_proc_exit(net); +out_proc: + goto out; +} + +static void arp_net_exit(struct net *net) +{ +#ifdef CONFIG_SYSCTL + neigh_sysctl_unregister(net->arp_neigh_parms_default); +#endif + neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default); + arp_proc_exit(net); +} + +static struct pernet_operations arp_net_ops = { + .init = arp_net_init, + .exit = arp_net_exit, +}; /* * Called once on startup. */ @@ -1236,18 +1299,12 @@ .func = arp_rcv, }; -static int arp_proc_init(void); - void __init arp_init(void) { neigh_table_init(&arp_tbl); dev_add_pack(&arp_packet_type); - arp_proc_init(); -#ifdef CONFIG_SYSCTL - neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL, NULL); -#endif + register_pernet_subsys(&arp_net_ops); register_netdevice_notifier(&arp_netdev_notifier); } @@ -1383,6 +1440,8 @@ seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); + out: return rc; out_kfree: @@ -1390,28 +1449,46 @@ goto out; } +static int arp_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct neigh_seq_state *state = seq->private; + put_net(state->net); + return seq_release_private(inode, file); +} + static const struct file_operations arp_seq_fops = { .owner = THIS_MODULE, .open = arp_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = arp_seq_release, }; -static int __init arp_proc_init(void) +static int arp_proc_init(struct net *net) { - if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops)) + if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops)) return -ENOMEM; return 0; } +static void arp_proc_exit(struct net *net) +{ + proc_net_remove(net, "arp"); +} + #else /* CONFIG_PROC_FS */ -static int __init arp_proc_init(void) +static int arp_proc_init(struct net *net) { return 0; } +static void arp_proc_exit(struct net *net) +{ + return; +} + #endif /* CONFIG_PROC_FS */ EXPORT_SYMBOL(arp_broken_ops); diff -Nurb linux-2.6.22-try2/net/ipv4/devinet.c linux-2.6.22-try2-netns/net/ipv4/devinet.c --- linux-2.6.22-try2/net/ipv4/devinet.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/devinet.c 2007-12-19 22:49:18.000000000 -0500 @@ -63,7 +63,7 @@ #include #include -struct ipv4_devconf ipv4_devconf = { +static struct ipv4_devconf ipv4_devconf_template = { .data = { [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, @@ -72,7 +72,7 @@ }, }; -static struct ipv4_devconf ipv4_devconf_dflt = { +static struct ipv4_devconf ipv4_devconf_dflt_template = { .data = { [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, @@ -82,7 +82,7 @@ }, }; -#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr) +#define IPV4_DEVCONF_DFLT(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf_dflt), attr) static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U32 }, @@ -98,7 +98,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy); #ifdef CONFIG_SYSCTL -static void devinet_sysctl_register(struct in_device *in_dev, +static void devinet_sysctl_register(struct net *net, struct in_device *in_dev, struct ipv4_devconf *p); static void devinet_sysctl_unregister(struct ipv4_devconf *p); #endif @@ -149,6 +149,7 @@ static struct in_device *inetdev_init(struct net_device *dev) { + struct net *net = dev->nd_net; struct in_device *in_dev; ASSERT_RTNL(); @@ -157,7 +158,7 @@ if (!in_dev) goto out; INIT_RCU_HEAD(&in_dev->rcu_head); - memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); + memcpy(&in_dev->cnf, &net->ipv4_devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; in_dev->dev = dev; if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) @@ -173,7 +174,7 @@ in_dev_hold(in_dev); #ifdef CONFIG_SYSCTL - devinet_sysctl_register(in_dev, &in_dev->cnf); + devinet_sysctl_register(net, in_dev, &in_dev->cnf); #endif ip_mc_init_dev(in_dev); if (dev->flags & IFF_UP) @@ -203,8 +204,6 @@ ASSERT_RTNL(); dev = in_dev->dev; - if (dev == &loopback_dev) - return; in_dev->dead = 1; @@ -415,12 +414,12 @@ return inet_insert_ifa(ifa); } -struct in_device *inetdev_by_index(int ifindex) +struct in_device *inetdev_by_index(struct net *net, int ifindex) { struct net_device *dev; struct in_device *in_dev = NULL; read_lock(&dev_base_lock); - dev = __dev_get_by_index(ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev) in_dev = in_dev_get(dev); read_unlock(&dev_base_lock); @@ -444,6 +443,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; struct ifaddrmsg *ifm; @@ -457,7 +457,7 @@ goto errout; ifm = nlmsg_data(nlh); - in_dev = inetdev_by_index(ifm->ifa_index); + in_dev = inetdev_by_index(net, ifm->ifa_index); if (in_dev == NULL) { err = -ENODEV; goto errout; @@ -488,7 +488,7 @@ return err; } -static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) +static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) { struct nlattr *tb[IFA_MAX+1]; struct in_ifaddr *ifa; @@ -507,7 +507,7 @@ goto errout; } - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(net, ifm->ifa_index); if (dev == NULL) { err = -ENODEV; goto errout; @@ -564,11 +564,12 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct in_ifaddr *ifa; ASSERT_RTNL(); - ifa = rtm_to_ifaddr(nlh); + ifa = rtm_to_ifaddr(net, nlh); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -600,7 +601,7 @@ } -int devinet_ioctl(unsigned int cmd, void __user *arg) +int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; struct sockaddr_in sin_orig; @@ -629,7 +630,7 @@ *colon = 0; #ifdef CONFIG_KMOD - dev_load(ifr.ifr_name); + dev_load(net, ifr.ifr_name); #endif switch (cmd) { @@ -670,7 +671,7 @@ rtnl_lock(); ret = -ENODEV; - if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) goto done; if (colon) @@ -889,6 +890,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { + struct net *net = dev->nd_net; __be32 addr = 0; struct in_device *in_dev; @@ -919,7 +921,7 @@ */ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if ((in_dev = __in_dev_get_rcu(dev)) == NULL) continue; @@ -982,7 +984,7 @@ * - local: address, 0=autoselect the local address * - scope: maximum allowed scope value for the local address */ -__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope) +__be32 inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope) { __be32 addr = 0; struct in_device *in_dev; @@ -998,7 +1000,7 @@ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(net, dev) { if ((in_dev = __in_dev_get_rcu(dev))) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) @@ -1059,6 +1061,7 @@ void *ptr) { struct net_device *dev = ptr; + struct net *net = dev->nd_net; struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -1066,7 +1069,7 @@ if (!in_dev) { if (event == NETDEV_REGISTER) { in_dev = inetdev_init(dev); - if (dev == &loopback_dev) { + if (dev == &net->loopback_dev) { if (!in_dev) panic("devinet: " "Failed to create loopback\n"); @@ -1085,7 +1088,7 @@ case NETDEV_UP: if (dev->mtu < 68) break; - if (dev == &loopback_dev) { + if (dev == &net->loopback_dev) { struct in_ifaddr *ifa; if ((ifa = inet_alloc_ifa()) != NULL) { ifa->ifa_local = @@ -1122,7 +1125,7 @@ neigh_sysctl_unregister(in_dev->arp_parms); neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL); - devinet_sysctl_register(in_dev, &in_dev->cnf); + devinet_sysctl_register(net, in_dev, &in_dev->cnf); #endif break; } @@ -1185,6 +1188,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, ip_idx; struct net_device *dev; struct in_device *in_dev; @@ -1194,7 +1198,7 @@ s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -1228,6 +1232,7 @@ u32 pid) { struct sk_buff *skb; + struct net *net = ifa->ifa_dev->dev->nd_net; u32 seq = nlh ? nlh->nlmsg_seq : 0; int err = -ENOBUFS; @@ -1242,25 +1247,25 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); } #ifdef CONFIG_SYSCTL -static void devinet_copy_dflt_conf(int i) +static void devinet_copy_dflt_conf(struct net *net, int i) { struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev && !test_bit(i, in_dev->cnf.state)) - in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i]; + in_dev->cnf.data[i] = net->ipv4_devconf_dflt->data[i]; rcu_read_unlock(); } read_unlock(&dev_base_lock); @@ -1274,12 +1279,13 @@ if (write) { struct ipv4_devconf *cnf = ctl->extra1; + struct net *net = ctl->extra2; int i = (int *)ctl->data - cnf->data; set_bit(i, cnf->state); - if (cnf == &ipv4_devconf_dflt) - devinet_copy_dflt_conf(i); + if (cnf == net->ipv4_devconf_dflt) + devinet_copy_dflt_conf(net, i); } return ret; @@ -1291,6 +1297,7 @@ { struct ipv4_devconf *cnf; int *valp = table->data; + struct net *net; int new; int i; @@ -1325,26 +1332,27 @@ *valp = new; cnf = table->extra1; + net = table->extra2; i = (int *)table->data - cnf->data; set_bit(i, cnf->state); - if (cnf == &ipv4_devconf_dflt) - devinet_copy_dflt_conf(i); + if (cnf == net->ipv4_devconf_dflt) + devinet_copy_dflt_conf(net, i); return 1; } -void inet_forward_change(void) +void inet_forward_change(struct net *net) { struct net_device *dev; - int on = IPV4_DEVCONF_ALL(FORWARDING); + int on = IPV4_DEVCONF_ALL(net, FORWARDING); - IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on; - IPV4_DEVCONF_DFLT(FORWARDING) = on; + IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; + IPV4_DEVCONF_DFLT(net, FORWARDING) = on; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(net, dev) { struct in_device *in_dev; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -1364,11 +1372,12 @@ int *valp = ctl->data; int val = *valp; int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + struct net *net = ctl->extra2; if (write && *valp != val) { - if (valp == &IPV4_DEVCONF_ALL(FORWARDING)) - inet_forward_change(); - else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING)) + if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) + inet_forward_change(net); + else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) rt_cache_flush(0); } @@ -1407,13 +1416,14 @@ { \ .ctl_name = NET_IPV4_CONF_ ## attr, \ .procname = name, \ - .data = ipv4_devconf.data + \ + .data = ipv4_devconf_template.data + \ NET_IPV4_CONF_ ## attr - 1, \ .maxlen = sizeof(int), \ .mode = mval, \ .proc_handler = proc, \ .strategy = sysctl, \ - .extra1 = &ipv4_devconf, \ + .extra1 = &ipv4_devconf_template, \ + .extra2 = &init_net, \ } #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ @@ -1503,25 +1513,29 @@ }, }; -static void devinet_sysctl_register(struct in_device *in_dev, +static void devinet_sysctl_register(struct net *net, struct in_device *in_dev, struct ipv4_devconf *p) { int i; struct net_device *dev = in_dev ? in_dev->dev : NULL; - struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t), - GFP_KERNEL); + struct devinet_sysctl_table *t; char *dev_name = NULL; + t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); if (!t) return; for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { - t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; + t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf_template; t->devinet_vars[i].extra1 = p; + t->devinet_vars[i].extra2 = net; } if (dev) { dev_name = dev->name; t->devinet_dev[0].ctl_name = dev->ifindex; + } else if (p == net->ipv4_devconf) { + dev_name = "all"; + t->devinet_dev[0].ctl_name = NET_PROTO_CONF_ALL; } else { dev_name = "default"; t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; @@ -1542,7 +1556,7 @@ t->devinet_proto_dir[0].child = t->devinet_conf_dir; t->devinet_root_dir[0].child = t->devinet_proto_dir; - t->sysctl_header = register_sysctl_table(t->devinet_root_dir); + t->sysctl_header = register_net_sysctl_table(net, t->devinet_root_dir); if (!t->sysctl_header) goto free_procname; @@ -1562,26 +1576,59 @@ if (p->sysctl) { struct devinet_sysctl_table *t = p->sysctl; p->sysctl = NULL; - unregister_sysctl_table(t->sysctl_header); + unregister_net_sysctl_table(t->sysctl_header); kfree(t->devinet_dev[0].procname); kfree(t); } } #endif +static int devinet_net_init(struct net *net) +{ +#ifdef CONFIG_SYSCTL + net->ipv4_devconf = kmemdup(&ipv4_devconf_template, + sizeof(ipv4_devconf_template), GFP_KERNEL); + if (!net->ipv4_devconf) + return -ENOMEM; + + net->ipv4_devconf_dflt = kmemdup(&ipv4_devconf_dflt_template, + sizeof(ipv4_devconf_template), + GFP_KERNEL); + if (!net->ipv4_devconf_dflt) { + kfree(net->ipv4_devconf); + return -ENOMEM; + } + + devinet_sysctl_register(net, NULL, net->ipv4_devconf); + devinet_sysctl_register(net, NULL, net->ipv4_devconf_dflt); + + multi_ipv4_table[0].data = &IPV4_DEVCONF_ALL(net, FORWARDING); +#endif + return 0; +} + +static void devinet_net_exit(struct net *net) +{ +#ifdef CONFIG_SYSCTL + devinet_sysctl_unregister(net->ipv4_devconf_dflt); + devinet_sysctl_unregister(net->ipv4_devconf); +#endif +} + +static struct pernet_operations devinet_net_ops = { + .init = devinet_net_init, + .exit = devinet_net_exit, +}; + void __init devinet_init(void) { + register_pernet_subsys(&devinet_net_ops); register_gifconf(PF_INET, inet_gifconf); register_netdevice_notifier(&ip_netdev_notifier); rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); -#ifdef CONFIG_SYSCTL - devinet_sysctl.sysctl_header = - register_sysctl_table(devinet_sysctl.devinet_root_dir); - devinet_sysctl_register(NULL, &ipv4_devconf_dflt); -#endif } EXPORT_SYMBOL(in_dev_finish_destroy); diff -Nurb linux-2.6.22-try2/net/ipv4/esp4.c linux-2.6.22-try2-netns/net/ipv4/esp4.c --- linux-2.6.22-try2/net/ipv4/esp4.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/esp4.c 2007-12-19 22:49:18.000000000 -0500 @@ -307,6 +307,9 @@ struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; + if (skb->dev->nd_net != &init_net) + return; + if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; diff -Nurb linux-2.6.22-try2/net/ipv4/fib_frontend.c linux-2.6.22-try2-netns/net/ipv4/fib_frontend.c --- linux-2.6.22-try2/net/ipv4/fib_frontend.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/fib_frontend.c 2007-12-19 22:49:18.000000000 -0500 @@ -51,38 +51,34 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES -struct fib_table *ip_fib_local_table; -struct fib_table *ip_fib_main_table; - #define FIB_TABLE_HASHSZ 1 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; #else #define FIB_TABLE_HASHSZ 256 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -struct fib_table *fib_new_table(u32 id) +struct fib_table *fib_new_table(struct net *net, u32 id) { struct fib_table *tb; unsigned int h; if (id == 0) id = RT_TABLE_MAIN; - tb = fib_get_table(id); + tb = fib_get_table(net, id); if (tb) return tb; tb = fib_hash_init(id); if (!tb) return NULL; h = id & (FIB_TABLE_HASHSZ - 1); - hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); + hlist_add_head_rcu(&tb->tb_hlist, &net->ip_fib_table_hash[h]); return tb; } -struct fib_table *fib_get_table(u32 id) +struct fib_table *fib_get_table(struct net *net, u32 id) { struct fib_table *tb; + struct hlist_head *head; struct hlist_node *node; unsigned int h; @@ -90,7 +86,8 @@ id = RT_TABLE_MAIN; h = id & (FIB_TABLE_HASHSZ - 1); rcu_read_lock(); - hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { + head = &net->ip_fib_table_hash[h]; + hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { if (tb->tb_id == id) { rcu_read_unlock(); return tb; @@ -99,9 +96,10 @@ rcu_read_unlock(); return NULL; } + #endif /* CONFIG_IP_MULTIPLE_TABLES */ -static void fib_flush(void) +static void fib_flush(struct net *net) { int flushed = 0; struct fib_table *tb; @@ -109,7 +107,8 @@ unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) + struct hlist_head *head = &net->ip_fib_table_hash[h]; + hlist_for_each_entry(tb, node, head, tb_hlist) flushed += tb->tb_flush(tb); } @@ -121,18 +120,23 @@ * Find the first device with a given source address. */ -struct net_device * ip_dev_find(__be32 addr) +struct net_device * ip_dev_find(struct net *net, __be32 addr) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; + struct flowi fl = { + .fl_net = net, + .nl_u = { .ip4_u = { .daddr = addr } } + }; struct fib_result res; struct net_device *dev = NULL; + struct fib_table *local_table; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif - if (!ip_fib_local_table || - ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res)) + local_table = fib_get_table(net, RT_TABLE_LOCAL); + if (!local_table || + local_table->tb_lookup(local_table, &fl, &res)) return NULL; if (res.type != RTN_LOCAL) goto out; @@ -145,11 +149,15 @@ return dev; } -unsigned inet_addr_type(__be32 addr) +unsigned inet_addr_type(struct net *net, __be32 addr) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; + struct flowi fl = { + .fl_net = net, + .nl_u = { .ip4_u = { .daddr = addr } } + }; struct fib_result res; unsigned ret = RTN_BROADCAST; + struct fib_table *local_table; if (ZERONET(addr) || BADCLASS(addr)) return RTN_BROADCAST; @@ -160,10 +168,10 @@ res.r = NULL; #endif - if (ip_fib_local_table) { + local_table = fib_get_table(net, RT_TABLE_LOCAL); + if (local_table) { ret = RTN_UNICAST; - if (!ip_fib_local_table->tb_lookup(ip_fib_local_table, - &fl, &res)) { + if (!local_table->tb_lookup(local_table, &fl, &res)) { ret = res.type; fib_res_put(&res); } @@ -183,7 +191,8 @@ struct net_device *dev, __be32 *spec_dst, u32 *itag) { struct in_device *in_dev; - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = dev->nd_net, + .nl_u = { .ip4_u = { .daddr = src, .saddr = dst, .tos = tos } }, @@ -267,13 +276,16 @@ return len + nla_total_size(4); } -static int rtentry_to_fib_config(int cmd, struct rtentry *rt, +static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, struct fib_config *cfg) { __be32 addr; int plen; memset(cfg, 0, sizeof(*cfg)); + cfg->fc_nlinfo.pid = 0; + cfg->fc_nlinfo.nlh = NULL; + cfg->fc_nlinfo.net = net; if (rt->rt_dst.sa_family != AF_INET) return -EAFNOSUPPORT; @@ -334,7 +346,7 @@ colon = strchr(devname, ':'); if (colon) *colon = 0; - dev = __dev_get_by_name(devname); + dev = __dev_get_by_name(net, devname); if (!dev) return -ENODEV; cfg->fc_oif = dev->ifindex; @@ -357,7 +369,7 @@ if (rt->rt_gateway.sa_family == AF_INET && addr) { cfg->fc_gw = addr; if (rt->rt_flags & RTF_GATEWAY && - inet_addr_type(addr) == RTN_UNICAST) + inet_addr_type(net, addr) == RTN_UNICAST) cfg->fc_scope = RT_SCOPE_UNIVERSE; } @@ -398,7 +410,7 @@ * Handle IP routing ioctl calls. These are used to manipulate the routing tables */ -int ip_rt_ioctl(unsigned int cmd, void __user *arg) +int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct fib_config cfg; struct rtentry rt; @@ -414,18 +426,18 @@ return -EFAULT; rtnl_lock(); - err = rtentry_to_fib_config(cmd, &rt, &cfg); + err = rtentry_to_fib_config(net, cmd, &rt, &cfg); if (err == 0) { struct fib_table *tb; if (cmd == SIOCDELRT) { - tb = fib_get_table(cfg.fc_table); + tb = fib_get_table(net, cfg.fc_table); if (tb) err = tb->tb_delete(tb, &cfg); else err = -ESRCH; } else { - tb = fib_new_table(cfg.fc_table); + tb = fib_new_table(net, cfg.fc_table); if (tb) err = tb->tb_insert(tb, &cfg); else @@ -480,6 +492,7 @@ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; + cfg->fc_nlinfo.net = skb->sk->sk_net; if (cfg->fc_type > RTN_MAX) { err = -EINVAL; @@ -527,6 +540,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_config cfg; struct fib_table *tb; int err; @@ -535,7 +549,7 @@ if (err < 0) goto errout; - tb = fib_get_table(cfg.fc_table); + tb = fib_get_table(net, cfg.fc_table); if (tb == NULL) { err = -ESRCH; goto errout; @@ -548,6 +562,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_config cfg; struct fib_table *tb; int err; @@ -556,7 +571,7 @@ if (err < 0) goto errout; - tb = fib_new_table(cfg.fc_table); + tb = fib_new_table(net, cfg.fc_table); if (tb == NULL) { err = -ENOBUFS; goto errout; @@ -569,6 +584,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; @@ -583,8 +599,9 @@ s_e = cb->args[1]; for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + struct hlist_head *head = &net->ip_fib_table_hash[h]; e = 0; - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { + hlist_for_each_entry(tb, node, head, tb_hlist) { if (e < s_e) goto next; if (dumped) @@ -613,6 +630,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) { + struct net *net = ifa->ifa_dev->dev->nd_net; struct fib_table *tb; struct fib_config cfg = { .fc_protocol = RTPROT_KERNEL, @@ -622,12 +640,13 @@ .fc_prefsrc = ifa->ifa_local, .fc_oif = ifa->ifa_dev->dev->ifindex, .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, + .fc_nlinfo.net = net, }; if (type == RTN_UNICAST) - tb = fib_new_table(RT_TABLE_MAIN); + tb = fib_new_table(net, RT_TABLE_MAIN); else - tb = fib_new_table(RT_TABLE_LOCAL); + tb = fib_new_table(net, RT_TABLE_LOCAL); if (tb == NULL) return; @@ -688,6 +707,7 @@ { struct in_device *in_dev = ifa->ifa_dev; struct net_device *dev = in_dev->dev; + struct net *net = dev->nd_net; struct in_ifaddr *ifa1; struct in_ifaddr *prim = ifa; __be32 brd = ifa->ifa_address|~ifa->ifa_mask; @@ -736,15 +756,15 @@ fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); /* Check, that this local address finally disappeared. */ - if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) { + if (inet_addr_type(net, ifa->ifa_local) != RTN_LOCAL) { /* And the last, but not the least thing. We must flush stray FIB entries. First of all, we scan fib_info list searching for stray nexthop entries, then ignite fib_flush. */ - if (fib_sync_down(ifa->ifa_local, NULL, 0)) - fib_flush(); + if (fib_sync_down(net, ifa->ifa_local, NULL, 0)) + fib_flush(net); } } #undef LOCAL_OK @@ -753,11 +773,12 @@ #undef BRD1_OK } -static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) +static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn, struct fib_table *tb ) { struct fib_result res; - struct flowi fl = { .mark = frn->fl_mark, + struct flowi fl = { .fl_net = net, + .mark = frn->fl_mark, .nl_u = { .ip4_u = { .daddr = frn->fl_addr, .tos = frn->fl_tos, .scope = frn->fl_scope } } }; @@ -786,6 +807,7 @@ static void nl_fib_input(struct sock *sk, int len) { + struct net *net = sk->sk_net; struct sk_buff *skb = NULL; struct nlmsghdr *nlh = NULL; struct fib_result_nl *frn; @@ -804,9 +826,9 @@ } frn = (struct fib_result_nl *) NLMSG_DATA(nlh); - tb = fib_get_table(frn->tb_id_in); + tb = fib_get_table(net, frn->tb_id_in); - nl_fib_lookup(frn, tb); + nl_fib_lookup(net, frn, tb); pid = NETLINK_CB(skb).pid; /* pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ @@ -814,16 +836,36 @@ netlink_unicast(sk, skb, pid, MSG_DONTWAIT); } -static void nl_fib_lookup_init(void) +static int nl_fib_lookup_init(struct net *net) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, - THIS_MODULE); + int error = -ENOMEM; + struct sock *sk; + sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, nl_fib_input, + NULL, THIS_MODULE); + if (sk) { + /* Don't hold an extra reference on the namespace */ + put_net(sk->sk_net); + net->nlfl = sk; + error = 0; + } + return error; +} + +static void nl_fib_lookup_exit(struct net *net) +{ + /* At the last minute lie and say this is a socket for the + * initial network namespace. So the socket will be safe to + * free. + */ + net->nlfl->sk_net = get_net(&init_net); + sock_put(net->nlfl); } static void fib_disable_ip(struct net_device *dev, int force) { - if (fib_sync_down(0, dev, force)) - fib_flush(); + struct net *net = dev->nd_net; + if (fib_sync_down(net, 0, dev, force)) + fib_flush(net); rt_cache_flush(0); arp_ifdown(dev); } @@ -860,6 +902,9 @@ struct net_device *dev = ptr; struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2); return NOTIFY_DONE; @@ -889,6 +934,85 @@ return NOTIFY_DONE; } +static int ip_fib_net_init(struct net *net) +{ + unsigned int i; + + net->ip_fib_table_hash = kzalloc( + sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); + if (!net->ip_fib_table_hash) + return -ENOMEM; + + for (i = 0; i < FIB_TABLE_HASHSZ; i++) + INIT_HLIST_HEAD(&net->ip_fib_table_hash[i]); +#ifndef CONFIG_IP_MULTIPLE_TABLES + net->ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&net->ip_fib_local_table->tb_hlist, + &net->ip_fib_table_hash[0]); + net->ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&net->ip_fib_main_table->tb_hlist, + &net->ip_fib_table_hash[0]); +#else + fib4_rules_init(net); +#endif + return 0; +} + +static void ip_fib_net_exit(struct net *net) +{ + unsigned int i; + +#ifdef CONFIG_IP_MULTIPLE_TABLES + fib4_rules_exit(net); +#endif + + synchronize_rcu(); /* needed? */ + for (i = 0; i < FIB_TABLE_HASHSZ; i++) { + struct fib_table *tb; + struct hlist_head *head; + struct hlist_node *node, *tmp; + + head = &net->ip_fib_table_hash[i]; + hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { + hlist_del(node); + fib_hash_exit(tb); + } + } + kfree(net->ip_fib_table_hash); +} + +static int fib_net_init(struct net *net) +{ + int error; + + error = 0; + if ((error = ip_fib_net_init(net))) + goto out; + if ((error = fib_info_init(net))) + goto out_info; + if ((error = nl_fib_lookup_init(net))) + goto out_nlfl; + if ((error = fib_proc_init(net))) + goto out_proc; +out: + return error; +out_proc: + nl_fib_lookup_exit(net); +out_nlfl: + fib_info_exit(net); +out_info: + ip_fib_net_exit(net); + goto out; +} + +static void fib_net_exit(struct net *net) +{ + fib_proc_exit(net); + nl_fib_lookup_exit(net); + fib_info_exit(net); + ip_fib_net_exit(net); +} + static struct notifier_block fib_inetaddr_notifier = { .notifier_call =fib_inetaddr_event, }; @@ -897,28 +1021,20 @@ .notifier_call =fib_netdev_event, }; +static struct pernet_operations fib_net_ops = { + .init = fib_net_init, + .exit = fib_net_exit, +}; + void __init ip_fib_init(void) { - unsigned int i; - - for (i = 0; i < FIB_TABLE_HASHSZ; i++) - INIT_HLIST_HEAD(&fib_table_hash[i]); -#ifndef CONFIG_IP_MULTIPLE_TABLES - ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); - hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); - ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); - hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); -#else - fib4_rules_init(); -#endif - - register_netdevice_notifier(&fib_netdev_notifier); - register_inetaddr_notifier(&fib_inetaddr_notifier); - nl_fib_lookup_init(); - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); + + register_pernet_subsys(&fib_net_ops); + register_netdevice_notifier(&fib_netdev_notifier); + register_inetaddr_notifier(&fib_inetaddr_notifier); } EXPORT_SYMBOL(inet_addr_type); diff -Nurb linux-2.6.22-try2/net/ipv4/fib_hash.c linux-2.6.22-try2-netns/net/ipv4/fib_hash.c --- linux-2.6.22-try2/net/ipv4/fib_hash.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/fib_hash.c 2007-12-19 22:49:18.000000000 -0500 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include "fib_lookup.h" @@ -274,11 +275,10 @@ return err; } -static int fn_hash_last_dflt=-1; - static void fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { + struct net *net = flp->fl_net; int order, last_idx; struct hlist_node *node; struct fib_node *f; @@ -316,12 +316,12 @@ if (next_fi != res->fi) break; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, &fn_hash_last_dflt)) { + &last_idx, &net->fn_hash_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - fn_hash_last_dflt = order; + net->fn_hash_last_dflt = order; goto out; } fi = next_fi; @@ -330,16 +330,16 @@ } if (order <= 0 || fi == NULL) { - fn_hash_last_dflt = -1; + net->fn_hash_last_dflt = -1; goto out; } - if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) { + if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->fn_hash_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - fn_hash_last_dflt = order; + net->fn_hash_last_dflt = order; goto out; } @@ -350,7 +350,7 @@ if (last_resort) atomic_inc(&last_resort->fib_clntref); } - fn_hash_last_dflt = last_idx; + net->fn_hash_last_dflt = last_idx; out: read_unlock(&fib_hash_lock); } @@ -759,11 +759,15 @@ return skb->len; } -#ifdef CONFIG_IP_MULTIPLE_TABLES +void fib_hash_exit(struct fib_table *tb) +{ + if (!tb) + return; + fn_hash_flush(tb); + kfree(tb); +} + struct fib_table * fib_hash_init(u32 id) -#else -struct fib_table * __init fib_hash_init(u32 id) -#endif { struct fib_table *tb; @@ -799,6 +803,7 @@ #ifdef CONFIG_PROC_FS struct fib_iter_state { + struct net *net; struct fn_zone *zone; int bucket; struct hlist_head *hash_head; @@ -812,7 +817,8 @@ static struct fib_alias *fib_get_first(struct seq_file *seq) { struct fib_iter_state *iter = seq->private; - struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data; + struct fib_table *main_table = fib_get_table(iter->net, RT_TABLE_MAIN); + struct fn_hash *table = (struct fn_hash *) main_table->tb_data; iter->bucket = 0; iter->hash_head = NULL; @@ -948,10 +954,11 @@ static void *fib_seq_start(struct seq_file *seq, loff_t *pos) { + struct fib_iter_state *iter = seq->private; void *v = NULL; read_lock(&fib_hash_lock); - if (ip_fib_main_table) + if (fib_get_table(iter->net, RT_TABLE_MAIN)) v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; return v; } @@ -1051,6 +1058,7 @@ seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -1058,23 +1066,32 @@ goto out; } +static int fib_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct fib_iter_state *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations fib_seq_fops = { .owner = THIS_MODULE, .open = fib_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = fib_seq_release, }; -int __init fib_proc_init(void) +int fib_proc_init(struct net *net) { - if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops)) + net->fn_hash_last_dflt = -1; + if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops)) return -ENOMEM; return 0; } -void __init fib_proc_exit(void) +void fib_proc_exit(struct net *net) { - proc_net_remove("route"); + proc_net_remove(net, "route"); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-try2/net/ipv4/fib_rules.c linux-2.6.22-try2-netns/net/ipv4/fib_rules.c --- linux-2.6.22-try2/net/ipv4/fib_rules.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/fib_rules.c 2007-12-19 22:49:18.000000000 -0500 @@ -32,8 +32,6 @@ #include #include -static struct fib_rules_ops fib4_rules_ops; - struct fib4_rule { struct fib_rule common; @@ -49,35 +47,14 @@ #endif }; -static struct fib4_rule default_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFF, - .table = RT_TABLE_DEFAULT, - .action = FR_ACT_TO_TBL, - }, +struct fib4_rule_table { + struct list_head fib4_rules; + struct fib4_rule default_rule; + struct fib4_rule main_rule; + struct fib4_rule local_rule; + struct fib_rules_ops fib4_rules_ops; }; -static struct fib4_rule main_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFE, - .table = RT_TABLE_MAIN, - .action = FR_ACT_TO_TBL, - }, -}; - -static struct fib4_rule local_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .table = RT_TABLE_LOCAL, - .action = FR_ACT_TO_TBL, - .flags = FIB_RULE_PERMANENT, - }, -}; - -static LIST_HEAD(fib4_rules); - #ifdef CONFIG_NET_CLS_ROUTE u32 fib_rules_tclass(struct fib_result *res) { @@ -87,12 +64,14 @@ int fib_lookup(struct flowi *flp, struct fib_result *res) { + struct net *net = flp->fl_net; + struct fib4_rule_table *table = net->fib4_table; struct fib_lookup_arg arg = { .result = res, }; int err; - err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(&table->fib4_rules_ops, flp, 0, &arg); res->r = arg.rule; return err; @@ -122,7 +101,7 @@ goto errout; } - if ((tbl = fib_get_table(rule->table)) == NULL) + if ((tbl = fib_get_table(flp->fl_net, rule->table)) == NULL) goto errout; err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); @@ -138,7 +117,7 @@ if (res->r && res->r->action == FR_ACT_TO_TBL && FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { struct fib_table *tb; - if ((tb = fib_get_table(res->r->table)) != NULL) + if ((tb = fib_get_table(flp->fl_net, res->r->table)) != NULL) tb->tb_select_default(tb, flp, res); } } @@ -159,13 +138,13 @@ return 1; } -static struct fib_table *fib_empty_table(void) +static struct fib_table *fib_empty_table(struct net *net) { u32 id; for (id = 1; id <= RT_TABLE_MAX; id++) - if (fib_get_table(id) == NULL) - return fib_new_table(id); + if (fib_get_table(net, id) == NULL) + return fib_new_table(net, id); return NULL; } @@ -178,6 +157,7 @@ struct nlmsghdr *nlh, struct fib_rule_hdr *frh, struct nlattr **tb) { + struct net *net = skb->sk->sk_net; int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; @@ -188,7 +168,7 @@ if (rule->action == FR_ACT_TO_TBL) { struct fib_table *table; - table = fib_empty_table(); + table = fib_empty_table(net); if (table == NULL) { err = -ENOBUFS; goto errout; @@ -274,14 +254,15 @@ return -ENOBUFS; } -static u32 fib4_rule_default_pref(void) +static u32 fib4_rule_default_pref(struct fib_rules_ops *ops) { - struct list_head *pos; + struct list_head *list, *pos; struct fib_rule *rule; - if (!list_empty(&fib4_rules)) { - pos = fib4_rules.next; - if (pos->next != &fib4_rules) { + list = ops->rules_list; + if (!list_empty(list)) { + pos = list->next; + if (pos->next != list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -298,12 +279,37 @@ + nla_total_size(4); /* flow */ } -static void fib4_rule_flush_cache(void) +static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { rt_cache_flush(-1); } -static struct fib_rules_ops fib4_rules_ops = { +static struct fib4_rule_table fib4_rule_table = { + .default_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFF, + .table = RT_TABLE_DEFAULT, + .action = FR_ACT_TO_TBL, + }, + }, + .main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, + }, + .local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .table = RT_TABLE_LOCAL, + .action = FR_ACT_TO_TBL, + .flags = FIB_RULE_PERMANENT, + }, + }, + .fib4_rules_ops = { .family = AF_INET, .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), @@ -317,15 +323,34 @@ .flush_cache = fib4_rule_flush_cache, .nlgroup = RTNLGRP_IPV4_RULE, .policy = fib4_rule_policy, - .rules_list = &fib4_rules, + .rules_list = &fib4_rule_table.fib4_rules, /* &fib4_rules, */ .owner = THIS_MODULE, + }, }; -void __init fib4_rules_init(void) + +void fib4_rules_init(struct net *net) { - list_add_tail(&local_rule.common.list, &fib4_rules); - list_add_tail(&main_rule.common.list, &fib4_rules); - list_add_tail(&default_rule.common.list, &fib4_rules); + struct fib4_rule_table *table; + table = kmemdup(&fib4_rule_table, sizeof(*table), GFP_KERNEL); + if (!table) + return; + INIT_LIST_HEAD(&table->fib4_rules); + list_add_tail(&table->local_rule.common.list, &table->fib4_rules); + list_add_tail(&table->main_rule.common.list, &table->fib4_rules); + list_add_tail(&table->default_rule.common.list, &table->fib4_rules); + table->fib4_rules_ops.rules_list = &table->fib4_rules; + if (fib_rules_register(net, &table->fib4_rules_ops)) { + kfree(table); + return; + } + net->fib4_table = table; +} - fib_rules_register(&fib4_rules_ops); +void fib4_rules_exit(struct net *net) +{ + struct fib4_rule_table *table = net->fib4_table; + if (table) + fib_rules_unregister(net, &table->fib4_rules_ops); + kfree(table); } diff -Nurb linux-2.6.22-try2/net/ipv4/fib_semantics.c linux-2.6.22-try2-netns/net/ipv4/fib_semantics.c --- linux-2.6.22-try2/net/ipv4/fib_semantics.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/fib_semantics.c 2007-12-19 22:49:18.000000000 -0500 @@ -50,14 +50,9 @@ #define FSprintk(a...) static DEFINE_SPINLOCK(fib_info_lock); -static struct hlist_head *fib_info_hash; -static struct hlist_head *fib_info_laddrhash; -static unsigned int fib_hash_size; -static unsigned int fib_info_cnt; #define DEVINDEX_HASHBITS 8 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) -static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -153,7 +148,8 @@ dev_put(nh->nh_dev); nh->nh_dev = NULL; } endfor_nexthops(fi); - fib_info_cnt--; + fi->fib_net->fib_info_cnt--; + release_net(fi->fib_net); kfree(fi); } @@ -196,9 +192,9 @@ return 0; } -static inline unsigned int fib_info_hashfn(const struct fib_info *fi) +static inline unsigned int fib_info_hashfn(struct net *net, const struct fib_info *fi) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = net->fib_info_hash_size - 1; unsigned int val = fi->fib_nhs; val ^= fi->fib_protocol; @@ -208,15 +204,15 @@ return (val ^ (val >> 7) ^ (val >> 12)) & mask; } -static struct fib_info *fib_find_info(const struct fib_info *nfi) +static struct fib_info *fib_find_info(struct net *net, const struct fib_info *nfi) { struct hlist_head *head; struct hlist_node *node; struct fib_info *fi; unsigned int hash; - hash = fib_info_hashfn(nfi); - head = &fib_info_hash[hash]; + hash = fib_info_hashfn(net, nfi); + head = &net->fib_info_hash[hash]; hlist_for_each_entry(fi, node, head, fib_hash) { if (fi->fib_nhs != nfi->fib_nhs) @@ -249,6 +245,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) { + struct net *net = dev->nd_net; struct hlist_head *head; struct hlist_node *node; struct fib_nh *nh; @@ -257,7 +254,7 @@ spin_lock(&fib_info_lock); hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = &net->fib_info_devhash[hash]; hlist_for_each_entry(nh, node, head, nh_hash) { if (nh->nh_dev == dev && nh->nh_gw == gw && @@ -320,11 +317,11 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, + err = rtnl_notify(skb, info->net, info->pid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); + rtnl_set_sk_err(info->net, RTNLGRP_IPV4_ROUTE, err); } /* Return the first fib alias matching TOS with @@ -517,6 +514,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh) { + struct net *net = cfg->fc_nlinfo.net; int err; if (nh->nh_gw) { @@ -531,9 +529,9 @@ if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; - if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) + if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) + if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) return -ENODEV; if (!(dev->flags&IFF_UP)) return -ENETDOWN; @@ -544,6 +542,7 @@ } { struct flowi fl = { + .fl_net = net, .nl_u = { .ip4_u = { .daddr = nh->nh_gw, @@ -580,7 +579,7 @@ if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) return -EINVAL; - in_dev = inetdev_by_index(nh->nh_oif); + in_dev = inetdev_by_index(net, nh->nh_oif); if (in_dev == NULL) return -ENODEV; if (!(in_dev->dev->flags&IFF_UP)) { @@ -595,9 +594,9 @@ return 0; } -static inline unsigned int fib_laddr_hashfn(__be32 val) +static inline unsigned int fib_laddr_hashfn(struct net *net, __be32 val) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = net->fib_info_hash_size - 1; return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } @@ -622,21 +621,22 @@ free_pages((unsigned long) hash, get_order(bytes)); } -static void fib_hash_move(struct hlist_head *new_info_hash, +static void fib_hash_move(struct net *net, + struct hlist_head *new_info_hash, struct hlist_head *new_laddrhash, unsigned int new_size) { struct hlist_head *old_info_hash, *old_laddrhash; - unsigned int old_size = fib_hash_size; + unsigned int old_size = net->fib_info_hash_size; unsigned int i, bytes; spin_lock_bh(&fib_info_lock); - old_info_hash = fib_info_hash; - old_laddrhash = fib_info_laddrhash; - fib_hash_size = new_size; + old_info_hash = net->fib_info_hash; + old_laddrhash = net->fib_info_laddrhash; + net->fib_info_hash_size = new_size; for (i = 0; i < old_size; i++) { - struct hlist_head *head = &fib_info_hash[i]; + struct hlist_head *head = &net->fib_info_hash[i]; struct hlist_node *node, *n; struct fib_info *fi; @@ -646,15 +646,15 @@ hlist_del(&fi->fib_hash); - new_hash = fib_info_hashfn(fi); + new_hash = fib_info_hashfn(net, fi); dest = &new_info_hash[new_hash]; hlist_add_head(&fi->fib_hash, dest); } } - fib_info_hash = new_info_hash; + net->fib_info_hash = new_info_hash; for (i = 0; i < old_size; i++) { - struct hlist_head *lhead = &fib_info_laddrhash[i]; + struct hlist_head *lhead = &net->fib_info_laddrhash[i]; struct hlist_node *node, *n; struct fib_info *fi; @@ -664,12 +664,12 @@ hlist_del(&fi->fib_lhash); - new_hash = fib_laddr_hashfn(fi->fib_prefsrc); + new_hash = fib_laddr_hashfn(net, fi->fib_prefsrc); ldest = &new_laddrhash[new_hash]; hlist_add_head(&fi->fib_lhash, ldest); } } - fib_info_laddrhash = new_laddrhash; + net->fib_info_laddrhash = new_laddrhash; spin_unlock_bh(&fib_info_lock); @@ -680,6 +680,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) { + struct net *net = cfg->fc_nlinfo.net; int err; struct fib_info *fi = NULL; struct fib_info *ofi; @@ -698,8 +699,8 @@ #endif err = -ENOBUFS; - if (fib_info_cnt >= fib_hash_size) { - unsigned int new_size = fib_hash_size << 1; + if (net->fib_info_cnt >= net->fib_info_hash_size) { + unsigned int new_size = net->fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; unsigned int bytes; @@ -716,18 +717,19 @@ memset(new_info_hash, 0, bytes); memset(new_laddrhash, 0, bytes); - fib_hash_move(new_info_hash, new_laddrhash, new_size); + fib_hash_move(net, new_info_hash, new_laddrhash, new_size); } - if (!fib_hash_size) + if (!net->fib_info_hash_size) goto failure; } fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; - fib_info_cnt++; + net->fib_info_cnt++; + fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; @@ -799,7 +801,7 @@ if (nhs != 1 || nh->nh_gw) goto err_inval; nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); + nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); err = -ENODEV; if (nh->nh_dev == NULL) goto failure; @@ -813,12 +815,12 @@ if (fi->fib_prefsrc) { if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || fi->fib_prefsrc != cfg->fc_dst) - if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) + if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } link_it: - if ((ofi = fib_find_info(fi)) != NULL) { + if ((ofi = fib_find_info(net, fi)) != NULL) { fi->fib_dead = 1; free_fib_info(fi); ofi->fib_treeref++; @@ -829,11 +831,13 @@ atomic_inc(&fi->fib_clntref); spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, - &fib_info_hash[fib_info_hashfn(fi)]); + &net->fib_info_hash[fib_info_hashfn(net, fi)]); if (fi->fib_prefsrc) { struct hlist_head *head; + unsigned int hash; - head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; + hash = fib_laddr_hashfn(net, fi->fib_prefsrc); + head = &net->fib_info_laddrhash[hash]; hlist_add_head(&fi->fib_lhash, head); } change_nexthops(fi) { @@ -843,7 +847,7 @@ if (!nh->nh_dev) continue; hash = fib_devindex_hashfn(nh->nh_dev->ifindex); - head = &fib_info_devhash[hash]; + head = &net->fib_info_devhash[hash]; hlist_add_head(&nh->nh_hash, head); } endfor_nexthops(fi) spin_unlock_bh(&fib_info_lock); @@ -1030,7 +1034,7 @@ - device went down -> we must shutdown all nexthops going via it. */ -int fib_sync_down(__be32 local, struct net_device *dev, int force) +int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -1038,9 +1042,9 @@ if (force) scope = -1; - if (local && fib_info_laddrhash) { - unsigned int hash = fib_laddr_hashfn(local); - struct hlist_head *head = &fib_info_laddrhash[hash]; + if (local && net->fib_info_laddrhash) { + unsigned int hash = fib_laddr_hashfn(net, local); + struct hlist_head *head = &net->fib_info_laddrhash[hash]; struct hlist_node *node; struct fib_info *fi; @@ -1055,7 +1059,7 @@ if (dev) { struct fib_info *prev_fi = NULL; unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_head *head = &net->fib_info_devhash[hash]; struct hlist_node *node; struct fib_nh *nh; @@ -1108,6 +1112,7 @@ int fib_sync_up(struct net_device *dev) { + struct net *net = dev->nd_net; struct fib_info *prev_fi; unsigned int hash; struct hlist_head *head; @@ -1120,7 +1125,7 @@ prev_fi = NULL; hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = &net->fib_info_devhash[hash]; ret = 0; hlist_for_each_entry(nh, node, head, nh_hash) { @@ -1210,3 +1215,17 @@ spin_unlock_bh(&fib_multipath_lock); } #endif + +int fib_info_init(struct net *net) +{ + net->fib_info_devhash = kzalloc( + sizeof(struct hlist_head)*DEVINDEX_HASHSIZE, GFP_KERNEL); + if (!net->fib_info_devhash) + return -ENOMEM; + return 0; +} + +void fib_info_exit(struct net *net) +{ + kfree(net->fib_info_devhash); +} diff -Nurb linux-2.6.22-try2/net/ipv4/fib_trie.c linux-2.6.22-try2-netns/net/ipv4/fib_trie.c --- linux-2.6.22-try2/net/ipv4/fib_trie.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/fib_trie.c 2007-12-19 22:49:18.000000000 -0500 @@ -78,6 +78,7 @@ #include #include #include +#include #include #include "fib_lookup.h" @@ -172,7 +173,6 @@ static void tnode_free(struct tnode *tn); static struct kmem_cache *fn_alias_kmem __read_mostly; -static struct trie *trie_local = NULL, *trie_main = NULL; /* rcu_read_lock needs to be hold by caller from readside */ @@ -290,11 +290,10 @@ WARN_ON(tn && tn->pos+tn->bits > 32); } -static int halve_threshold = 25; -static int inflate_threshold = 50; -static int halve_threshold_root = 8; -static int inflate_threshold_root = 15; - +static const int halve_threshold = 25; +static const int inflate_threshold = 50; +static const int halve_threshold_root = 15; +static const int inflate_threshold_root = 25; static void __alias_free_mem(struct rcu_head *head) { @@ -1771,11 +1770,10 @@ return found; } -static int trie_last_dflt = -1; - static void fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { + struct net *net = flp->fl_net; struct trie *t = (struct trie *) tb->tb_data; int order, last_idx; struct fib_info *fi = NULL; @@ -1819,28 +1817,28 @@ if (next_fi != res->fi) break; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, &trie_last_dflt)) { + &last_idx, &net->trie_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - trie_last_dflt = order; + net->trie_last_dflt = order; goto out; } fi = next_fi; order++; } if (order <= 0 || fi == NULL) { - trie_last_dflt = -1; + net->trie_last_dflt = -1; goto out; } - if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { + if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->trie_last_dflt)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - trie_last_dflt = order; + net->trie_last_dflt = order; goto out; } if (last_idx >= 0) { @@ -1850,7 +1848,7 @@ if (last_resort) atomic_inc(&last_resort->fib_clntref); } - trie_last_dflt = last_idx; + net->trie_last_dflt = last_idx; out:; rcu_read_unlock(); } @@ -1957,11 +1955,15 @@ /* Fix more generic FIB names for init later */ -#ifdef CONFIG_IP_MULTIPLE_TABLES +void fib_hash_exit(struct fib_table *tb) +{ + if (!tb) + return; + fn_trie_flush(tb); + kfree(tb); +} + struct fib_table * fib_hash_init(u32 id) -#else -struct fib_table * __init fib_hash_init(u32 id) -#endif { struct fib_table *tb; struct trie *t; @@ -1991,11 +1993,6 @@ trie_init(t); if (id == RT_TABLE_LOCAL) - trie_local = t; - else if (id == RT_TABLE_MAIN) - trie_main = t; - - if (id == RT_TABLE_LOCAL) printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); return tb; @@ -2004,6 +2001,8 @@ #ifdef CONFIG_PROC_FS /* Depth first Trie walk iterator */ struct fib_trie_iter { + struct net *net; + struct trie *trie_local, *trie_main; struct tnode *tnode; struct trie *trie; unsigned index; @@ -2170,7 +2169,21 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; + struct trie *trie_local, *trie_main; struct trie_stat *stat; + struct fib_table *tb; + + trie_local = NULL; + tb = fib_get_table(net, RT_TABLE_LOCAL); + if (tb) + trie_local = (struct trie *) tb->tb_data; + + trie_main = NULL; + tb = fib_get_table(net, RT_TABLE_MAIN); + if (tb) + trie_main = (struct trie *) tb->tb_data; + stat = kmalloc(sizeof(*stat), GFP_KERNEL); if (!stat) @@ -2197,7 +2210,15 @@ static int fib_triestat_seq_open(struct inode *inode, struct file *file) { - return single_open(file, fib_triestat_seq_show, NULL); + return single_open(file, fib_triestat_seq_show, + get_net(PROC_NET(inode))); +} + +static int fib_triestat_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + put_net(seq->private); + return single_release(inode, file); } static const struct file_operations fib_triestat_fops = { @@ -2205,7 +2226,7 @@ .open = fib_triestat_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = fib_triestat_seq_release, }; static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, @@ -2214,13 +2235,13 @@ loff_t idx = 0; struct node *n; - for (n = fib_trie_get_first(iter, trie_local); + for (n = fib_trie_get_first(iter, iter->trie_local); n; ++idx, n = fib_trie_get_next(iter)) { if (pos == idx) return n; } - for (n = fib_trie_get_first(iter, trie_main); + for (n = fib_trie_get_first(iter, iter->trie_main); n; ++idx, n = fib_trie_get_next(iter)) { if (pos == idx) return n; @@ -2230,10 +2251,23 @@ static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) { + struct fib_trie_iter *iter = seq->private; + struct fib_table *tb; + + if (!iter->trie_local) { + tb = fib_get_table(iter->net, RT_TABLE_LOCAL); + if (tb) + iter->trie_local = (struct trie *) tb->tb_data; + } + if (!iter->trie_main) { + tb = fib_get_table(iter->net, RT_TABLE_MAIN); + if (tb) + iter->trie_main = (struct trie *) tb->tb_data; + } rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; - return fib_trie_get_idx(seq->private, *pos - 1); + return fib_trie_get_idx(iter, *pos - 1); } static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -2251,8 +2285,8 @@ return v; /* continue scan in next trie */ - if (iter->trie == trie_local) - return fib_trie_get_first(iter, trie_main); + if (iter->trie == iter->trie_local) + return fib_trie_get_first(iter, iter->trie_main); return NULL; } @@ -2318,7 +2352,7 @@ return 0; if (!NODE_PARENT(n)) { - if (iter->trie == trie_local) + if (iter->trie == iter->trie_local) seq_puts(seq, ":\n"); else seq_puts(seq, "
:\n"); @@ -2384,6 +2418,7 @@ seq = file->private_data; seq->private = s; memset(s, 0, sizeof(*s)); + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -2391,12 +2426,20 @@ goto out; } +static int fib_trie_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct fib_trie_iter *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations fib_trie_fops = { .owner = THIS_MODULE, .open = fib_trie_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = fib_trie_seq_release, }; static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) @@ -2434,7 +2477,7 @@ return 0; } - if (iter->trie == trie_local) + if (iter->trie == iter->trie_local) return 0; if (IS_TNODE(l)) return 0; @@ -2505,6 +2548,7 @@ seq = file->private_data; seq->private = s; memset(s, 0, sizeof(*s)); + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -2517,35 +2561,37 @@ .open = fib_route_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = fib_trie_seq_release, }; -int __init fib_proc_init(void) +int fib_proc_init(struct net *net) { - if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) + net->trie_last_dflt = -1; + + if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops)) goto out1; - if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) + if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, &fib_triestat_fops)) goto out2; - if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) + if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops)) goto out3; return 0; out3: - proc_net_remove("fib_triestat"); + proc_net_remove(net, "fib_triestat"); out2: - proc_net_remove("fib_trie"); + proc_net_remove(net, "fib_trie"); out1: return -ENOMEM; } -void __init fib_proc_exit(void) +void fib_proc_exit(struct net *net) { - proc_net_remove("fib_trie"); - proc_net_remove("fib_triestat"); - proc_net_remove("route"); + proc_net_remove(net, "fib_trie"); + proc_net_remove(net, "fib_triestat"); + proc_net_remove(net, "route"); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-try2/net/ipv4/icmp.c linux-2.6.22-try2-netns/net/ipv4/icmp.c --- linux-2.6.22-try2/net/ipv4/icmp.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/icmp.c 2007-12-19 22:49:18.000000000 -0500 @@ -229,14 +229,13 @@ * * On SMP we have one ICMP socket per-cpu. */ -static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; -#define icmp_socket __get_cpu_var(__icmp_socket) +#define icmp_socket(NET) (*per_cpu_ptr((NET)->__icmp_socket, smp_processor_id())) -static __inline__ int icmp_xmit_lock(void) +static __inline__ int icmp_xmit_lock(struct net *net) { local_bh_disable(); - if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { + if (unlikely(!spin_trylock(&icmp_socket(net)->sk->sk_lock.slock))) { /* This can happen if the output path signals a * dst_link_failure() for an outgoing ICMP packet. */ @@ -246,9 +245,9 @@ return 0; } -static void icmp_xmit_unlock(void) +static void icmp_xmit_unlock(struct net *net) { - spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); + spin_unlock_bh(&icmp_socket(net)->sk->sk_lock.slock); } /* @@ -347,19 +346,20 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, struct ipcm_cookie *ipc, struct rtable *rt) { + struct net *net = icmp_param->skb->dev->nd_net; struct sk_buff *skb; - if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, + if (ip_append_data(icmp_socket(net)->sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, ipc, rt, MSG_DONTWAIT) < 0) - ip_flush_pending_frames(icmp_socket->sk); - else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { + ip_flush_pending_frames(icmp_socket(net)->sk); + else if ((skb = skb_peek(&icmp_socket(net)->sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = icmp_hdr(skb); __wsum csum = 0; struct sk_buff *skb1; - skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { + skb_queue_walk(&icmp_socket(net)->sk->sk_write_queue, skb1) { csum = csum_add(csum, skb1->csum); } csum = csum_partial_copy_nocheck((void *)&icmp_param->data, @@ -367,7 +367,7 @@ icmp_param->head_len, csum); icmph->checksum = csum_fold(csum); skb->ip_summed = CHECKSUM_NONE; - ip_push_pending_frames(icmp_socket->sk); + ip_push_pending_frames(icmp_socket(net)->sk); } } @@ -377,7 +377,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { - struct sock *sk = icmp_socket->sk; + struct net *net = icmp_param->skb->dev->nd_net; + struct sock *sk = icmp_socket(net)->sk; struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = (struct rtable *)skb->dst; @@ -386,7 +387,7 @@ if (ip_options_echo(&icmp_param->replyopts, skb)) return; - if (icmp_xmit_lock()) + if (icmp_xmit_lock(net)) return; icmp_param->data.icmph.checksum = 0; @@ -401,7 +402,8 @@ daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = net, + .nl_u = { .ip4_u = { .daddr = daddr, .saddr = rt->rt_spec_dst, .tos = RT_TOS(ip_hdr(skb)->tos) } }, @@ -415,7 +417,7 @@ icmp_push_reply(icmp_param, &ipc, rt); ip_rt_put(rt); out_unlock: - icmp_xmit_unlock(); + icmp_xmit_unlock(net); } @@ -436,6 +438,7 @@ int room; struct icmp_bxm icmp_param; struct rtable *rt = (struct rtable *)skb_in->dst; + struct net *net; struct ipcm_cookie ipc; __be32 saddr; u8 tos; @@ -443,6 +446,7 @@ if (!rt) goto out; + net = rt->fl.fl_net; /* * Find the original header. It is expected to be valid, of course. * Check this, icmp_send is called from the most obscure devices @@ -505,7 +509,7 @@ } } - if (icmp_xmit_lock()) + if (icmp_xmit_lock(net)) return; /* @@ -517,7 +521,7 @@ struct net_device *dev = NULL; if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index(rt->fl.iif); + dev = dev_get_by_index(&init_net, rt->fl.iif); if (dev) { saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); @@ -545,12 +549,13 @@ icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); icmp_out_count(icmp_param.data.icmph.type); - inet_sk(icmp_socket->sk)->tos = tos; + inet_sk(icmp_socket(net)->sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; { struct flowi fl = { + .fl_net = net, .nl_u = { .ip4_u = { .daddr = icmp_param.replyopts.srr ? @@ -593,7 +598,7 @@ ende: ip_rt_put(rt); out_unlock: - icmp_xmit_unlock(); + icmp_xmit_unlock(net); out:; } @@ -604,6 +609,7 @@ static void icmp_unreach(struct sk_buff *skb) { + struct net *net = skb->dev->nd_net; struct iphdr *iph; struct icmphdr *icmph; int hash, protocol; @@ -634,7 +640,7 @@ case ICMP_PORT_UNREACH: break; case ICMP_FRAG_NEEDED: - if (ipv4_config.no_pmtu_disc) { + if (net->sysctl_ipv4_no_pmtu_disc) { LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " "fragmentation needed " "and DF set.\n", @@ -678,7 +684,7 @@ */ if (!sysctl_icmp_ignore_bogus_error_responses && - inet_addr_type(iph->daddr) == RTN_BROADCAST) { + inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { if (net_ratelimit()) printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " "type %u, code %u " @@ -707,7 +713,7 @@ hash = protocol & (MAX_INET_PROTOS - 1); read_lock(&raw_v4_lock); if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) { - while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, + while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr, iph->saddr, skb->dev->ifindex, skb->skb_tag)) != NULL) { raw_err(raw_sk, skb, info); @@ -1179,29 +1185,54 @@ }, }; -void __init icmp_init(struct net_proto_family *ops) +static void icmp_net_exit(struct net *net) { - struct inet_sock *inet; + struct socket **sock; int i; for_each_possible_cpu(i) { + sock = percpu_ptr(net->__icmp_socket, i); + if (!*sock) + continue; + /* At the last minute lie and say this is a socket for + * the initial network namespace. So the socket will + * be safe to free. + */ + (*sock)->sk->sk_net = get_net(&init_net); + sock_release(*sock); + *sock = NULL; + } + percpu_free(net->__icmp_socket); +} + +static int icmp_net_init(struct net *net) +{ + struct socket **sock; + struct inet_sock *inet; int err; + int i; + + net->__icmp_socket = alloc_percpu(struct socket *); + if (!net->__icmp_socket) + return -ENOMEM; + + for_each_possible_cpu(i) { - err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, - &per_cpu(__icmp_socket, i)); + sock = percpu_ptr(net->__icmp_socket, i); + err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, sock); if (err < 0) - panic("Failed to create the ICMP control socket.\n"); + goto fail; - per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; + (*sock)->sk->sk_allocation = GFP_ATOMIC; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ - per_cpu(__icmp_socket, i)->sk->sk_sndbuf = + (*sock)->sk->sk_sndbuf = (2 * ((64 * 1024) + sizeof(struct sk_buff))); - inet = inet_sk(per_cpu(__icmp_socket, i)->sk); + inet = inet_sk((*sock)->sk); inet->uc_ttl = -1; inet->pmtudisc = IP_PMTUDISC_DONT; @@ -1209,8 +1240,27 @@ * see it, we do not wish this socket to see incoming * packets. */ - per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk); + (*sock)->sk->sk_prot->unhash((*sock)->sk); + + /* Don't hold an extra reference on the namespace */ + put_net((*sock)->sk->sk_net); } + return 0; +fail: + icmp_net_exit(net); + return err; + +} + +static struct pernet_operations icmp_net_ops = { + .init = icmp_net_init, + .exit = icmp_net_exit, +}; + +void __init icmp_init(struct net_proto_family *ops) +{ + if (register_pernet_subsys(&icmp_net_ops)) + panic("Failed to create the ICMP control socket.\n"); } EXPORT_SYMBOL(icmp_err_convert); diff -Nurb linux-2.6.22-try2/net/ipv4/igmp.c linux-2.6.22-try2-netns/net/ipv4/igmp.c --- linux-2.6.22-try2/net/ipv4/igmp.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/igmp.c 2007-12-19 22:49:18.000000000 -0500 @@ -97,6 +97,7 @@ #include #include #include +#include #include #ifdef CONFIG_IP_MROUTE #include @@ -129,12 +130,12 @@ */ #define IGMP_V1_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \ + (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ ((in_dev)->mr_v1_seen && \ time_before(jiffies, (in_dev)->mr_v1_seen))) #define IGMP_V2_SEEN(in_dev) \ - (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \ + (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \ IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ ((in_dev)->mr_v2_seen && \ time_before(jiffies, (in_dev)->mr_v2_seen))) @@ -296,7 +297,8 @@ return NULL; { - struct flowi fl = { .oif = dev->ifindex, + struct flowi fl = { .fl_net = &init_net, + .oif = dev->ifindex, .nl_u = { .ip4_u = { .daddr = IGMPV3_ALL_MCR } }, .proto = IPPROTO_IGMP }; @@ -646,7 +648,8 @@ dst = group; { - struct flowi fl = { .oif = dev->ifindex, + struct flowi fl = { .fl_net = &init_net, + .oif = dev->ifindex, .nl_u = { .ip4_u = { .daddr = dst } }, .proto = IPPROTO_IGMP }; if (ip_route_output_key(&rt, &fl)) @@ -929,6 +932,11 @@ struct in_device *in_dev = in_dev_get(skb->dev); int len = skb->len; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if (in_dev==NULL) { kfree_skb(skb); return 0; @@ -1393,20 +1401,22 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = imr->imr_multiaddr.s_addr } } }; + struct flowi fl = { + .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } } + }; struct rtable *rt; struct net_device *dev = NULL; struct in_device *idev = NULL; if (imr->imr_ifindex) { - idev = inetdev_by_index(imr->imr_ifindex); + idev = inetdev_by_index(&init_net, imr->imr_ifindex); if (idev) __in_dev_put(idev); return idev; } if (imr->imr_address.s_addr) { - dev = ip_dev_find(imr->imr_address.s_addr); + dev = ip_dev_find(&init_net, imr->imr_address.s_addr); if (!dev) return NULL; dev_put(dev); @@ -2234,7 +2244,7 @@ struct in_device *in_dev; inet->mc_list = iml->next; - in_dev = inetdev_by_index(iml->multi.imr_ifindex); + in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); @@ -2291,7 +2301,7 @@ struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); state->in_dev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct in_device *in_dev; in_dev = in_dev_get(state->dev); if (!in_dev) @@ -2453,7 +2463,7 @@ state->idev = NULL; state->im = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct in_device *idev; idev = in_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2613,8 +2623,8 @@ int __init igmp_mc_proc_init(void) { - proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops); - proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops); + proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); return 0; } #endif diff -Nurb linux-2.6.22-try2/net/ipv4/inet_connection_sock.c linux-2.6.22-try2-netns/net/ipv4/inet_connection_sock.c --- linux-2.6.22-try2/net/ipv4/inet_connection_sock.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/inet_connection_sock.c 2007-12-19 23:25:05.000000000 -0500 @@ -32,7 +32,7 @@ /* * This array holds the first and last local port number. */ -int sysctl_local_port_range[2] = { 32768, 61000 }; +//int sysctl_local_port_range[2] = { 32768, 61000 }; int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { @@ -74,6 +74,7 @@ sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && + (sk->sk_net == sk2->sk_net) && !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || @@ -98,6 +99,7 @@ int (*bind_conflict)(const struct sock *sk, const struct inet_bind_bucket *tb)) { + struct net *net = sk->sk_net; struct inet_bind_hashbucket *head; struct hlist_node *node; struct inet_bind_bucket *tb; @@ -105,16 +107,16 @@ local_bh_disable(); if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; + int low = sk->sk_net->sysctl_local_port_range[0]; + int high = sk->sk_net->sysctl_local_port_range[1]; int remaining = (high - low) + 1; int rover = net_random() % (high - low) + low; do { - head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == rover) + if ((tb->port == rover) && (tb->net == net)) goto next; break; next: @@ -138,10 +140,10 @@ */ snum = rover; } else { - head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == snum) + if ((tb->port == snum) && (tb->net==net)) goto tb_found; } tb = NULL; @@ -161,7 +163,7 @@ } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, net, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -341,7 +343,8 @@ struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; - struct flowi fl = { .oif = sk->sk_bound_dev_if, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = sk->sk_bound_dev_if, .nl_u = { .ip4_u = { .daddr = ((opt && opt->srr) ? opt->faddr : diff -Nurb linux-2.6.22-try2/net/ipv4/inet_diag.c linux-2.6.22-try2-netns/net/ipv4/inet_diag.c --- linux-2.6.22-try2/net/ipv4/inet_diag.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/inet_diag.c 2007-12-19 22:49:20.000000000 -0500 @@ -227,6 +227,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { + struct net *net = in_skb->sk->sk_net; int err; struct sock *sk; struct inet_diag_req *req = NLMSG_DATA(nlh); @@ -242,7 +243,7 @@ /* TODO: lback */ sk = inet_lookup(hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], - req->id.idiag_sport, req->id.idiag_if); + req->id.idiag_sport, req->id.idiag_if, net); } #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) else if (req->idiag_family == AF_INET6) { @@ -251,7 +252,7 @@ req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, - req->id.idiag_if); + req->id.idiag_if, net); } #endif else { @@ -906,8 +907,8 @@ if (!inet_diag_table) goto out; - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, - NULL, THIS_MODULE); + idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, + inet_diag_rcv, NULL, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; err = 0; diff -Nurb linux-2.6.22-try2/net/ipv4/inet_hashtables.c linux-2.6.22-try2-netns/net/ipv4/inet_hashtables.c --- linux-2.6.22-try2/net/ipv4/inet_hashtables.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/inet_hashtables.c 2007-12-19 23:27:05.000000000 -0500 @@ -29,11 +29,13 @@ */ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, + struct net *net, const unsigned short snum) { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { + tb->net = net; tb->port = snum; tb->fastreuse = 0; INIT_HLIST_HEAD(&tb->owners); @@ -66,7 +68,7 @@ */ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) { - const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); + const int bhash = inet_bhashfn(sk->sk_net, inet_sk(sk)->num, hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; @@ -127,7 +129,7 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, const __be32 daddr, const unsigned short hnum, - const int dif) + const int dif, struct net *net) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -149,6 +151,8 @@ continue; score += 2; } + if (sk->sk_net != net) + continue; if (score == 5) return sk; if (score > hiscore) { @@ -163,22 +167,22 @@ /* Optimize the common listener case. */ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, - const int dif) + const int dif, struct net *net) { struct sock *sk = NULL; const struct hlist_head *head; read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + head = &hashinfo->listening_hash[net, inet_lhashfn(hnum)]; if (!hlist_empty(head)) { const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); if (inet->num == hnum && !sk->sk_node.next && v4_inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if) + !sk->sk_bound_dev_if && (sk->sk_net == net)) goto sherry_cache; - sk = inet_lookup_listener_slow(head, daddr, hnum, dif); + sk = inet_lookup_listener_slow(head, daddr, hnum, dif,net ); } if (sk) { sherry_cache: @@ -196,12 +200,13 @@ { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); + struct net *net = sk->sk_net; __be32 daddr = inet->rcv_saddr; __be32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); - unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct sock *sk2; const struct hlist_node *node; @@ -214,7 +219,7 @@ sk_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); - if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net)) { if (twsk_unique(sk, sk2, twp)) goto unique; else @@ -225,7 +230,7 @@ /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net)) goto not_unique; } @@ -271,6 +276,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + struct net *net = sk->sk_net; struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->num; struct inet_bind_hashbucket *head; @@ -278,8 +284,8 @@ int ret; if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; + int low = sk->sk_net->sysctl_local_port_range[0]; + int high = sk->sk_net->sysctl_local_port_range[1]; int range = high - low; int i; int port; @@ -291,7 +297,7 @@ local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -299,7 +305,7 @@ * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { + if ((tb->port == port) && (tb->net == net)) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; @@ -311,7 +317,7 @@ } } - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, net, port); if (!tb) { spin_unlock(&head->lock); break; @@ -346,7 +352,7 @@ goto out; } - head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { diff -Nurb linux-2.6.22-try2/net/ipv4/inet_timewait_sock.c linux-2.6.22-try2-netns/net/ipv4/inet_timewait_sock.c --- linux-2.6.22-try2/net/ipv4/inet_timewait_sock.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/inet_timewait_sock.c 2007-12-19 22:49:20.000000000 -0500 @@ -31,7 +31,7 @@ write_unlock(&ehead->lock); /* Disassociate with bind bucket. */ - bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_net, tw->tw_num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); @@ -65,7 +65,7 @@ Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; + bhead = &hashinfo->bhash[inet_bhashfn(sk->sk_net, inet->num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; BUG_TRAP(icsk->icsk_bind_hash); diff -Nurb linux-2.6.22-try2/net/ipv4/inetpeer.c linux-2.6.22-try2-netns/net/ipv4/inetpeer.c --- linux-2.6.22-try2/net/ipv4/inetpeer.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/inetpeer.c 2007-12-19 22:49:20.000000000 -0500 @@ -81,71 +81,94 @@ .avl_height = 0 }; #define peer_avl_empty (&peer_fake_node) -static struct inet_peer *peer_root = peer_avl_empty; static DEFINE_RWLOCK(peer_pool_lock); #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ -static int peer_total; -/* Exported for sysctl_net_ipv4. */ -int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more - * aggressively at this stage */ -int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ -int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ -int inet_peer_gc_mintime __read_mostly = 10 * HZ; -int inet_peer_gc_maxtime __read_mostly = 120 * HZ; - -static struct inet_peer *inet_peer_unused_head; -static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head; static DEFINE_SPINLOCK(inet_peer_unused_lock); static void peer_check_expire(unsigned long dummy); -static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); +static int inet_peers_net_init(struct net *net); +static void inet_peers_net_exit(struct net *net); +static struct pernet_operations inet_peers_net_ops = { + .init = inet_peers_net_init, + .exit = inet_peers_net_exit, +}; /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { + peer_cachep = kmem_cache_create("inet_peer_cache", + sizeof(struct inet_peer), + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + NULL, NULL); + + register_pernet_subsys(&inet_peers_net_ops); +} + +static int inet_peers_net_init(struct net *net) +{ struct sysinfo si; + net->peer_root = peer_avl_empty; + net->inet_peer_unused_tailp = &net->inet_peer_unused_head; + + net->inet_peer_threshold = 65536 + 128; /* start to throw entries more + * aggressively at this stage */ + net->inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */ + net->inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */ + net->inet_peer_gc_mintime = 10 * HZ; + net->inet_peer_gc_maxtime = 120 * HZ; + /* Use the straight interface to information about memory. */ si_meminfo(&si); + /* The values below were suggested by Alexey Kuznetsov * . I don't have any opinion about the values * myself. --SAW */ if (si.totalram <= (32768*1024)/PAGE_SIZE) - inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */ + net->inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */ if (si.totalram <= (16384*1024)/PAGE_SIZE) - inet_peer_threshold >>= 1; /* about 512KB */ + net->inet_peer_threshold >>= 1; /* about 512KB */ if (si.totalram <= (8192*1024)/PAGE_SIZE) - inet_peer_threshold >>= 2; /* about 128KB */ + net->inet_peer_threshold >>= 2; /* about 128KB */ - peer_cachep = kmem_cache_create("inet_peer_cache", - sizeof(struct inet_peer), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + init_timer(&net->peer_periodic_timer); + net->peer_periodic_timer.function = peer_check_expire; /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ - peer_periodic_timer.expires = jiffies - + net_random() % inet_peer_gc_maxtime - + inet_peer_gc_maxtime; - add_timer(&peer_periodic_timer); + net->peer_periodic_timer.expires = jiffies + + net_random() % net->inet_peer_gc_maxtime + + net->inet_peer_gc_maxtime; + /* Remember our namespace */ + net->peer_periodic_timer.data = (unsigned long)net; + add_timer(&net->peer_periodic_timer); + + return 0; +} + +static void inet_peers_net_exit(struct net *net) +{ + del_timer(&net->peer_periodic_timer); + /* CHECKME do I need to do something to release all of the peers */ } /* Called with or without local BH being disabled. */ -static void unlink_from_unused(struct inet_peer *p) +static void unlink_from_unused(struct net *net, struct inet_peer *p) { spin_lock_bh(&inet_peer_unused_lock); if (p->unused_prevp != NULL) { /* On unused list. */ - *p->unused_prevp = p->unused_next; - if (p->unused_next != NULL) - p->unused_next->unused_prevp = p->unused_prevp; + *p->unused_prevp = p->u.unused_next; + if (p->u.unused_next != NULL) + p->u.unused_next->unused_prevp = p->unused_prevp; else - inet_peer_unused_tailp = p->unused_prevp; + net->inet_peer_unused_tailp = p->unused_prevp; p->unused_prevp = NULL; /* mark it as removed */ + p->u.net = hold_net(net); /* Remember the net */ } spin_unlock_bh(&inet_peer_unused_lock); } @@ -160,9 +183,9 @@ struct inet_peer *u, **v; \ if (_stack) { \ stackptr = _stack; \ - *stackptr++ = &peer_root; \ + *stackptr++ = &net->peer_root; \ } \ - for (u = peer_root; u != peer_avl_empty; ) { \ + for (u = net->peer_root; u != peer_avl_empty; ) { \ if (_daddr == u->v4daddr) \ break; \ if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ @@ -279,7 +302,7 @@ } while(0) /* May be called with local BH enabled. */ -static void unlink_from_pool(struct inet_peer *p) +static void unlink_from_pool(struct net *net, struct inet_peer *p) { int do_free; @@ -317,7 +340,7 @@ delp[1] = &t->avl_left; /* was &p->avl_left */ } peer_avl_rebalance(stack, stackptr); - peer_total--; + net->peer_total--; do_free = 1; } write_unlock_bh(&peer_pool_lock); @@ -335,13 +358,13 @@ } /* May be called with local BH enabled. */ -static int cleanup_once(unsigned long ttl) +static int cleanup_once(struct net *net, unsigned long ttl) { struct inet_peer *p; /* Remove the first entry from the list of unused nodes. */ spin_lock_bh(&inet_peer_unused_lock); - p = inet_peer_unused_head; + p = net->inet_peer_unused_head; if (p != NULL) { __u32 delta = (__u32)jiffies - p->dtime; if (delta < ttl) { @@ -349,12 +372,13 @@ spin_unlock_bh(&inet_peer_unused_lock); return -1; } - inet_peer_unused_head = p->unused_next; - if (p->unused_next != NULL) - p->unused_next->unused_prevp = p->unused_prevp; + net->inet_peer_unused_head = p->u.unused_next; + if (p->u.unused_next != NULL) + p->u.unused_next->unused_prevp = p->unused_prevp; else - inet_peer_unused_tailp = p->unused_prevp; + net->inet_peer_unused_tailp = p->unused_prevp; p->unused_prevp = NULL; /* mark as not on the list */ + p->u.net = hold_net(net); /* Grab an extra reference to prevent node disappearing * before unlink_from_pool() call. */ atomic_inc(&p->refcnt); @@ -367,12 +391,12 @@ * happen because of entry limits in route cache. */ return -1; - unlink_from_pool(p); + unlink_from_pool(net, p); return 0; } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(__be32 daddr, int create) +struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create) { struct inet_peer *p, *n; struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; @@ -387,7 +411,7 @@ if (p != peer_avl_empty) { /* The existing node has been found. */ /* Remove the entry from unused list if it was there. */ - unlink_from_unused(p); + unlink_from_unused(net, p); return p; } @@ -413,13 +437,13 @@ /* Link the node. */ link_to_pool(n); n->unused_prevp = NULL; /* not on the list */ - peer_total++; + n->u.net = hold_net(net); /* Remember the net */ + net->peer_total++; write_unlock_bh(&peer_pool_lock); - if (peer_total >= inet_peer_threshold) + if (net->peer_total >= net->inet_peer_threshold) /* Remove one less-recently-used entry. */ - cleanup_once(0); - + cleanup_once(net, 0); return n; out_free: @@ -427,25 +451,26 @@ atomic_inc(&p->refcnt); write_unlock_bh(&peer_pool_lock); /* Remove the entry from unused list if it was there. */ - unlink_from_unused(p); + unlink_from_unused(net, p); /* Free preallocated the preallocated node. */ kmem_cache_free(peer_cachep, n); return p; } /* Called with local BH disabled. */ -static void peer_check_expire(unsigned long dummy) +static void peer_check_expire(unsigned long arg) { + struct net *net = (void *)arg; unsigned long now = jiffies; int ttl; - if (peer_total >= inet_peer_threshold) - ttl = inet_peer_minttl; + if (net->peer_total >= net->inet_peer_threshold) + ttl = net->inet_peer_minttl; else - ttl = inet_peer_maxttl - - (inet_peer_maxttl - inet_peer_minttl) / HZ * - peer_total / inet_peer_threshold * HZ; - while (!cleanup_once(ttl)) { + ttl = net->inet_peer_maxttl + - (net->inet_peer_maxttl - net->inet_peer_minttl) / HZ * + net->peer_total / net->inet_peer_threshold * HZ; + while (!cleanup_once(net, ttl)) { if (jiffies != now) break; } @@ -453,25 +478,30 @@ /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - if (peer_total >= inet_peer_threshold) - peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; + if (net->peer_total >= net->inet_peer_threshold) + net->peer_periodic_timer.expires = jiffies + + net->inet_peer_gc_mintime; else - peer_periodic_timer.expires = jiffies - + inet_peer_gc_maxtime - - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peer_total / inet_peer_threshold * HZ; - add_timer(&peer_periodic_timer); + net->peer_periodic_timer.expires = jiffies + + net->inet_peer_gc_maxtime + - (net->inet_peer_gc_maxtime - net->inet_peer_gc_mintime) / HZ * + net->peer_total / net->inet_peer_threshold * HZ; + add_timer(&net->peer_periodic_timer); } void inet_putpeer(struct inet_peer *p) { spin_lock_bh(&inet_peer_unused_lock); if (atomic_dec_and_test(&p->refcnt)) { - p->unused_prevp = inet_peer_unused_tailp; - p->unused_next = NULL; - *inet_peer_unused_tailp = p; - inet_peer_unused_tailp = &p->unused_next; + struct net *net = p->u.net; + + p->unused_prevp = net->inet_peer_unused_tailp; + p->u.unused_next = NULL; + *net->inet_peer_unused_tailp = p; + net->inet_peer_unused_tailp = &p->u.unused_next; p->dtime = (__u32)jiffies; + + release_net(net); } spin_unlock_bh(&inet_peer_unused_lock); } diff -Nurb linux-2.6.22-try2/net/ipv4/ip_fragment.c linux-2.6.22-try2-netns/net/ipv4/ip_fragment.c --- linux-2.6.22-try2/net/ipv4/ip_fragment.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ip_fragment.c 2007-12-19 22:49:20.000000000 -0500 @@ -49,21 +49,6 @@ * as well. Or notify me, at least. --ANK */ -/* Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to measurably - * harm machine performance. - */ -int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; -int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; - -int sysctl_ipfrag_max_dist __read_mostly = 64; - -/* Important NOTE! Fragment queue must be destroyed before MSL expires. - * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. - */ -int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; - struct ipfrag_skb_cb { struct inet_skb_parm h; @@ -96,6 +81,7 @@ int iif; unsigned int rid; struct inet_peer *peer; + struct net *net; }; /* Hash table. */ @@ -103,17 +89,13 @@ #define IPQ_HASHSZ 64 /* Per-bucket lock is easy to add now. */ -static struct hlist_head ipq_hash[IPQ_HASHSZ]; static DEFINE_RWLOCK(ipfrag_lock); -static u32 ipfrag_hash_rnd; -static LIST_HEAD(ipq_lru_list); -int ip_frag_nqueues = 0; static __inline__ void __ipq_unlink(struct ipq *qp) { hlist_del(&qp->list); list_del(&qp->lru_list); - ip_frag_nqueues--; + qp->net->ip_frag_nqueues--; } static __inline__ void ipq_unlink(struct ipq *ipq) @@ -123,70 +105,71 @@ write_unlock(&ipfrag_lock); } -static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) +static unsigned int ipqhashfn(struct net *net, __be16 id, __be32 saddr, __be32 daddr, u8 prot) { return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, - ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); + net->ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); } -static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; - -static void ipfrag_secret_rebuild(unsigned long dummy) +static void ipfrag_secret_rebuild(unsigned long arg) { + struct net *net = (void *)arg; unsigned long now = jiffies; int i; write_lock(&ipfrag_lock); - get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); + get_random_bytes(&net->ipfrag_hash_rnd, sizeof(u32)); for (i = 0; i < IPQ_HASHSZ; i++) { struct ipq *q; + struct hlist_head *head; struct hlist_node *p, *n; - hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { - unsigned int hval = ipqhashfn(q->id, q->saddr, + head = &net->ipq_hash[i]; + hlist_for_each_entry_safe(q, p, n, head, list) { + unsigned int hval = ipqhashfn(net, q->id, q->saddr, q->daddr, q->protocol); if (hval != i) { hlist_del(&q->list); /* Relink to new hash chain. */ - hlist_add_head(&q->list, &ipq_hash[hval]); + hlist_add_head(&q->list, &net->ipq_hash[hval]); } } } write_unlock(&ipfrag_lock); - mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); + mod_timer(&net->ipfrag_secret_timer, + now + net->sysctl_ipfrag_secret_interval); } -atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ - /* Memory Tracking Functions. */ -static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) +static __inline__ void frag_kfree_skb(struct net *net, struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip_frag_mem); + atomic_sub(skb->truesize, &net->ip_frag_mem); kfree_skb(skb); } static __inline__ void frag_free_queue(struct ipq *qp, int *work) { + struct net *net = qp->net; if (work) *work -= sizeof(struct ipq); - atomic_sub(sizeof(struct ipq), &ip_frag_mem); + atomic_sub(sizeof(struct ipq), &net->ip_frag_mem); + release_net(net); kfree(qp); } -static __inline__ struct ipq *frag_alloc_queue(void) +static __inline__ struct ipq *frag_alloc_queue(struct net *net) { struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); if (!qp) return NULL; - atomic_add(sizeof(struct ipq), &ip_frag_mem); + atomic_add(sizeof(struct ipq), &net->ip_frag_mem); return qp; } @@ -209,7 +192,7 @@ while (fp) { struct sk_buff *xp = fp->next; - frag_kfree_skb(fp, work); + frag_kfree_skb(qp->net, fp, work); fp = xp; } @@ -241,23 +224,23 @@ /* Memory limiting on fragments. Evictor trashes the oldest * fragment queue until we are back under the threshold. */ -static void ip_evictor(void) +static void ip_evictor(struct net *net) { struct ipq *qp; struct list_head *tmp; int work; - work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; + work = atomic_read(&net->ip_frag_mem) - net->sysctl_ipfrag_low_thresh; if (work <= 0) return; while (work > 0) { read_lock(&ipfrag_lock); - if (list_empty(&ipq_lru_list)) { + if (list_empty(&net->ipq_lru_list)) { read_unlock(&ipfrag_lock); return; } - tmp = ipq_lru_list.next; + tmp = net->ipq_lru_list.next; qp = list_entry(tmp, struct ipq, lru_list); atomic_inc(&qp->refcnt); read_unlock(&ipfrag_lock); @@ -292,7 +275,7 @@ if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { struct sk_buff *head = qp->fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if ((head->dev = dev_get_by_index(qp->iif)) != NULL) { + if ((head->dev = dev_get_by_index(qp->net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); dev_put(head->dev); } @@ -304,7 +287,7 @@ /* Creation primitives. */ -static struct ipq *ip_frag_intern(struct ipq *qp_in) +static struct ipq *ip_frag_intern(struct net *net, struct ipq *qp_in) { struct ipq *qp; #ifdef CONFIG_SMP @@ -313,14 +296,14 @@ unsigned int hash; write_lock(&ipfrag_lock); - hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, + hash = ipqhashfn(net, qp_in->id, qp_in->saddr, qp_in->daddr, qp_in->protocol); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we * promoted read lock to write lock. */ - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { + hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) { if (qp->id == qp_in->id && qp->saddr == qp_in->saddr && qp->daddr == qp_in->daddr && @@ -336,26 +319,27 @@ #endif qp = qp_in; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) + if (!mod_timer(&qp->timer, jiffies + net->sysctl_ipfrag_time)) atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt); - hlist_add_head(&qp->list, &ipq_hash[hash]); + hlist_add_head(&qp->list, &net->ipq_hash[hash]); INIT_LIST_HEAD(&qp->lru_list); - list_add_tail(&qp->lru_list, &ipq_lru_list); - ip_frag_nqueues++; + list_add_tail(&qp->lru_list, &net->ipq_lru_list); + net->ip_frag_nqueues++; write_unlock(&ipfrag_lock); return qp; } /* Add an entry to the 'ipq' queue for a newly received IP datagram. */ -static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) +static struct ipq *ip_frag_create(struct net *net, struct iphdr *iph, u32 user) { struct ipq *qp; - if ((qp = frag_alloc_queue()) == NULL) + if ((qp = frag_alloc_queue(net)) == NULL) goto out_nomem; + qp->net = hold_net(net); qp->protocol = iph->protocol; qp->last_in = 0; qp->id = iph->id; @@ -366,7 +350,8 @@ qp->meat = 0; qp->fragments = NULL; qp->iif = 0; - qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; + qp->peer = net->sysctl_ipfrag_max_dist ? + inet_getpeer(net, iph->saddr, 1) : NULL; /* Initialize a timer for this entry. */ init_timer(&qp->timer); @@ -375,7 +360,7 @@ spin_lock_init(&qp->lock); atomic_set(&qp->refcnt, 1); - return ip_frag_intern(qp); + return ip_frag_intern(net, qp); out_nomem: LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); @@ -385,7 +370,7 @@ /* Find the correct entry in the "incomplete datagrams" queue for * this IP datagram, and create new one, if nothing is found. */ -static inline struct ipq *ip_find(struct iphdr *iph, u32 user) +static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) { __be16 id = iph->id; __be32 saddr = iph->saddr; @@ -396,8 +381,8 @@ struct hlist_node *n; read_lock(&ipfrag_lock); - hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { + hash = ipqhashfn(net, id, saddr, daddr, protocol); + hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) { if (qp->id == id && qp->saddr == saddr && qp->daddr == daddr && @@ -410,14 +395,14 @@ } read_unlock(&ipfrag_lock); - return ip_frag_create(iph, user); + return ip_frag_create(net, iph, user); } /* Is the fragment too far ahead to be part of ipq? */ static inline int ip_frag_too_far(struct ipq *qp) { struct inet_peer *peer = qp->peer; - unsigned int max = sysctl_ipfrag_max_dist; + unsigned int max = qp->net->sysctl_ipfrag_max_dist; unsigned int start, end; int rc; @@ -442,7 +427,7 @@ { struct sk_buff *fp; - if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { + if (!mod_timer(&qp->timer, jiffies + qp->net->sysctl_ipfrag_time)) { atomic_inc(&qp->refcnt); return -ETIMEDOUT; } @@ -450,7 +435,7 @@ fp = qp->fragments; do { struct sk_buff *xp = fp->next; - frag_kfree_skb(fp, NULL); + frag_kfree_skb(qp->net, fp, NULL); fp = xp; } while (fp); @@ -466,6 +451,7 @@ /* Add new segment to existing queue. */ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { + struct net *net = qp->net; struct sk_buff *prev, *next; int flags, offset; int ihl, end; @@ -576,7 +562,7 @@ qp->fragments = next; qp->meat -= free_it->len; - frag_kfree_skb(free_it, NULL); + frag_kfree_skb(net, free_it, NULL); } } @@ -594,12 +580,12 @@ skb->dev = NULL; qp->stamp = skb->tstamp; qp->meat += skb->len; - atomic_add(skb->truesize, &ip_frag_mem); + atomic_add(skb->truesize, &net->ip_frag_mem); if (offset == 0) qp->last_in |= FIRST_IN; write_lock(&ipfrag_lock); - list_move_tail(&qp->lru_list, &ipq_lru_list); + list_move_tail(&qp->lru_list, &net->ipq_lru_list); write_unlock(&ipfrag_lock); return; @@ -613,6 +599,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) { + struct net *net = qp->net; struct iphdr *iph; struct sk_buff *fp, *head = qp->fragments; int len; @@ -654,12 +641,12 @@ head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip_frag_mem); + atomic_add(clone->truesize, &net->ip_frag_mem); } skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip_frag_mem); + atomic_sub(head->truesize, &net->ip_frag_mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -669,7 +656,7 @@ else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip_frag_mem); + atomic_sub(fp->truesize, &net->ip_frag_mem); } head->next = NULL; @@ -700,19 +687,20 @@ /* Process an incoming IP datagram fragment. */ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) { + struct net *net = skb->dev->nd_net; struct ipq *qp; struct net_device *dev; IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) - ip_evictor(); + if (atomic_read(&net->ip_frag_mem) > net->sysctl_ipfrag_high_thresh) + ip_evictor(net); dev = skb->dev; /* Lookup (or create) queue header */ - if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { + if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { struct sk_buff *ret = NULL; spin_lock(&qp->lock); @@ -733,15 +721,70 @@ return NULL; } -void __init ipfrag_init(void) +static int ipfrag_net_init(struct net *net) { - ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + struct timer_list *secret_timer; + int i; + + /* Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to measurably + * harm machine performance. + */ + net->sysctl_ipfrag_high_thresh = 256*1024; + net->sysctl_ipfrag_low_thresh = 192*1024; + net->sysctl_ipfrag_max_dist = 64; + + /* Important NOTE! Fragment queue must be destroyed before MSL expires. + * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. + */ + net->sysctl_ipfrag_time = IP_FRAG_TIME; + + net->sysctl_ipfrag_secret_interval = 10 * 60 * HZ; + + net->ipq_hash = kzalloc(sizeof(*net->ipq_hash)*IPQ_HASHSZ, GFP_KERNEL); + if (!net->ipq_hash) + return -ENOMEM; + + for (i = 0; i < IPQ_HASHSZ; i++) + INIT_HLIST_HEAD(&net->ipq_hash[i]); + INIT_LIST_HEAD(&net->ipq_lru_list); + net->ip_frag_nqueues = 0; + atomic_set(&net->ip_frag_mem, 0); + + + net->ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ (jiffies ^ (jiffies >> 6))); - init_timer(&ipfrag_secret_timer); - ipfrag_secret_timer.function = ipfrag_secret_rebuild; - ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; - add_timer(&ipfrag_secret_timer); + secret_timer = &net->ipfrag_secret_timer; + init_timer(secret_timer); + secret_timer->function = ipfrag_secret_rebuild; + secret_timer->expires = jiffies + net->sysctl_ipfrag_secret_interval; + secret_timer->data = (unsigned long)net; + add_timer(secret_timer); + + return 0; +} + +static void ipfrag_net_exit(struct net *net) +{ + del_timer(&net->ipfrag_secret_timer); + + net->sysctl_ipfrag_low_thresh = 0; + while (atomic_read(&net->ip_frag_mem)) + ip_evictor(net); + + kfree(net->ipq_hash); +} + +static struct pernet_operations ipfrag_net_ops = { + .init = ipfrag_net_init, + .exit = ipfrag_net_exit, +}; + +void ipfrag_init(void) +{ + register_pernet_subsys(&ipfrag_net_ops); } EXPORT_SYMBOL(ip_defrag); diff -Nurb linux-2.6.22-try2/net/ipv4/ip_gre.c linux-2.6.22-try2-netns/net/ipv4/ip_gre.c --- linux-2.6.22-try2/net/ipv4/ip_gre.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ip_gre.c 2007-12-19 22:49:20.000000000 -0500 @@ -262,7 +262,7 @@ int i; for (i=1; i<100; i++) { sprintf(name, "gre%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -397,6 +397,9 @@ struct flowi fl; struct rtable *rt; + if (skb->dev->nd_net != &init_net) + return; + if (p[1] != htons(ETH_P_IP)) return; @@ -475,6 +478,7 @@ /* Try to guess incoming interface */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.fl4_dst = eiph->saddr; fl.fl4_tos = RT_TOS(eiph->tos); fl.proto = IPPROTO_GRE; @@ -559,6 +563,10 @@ struct ip_tunnel *tunnel; int offset = 4; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (!pskb_may_pull(skb, 16)) goto drop_nolock; @@ -740,7 +748,8 @@ } { - struct flowi fl = { .oif = tunnel->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = tunnel->parms.link, .nl_u = { .ip4_u = { .daddr = dst, .saddr = tiph->saddr, @@ -1095,7 +1104,8 @@ struct ip_tunnel *t = netdev_priv(dev); if (MULTICAST(t->parms.iph.daddr)) { - struct flowi fl = { .oif = t->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = t->parms.link, .nl_u = { .ip4_u = { .daddr = t->parms.iph.daddr, .saddr = t->parms.iph.saddr, @@ -1118,7 +1128,7 @@ { struct ip_tunnel *t = netdev_priv(dev); if (MULTICAST(t->parms.iph.daddr) && t->mlink) { - struct in_device *in_dev = inetdev_by_index(t->mlink); + struct in_device *in_dev = inetdev_by_index(&init_net, t->mlink); if (in_dev) { ip_mc_dec_group(in_dev, t->parms.iph.daddr); in_dev_put(in_dev); @@ -1168,7 +1178,8 @@ /* Guess output device to choose reasonable mtu and hard_header_len */ if (iph->daddr) { - struct flowi fl = { .oif = tunnel->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = tunnel->parms.link, .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, @@ -1195,7 +1206,7 @@ } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { hlen = tdev->hard_header_len; diff -Nurb linux-2.6.22-try2/net/ipv4/ip_input.c linux-2.6.22-try2-netns/net/ipv4/ip_input.c --- linux-2.6.22-try2/net/ipv4/ip_input.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ip_input.c 2007-12-19 22:49:20.000000000 -0500 @@ -280,6 +280,10 @@ struct iphdr *iph; struct net_device *dev = skb->dev; + + if (skb->dev->nd_net != &init_net) + goto drop; + /* It looks as overkill, because not all IP options require packet mangling. But it is the easiest for now, especially taking diff -Nurb linux-2.6.22-try2/net/ipv4/ip_options.c linux-2.6.22-try2-netns/net/ipv4/ip_options.c --- linux-2.6.22-try2/net/ipv4/ip_options.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ip_options.c 2007-12-19 22:49:20.000000000 -0500 @@ -151,7 +151,7 @@ __be32 addr; memcpy(&addr, sptr+soffset-1, 4); - if (inet_addr_type(addr) != RTN_LOCAL) { + if (inet_addr_type(&init_net, addr) != RTN_LOCAL) { dopt->ts_needtime = 1; soffset += 8; } @@ -400,7 +400,7 @@ { __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); - if (inet_addr_type(addr) == RTN_UNICAST) + if (inet_addr_type(&init_net, addr) == RTN_UNICAST) break; if (skb) timeptr = (__be32*)&optptr[optptr[2]+3]; diff -Nurb linux-2.6.22-try2/net/ipv4/ip_output.c linux-2.6.22-try2-netns/net/ipv4/ip_output.c --- linux-2.6.22-try2/net/ipv4/ip_output.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ip_output.c 2007-12-19 22:49:20.000000000 -0500 @@ -83,8 +83,6 @@ #include #include -int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; - /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) { @@ -317,7 +315,8 @@ daddr = opt->faddr; { - struct flowi fl = { .oif = sk->sk_bound_dev_if, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = sk->sk_bound_dev_if, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = inet->saddr, @@ -1352,7 +1351,8 @@ } { - struct flowi fl = { .oif = arg->bound_dev_if, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = arg->bound_dev_if, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = rt->rt_spec_dst, diff -Nurb linux-2.6.22-try2/net/ipv4/ip_sockglue.c linux-2.6.22-try2-netns/net/ipv4/ip_sockglue.c --- linux-2.6.22-try2/net/ipv4/ip_sockglue.c 2007-12-19 13:37:57.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ip_sockglue.c 2007-12-19 22:49:20.000000000 -0500 @@ -411,6 +411,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { + struct net *net = sk->sk_net; struct inet_sock *inet = inet_sk(sk); int val=0,err; @@ -596,13 +597,13 @@ err = 0; break; } - dev = ip_dev_find(mreq.imr_address.s_addr); + dev = ip_dev_find(net, mreq.imr_address.s_addr); if (dev) { mreq.imr_ifindex = dev->ifindex; dev_put(dev); } } else - dev = __dev_get_by_index(mreq.imr_ifindex); + dev = __dev_get_by_index(net, mreq.imr_ifindex); err = -EADDRNOTAVAIL; @@ -956,6 +957,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { + struct net *net = sk->sk_net; struct inet_sock *inet = inet_sk(sk); int val; int len; @@ -1023,7 +1025,7 @@ break; case IP_TTL: val = (inet->uc_ttl == -1 ? - sysctl_ip_default_ttl : + net->sysctl_ip_default_ttl : inet->uc_ttl); break; case IP_HDRINCL: diff -Nurb linux-2.6.22-try2/net/ipv4/ipcomp.c linux-2.6.22-try2-netns/net/ipv4/ipcomp.c --- linux-2.6.22-try2/net/ipv4/ipcomp.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipcomp.c 2007-12-19 22:49:20.000000000 -0500 @@ -175,6 +175,9 @@ struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; + if (skb->dev->nd_net != &init_net) + return; + if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; diff -Nurb linux-2.6.22-try2/net/ipv4/ipconfig.c linux-2.6.22-try2-netns/net/ipv4/ipconfig.c --- linux-2.6.22-try2/net/ipv4/ipconfig.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipconfig.c 2007-12-19 22:49:20.000000000 -0500 @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -184,16 +185,18 @@ struct ic_device *d, **last; struct net_device *dev; unsigned short oflags; + struct net_device *lo; last = &ic_first_dev; rtnl_lock(); /* bring loopback device up first */ - if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0) - printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name); + lo = &init_net.loopback_dev; + if (dev_change_flags(lo, lo->flags | IFF_UP) < 0) + printk(KERN_ERR "IP-Config: Failed to open %s\n", lo->name); - for_each_netdev(dev) { - if (dev == &loopback_dev) + for_each_netdev(&init_net, dev) { + if (dev == lo) continue; if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : (!(dev->flags & IFF_LOOPBACK) && @@ -283,7 +286,7 @@ mm_segment_t oldfs = get_fs(); set_fs(get_ds()); - res = devinet_ioctl(cmd, (struct ifreq __user *) arg); + res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg); set_fs(oldfs); return res; } @@ -294,7 +297,7 @@ mm_segment_t oldfs = get_fs(); set_fs(get_ds()); - res = ip_rt_ioctl(cmd, (void __user *) arg); + res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg); set_fs(oldfs); return res; } @@ -425,6 +428,9 @@ unsigned char *sha, *tha; /* s for "source", t for "target" */ struct ic_device *d; + if (dev->nd_net != &init_net) + goto drop; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; @@ -834,6 +840,9 @@ struct ic_device *d; int len, ext_len; + if (dev->nd_net != &init_net) + goto drop; + /* Perform verifications before taking the lock. */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; @@ -1253,7 +1262,7 @@ __be32 addr; #ifdef CONFIG_PROC_FS - proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops); + proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); #endif /* CONFIG_PROC_FS */ if (!ic_enable) diff -Nurb linux-2.6.22-try2/net/ipv4/ipip.c linux-2.6.22-try2-netns/net/ipv4/ipip.c --- linux-2.6.22-try2/net/ipv4/ipip.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipip.c 2007-12-19 22:49:20.000000000 -0500 @@ -225,7 +225,7 @@ int i; for (i=1; i<100; i++) { sprintf(name, "tunl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -403,6 +403,7 @@ /* Try to guess incoming interface */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.fl4_daddr = eiph->saddr; fl.fl4_tos = RT_TOS(eiph->tos); fl.proto = IPPROTO_IPIP; @@ -542,7 +543,8 @@ } { - struct flowi fl = { .oif = tunnel->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = tunnel->parms.link, .nl_u = { .ip4_u = { .daddr = dst, .saddr = tiph->saddr, @@ -806,7 +808,8 @@ memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); if (iph->daddr) { - struct flowi fl = { .oif = tunnel->parms.link, + struct flowi fl = { .fl_net = &init_net, + .oif = tunnel->parms.link, .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, @@ -821,7 +824,7 @@ } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff -Nurb linux-2.6.22-try2/net/ipv4/ipmr.c linux-2.6.22-try2-netns/net/ipv4/ipmr.c --- linux-2.6.22-try2/net/ipv4/ipmr.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipmr.c 2007-12-19 22:49:20.000000000 -0500 @@ -62,6 +62,7 @@ #include #include #include +#include #include #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) @@ -124,7 +125,7 @@ { struct net_device *dev; - dev = __dev_get_by_name("tunl0"); + dev = __dev_get_by_name(&init_net, "tunl0"); if (dev) { int err; @@ -148,7 +149,7 @@ dev = NULL; - if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { + if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); @@ -320,7 +321,7 @@ e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - rtnl_unicast(skb, NETLINK_CB(skb).pid); + rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -422,7 +423,7 @@ return -ENOBUFS; break; case 0: - dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr); + dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); if (!dev) return -EADDRNOTAVAIL; dev_put(dev); @@ -532,7 +533,7 @@ memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, NETLINK_CB(skb).pid); + rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); } else ip_mr_forward(skb, c, 0); } @@ -848,7 +849,7 @@ { rtnl_lock(); if (sk == mroute_socket) { - IPV4_DEVCONF_ALL(MC_FORWARDING)--; + IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--; write_lock_bh(&mrt_lock); mroute_socket=NULL; @@ -897,7 +898,7 @@ mroute_socket=sk; write_unlock_bh(&mrt_lock); - IPV4_DEVCONF_ALL(MC_FORWARDING)++; + IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++; } rtnl_unlock(); return ret; @@ -1082,13 +1083,18 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; struct vif_device *v; int ct; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; v=&vif_table[0]; for (ct=0;ctdev==ptr) + if (v->dev==dev) vif_delete(ct); } return NOTIFY_DONE; @@ -1171,7 +1177,8 @@ #endif if (vif->flags&VIFF_TUNNEL) { - struct flowi fl = { .oif = vif->link, + struct flowi fl = { .fl_net = &init_net, + .oif = vif->link, .nl_u = { .ip4_u = { .daddr = vif->remote, .saddr = vif->local, @@ -1181,7 +1188,8 @@ goto out_free; encap = sizeof(struct iphdr); } else { - struct flowi fl = { .oif = vif->link, + struct flowi fl = { .fl_net = &init_net, + .oif = vif->link, .nl_u = { .ip4_u = { .daddr = iph->daddr, .tos = RT_TOS(iph->tos) } }, @@ -1498,6 +1506,10 @@ struct iphdr *encap; struct net_device *reg_dev = NULL; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) goto drop; @@ -1922,7 +1934,7 @@ ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); - proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); + proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); + proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); #endif } diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_app.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_app.c --- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_app.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_app.c 2007-12-19 22:49:20.000000000 -0500 @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -616,12 +617,12 @@ int ip_vs_app_init(void) { /* we will replace it with proc_net_ipvs_create() soon */ - proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops); + proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } void ip_vs_app_cleanup(void) { - proc_net_remove("ip_vs_app"); + proc_net_remove(&init_net, "ip_vs_app"); } diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_conn.c --- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_conn.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_conn.c 2007-12-19 22:49:20.000000000 -0500 @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -922,7 +923,7 @@ rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops); + proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); @@ -938,6 +939,6 @@ /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - proc_net_remove("ip_vs_conn"); + proc_net_remove(&init_net, "ip_vs_conn"); vfree(ip_vs_conn_tab); } diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_core.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_core.c --- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_core.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_core.c 2007-12-19 22:49:20.000000000 -0500 @@ -460,7 +460,7 @@ and the destination is RTN_UNICAST (and not local), then create a cache_bypass connection entry */ if (sysctl_ip_vs_cache_bypass && svc->fwmark - && (inet_addr_type(iph->daddr) == RTN_UNICAST)) { + && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) { int ret, cs; struct ip_vs_conn *cp; @@ -530,6 +530,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (!((*pskb)->ipvs_property)) return NF_ACCEPT; /* The packet was sent from IPVS, exit this chain */ @@ -734,6 +738,10 @@ struct ip_vs_conn *cp; int ihl; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + EnterFunction(11); if (skb->ipvs_property) @@ -818,7 +826,7 @@ * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) + if (ip_route_me_harder(&init_net, pskb, RTN_LOCAL) != 0) goto drop; skb = *pskb; @@ -956,12 +964,16 @@ int ret, restart; int ihl; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* * Big tappo: only PACKET_HOST (neither loopback nor mcasts) * ... don't know why 1st test DOES NOT include 2nd (?) */ if (unlikely(skb->pkt_type != PACKET_HOST - || skb->dev == &loopback_dev || skb->sk)) { + || skb->dev == &init_net.loopback_dev || skb->sk)) { IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", skb->pkt_type, ip_hdr(skb)->protocol, @@ -1062,6 +1074,10 @@ { int r; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_ctl.c --- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_ctl.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_ctl.c 2007-12-19 22:49:20.000000000 -0500 @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -679,7 +680,7 @@ conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; /* check if local node and update the flags */ - if (inet_addr_type(udest->addr) == RTN_LOCAL) { + if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) { conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) | IP_VS_CONN_F_LOCALNODE; } @@ -731,7 +732,7 @@ EnterFunction(2); - atype = inet_addr_type(udest->addr); + atype = inet_addr_type(&init_net, udest->addr); if (atype != RTN_LOCAL && atype != RTN_UNICAST) return -EINVAL; @@ -1932,6 +1933,9 @@ struct ip_vs_service *svc; struct ip_vs_dest_user *udest; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2196,6 +2200,9 @@ unsigned char arg[128]; int ret = 0; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2356,8 +2363,8 @@ return ret; } - proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops); + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); sysctl_header = register_sysctl_table(vs_root_table); @@ -2390,8 +2397,8 @@ cancel_work_sync(&defense_work.work); ip_vs_kill_estimator(&ip_vs_stats); unregister_sysctl_table(sysctl_header); - proc_net_remove("ip_vs_stats"); - proc_net_remove("ip_vs"); + proc_net_remove(&init_net, "ip_vs_stats"); + proc_net_remove(&init_net, "ip_vs"); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_lblcr.c --- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_lblcr.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_lblcr.c 2007-12-19 22:49:20.000000000 -0500 @@ -843,7 +843,7 @@ INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); sysctl_header = register_sysctl_table(lblcr_root_table); #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); + proc_net_create(&init_net, "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); #endif return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); } @@ -852,7 +852,7 @@ static void __exit ip_vs_lblcr_cleanup(void) { #ifdef CONFIG_IP_VS_LBLCR_DEBUG - proc_net_remove("ip_vs_lblcr"); + proc_net_remove(&init_net, "ip_vs_lblcr"); #endif unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_sync.c --- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_sync.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_sync.c 2007-12-19 22:49:20.000000000 -0500 @@ -387,7 +387,7 @@ struct net_device *dev; struct inet_sock *inet = inet_sk(sk); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) @@ -412,7 +412,7 @@ int num; if (sync_state == IP_VS_STATE_MASTER) { - if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) return -ENODEV; num = (dev->mtu - sizeof(struct iphdr) - @@ -423,7 +423,7 @@ IP_VS_DBG(7, "setting the maximum length of sync sending " "message %d.\n", sync_send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { - if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) return -ENODEV; sync_recv_mesg_maxlen = dev->mtu - @@ -451,7 +451,7 @@ memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -472,7 +472,7 @@ __be32 addr; struct sockaddr_in sin; - if ((dev = __dev_get_by_name(ifname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) return -ENODEV; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_xmit.c --- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_xmit.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_xmit.c 2007-12-19 22:49:20.000000000 -0500 @@ -70,6 +70,7 @@ if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos, 0))) { struct flowi fl = { + .fl_net = &init_net, .oif = 0, .nl_u = { .ip4_u = { @@ -93,6 +94,7 @@ spin_unlock(&dest->dst_lock); } else { struct flowi fl = { + .fl_net = &init_net, .oif = 0, .nl_u = { .ip4_u = { @@ -160,6 +162,7 @@ u8 tos = iph->tos; int mtu; struct flowi fl = { + .fl_net = &init_net, .oif = 0, .nl_u = { .ip4_u = { diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/arp_tables.c linux-2.6.22-try2-netns/net/ipv4/netfilter/arp_tables.c --- linux-2.6.22-try2/net/ipv4/netfilter/arp_tables.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/arp_tables.c 2007-12-19 22:49:20.000000000 -0500 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -773,7 +774,7 @@ int ret; struct arpt_table *t; - t = xt_find_table_lock(NF_ARP, entries->name); + t = xt_find_table_lock(&init_net, NF_ARP, entries->name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", @@ -843,7 +844,7 @@ duprintf("arp_tables: Translated table\n"); - t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name), + t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, tmp.name), "arptable_%s", tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -936,7 +937,7 @@ goto free; } - t = xt_find_table_lock(NF_ARP, tmp.name); + t = xt_find_table_lock(&init_net, NF_ARP, tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -971,6 +972,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -995,6 +999,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1016,7 +1023,7 @@ } name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; - t = try_then_request_module(xt_find_table_lock(NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name), "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; @@ -1116,7 +1123,7 @@ return ret; } - ret = xt_register_table(table, &bootstrap, newinfo); + ret = xt_register_table(&init_net, table, &bootstrap, newinfo); if (ret != 0) { xt_free_table_info(newinfo); return ret; diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/arptable_filter.c linux-2.6.22-try2-netns/net/ipv4/netfilter/arptable_filter.c --- linux-2.6.22-try2/net/ipv4/netfilter/arptable_filter.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/arptable_filter.c 2007-12-19 22:49:20.000000000 -0500 @@ -61,6 +61,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return arpt_do_table(pskb, hook, in, out, &packet_filter); } diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ip_queue.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ip_queue.c --- linux-2.6.22-try2/net/ipv4/netfilter/ip_queue.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ip_queue.c 2007-12-19 22:49:20.000000000 -0500 @@ -26,6 +26,7 @@ #include #include #include +#include #define IPQ_QMAX_DEFAULT 1024 #define IPQ_PROC_FS_NAME "ip_queue" @@ -556,6 +557,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); @@ -575,7 +579,7 @@ if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -667,14 +671,14 @@ struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, - NULL, THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, + ipq_rcv_sk, NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -695,8 +699,7 @@ cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); - + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); @@ -715,7 +718,7 @@ unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ip_tables.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ip_tables.c --- linux-2.6.22-try2/net/ipv4/netfilter/ip_tables.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ip_tables.c 2007-12-19 22:49:20.000000000 -0500 @@ -1039,7 +1039,7 @@ } #endif -static int get_info(void __user *user, int *len, int compat) +static int get_info(struct net *net, void __user *user, int *len, int compat) { char name[IPT_TABLE_MAXNAMELEN]; struct xt_table *t; @@ -1059,7 +1059,7 @@ if (compat) xt_compat_lock(AF_INET); #endif - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); if (t && !IS_ERR(t)) { struct ipt_getinfo info; @@ -1099,7 +1099,7 @@ } static int -get_entries(struct ipt_get_entries __user *uptr, int *len) +get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) { int ret; struct ipt_get_entries get; @@ -1119,7 +1119,7 @@ return -EINVAL; } - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", @@ -1142,7 +1142,7 @@ } static int -__do_replace(const char *name, unsigned int valid_hooks, +__do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { @@ -1159,7 +1159,7 @@ goto out; } - t = try_then_request_module(xt_find_table_lock(AF_INET, name), + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), "iptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1211,7 +1211,7 @@ } static int -do_replace(void __user *user, unsigned int len) +do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct ipt_replace tmp; @@ -1252,7 +1252,7 @@ duprintf("ip_tables: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) @@ -1289,7 +1289,7 @@ } static int -do_add_counters(void __user *user, unsigned int len, int compat) +do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { unsigned int i; struct xt_counters_info tmp; @@ -1341,7 +1341,7 @@ goto free; } - t = xt_find_table_lock(AF_INET, name); + t = xt_find_table_lock(net, AF_INET, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1745,7 +1745,7 @@ } static int -compat_do_replace(void __user *user, unsigned int len) +compat_do_replace(struct net *net, void __user *user, unsigned int len) { int ret; struct compat_ipt_replace tmp; @@ -1786,7 +1786,7 @@ duprintf("compat_do_replace: Translated table\n"); - ret = __do_replace(tmp.name, tmp.valid_hooks, + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) @@ -1811,11 +1811,11 @@ switch (cmd) { case IPT_SO_SET_REPLACE: - ret = compat_do_replace(user, len); + ret = compat_do_replace(sk->sk_net, user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 1); + ret = do_add_counters(sk->sk_net, user, len, 1); break; default: @@ -1904,7 +1904,7 @@ } static int -compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) +compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, int *len) { int ret; struct compat_ipt_get_entries get; @@ -1928,7 +1928,7 @@ } xt_compat_lock(AF_INET); - t = xt_find_table_lock(AF_INET, get.name); + t = xt_find_table_lock(net, AF_INET, get.name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1966,10 +1966,10 @@ switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(user, len, 1); + ret = get_info(sk->sk_net, user, len, 1); break; case IPT_SO_GET_ENTRIES: - ret = compat_get_entries(user, len); + ret = compat_get_entries(sk->sk_net, user, len); break; default: ret = do_ipt_get_ctl(sk, cmd, user, len); @@ -1988,11 +1988,11 @@ switch (cmd) { case IPT_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = do_replace(sk->sk_net, user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len, 0); + ret = do_add_counters(sk->sk_net, user, len, 0); break; default: @@ -2013,11 +2013,11 @@ switch (cmd) { case IPT_SO_GET_INFO: - ret = get_info(user, len, 0); + ret = get_info(sk->sk_net, user, len, 0); break; case IPT_SO_GET_ENTRIES: - ret = get_entries(user, len); + ret = get_entries(sk->sk_net, user, len); break; case IPT_SO_GET_REVISION_MATCH: @@ -2054,7 +2054,7 @@ return ret; } -int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) +int ipt_register_table(struct net *net, struct xt_table *table, const struct ipt_replace *repl) { int ret; struct xt_table_info *newinfo; @@ -2082,7 +2082,7 @@ return ret; } - ret = xt_register_table(table, &bootstrap, newinfo); + ret = xt_register_table(net, table, &bootstrap, newinfo); if (ret != 0) { xt_free_table_info(newinfo); return ret; diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_CLUSTERIP.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_CLUSTERIP.c --- linux-2.6.22-try2/net/ipv4/netfilter/ipt_CLUSTERIP.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_CLUSTERIP.c 2007-12-19 22:49:20.000000000 -0500 @@ -27,6 +27,7 @@ #include #include #include +#include #define CLUSTERIP_VERSION "0.8" @@ -427,7 +428,7 @@ return 0; } - dev = dev_get_by_name(e->ip.iniface); + dev = dev_get_by_name(&init_net, e->ip.iniface); if (!dev) { printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); return 0; @@ -523,6 +524,10 @@ struct arp_payload *payload; struct clusterip_config *c; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* we don't care about non-ethernet and non-ipv4 ARP */ if (arp->ar_hrd != htons(ARPHRD_ETHER) || arp->ar_pro != htons(ETH_P_IP) @@ -735,7 +740,7 @@ goto cleanup_target; #ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); + clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); if (!clusterip_procdir) { printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); ret = -ENOMEM; diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_MASQUERADE.c --- linux-2.6.22-try2/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-12-19 22:49:20.000000000 -0500 @@ -131,6 +131,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for conntracks which were associated with that device, diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_REJECT.c --- linux-2.6.22-try2/net/ipv4/netfilter/ipt_REJECT.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_REJECT.c 2007-12-19 22:49:20.000000000 -0500 @@ -137,7 +137,7 @@ ) addr_type = RTN_LOCAL; - if (ip_route_me_harder(&nskb, addr_type)) + if (ip_route_me_harder(&init_net, &nskb, addr_type)) goto free_nskb; nskb->ip_summed = CHECKSUM_NONE; diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_ULOG.c --- linux-2.6.22-try2/net/ipv4/netfilter/ipt_ULOG.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_ULOG.c 2007-12-19 22:49:20.000000000 -0500 @@ -419,7 +419,8 @@ for (i = 0; i < ULOG_MAXNLGROUPS; i++) setup_timer(&ulog_buffers[i].timer, ulog_timer, i); - nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, + nflognl = netlink_kernel_create(&init_net, + NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_addrtype.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_addrtype.c --- linux-2.6.22-try2/net/ipv4/netfilter/ipt_addrtype.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_addrtype.c 2007-12-19 22:49:20.000000000 -0500 @@ -24,7 +24,7 @@ static inline int match_type(__be32 addr, u_int16_t mask) { - return !!(mask & (1 << inet_addr_type(addr))); + return !!(mask & (1 << inet_addr_type(&init_net, addr))); } static int match(const struct sk_buff *skb, diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_recent.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_recent.c --- linux-2.6.22-try2/net/ipv4/netfilter/ipt_recent.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_recent.c 2007-12-19 22:49:20.000000000 -0500 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -485,7 +486,7 @@ #ifdef CONFIG_PROC_FS if (err) return err; - proc_dir = proc_mkdir("ipt_recent", proc_net); + proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); if (proc_dir == NULL) { xt_unregister_match(&recent_match); err = -ENOMEM; @@ -499,7 +500,7 @@ BUG_ON(!list_empty(&tables)); xt_unregister_match(&recent_match); #ifdef CONFIG_PROC_FS - remove_proc_entry("ipt_recent", proc_net); + remove_proc_entry("ipt_recent", init_net.proc_net); #endif } diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/iptable_filter.c linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_filter.c --- linux-2.6.22-try2/net/ipv4/netfilter/iptable_filter.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_filter.c 2007-12-19 22:49:20.000000000 -0500 @@ -26,7 +26,7 @@ struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} initial_table __initdata = { +} initial_table = { .repl = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -51,7 +51,7 @@ .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table packet_filter = { +static struct xt_table ip_packet_filter_dflt = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .lock = RW_LOCK_UNLOCKED, @@ -67,7 +67,9 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter); + struct net *net = (in?in:out)->nd_net; + + return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter); } static unsigned int @@ -77,6 +79,8 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct net *net = (in?in:out)->nd_net; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { @@ -86,7 +90,7 @@ return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter); } static struct nf_hook_ops ipt_ops[] = { @@ -117,6 +121,30 @@ static int forward = NF_ACCEPT; module_param(forward, bool, 0000); +static int iptable_filter_net_init(struct net *net) +{ + /* Allocate the table */ + net->ip_packet_filter = kmemdup(&ip_packet_filter_dflt, + sizeof(*net->ip_packet_filter), + GFP_KERNEL); + if (!net->ip_packet_filter) + return -ENOMEM; + + /* Register table */ + return ipt_register_table(net, net->ip_packet_filter, &initial_table.repl); +} + +static void iptable_filter_net_exit(struct net *net) +{ + ipt_unregister_table(net->ip_packet_filter); + kfree(net->ip_packet_filter); +} + +static struct pernet_operations iptable_filter_net_ops = { + .init = iptable_filter_net_init, + .exit = iptable_filter_net_exit, +}; + static int __init iptable_filter_init(void) { int ret; @@ -130,7 +158,7 @@ initial_table.entries[1].target.verdict = -forward - 1; /* Register table */ - ret = ipt_register_table(&packet_filter, &initial_table.repl); + ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) return ret; @@ -142,14 +170,14 @@ return ret; cleanup_table: - ipt_unregister_table(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); return ret; } static void __exit iptable_filter_fini(void) { nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - ipt_unregister_table(&packet_filter); + unregister_pernet_subsys(&iptable_filter_net_ops); } module_init(iptable_filter_init); diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/iptable_mangle.c linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_mangle.c --- linux-2.6.22-try2/net/ipv4/netfilter/iptable_mangle.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_mangle.c 2007-12-19 22:49:20.000000000 -0500 @@ -80,6 +80,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ipt_do_table(pskb, hook, in, out, &packet_mangler); } @@ -96,6 +100,10 @@ __be32 saddr, daddr; u_int32_t mark; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { @@ -121,7 +129,7 @@ iph->daddr != daddr || (*pskb)->mark != mark || iph->tos != tos) - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC)) ret = NF_DROP; } @@ -171,7 +179,7 @@ int ret; /* Register table */ - ret = ipt_register_table(&packet_mangler, &initial_table.repl); + ret = ipt_register_table(&init_net, &packet_mangler, &initial_table.repl); if (ret < 0) return ret; diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/iptable_raw.c linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_raw.c --- linux-2.6.22-try2/net/ipv4/netfilter/iptable_raw.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_raw.c 2007-12-19 22:49:20.000000000 -0500 @@ -52,6 +52,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ipt_do_table(pskb, hook, in, out, &packet_raw); } @@ -96,7 +100,7 @@ int ret; /* Register table */ - ret = ipt_register_table(&packet_raw, &initial_table.repl); + ret = ipt_register_table(&init_net, &packet_raw, &initial_table.repl); if (ret < 0) return ret; diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c --- linux-2.6.22-try2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2007-12-19 22:49:20.000000000 -0500 @@ -120,6 +120,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(pskb); } @@ -135,6 +139,10 @@ struct nf_conn_help *help; struct nf_conntrack_helper *helper; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(*pskb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) @@ -157,6 +165,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* Previously seen (loopback)? Ignore. Do this before fragment check. */ if ((*pskb)->nfct) @@ -180,6 +192,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return nf_conntrack_in(PF_INET, hooknum, pskb); } @@ -189,6 +205,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { @@ -325,6 +345,9 @@ struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + NF_CT_TUPLE_U_BLANK(&tuple); tuple.src.u3.ip = inet->rcv_saddr; tuple.src.u.tcp.port = inet->sport; diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c --- linux-2.6.22-try2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2007-12-19 22:49:20.000000000 -0500 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -378,16 +379,16 @@ { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440, &ip_exp_file_ops); if (!proc_exp) goto err2; - proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat); if (!proc_stat) goto err3; @@ -397,16 +398,16 @@ return 0; err3: - proc_net_remove("ip_conntrack_expect"); + proc_net_remove(&init_net, "ip_conntrack_expect"); err2: - proc_net_remove("ip_conntrack"); + proc_net_remove(&init_net, "ip_conntrack"); err1: return -ENOMEM; } void __exit nf_conntrack_ipv4_compat_fini(void) { - remove_proc_entry("ip_conntrack", proc_net_stat); - proc_net_remove("ip_conntrack_expect"); - proc_net_remove("ip_conntrack"); + remove_proc_entry("ip_conntrack", init_net.proc_net_stat); + proc_net_remove(&init_net, "ip_conntrack_expect"); + proc_net_remove(&init_net, "ip_conntrack"); } diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_rule.c linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_nat_rule.c --- linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_rule.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_nat_rule.c 2007-12-19 22:49:20.000000000 -0500 @@ -98,7 +98,10 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) { static int warned = 0; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; + struct flowi fl = { + .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = dstip } } + }; struct rtable *rt; if (ip_route_output_key(&rt, &fl) != 0) @@ -252,7 +255,7 @@ { int ret; - ret = ipt_register_table(&nat_table, &nat_initial_table.repl); + ret = ipt_register_table(&init_net, &nat_table, &nat_initial_table.repl); if (ret != 0) return ret; ret = xt_register_target(&ipt_snat_reg); diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_standalone.c linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_nat_standalone.c --- linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_standalone.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_nat_standalone.c 2007-12-19 22:49:20.000000000 -0500 @@ -83,6 +83,10 @@ /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET))); @@ -172,6 +176,10 @@ unsigned int ret; __be32 daddr = ip_hdr(*pskb)->daddr; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + ret = nf_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && daddr != ip_hdr(*pskb)->daddr) { @@ -194,6 +202,10 @@ #endif unsigned int ret; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) @@ -227,6 +239,10 @@ enum ip_conntrack_info ctinfo; unsigned int ret; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) || ip_hdrlen(*pskb) < sizeof(struct iphdr)) @@ -239,7 +255,7 @@ if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(pskb, RTN_UNSPEC)) + if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC)) ret = NF_DROP; } #ifdef CONFIG_XFRM @@ -262,6 +278,10 @@ struct nf_conn *ct; enum ip_conntrack_info ctinfo; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + ct = nf_ct_get(*pskb, &ctinfo); if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { DEBUGP("nf_nat_standalone: adjusting sequence number\n"); diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter.c linux-2.6.22-try2-netns/net/ipv4/netfilter.c --- linux-2.6.22-try2/net/ipv4/netfilter.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/netfilter.c 2007-12-19 22:49:20.000000000 -0500 @@ -8,7 +8,7 @@ #include /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) +int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type) { const struct iphdr *iph = ip_hdr(*pskb); struct rtable *rt; @@ -17,7 +17,8 @@ unsigned int hh_len; unsigned int type; - type = inet_addr_type(iph->saddr); + fl.fl_net = net; + type = inet_addr_type(net, iph->saddr); if (addr_type == RTN_UNSPEC) addr_type = type; @@ -155,12 +156,13 @@ const struct ip_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP_LOCAL_OUT) { + struct net *net = (info->indev?info->indev:info->outdev)->nd_net; const struct iphdr *iph = ip_hdr(*pskb); if (!(iph->tos == rt_info->tos && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(pskb, RTN_UNSPEC); + return ip_route_me_harder(net, pskb, RTN_UNSPEC); } return 0; } diff -Nurb linux-2.6.22-try2/net/ipv4/proc.c linux-2.6.22-try2-netns/net/ipv4/proc.c --- linux-2.6.22-try2/net/ipv4/proc.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/proc.c 2007-12-19 22:49:20.000000000 -0500 @@ -44,6 +44,7 @@ #include #include #include +#include static int fold_prot_inuse(struct proto *proto) { @@ -69,8 +70,9 @@ seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); - seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, - atomic_read(&ip_frag_mem)); + seq_printf(seq, "FRAG: inuse %d memory %d\n", + init_net.ip_frag_nqueues, + atomic_read(&init_net.ip_frag_mem)); return 0; } @@ -260,7 +262,8 @@ seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", - IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); + IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2, + init_net.sysctl_ip_default_ttl); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) seq_printf(seq, " %lu", @@ -380,20 +383,20 @@ { int rc = 0; - if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops)) + if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) goto out_netstat; - if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops)) + if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) goto out_snmp; - if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops)) + if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops)) goto out_sockstat; out: return rc; out_sockstat: - proc_net_remove("snmp"); + proc_net_remove(&init_net, "snmp"); out_snmp: - proc_net_remove("netstat"); + proc_net_remove(&init_net, "netstat"); out_netstat: rc = -ENOMEM; goto out; diff -Nurb linux-2.6.22-try2/net/ipv4/raw.c linux-2.6.22-try2-netns/net/ipv4/raw.c --- linux-2.6.22-try2/net/ipv4/raw.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/raw.c 2007-12-19 23:30:30.000000000 -0500 @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -101,7 +102,7 @@ write_unlock_bh(&raw_v4_lock); } -struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int tag) { @@ -110,6 +111,9 @@ sk_for_each_from(sk, node) { struct inet_sock *inet = inet_sk(sk); + if (sk->sk_net != net) + continue; + if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag) && @@ -152,6 +156,7 @@ */ int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { + struct net *net = skb->dev->nd_net; struct sock *sk; struct hlist_head *head; int delivered = 0; @@ -160,7 +165,7 @@ head = &raw_v4_htable[hash]; if (hlist_empty(head)) goto out; - sk = __raw_v4_lookup(__sk_head(head), iph->protocol, + sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex, skb->skb_tag); @@ -173,7 +178,7 @@ if (clone) raw_rcv(sk, clone); } - sk = __raw_v4_lookup(sk_next(sk), iph->protocol, + sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex, skb->skb_tag); } @@ -484,7 +489,8 @@ } { - struct flowi fl = { .oif = ipc.oif, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = ipc.oif, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, @@ -574,7 +580,7 @@ if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; v4_map_sock_addr(inet, addr, &nsa); - chk_addr_ret = inet_addr_type(nsa.saddr); + chk_addr_ret = inet_addr_type(sk->sk_net, nsa.saddr); ret = -EADDRNOTAVAIL; if (nsa.saddr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) @@ -798,6 +804,7 @@ #ifdef CONFIG_PROC_FS struct raw_iter_state { + struct net *net; int bucket; }; @@ -811,11 +818,14 @@ for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; - sk_for_each(sk, node, &raw_v4_htable[state->bucket]) + sk_for_each(sk, node, &raw_v4_htable[state->bucket]) { + if (sk->sk_net != state->net) + continue; if (sk->sk_family == PF_INET && nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)) goto found; } + } sk = NULL; found: return sk; @@ -830,7 +840,7 @@ try_again: ; } while (sk && (sk->sk_family != PF_INET || - !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))); + !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT) || (sk->sk_net != state->net))); if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { sk = sk_head(&raw_v4_htable[state->bucket]); @@ -933,6 +943,7 @@ seq = file->private_data; seq->private = s; memset(s, 0, sizeof(*s)); + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -940,23 +951,46 @@ goto out; } +static int raw_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct raw_iter_state *state = seq->private; + put_net(state->net); + return seq_release_private(inode, file); +} + static const struct file_operations raw_seq_fops = { .owner = THIS_MODULE, .open = raw_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = raw_seq_release, }; -int __init raw_proc_init(void) +static int raw_proc_net_init(struct net *net) { - if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops)) + if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops)) return -ENOMEM; return 0; } +static void raw_proc_net_exit(struct net *net) +{ + proc_net_remove(net, "raw"); +} + +static struct pernet_operations raw_proc_net_ops = { + .init = raw_proc_net_init, + .exit = raw_proc_net_exit, +}; + +int __init raw_proc_init(void) +{ + return register_pernet_subsys(&raw_proc_net_ops); +} + void __init raw_proc_exit(void) { - proc_net_remove("raw"); + unregister_pernet_subsys(&raw_proc_net_ops); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-try2/net/ipv4/route.c linux-2.6.22-try2-netns/net/ipv4/route.c --- linux-2.6.22-try2/net/ipv4/route.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/route.c 2007-12-19 22:49:20.000000000 -0500 @@ -102,6 +102,7 @@ #include #include #include +#include #include #ifdef CONFIG_SYSCTL #include @@ -265,6 +266,7 @@ #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { + struct net *net; int bucket; }; @@ -333,6 +335,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) { + struct rt_cache_iter_state *st = seq->private; if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" @@ -342,6 +345,9 @@ struct rtable *r = v; char temp[256]; + if (r->fl.fl_net != st->net) + return 0; + sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X", r->u.dst.dev ? r->u.dst.dev->name : "*", @@ -384,6 +390,7 @@ seq = file->private_data; seq->private = s; memset(s, 0, sizeof(*s)); + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -391,12 +398,20 @@ goto out; } +static int rt_cache_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct rt_cache_iter_state *st = seq->private; + put_net(st->net); + return seq_release_private(inode, file); +} + static const struct file_operations rt_cache_seq_fops = { .owner = THIS_MODULE, .open = rt_cache_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = rt_cache_seq_release, }; @@ -562,13 +577,14 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | + return (((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | (fl1->mark ^ fl2->mark) | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + (fl1->iif ^ fl2->iif)) == 0) && + fl1->fl_net == fl2->fl_net; } /* This runs via a timer and thus is always in BH context. */ @@ -963,7 +979,7 @@ static DEFINE_SPINLOCK(rt_peer_lock); struct inet_peer *peer; - peer = inet_getpeer(rt->rt_dst, create); + peer = inet_getpeer(rt->fl.fl_net, rt->rt_dst, create); spin_lock_bh(&rt_peer_lock); if (rt->peer == NULL) { @@ -1056,7 +1072,7 @@ if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) goto reject_redirect; } else { - if (inet_addr_type(new_gw) != RTN_UNICAST) + if (inet_addr_type(dev->nd_net, new_gw) != RTN_UNICAST) goto reject_redirect; } @@ -1097,6 +1113,7 @@ /* Copy all the information. */ *rt = *rth; + hold_net(rt->fl.fl_net); INIT_RCU_HEAD(&rt->u.dst.rcu_head); rt->u.dst.__use = 1; atomic_set(&rt->u.dst.__refcnt, 1); @@ -1315,7 +1332,7 @@ __be32 daddr = iph->daddr; unsigned short est_mtu = 0; - if (ipv4_config.no_pmtu_disc) + if (init_net.sysctl_ipv4_no_pmtu_disc) return 0; for (i = 0; i < 2; i++) { @@ -1397,6 +1414,7 @@ rt->idev = NULL; in_dev_put(idev); } + release_net(rt->fl.fl_net); } static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1404,8 +1422,9 @@ { struct rtable *rt = (struct rtable *) dst; struct in_device *idev = rt->idev; - if (dev != &loopback_dev && idev && idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + struct net *net = dev->nd_net; + if (dev != &net->loopback_dev && idev && idev->dev == dev) { + struct in_device *loopback_idev = in_dev_get(&net->loopback_dev); if (loopback_idev) { rt->idev = loopback_idev; in_dev_put(idev); @@ -1492,7 +1511,7 @@ rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) - rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; + rt->u.dst.metrics[RTAX_HOPLIMIT-1] = init_net.sysctl_ip_default_ttl; if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) @@ -1513,6 +1532,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { + struct net *net = dev->nd_net; unsigned hash; struct rtable *rth; __be32 spec_dst; @@ -1546,6 +1566,7 @@ rth->u.dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; + rth->fl.fl_net = hold_net(net); rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -1557,7 +1578,7 @@ #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = &net->loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; @@ -1686,6 +1707,7 @@ rth->u.dst.flags |= DST_NOPOLICY; if (IN_DEV_CONF_GET(out_dev, NOXFRM)) rth->u.dst.flags |= DST_NOXFRM; + rth->fl.fl_net = hold_net(in_dev->dev->nd_net); rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -1754,9 +1776,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { + struct net *net = dev->nd_net; struct fib_result res; struct in_device *in_dev = in_dev_get(dev); - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = net, + .nl_u = { .ip4_u = { .daddr = daddr, .saddr = saddr, .tos = tos, @@ -1814,7 +1838,7 @@ if (res.type == RTN_LOCAL) { int result; result = fib_validate_source(saddr, daddr, tos, - loopback_dev.ifindex, + net->loopback_dev.ifindex, dev, &spec_dst, &itag); if (result < 0) goto martian_source; @@ -1870,6 +1894,7 @@ rth->u.dst.flags= DST_HOST; if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; + rth->fl.fl_net = hold_net(net); rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -1881,7 +1906,7 @@ #endif rth->rt_iif = rth->fl.iif = dev->ifindex; - rth->u.dst.dev = &loopback_dev; + rth->u.dst.dev = &net->loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->rt_gateway = daddr; @@ -1939,6 +1964,7 @@ struct rtable * rth; unsigned hash; int iif = dev->ifindex; + struct net *net = dev->nd_net; tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif); @@ -1951,7 +1977,8 @@ rth->fl.iif == iif && rth->fl.oif == 0 && rth->fl.mark == skb->mark && - rth->fl.fl4_tos == tos) { + rth->fl.fl4_tos == tos && + rth->fl.fl_net == net) { rth->u.dst.lastuse = jiffies; dst_hold(&rth->u.dst); rth->u.dst.__use++; @@ -2063,6 +2090,7 @@ if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) rth->u.dst.flags |= DST_NOPOLICY; + rth->fl.fl_net = hold_net(oldflp->fl_net); rth->fl.fl4_dst = oldflp->fl4_dst; rth->fl.fl4_tos = tos; rth->fl.fl4_src = oldflp->fl4_src; @@ -2142,7 +2170,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) { u32 tos = RT_FL_TOS(oldflp); - struct flowi fl = { .nl_u = { .ip4_u = + struct net *net = oldflp->fl_net; + struct flowi fl = { .fl_net = net, + .nl_u = { .ip4_u = { .daddr = oldflp->fl4_dst, .saddr = oldflp->fl4_src, .tos = tos & IPTOS_RT_MASK, @@ -2151,7 +2181,7 @@ RT_SCOPE_UNIVERSE), } }, .mark = oldflp->mark, - .iif = loopback_dev.ifindex, + .iif = net->loopback_dev.ifindex, .oif = oldflp->oif }; struct fib_result res; unsigned flags = 0; @@ -2173,7 +2203,7 @@ goto out; /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(oldflp->fl4_src); + dev_out = ip_dev_find(net, oldflp->fl4_src); if (dev_out == NULL) goto out; @@ -2212,7 +2242,7 @@ if (oldflp->oif) { - dev_out = dev_get_by_index(oldflp->oif); + dev_out = dev_get_by_index(net, oldflp->oif); err = -ENODEV; if (dev_out == NULL) goto out; @@ -2245,9 +2275,9 @@ fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &net->loopback_dev; dev_hold(dev_out); - fl.oif = loopback_dev.ifindex; + fl.oif = net->loopback_dev.ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -2292,7 +2322,7 @@ fl.fl4_src = fl.fl4_dst; if (dev_out) dev_put(dev_out); - dev_out = &loopback_dev; + dev_out = &net->loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; if (res.fi) @@ -2346,6 +2376,7 @@ rth->fl.iif == 0 && rth->fl.oif == flp->oif && rth->fl.mark == flp->mark && + rth->fl.fl_net == flp->fl_net && !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK))) { rth->u.dst.lastuse = jiffies; @@ -2522,7 +2553,7 @@ __be32 dst = rt->rt_dst; if (MULTICAST(dst) && !LOCAL_MCAST(dst) && - IPV4_DEVCONF_ALL(MC_FORWARDING)) { + IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) { int err = ipmr_get_route(skb, r, nowait); if (err <= 0) { if (!nowait) { @@ -2553,6 +2584,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; @@ -2591,7 +2623,7 @@ if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(net, iif); if (dev == NULL) { err = -ENODEV; goto errout_free; @@ -2608,6 +2640,7 @@ err = -rt->u.dst.error; } else { struct flowi fl = { + .fl_net = net, .nl_u = { .ip4_u = { .daddr = dst, @@ -2632,7 +2665,7 @@ if (err <= 0) goto errout_free; - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); errout: return err; @@ -2945,6 +2978,48 @@ } __setup("rhash_entries=", set_rhash_entries); + +static void ip_rt_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS +# ifdef CONFIG_NET_CLS_ROUTE + proc_net_remove(net, "rt_acct"); +# endif + remove_proc_entry("rt_cache", net->proc_net_stat); + proc_net_remove(net, "rt_cache"); +#endif + rt_run_flush(0); +} + +static int ip_rt_net_init(struct net *net) +{ + int error = -ENOMEM; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *rtstat_pde; + if (!proc_net_fops_create(net, "rt_cache", S_IRUGO, &rt_cache_seq_fops)) + goto out; + if (!(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, + net->proc_net_stat))) + goto out; + rtstat_pde->proc_fops = &rt_cpu_seq_fops; +# ifdef CONFIG_NET_CLS_ROUTE + if (!create_proc_read_entry("rt_acct", 0, net->proc_net, + ip_rt_acct_read, NULL)) + goto out; +# endif +#endif + error = 0; +out: + if (error) + ip_rt_net_exit(net); + return error; +} + +struct pernet_operations ip_rt_net_ops = { + .init = ip_rt_net_init, + .exit = ip_rt_net_exit, +}; + int __init ip_rt_init(void) { int rc = 0; @@ -3008,20 +3083,7 @@ ip_rt_secret_interval; add_timer(&rt_secret_timer); -#ifdef CONFIG_PROC_FS - { - struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */ - if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || - !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, - proc_net_stat))) { - return -ENOMEM; - } - rtstat_pde->proc_fops = &rt_cpu_seq_fops; - } -#ifdef CONFIG_NET_CLS_ROUTE - create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL); -#endif -#endif + register_pernet_subsys(&ip_rt_net_ops); #ifdef CONFIG_XFRM xfrm_init(); xfrm4_init(); diff -Nurb linux-2.6.22-try2/net/ipv4/syncookies.c linux-2.6.22-try2-netns/net/ipv4/syncookies.c --- linux-2.6.22-try2/net/ipv4/syncookies.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/syncookies.c 2007-12-19 22:49:20.000000000 -0500 @@ -253,7 +253,8 @@ * no easy way to do this. */ { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = ((opt && opt->srr) ? opt->faddr : ireq->rmt_addr), diff -Nurb linux-2.6.22-try2/net/ipv4/sysctl_net_ipv4.c linux-2.6.22-try2-netns/net/ipv4/sysctl_net_ipv4.c --- linux-2.6.22-try2/net/ipv4/sysctl_net_ipv4.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/sysctl_net_ipv4.c 2007-12-19 22:49:20.000000000 -0500 @@ -29,21 +29,21 @@ static int ip_local_port_range_max[] = { 65535, 65535 }; #endif -struct ipv4_config ipv4_config; - #ifdef CONFIG_SYSCTL static int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - int val = IPV4_DEVCONF_ALL(FORWARDING); + struct net *net = ctl->extra2; + int *valp = ctl->data; + int old = *valp; int ret; ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - if (write && IPV4_DEVCONF_ALL(FORWARDING) != val) - inet_forward_change(); + if (write && *valp != old) + inet_forward_change(net); return ret; } @@ -53,6 +53,7 @@ void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { + struct net *net = table->extra2; int *valp = table->data; int new; @@ -85,7 +86,7 @@ } *valp = new; - inet_forward_change(); + inet_forward_change(net); return 1; } @@ -188,22 +189,6 @@ ctl_table ipv4_table[] = { { - .ctl_name = NET_IPV4_TCP_TIMESTAMPS, - .procname = "tcp_timestamps", - .data = &sysctl_tcp_timestamps, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, - .procname = "tcp_window_scaling", - .data = &sysctl_tcp_window_scaling, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_IPV4_TCP_SACK, .procname = "tcp_sack", .data = &sysctl_tcp_sack, @@ -220,40 +205,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_FORWARD, - .procname = "ip_forward", - .data = &IPV4_DEVCONF_ALL(FORWARDING), - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_sysctl_forward, - .strategy = &ipv4_sysctl_forward_strategy - }, - { - .ctl_name = NET_IPV4_DEFAULT_TTL, - .procname = "ip_default_ttl", - .data = &sysctl_ip_default_ttl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &ipv4_doint_and_flush, - .strategy = &ipv4_doint_and_flush_strategy, - }, - { - .ctl_name = NET_IPV4_NO_PMTU_DISC, - .procname = "ip_no_pmtu_disc", - .data = &ipv4_config.no_pmtu_disc, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_NONLOCAL_BIND, - .procname = "ip_nonlocal_bind", - .data = &sysctl_ip_nonlocal_bind, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_IPV4_TCP_SYN_RETRIES, .procname = "tcp_syn_retries", .data = &sysctl_tcp_syn_retries, @@ -286,39 +237,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, - .procname = "ipfrag_high_thresh", - .data = &sysctl_ipfrag_high_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, - .procname = "ipfrag_low_thresh", - .data = &sysctl_ipfrag_low_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_DYNADDR, - .procname = "ip_dynaddr", - .data = &sysctl_ip_dynaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_IPFRAG_TIME, - .procname = "ipfrag_time", - .data = &sysctl_ipfrag_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, .procname = "tcp_keepalive_time", .data = &sysctl_tcp_keepalive_time, @@ -422,17 +340,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, - .procname = "ip_local_port_range", - .data = &sysctl_local_port_range, - .maxlen = sizeof(sysctl_local_port_range), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = ip_local_port_range_min, - .extra2 = ip_local_port_range_max - }, - { .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, .procname = "icmp_echo_ignore_all", .data = &sysctl_icmp_echo_ignore_all, @@ -534,50 +441,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, - .procname = "inet_peer_threshold", - .data = &inet_peer_threshold, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = NET_IPV4_INET_PEER_MINTTL, - .procname = "inet_peer_minttl", - .data = &inet_peer_minttl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { - .ctl_name = NET_IPV4_INET_PEER_MAXTTL, - .procname = "inet_peer_maxttl", - .data = &inet_peer_maxttl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { - .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, - .procname = "inet_peer_gc_mintime", - .data = &inet_peer_gc_mintime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { - .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, - .procname = "inet_peer_gc_maxtime", - .data = &inet_peer_gc_maxtime, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { .ctl_name = NET_TCP_ORPHAN_RETRIES, .procname = "tcp_orphan_retries", .data = &sysctl_tcp_orphan_retries, @@ -706,24 +569,6 @@ .proc_handler = &proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, - .procname = "ipfrag_secret_interval", - .data = &sysctl_ipfrag_secret_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies - }, - { - .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, - .procname = "ipfrag_max_dist", - .data = &sysctl_ipfrag_max_dist, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .extra1 = &zero - }, - { .ctl_name = NET_TCP_NO_METRICS_SAVE, .procname = "tcp_no_metrics_save", .data = &sysctl_tcp_nometrics_save, @@ -865,6 +710,181 @@ { .ctl_name = 0 } }; -#endif /* CONFIG_SYSCTL */ +struct ctl_table multi_ipv4_table[] = { + { + /* .data is filled in by devinet_net_init. + * As a consequence this table entry must be the first + * entry in multi_ipv4_table. + */ + .ctl_name = NET_IPV4_FORWARD, + .procname = "ip_forward", + .data = NULL, + .extra2 = &init_net, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ipv4_sysctl_forward, + .strategy = &ipv4_sysctl_forward_strategy + }, + { + .ctl_name = NET_IPV4_DEFAULT_TTL, + .procname = "ip_default_ttl", + .data = &init_net.sysctl_ip_default_ttl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ipv4_doint_and_flush, + .strategy = &ipv4_doint_and_flush_strategy, + }, + { + .ctl_name = NET_IPV4_NO_PMTU_DISC, + .procname = "ip_no_pmtu_disc", + .data = &init_net.sysctl_ipv4_no_pmtu_disc, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_NONLOCAL_BIND, + .procname = "ip_nonlocal_bind", + .data = &init_net.sysctl_ip_nonlocal_bind, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, + .procname = "ip_local_port_range", + .data = &init_net.sysctl_local_port_range, + .maxlen = sizeof(init_net.sysctl_local_port_range), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = ip_local_port_range_min, + .extra2 = ip_local_port_range_max + }, + { + .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, + .procname = "ipfrag_high_thresh", + .data = &init_net.sysctl_ipfrag_high_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, + .procname = "ipfrag_low_thresh", + .data = &init_net.sysctl_ipfrag_low_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_IPFRAG_TIME, + .procname = "ipfrag_time", + .data = &init_net.sysctl_ipfrag_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, + .procname = "ipfrag_secret_interval", + .data = &init_net.sysctl_ipfrag_secret_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, + .procname = "ipfrag_max_dist", + .data = &init_net.sysctl_ipfrag_max_dist, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero + }, + { + .ctl_name = NET_IPV4_DYNADDR, + .procname = "ip_dynaddr", + .data = &init_net.sysctl_ip_dynaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, + .procname = "ip_local_port_range", + .data = &init_net.sysctl_local_port_range, + .maxlen = sizeof(init_net.sysctl_local_port_range), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = ip_local_port_range_min, + .extra2 = ip_local_port_range_max + }, + { + .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, + .procname = "inet_peer_threshold", + .data = &init_net.inet_peer_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = NET_IPV4_INET_PEER_MINTTL, + .procname = "inet_peer_minttl", + .data = &init_net.inet_peer_minttl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_INET_PEER_MAXTTL, + .procname = "inet_peer_maxttl", + .data = &init_net.inet_peer_maxttl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, + .procname = "inet_peer_gc_mintime", + .data = &init_net.inet_peer_gc_mintime, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, + .procname = "inet_peer_gc_maxtime", + .data = &init_net.inet_peer_gc_maxtime, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies + }, + { + .ctl_name = NET_IPV4_TCP_TIMESTAMPS, + .procname = "tcp_timestamps", + .data = &init_net.sysctl_tcp_timestamps, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + + }, + { + .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, + .procname = "tcp_window_scaling", + .data = &init_net.sysctl_tcp_window_scaling, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + {} +}; -EXPORT_SYMBOL(ipv4_config); +#endif /* CONFIG_SYSCTL */ diff -Nurb linux-2.6.22-try2/net/ipv4/tcp.c linux-2.6.22-try2-netns/net/ipv4/tcp.c --- linux-2.6.22-try2/net/ipv4/tcp.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/tcp.c 2007-12-19 22:49:20.000000000 -0500 @@ -2409,6 +2409,23 @@ } __setup("thash_entries=", set_thash_entries); +static int tcp_net_init(struct net *net) +{ + /* + * This array holds the first and last local port number. + */ + net->sysctl_local_port_range[0] = 32768; + net->sysctl_local_port_range[1] = 61000; + + net->sysctl_tcp_timestamps = 1; + net->sysctl_tcp_window_scaling = 1; + return 0; +} + +static struct pernet_operations tcp_net_ops = { + .init = tcp_net_init, +}; + void __init tcp_init(void) { struct sk_buff *skb = NULL; @@ -2502,6 +2519,8 @@ sysctl_tcp_rmem[1] = 87380; sysctl_tcp_rmem[2] = max(87380, max_share); + register_pernet_subsys(&tcp_net_ops); + printk(KERN_INFO "TCP: Hash tables configured " "(established %d bind %d)\n", tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); diff -Nurb linux-2.6.22-try2/net/ipv4/tcp_input.c linux-2.6.22-try2-netns/net/ipv4/tcp_input.c --- linux-2.6.22-try2/net/ipv4/tcp_input.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/tcp_input.c 2007-12-19 22:49:20.000000000 -0500 @@ -72,8 +72,6 @@ #include #include -int sysctl_tcp_timestamps __read_mostly = 1; -int sysctl_tcp_window_scaling __read_mostly = 1; int sysctl_tcp_sack __read_mostly = 1; int sysctl_tcp_fack __read_mostly = 1; int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; @@ -2922,7 +2920,7 @@ break; case TCPOPT_WINDOW: if (opsize==TCPOLEN_WINDOW && th->syn && !estab) - if (sysctl_tcp_window_scaling) { + if (init_net.sysctl_tcp_window_scaling) { __u8 snd_wscale = *(__u8 *) ptr; opt_rx->wscale_ok = 1; if (snd_wscale > 14) { @@ -2938,7 +2936,7 @@ case TCPOPT_TIMESTAMP: if (opsize==TCPOLEN_TIMESTAMP) { if ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps)) { + (!estab && init_net.sysctl_tcp_timestamps)) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); diff -Nurb linux-2.6.22-try2/net/ipv4/tcp_ipv4.c linux-2.6.22-try2-netns/net/ipv4/tcp_ipv4.c --- linux-2.6.22-try2/net/ipv4/tcp_ipv4.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/tcp_ipv4.c 2007-12-19 23:33:32.000000000 -0500 @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -353,6 +354,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) { + struct net *net = skb->dev->nd_net; struct iphdr *iph = (struct iphdr *)skb->data; struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); struct tcp_sock *tp; @@ -369,7 +371,7 @@ } sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, - th->source, inet_iif(skb)); + th->source, inet_iif(skb), net); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -1499,7 +1501,8 @@ return tcp_check_req(sk, skb, req, prev); nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); + iph->daddr, th->dest, inet_iif(skb), + sk->sk_net); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1618,6 +1621,7 @@ int tcp_v4_rcv(struct sk_buff *skb) { + struct net *net = skb->dev->nd_net; const struct iphdr *iph; struct tcphdr *th; struct sock *sk; @@ -1657,7 +1661,7 @@ TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, - iph->daddr, th->dest, inet_iif(skb)); + iph->daddr, th->dest, inet_iif(skb), net); if (!sk) goto no_tcp_socket; @@ -1732,7 +1736,7 @@ case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, iph->daddr, th->dest, - inet_iif(skb)); + inet_iif(skb), net); if (sk2) { inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); inet_twsk_put(inet_twsk(sk)); @@ -1766,7 +1770,7 @@ int release_it = 0; if (!rt || rt->rt_dst != inet->daddr) { - peer = inet_getpeer(inet->daddr, 1); + peer = inet_getpeer(sk->sk_net, inet->daddr, 1); release_it = 1; } else { if (!rt->peer) @@ -1791,7 +1795,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { - struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); + struct inet_peer *peer = inet_getpeer(tw->tw_net, tw->tw_daddr, 1); if (peer) { const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); @@ -1980,7 +1984,8 @@ if (req->sk && !nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT)) continue; - if (req->rsk_ops->family == st->family) { + if ((req->rsk_ops->family == st->family) && + (req->sk->sk_net == st->net)) { cur = req; goto out; } @@ -2004,6 +2009,8 @@ } get_sk: sk_for_each_from(sk, node) { + if (sk->sk_net != st->net) + continue; vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)", sk, sk->sk_nid, nx_current_nid()); if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)) @@ -2056,6 +2063,8 @@ read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { + if (sk->sk_net != st->net) + continue; vxdprintk(VXD_CBIT(net, 6), "sk,egf: %p [#%d] (from %d)", sk, sk->sk_nid, nx_current_nid()); @@ -2069,6 +2078,8 @@ st->state = TCP_SEQ_STATE_TIME_WAIT; inet_twsk_for_each(tw, node, &tcp_hashinfo.ehash[st->bucket].twchain) { + if (tw->tw_net != st->net) + continue; vxdprintk(VXD_CBIT(net, 6), "tw: %p [#%d] (from %d)", tw, tw->tw_nid, nx_current_nid()); @@ -2099,7 +2110,8 @@ tw = cur; tw = tw_next(tw); get_tw: - while (tw && (tw->tw_family != st->family || + while (tw && ((tw->tw_net != st->net) || + (tw->tw_family != st->family) || !nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) { tw = tw_next(tw); } @@ -2124,6 +2136,8 @@ vxdprintk(VXD_CBIT(net, 6), "sk,egn: %p [#%d] (from %d)", sk, sk->sk_nid, nx_current_nid()); + if (sk->sk_net != st->net) + continue; if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)) continue; if (sk->sk_family == st->family) @@ -2253,6 +2267,7 @@ goto out_kfree; seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -2260,20 +2275,30 @@ goto out; } -int tcp_proc_register(struct tcp_seq_afinfo *afinfo) +static int tcp_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct tcp_iter_state *st = seq->private; + put_net(st->net); + return seq_release_private(inode, file); +} + +int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) { int rc = 0; struct proc_dir_entry *p; if (!afinfo) return -EINVAL; + if (net == &init_net) { afinfo->seq_fops->owner = afinfo->owner; afinfo->seq_fops->open = tcp_seq_open; afinfo->seq_fops->read = seq_read; afinfo->seq_fops->llseek = seq_lseek; - afinfo->seq_fops->release = seq_release_private; + afinfo->seq_fops->release = tcp_seq_release; + } - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -2281,11 +2306,12 @@ return rc; } -void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) +void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(net, afinfo->name); + if (net == &init_net) memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } @@ -2430,14 +2456,29 @@ .seq_fops = &tcp4_seq_fops, }; +static int tcp4_proc_net_init(struct net *net) +{ + return tcp_proc_register(net, &tcp4_seq_afinfo); +} + +static void tcp4_proc_net_exit(struct net *net) +{ + tcp_proc_unregister(net, &tcp4_seq_afinfo); +} + +static struct pernet_operations tcp4_proc_net_ops = { + .init = tcp4_proc_net_init, + .exit = tcp4_proc_net_exit, +}; + int __init tcp4_proc_init(void) { - return tcp_proc_register(&tcp4_seq_afinfo); + return register_pernet_subsys(&tcp4_proc_net_ops); } void tcp4_proc_exit(void) { - tcp_proc_unregister(&tcp4_seq_afinfo); + unregister_pernet_subsys(&tcp4_proc_net_ops); } #endif /* CONFIG_PROC_FS */ @@ -2499,6 +2540,5 @@ EXPORT_SYMBOL(tcp_proc_register); EXPORT_SYMBOL(tcp_proc_unregister); #endif -EXPORT_SYMBOL(sysctl_local_port_range); EXPORT_SYMBOL(sysctl_tcp_low_latency); diff -Nurb linux-2.6.22-try2/net/ipv4/tcp_output.c linux-2.6.22-try2-netns/net/ipv4/tcp_output.c --- linux-2.6.22-try2/net/ipv4/tcp_output.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/tcp_output.c 2007-12-19 22:49:20.000000000 -0500 @@ -432,11 +432,11 @@ sysctl_flags = 0; if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; - if (sysctl_tcp_timestamps) { + if (sk->sk_net->sysctl_tcp_timestamps) { tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; sysctl_flags |= SYSCTL_FLAG_TSTAMPS; } - if (sysctl_tcp_window_scaling) { + if (sk->sk_net->sysctl_tcp_window_scaling) { tcp_header_size += TCPOLEN_WSCALE_ALIGNED; sysctl_flags |= SYSCTL_FLAG_WSCALE; } @@ -2215,7 +2215,7 @@ * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); + (sk->sk_net->sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk) != NULL) @@ -2238,7 +2238,7 @@ tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sysctl_tcp_window_scaling, + sk->sk_net->sysctl_tcp_window_scaling, &rcv_wscale); tp->rx_opt.rcv_wscale = rcv_wscale; diff -Nurb linux-2.6.22-try2/net/ipv4/tcp_probe.c linux-2.6.22-try2-netns/net/ipv4/tcp_probe.c --- linux-2.6.22-try2/net/ipv4/tcp_probe.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/tcp_probe.c 2007-12-19 22:49:20.000000000 -0500 @@ -172,7 +172,7 @@ if (IS_ERR(tcpw.fifo)) return PTR_ERR(tcpw.fifo); - if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops)) goto err0; ret = register_jprobe(&tcp_probe); @@ -182,7 +182,7 @@ pr_info("TCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(procname); + proc_net_remove(&init_net, procname); err0: kfifo_free(tcpw.fifo); return ret; @@ -192,7 +192,7 @@ static __exit void tcpprobe_exit(void) { kfifo_free(tcpw.fifo); - proc_net_remove(procname); + proc_net_remove(&init_net, procname); unregister_jprobe(&tcp_probe); } diff -Nurb linux-2.6.22-try2/net/ipv4/tunnel4.c linux-2.6.22-try2-netns/net/ipv4/tunnel4.c --- linux-2.6.22-try2/net/ipv4/tunnel4.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/tunnel4.c 2007-12-19 22:49:20.000000000 -0500 @@ -75,6 +75,10 @@ { struct xfrm_tunnel *handler; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; @@ -113,6 +117,9 @@ { struct xfrm_tunnel *handler; + if (skb->dev->nd_net != &init_net) + return; + for (handler = tunnel4_handlers; handler; handler = handler->next) if (!handler->err_handler(skb, info)) break; diff -Nurb linux-2.6.22-try2/net/ipv4/udp.c linux-2.6.22-try2-netns/net/ipv4/udp.c --- linux-2.6.22-try2/net/ipv4/udp.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/udp.c 2007-12-19 23:34:00.000000000 -0500 @@ -101,6 +101,7 @@ #include #include #include +#include #include "udp_impl.h" /* @@ -112,16 +113,17 @@ struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -static int udp_port_rover; - -static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) +static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; - sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) + sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) { + if (sk->sk_net != net) + continue; if (sk->sk_hash == num) return 1; + } return 0; } @@ -148,9 +150,9 @@ if (snum == 0) { int best_size_so_far, best, result, i; - if (*port_rover > sysctl_local_port_range[1] || - *port_rover < sysctl_local_port_range[0]) - *port_rover = sysctl_local_port_range[0]; + if (*port_rover > sk->sk_net->sysctl_local_port_range[1] || + *port_rover < sk->sk_net->sysctl_local_port_range[0]) + *port_rover = sk->sk_net->sysctl_local_port_range[0]; best_size_so_far = 32767; best = result = *port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { @@ -158,9 +160,9 @@ head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; if (hlist_empty(head)) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] + - ((result - sysctl_local_port_range[0]) & + if (result > sk->sk_net->sysctl_local_port_range[1]) + result = sk->sk_net->sysctl_local_port_range[0] + + ((result - sk->sk_net->sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); goto gotit; } @@ -177,11 +179,11 @@ result = best; for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & + if (result > sk->sk_net->sysctl_local_port_range[1]) + result = sk->sk_net->sysctl_local_port_range[0] + + ((result - sk->sk_net->sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - if (! __udp_lib_lport_inuse(result, udptable)) + if (! __udp_lib_lport_inuse(sk->sk_net, result, udptable)) break; } if (i >= (1 << 16) / UDP_HTABLE_SIZE) @@ -194,6 +196,7 @@ sk_for_each(sk2, node, head) if (sk2->sk_hash == snum && sk2 != sk && + sk->sk_net == sk2->sk_net && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && @@ -216,7 +219,7 @@ int udp_get_port(struct sock *sk, unsigned short snum, int (*scmp)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); + return __udp_lib_get_port(sk, snum, udp_hash, &sk->sk_net->udp_port_rover, scmp); } extern int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2); @@ -229,7 +232,8 @@ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, +static struct sock *__udp4_lib_lookup(struct net *net, + __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, struct hlist_head udptable[]) { @@ -243,6 +247,9 @@ sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); + if (sk->sk_net != net) + continue; + if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { int score = (sk->sk_family == PF_INET ? 1 : 0); @@ -299,6 +306,9 @@ sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); + if (s->sk_net != sk->sk_net) + continue; + if (s->sk_hash != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || @@ -328,6 +338,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) { + struct net *net = skb->dev->nd_net; struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); @@ -337,7 +348,7 @@ int harderr; int err; - sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, + sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, udptable ); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); @@ -623,7 +634,8 @@ rt = (struct rtable*)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, + struct flowi fl = { .fl_net = sk->sk_net, + .oif = ipc.oif, .nl_u = { .ip4_u = { .daddr = faddr, .saddr = saddr, @@ -1288,6 +1300,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], int proto) { + struct net *net = skb->dev->nd_net; struct sock *sk; struct udphdr *uh = udp_hdr(skb); unsigned short ulen; @@ -1322,7 +1335,7 @@ udp_ping_of_death(skb, uh, saddr); #endif - sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, + sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, uh->dest, skb->dev->ifindex, udptable ); if (sk != NULL) { @@ -1651,7 +1664,7 @@ sk = sk_next(sk); try_again: ; - } while (sk && (sk->sk_family != state->family || + } while (sk && ((sk->sk_net != state->net) || sk->sk_family != state->family || !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { @@ -1717,6 +1730,7 @@ seq = file->private_data; seq->private = s; + s->net = get_net(PROC_NET(inode)); out: return rc; out_kfree: @@ -1724,21 +1738,31 @@ goto out; } +static int udp_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct udp_iter_state *state = seq->private; + put_net(state->net); + return seq_release_private(inode, file); +} + /* ------------------------------------------------------------------------ */ -int udp_proc_register(struct udp_seq_afinfo *afinfo) +int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) { struct proc_dir_entry *p; int rc = 0; if (!afinfo) return -EINVAL; + if (net == &init_net) { afinfo->seq_fops->owner = afinfo->owner; afinfo->seq_fops->open = udp_seq_open; afinfo->seq_fops->read = seq_read; afinfo->seq_fops->llseek = seq_lseek; - afinfo->seq_fops->release = seq_release_private; + afinfo->seq_fops->release = udp_seq_release; + } - p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -1746,11 +1770,12 @@ return rc; } -void udp_proc_unregister(struct udp_seq_afinfo *afinfo) +void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(afinfo->name); + proc_net_remove(net, afinfo->name); + if (net == &init_net) memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } @@ -1803,14 +1828,30 @@ .seq_fops = &udp4_seq_fops, }; + +static int udp4_proc_net_init(struct net *net) +{ + return udp_proc_register(net, &udp4_seq_afinfo); +} + +static void udp4_proc_net_exit(struct net *net) +{ + udp_proc_unregister(net, &udp4_seq_afinfo); +} + +static struct pernet_operations udp4_proc_net_ops = { + .init = udp4_proc_net_init, + .exit = udp4_proc_net_exit, +}; + int __init udp4_proc_init(void) { - return udp_proc_register(&udp4_seq_afinfo); + return register_pernet_subsys(&udp4_proc_net_ops); } void udp4_proc_exit(void) { - udp_proc_unregister(&udp4_seq_afinfo); + unregister_pernet_subsys(&udp4_proc_net_ops); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-try2/net/ipv4/udplite.c linux-2.6.22-try2-netns/net/ipv4/udplite.c --- linux-2.6.22-try2/net/ipv4/udplite.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/udplite.c 2007-12-19 22:49:20.000000000 -0500 @@ -31,11 +31,18 @@ static int udplite_rcv(struct sk_buff *skb) { + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); } static void udplite_err(struct sk_buff *skb, u32 info) { + if (skb->dev->nd_net != &init_net) + return; + return __udp4_lib_err(skb, info, udplite_hash); } @@ -103,7 +110,7 @@ inet_register_protosw(&udplite4_protosw); #ifdef CONFIG_PROC_FS - if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ + if (udp_proc_register(&init_net, &udplite4_seq_afinfo)) /* udplite4_proc_init() */ printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__); #endif return; diff -Nurb linux-2.6.22-try2/net/ipv4/xfrm4_input.c linux-2.6.22-try2-netns/net/ipv4/xfrm4_input.c --- linux-2.6.22-try2/net/ipv4/xfrm4_input.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/xfrm4_input.c 2007-12-19 22:49:20.000000000 -0500 @@ -18,6 +18,10 @@ int xfrm4_rcv(struct sk_buff *skb) { + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } return xfrm4_rcv_encap(skb, 0); } diff -Nurb linux-2.6.22-try2/net/ipv4/xfrm4_policy.c linux-2.6.22-try2-netns/net/ipv4/xfrm4_policy.c --- linux-2.6.22-try2/net/ipv4/xfrm4_policy.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/xfrm4_policy.c 2007-12-19 22:49:20.000000000 -0500 @@ -25,6 +25,7 @@ { struct rtable *rt; struct flowi fl_tunnel = { + .fl_net = &init_net, .nl_u = { .ip4_u = { .daddr = daddr->a4, @@ -73,6 +74,7 @@ struct rtable *rt0 = (struct rtable*)(*dst_p); struct rtable *rt = rt0; struct flowi fl_tunnel = { + .fl_net = &init_net, .nl_u = { .ip4_u = { .saddr = fl->fl4_src, @@ -213,6 +215,7 @@ u8 *xprth = skb_network_header(skb) + iph->ihl * 4; memset(fl, 0, sizeof(struct flowi)); + fl->fl_net = &init_net; if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { case IPPROTO_UDP: @@ -306,7 +309,7 @@ xdst = (struct xfrm_dst *)dst; if (xdst->u.rt.idev->dev == dev) { - struct in_device *loopback_idev = in_dev_get(&loopback_dev); + struct in_device *loopback_idev = in_dev_get(&init_net.loopback_dev); BUG_ON(!loopback_idev); do { diff -Nurb linux-2.6.22-try2/net/ipv4/xfrm4_state.c linux-2.6.22-try2-netns/net/ipv4/xfrm4_state.c --- linux-2.6.22-try2/net/ipv4/xfrm4_state.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv4/xfrm4_state.c 2007-12-19 22:49:20.000000000 -0500 @@ -16,7 +16,7 @@ static int xfrm4_init_flags(struct xfrm_state *x) { - if (ipv4_config.no_pmtu_disc) + if (init_net.sysctl_ipv4_no_pmtu_disc) x->props.flags |= XFRM_STATE_NOPMTUDISC; return 0; } diff -Nurb linux-2.6.22-try2/net/ipv6/addrconf.c linux-2.6.22-try2-netns/net/ipv6/addrconf.c --- linux-2.6.22-try2/net/ipv6/addrconf.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/addrconf.c 2007-12-19 23:35:57.000000000 -0500 @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -457,7 +458,7 @@ struct inet6_dev *idev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { @@ -920,7 +921,7 @@ read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; @@ -1882,7 +1883,7 @@ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) goto err_exit; - dev = __dev_get_by_index(ireq.ifr6_ifindex); + dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex); err = -ENODEV; if (dev == NULL) @@ -1913,7 +1914,7 @@ if (err == 0) { err = -ENOBUFS; - if ((dev = __dev_get_by_name(p.name)) == NULL) + if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL) goto err_exit; err = dev_open(dev); } @@ -1943,7 +1944,7 @@ if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) return -ENODEV; if ((idev = addrconf_add_dev(dev)) == NULL) @@ -1994,7 +1995,7 @@ struct inet6_dev *idev; struct net_device *dev; - if ((dev = __dev_get_by_index(ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) return -ENODEV; if ((idev = __in6_dev_get(dev)) == NULL) @@ -2089,7 +2090,7 @@ return; } - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { struct in_device * in_dev = __in_dev_get_rtnl(dev); if (in_dev && (dev->flags & IFF_UP)) { struct in_ifaddr * ifa; @@ -2245,12 +2246,12 @@ /* first try to inherit the link-local address from the link device */ if (idev->dev->iflink && - (link_dev = __dev_get_by_index(idev->dev->iflink))) { + (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } /* then try to inherit it from any device */ - for_each_netdev(link_dev) { + for_each_netdev(&init_net, link_dev) { if (!ipv6_inherit_linklocal(idev, link_dev)) return; } @@ -2282,6 +2283,9 @@ struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch(event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { @@ -2419,7 +2423,7 @@ ASSERT_RTNL(); - if (dev == &loopback_dev && how == 1) + if (dev == &init_net.loopback_dev && how == 1) how = 0; rt6_ifdown(dev); @@ -2850,14 +2854,14 @@ int __init if6_proc_init(void) { - if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) + if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) return -ENOMEM; return 0; } void if6_proc_exit(void) { - proc_net_remove("if_inet6"); + proc_net_remove(&init_net, "if_inet6"); } #endif /* CONFIG_PROC_FS */ @@ -3017,11 +3021,15 @@ static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3074,6 +3082,7 @@ static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx; @@ -3083,6 +3092,9 @@ u8 ifa_flags; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) return err; @@ -3103,7 +3115,7 @@ valid_lft = INFINITY_LIFE_TIME; } - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if (dev == NULL) return -ENODEV; @@ -3292,7 +3304,7 @@ s_ip_idx = ip_idx = cb->args[1]; idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -3367,26 +3379,42 @@ static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = UNICAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = MULTICAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; enum addr_type_t type = ANYCAST_ADDR; + + if (net != &init_net) + return 0; + return inet6_dump_addr(skb, cb, type); } static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *addr = NULL; @@ -3395,6 +3423,9 @@ struct sk_buff *skb; int err; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); if (err < 0) goto errout; @@ -3407,7 +3438,7 @@ ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = __dev_get_by_index(ifm->ifa_index); + dev = __dev_get_by_index(&init_net, ifm->ifa_index); if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { err = -EADDRNOTAVAIL; @@ -3427,7 +3458,7 @@ kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); errout_ifa: in6_ifa_put(ifa); errout: @@ -3450,10 +3481,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); } static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -3612,19 +3643,22 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, err; int s_idx = cb->args[0]; struct net_device *dev; struct inet6_dev *idev; struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL; + if (net != &init_net) + return 0; /* FIXME: maybe disable ipv6 on non v6 guests? if (skb->sk && skb->sk->sk_vx_info) return skb->len; */ read_lock(&dev_base_lock); idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (!v6_dev_in_nx_info(dev, nxi)) @@ -3661,10 +3695,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); } static inline size_t inet6_prefix_nlmsg_size(void) @@ -3730,10 +3764,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err); } static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -4244,16 +4278,16 @@ * device and it being up should be removed. */ rtnl_lock(); - if (!ipv6_add_dev(&loopback_dev)) + if (!ipv6_add_dev(&init_net.loopback_dev)) err = -ENOMEM; rtnl_unlock(); if (err) return err; - ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); - ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); + ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); + ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); #endif register_netdevice_notifier(&ipv6_dev_notf); @@ -4304,12 +4338,12 @@ * clean dev list. */ - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((idev = __in6_dev_get(dev)) == NULL) continue; addrconf_ifdown(dev, 1); } - addrconf_ifdown(&loopback_dev, 2); + addrconf_ifdown(&init_net.loopback_dev, 2); /* * Check hash table. @@ -4335,6 +4369,6 @@ rtnl_unlock(); #ifdef CONFIG_PROC_FS - proc_net_remove("if_inet6"); + proc_net_remove(&init_net, "if_inet6"); #endif } diff -Nurb linux-2.6.22-try2/net/ipv6/af_inet6.c linux-2.6.22-try2-netns/net/ipv6/af_inet6.c --- linux-2.6.22-try2/net/ipv6/af_inet6.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/af_inet6.c 2007-12-19 22:49:20.000000000 -0500 @@ -82,7 +82,7 @@ return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct socket *sock, int protocol) +static int inet6_create(struct net *net, struct socket *sock, int protocol) { struct inet_sock *inet; struct ipv6_pinfo *np; @@ -95,6 +95,9 @@ int try_loading_module = 0; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -163,7 +166,7 @@ BUG_TRAP(answer_prot->slab != NULL); err = -ENOBUFS; - sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1); + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; @@ -206,7 +209,7 @@ inet->mc_index = 0; inet->mc_list = NULL; - if (ipv4_config.no_pmtu_disc) + if (init_net.sysctl_ipv4_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -287,7 +290,7 @@ /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { v4addr = addr->sin6_addr.s6_addr32[3]; - if (inet_addr_type(v4addr) != RTN_LOCAL) { + if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) { err = -EADDRNOTAVAIL; goto out; } @@ -313,7 +316,7 @@ err = -EINVAL; goto out; } - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -672,6 +675,7 @@ struct flowi fl; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); diff -Nurb linux-2.6.22-try2/net/ipv6/anycast.c linux-2.6.22-try2-netns/net/ipv6/anycast.c --- linux-2.6.22-try2/net/ipv6/anycast.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/anycast.c 2007-12-19 22:49:20.000000000 -0500 @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -112,10 +113,10 @@ } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK); + dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) { err = -ENODEV; @@ -196,7 +197,7 @@ write_unlock_bh(&ipv6_sk_ac_lock); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(&init_net, pac->acl_ifindex); if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); dev_put(dev); @@ -224,7 +225,7 @@ if (pac->acl_ifindex != prev_index) { if (dev) dev_put(dev); - dev = dev_get_by_index(pac->acl_ifindex); + dev = dev_get_by_index(&init_net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -429,7 +430,7 @@ if (dev) return ipv6_chk_acast_dev(dev, addr); read_lock(&dev_base_lock); - for_each_netdev(dev) + for_each_netdev(&init_net, dev) if (ipv6_chk_acast_dev(dev, addr)) { found = 1; break; @@ -453,7 +454,7 @@ struct ac6_iter_state *state = ac6_seq_private(seq); state->idev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -579,7 +580,7 @@ int __init ac6_proc_init(void) { - if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops)) + if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops)) return -ENOMEM; return 0; @@ -587,7 +588,7 @@ void ac6_proc_exit(void) { - proc_net_remove("anycast6"); + proc_net_remove(&init_net, "anycast6"); } #endif diff -Nurb linux-2.6.22-try2/net/ipv6/datagram.c linux-2.6.22-try2-netns/net/ipv6/datagram.c --- linux-2.6.22-try2/net/ipv6/datagram.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/datagram.c 2007-12-19 22:49:20.000000000 -0500 @@ -60,6 +60,7 @@ return -EAFNOSUPPORT; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { @@ -544,7 +545,7 @@ if (!src_info->ipi6_ifindex) return -EINVAL; else { - dev = dev_get_by_index(src_info->ipi6_ifindex); + dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); if (!dev) return -ENODEV; } diff -Nurb linux-2.6.22-try2/net/ipv6/fib6_rules.c linux-2.6.22-try2-netns/net/ipv6/fib6_rules.c --- linux-2.6.22-try2/net/ipv6/fib6_rules.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/fib6_rules.c 2007-12-19 22:49:20.000000000 -0500 @@ -244,7 +244,7 @@ return -ENOBUFS; } -static u32 fib6_rule_default_pref(void) +static u32 fib6_rule_default_pref(struct fib_rules_ops *ops) { return 0x3FFF; } @@ -277,10 +277,10 @@ list_add_tail(&local_rule.common.list, &fib6_rules); list_add_tail(&main_rule.common.list, &fib6_rules); - fib_rules_register(&fib6_rules_ops); + fib_rules_register(&init_net, &fib6_rules_ops); } void fib6_rules_cleanup(void) { - fib_rules_unregister(&fib6_rules_ops); + fib_rules_unregister(&init_net, &fib6_rules_ops); } diff -Nurb linux-2.6.22-try2/net/ipv6/icmp.c linux-2.6.22-try2-netns/net/ipv6/icmp.c --- linux-2.6.22-try2/net/ipv6/icmp.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/icmp.c 2007-12-19 22:49:20.000000000 -0500 @@ -377,6 +377,7 @@ mip6_addr_swap(skb); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); if (saddr) @@ -495,6 +496,7 @@ tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); if (saddr) diff -Nurb linux-2.6.22-try2/net/ipv6/inet6_connection_sock.c linux-2.6.22-try2-netns/net/ipv6/inet6_connection_sock.c --- linux-2.6.22-try2/net/ipv6/inet6_connection_sock.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/inet6_connection_sock.c 2007-12-19 22:49:20.000000000 -0500 @@ -149,6 +149,7 @@ struct in6_addr *final_p = NULL, final; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); diff -Nurb linux-2.6.22-try2/net/ipv6/inet6_hashtables.c linux-2.6.22-try2-netns/net/ipv6/inet6_hashtables.c --- linux-2.6.22-try2/net/ipv6/inet6_hashtables.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/inet6_hashtables.c 2007-12-19 22:49:20.000000000 -0500 @@ -61,7 +61,7 @@ const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, struct net *net) { struct sock *sk; const struct hlist_node *node; @@ -105,7 +105,7 @@ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, - const unsigned short hnum, const int dif) + const unsigned short hnum, const int dif, struct net *net) { struct sock *sk; const struct hlist_node *node; @@ -113,7 +113,7 @@ int score, hiscore = 0; read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { + sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -152,12 +152,12 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, - const int dif) + const int dif, struct net *net) { struct sock *sk; local_bh_disable(); - sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif, net); local_bh_enable(); return sk; @@ -251,6 +251,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + struct net *net = sk->sk_net; struct inet_hashinfo *hinfo = death_row->hashinfo; const unsigned short snum = inet_sk(sk)->num; struct inet_bind_hashbucket *head; @@ -258,8 +259,8 @@ int ret; if (snum == 0) { - const int low = sysctl_local_port_range[0]; - const int high = sysctl_local_port_range[1]; + const int low = sk->sk_net->sysctl_local_port_range[0]; + const int high = sk->sk_net->sysctl_local_port_range[1]; const int range = high - low; int i, port; static u32 hint; @@ -270,7 +271,7 @@ local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, @@ -278,7 +279,7 @@ * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->port == port) { + if ((tb->port == port) && (tb->net == net)) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; @@ -291,7 +292,7 @@ } tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, - head, port); + head, net, port); if (!tb) { spin_unlock(&head->lock); break; @@ -326,7 +327,7 @@ goto out; } - head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; + head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_fib.c linux-2.6.22-try2-netns/net/ipv6/ip6_fib.c --- linux-2.6.22-try2/net/ipv6/ip6_fib.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/ip6_fib.c 2007-12-19 22:49:20.000000000 -0500 @@ -361,6 +361,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; unsigned int h, s_h; unsigned int e = 0, s_e; struct rt6_rtnl_dump_arg arg; @@ -369,6 +370,9 @@ struct hlist_node *node; int res = 0; + if (net != &init_net) + return 0; + s_h = cb->args[0]; s_e = cb->args[1]; @@ -1311,6 +1315,11 @@ static int fib6_clean_node(struct fib6_walker_t *w) { + struct nl_info info = { + .nlh = NULL, + .pid = 0, + .net = &init_net, + }; int res; struct rt6_info *rt; struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w; @@ -1319,7 +1328,7 @@ res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; - res = fib6_del(rt, NULL); + res = fib6_del(rt, &info); if (res) { #if RT6_DEBUG >= 2 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_flowlabel.c linux-2.6.22-try2-netns/net/ipv6/ip6_flowlabel.c --- linux-2.6.22-try2/net/ipv6/ip6_flowlabel.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/ip6_flowlabel.c 2007-12-19 22:49:20.000000000 -0500 @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -309,6 +310,7 @@ msg.msg_controllen = olen; msg.msg_control = (void*)(fl->opt+1); + flowi.fl_net = &init_net; flowi.oif = 0; err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); @@ -690,7 +692,7 @@ void ip6_flowlabel_init(void) { #ifdef CONFIG_PROC_FS - proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); + proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); #endif } @@ -698,6 +700,6 @@ { del_timer(&ip6_fl_gc_timer); #ifdef CONFIG_PROC_FS - proc_net_remove("ip6_flowlabel"); + proc_net_remove(&init_net, "ip6_flowlabel"); #endif } diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_input.c linux-2.6.22-try2-netns/net/ipv6/ip6_input.c --- linux-2.6.22-try2/net/ipv6/ip6_input.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/ip6_input.c 2007-12-19 22:49:20.000000000 -0500 @@ -61,6 +61,11 @@ u32 pkt_len; struct inet6_dev *idev; + if (dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } + if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); return 0; diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_output.c linux-2.6.22-try2-netns/net/ipv6/ip6_output.c --- linux-2.6.22-try2/net/ipv6/ip6_output.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/ip6_output.c 2007-12-19 22:49:20.000000000 -0500 @@ -423,7 +423,7 @@ /* XXX: idev->cnf.proxy_ndp? */ if (ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { + pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_tunnel.c linux-2.6.22-try2-netns/net/ipv6/ip6_tunnel.c --- linux-2.6.22-try2/net/ipv6/ip6_tunnel.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/ip6_tunnel.c 2007-12-19 22:49:20.000000000 -0500 @@ -235,7 +235,7 @@ int i; for (i = 1; i < IP6_TNL_MAX; i++) { sprintf(name, "ip6tnl%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i == IP6_TNL_MAX) @@ -651,7 +651,7 @@ struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(&init_net, p->link); if ((ipv6_addr_is_multicast(&p->laddr) || likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && @@ -787,7 +787,7 @@ struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(p->link); + ldev = dev_get_by_index(&init_net, p->link); if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) printk(KERN_WARNING diff -Nurb linux-2.6.22-try2/net/ipv6/ipv6_sockglue.c linux-2.6.22-try2-netns/net/ipv6/ipv6_sockglue.c --- linux-2.6.22-try2/net/ipv6/ipv6_sockglue.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/ipv6_sockglue.c 2007-12-19 22:49:20.000000000 -0500 @@ -463,6 +463,7 @@ struct flowi fl; int junk; + fl.fl_net = &init_net; fl.fl6_flowlabel = 0; fl.oif = sk->sk_bound_dev_if; @@ -547,7 +548,7 @@ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) goto e_inval; - if (__dev_get_by_index(val) == NULL) { + if (__dev_get_by_index(&init_net, val) == NULL) { retv = -ENODEV; break; } diff -Nurb linux-2.6.22-try2/net/ipv6/mcast.c linux-2.6.22-try2-netns/net/ipv6/mcast.c --- linux-2.6.22-try2/net/ipv6/mcast.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/mcast.c 2007-12-19 22:49:20.000000000 -0500 @@ -51,6 +51,7 @@ #include #include +#include #include #include @@ -214,7 +215,7 @@ dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (dev == NULL) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -265,7 +266,7 @@ *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) { + if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) { struct inet6_dev *idev = in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -300,7 +301,7 @@ dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(&init_net, ifindex); if (!dev) return NULL; @@ -331,7 +332,7 @@ np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(mc_lst->ifindex); + dev = dev_get_by_index(&init_net, mc_lst->ifindex); if (dev) { struct inet6_dev *idev = in6_dev_get(dev); @@ -2332,7 +2333,7 @@ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); state->idev = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -2476,7 +2477,7 @@ state->idev = NULL; state->im = NULL; - for_each_netdev(state->dev) { + for_each_netdev(&init_net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2658,8 +2659,8 @@ np->hop_limit = 1; #ifdef CONFIG_PROC_FS - proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops); - proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); + proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops); + proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); #endif return 0; @@ -2671,7 +2672,7 @@ igmp6_socket = NULL; /* for safety */ #ifdef CONFIG_PROC_FS - proc_net_remove("mcfilter6"); - proc_net_remove("igmp6"); + proc_net_remove(&init_net, "mcfilter6"); + proc_net_remove(&init_net, "igmp6"); #endif } diff -Nurb linux-2.6.22-try2/net/ipv6/ndisc.c linux-2.6.22-try2-netns/net/ipv6/ndisc.c --- linux-2.6.22-try2/net/ipv6/ndisc.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/ndisc.c 2007-12-19 22:49:20.000000000 -0500 @@ -418,6 +418,7 @@ int oif) { memset(fl, 0, sizeof(*fl)); + fl->fl_net = &init_net; ipv6_addr_copy(&fl->fl6_src, saddr); ipv6_addr_copy(&fl->fl6_dst, daddr); fl->proto = IPPROTO_ICMPV6; @@ -760,7 +761,7 @@ if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pneigh_lookup(&nd_tbl, + (pneigh = pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -901,7 +902,7 @@ */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { + pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) { /* XXX: idev->cnf.prixy_ndp */ goto out; } @@ -1525,6 +1526,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6_queue.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6_queue.c --- linux-2.6.22-try2/net/ipv6/netfilter/ip6_queue.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6_queue.c 2007-12-19 22:49:20.000000000 -0500 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -546,6 +547,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) ipq_dev_drop(dev->ifindex); @@ -565,7 +569,7 @@ if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW && n->pid) { write_lock_bh(&queue_lock); - if (n->pid == peer_pid) + if ((n->net == &init_net) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -657,14 +661,14 @@ struct proc_dir_entry *proc; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL, - THIS_MODULE); + ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, ipq_rcv_sk, + NULL, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; } - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); + proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); if (proc) proc->owner = THIS_MODULE; else { @@ -685,7 +689,7 @@ cleanup_sysctl: unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); cleanup_ipqnl: sock_release(ipqnl->sk_socket); @@ -705,7 +709,7 @@ unregister_sysctl_table(ipq_sysctl_header); unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); + proc_net_remove(&init_net, IPQ_PROC_FS_NAME); sock_release(ipqnl->sk_socket); mutex_lock(&ipqnl_mutex); diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6_tables.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6_tables.c --- linux-2.6.22-try2/net/ipv6/netfilter/ip6_tables.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6_tables.c 2007-12-19 22:49:20.000000000 -0500 @@ -906,7 +906,7 @@ int ret; struct xt_table *t; - t = xt_find_table_lock(AF_INET6, entries->name); + t = xt_find_table_lock(&init_net, AF_INET6, entries->name); if (t && !IS_ERR(t)) { struct xt_table_info *private = t->private; duprintf("t->private->number = %u\n", private->number); @@ -972,7 +972,7 @@ duprintf("ip_tables: Translated table\n"); - t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name), + t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, tmp.name), "ip6table_%s", tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1073,7 +1073,7 @@ goto free; } - t = xt_find_table_lock(AF_INET6, tmp.name); + t = xt_find_table_lock(&init_net, AF_INET6, tmp.name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1109,6 +1109,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1134,6 +1137,9 @@ { int ret; + if (sk->sk_net != &init_net) + return -ENOPROTOOPT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1155,7 +1161,7 @@ } name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; - t = try_then_request_module(xt_find_table_lock(AF_INET6, name), + t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name), "ip6table_%s", name); if (t && !IS_ERR(t)) { struct ip6t_getinfo info; @@ -1259,7 +1265,7 @@ return ret; } - ret = xt_register_table(table, &bootstrap, newinfo); + ret = xt_register_table(&init_net, table, &bootstrap, newinfo); if (ret != 0) { xt_free_table_info(newinfo); return ret; diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6t_REJECT.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6t_REJECT.c --- linux-2.6.22-try2/net/ipv6/netfilter/ip6t_REJECT.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6t_REJECT.c 2007-12-19 22:49:20.000000000 -0500 @@ -92,6 +92,7 @@ } memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); @@ -172,7 +173,7 @@ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) { if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = &loopback_dev; + skb_in->dev = &init_net.loopback_dev; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6table_filter.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_filter.c --- linux-2.6.22-try2/net/ipv6/netfilter/ip6table_filter.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_filter.c 2007-12-19 22:49:20.000000000 -0500 @@ -65,6 +65,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } @@ -75,6 +79,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + #if 0 /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_mangle.c --- linux-2.6.22-try2/net/ipv6/netfilter/ip6table_mangle.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_mangle.c 2007-12-19 22:49:20.000000000 -0500 @@ -79,6 +79,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ip6t_do_table(pskb, hook, in, out, &packet_mangler); } @@ -95,6 +99,10 @@ u_int8_t hop_limit; u_int32_t flowlabel, mark; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + #if 0 /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct iphdr) diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6table_raw.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_raw.c --- linux-2.6.22-try2/net/ipv6/netfilter/ip6table_raw.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_raw.c 2007-12-19 22:49:20.000000000 -0500 @@ -57,6 +57,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return ip6t_do_table(pskb, hook, in, out, &packet_raw); } diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c linux-2.6.22-try2-netns/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c --- linux-2.6.22-try2/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2007-12-19 22:49:20.000000000 -0500 @@ -167,6 +167,10 @@ unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(*pskb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) @@ -203,6 +207,10 @@ { struct sk_buff *reasm; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* Previously seen (loopback)? */ if ((*pskb)->nfct) return NF_ACCEPT; @@ -231,6 +239,10 @@ { struct sk_buff *reasm = (*pskb)->nfct_reasm; + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* This packet is fragmented and has reassembled packet. */ if (reasm) { /* Reassembled packet isn't parsed yet ? */ @@ -256,6 +268,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + /* root is playing with raw sockets. */ if ((*pskb)->len < sizeof(struct ipv6hdr)) { if (net_ratelimit()) diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter.c linux-2.6.22-try2-netns/net/ipv6/netfilter.c --- linux-2.6.22-try2/net/ipv6/netfilter.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/netfilter.c 2007-12-19 22:49:20.000000000 -0500 @@ -14,6 +14,7 @@ struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; struct flowi fl = { + .fl_net = &init_net, .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .mark = skb->mark, .nl_u = diff -Nurb linux-2.6.22-try2/net/ipv6/proc.c linux-2.6.22-try2-netns/net/ipv6/proc.c --- linux-2.6.22-try2/net/ipv6/proc.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/proc.c 2007-12-19 22:49:20.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include static struct proc_dir_entry *proc_net_devsnmp6; @@ -231,22 +232,22 @@ { int rc = 0; - if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops)) + if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops)) goto proc_snmp6_fail; - proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net); + proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net); if (!proc_net_devsnmp6) goto proc_dev_snmp6_fail; - if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops)) + if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops)) goto proc_sockstat6_fail; out: return rc; proc_sockstat6_fail: - proc_net_remove("dev_snmp6"); + proc_net_remove(&init_net, "dev_snmp6"); proc_dev_snmp6_fail: - proc_net_remove("snmp6"); + proc_net_remove(&init_net, "snmp6"); proc_snmp6_fail: rc = -ENOMEM; goto out; @@ -254,8 +255,8 @@ void ipv6_misc_proc_exit(void) { - proc_net_remove("sockstat6"); - proc_net_remove("dev_snmp6"); - proc_net_remove("snmp6"); + proc_net_remove(&init_net, "sockstat6"); + proc_net_remove(&init_net, "dev_snmp6"); + proc_net_remove(&init_net, "snmp6"); } diff -Nurb linux-2.6.22-try2/net/ipv6/raw.c linux-2.6.22-try2-netns/net/ipv6/raw.c --- linux-2.6.22-try2/net/ipv6/raw.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/raw.c 2007-12-19 22:49:20.000000000 -0500 @@ -49,6 +49,7 @@ #include #include #include +#include #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) #include #endif @@ -282,7 +283,7 @@ if (!sk->sk_bound_dev_if) goto out; - dev = dev_get_by_index(sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out; @@ -728,6 +729,7 @@ * Get and verify the address. */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -1315,13 +1317,13 @@ int __init raw6_proc_init(void) { - if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops)) + if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops)) return -ENOMEM; return 0; } void raw6_proc_exit(void) { - proc_net_remove("raw6"); + proc_net_remove(&init_net, "raw6"); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-try2/net/ipv6/reassembly.c linux-2.6.22-try2-netns/net/ipv6/reassembly.c --- linux-2.6.22-try2/net/ipv6/reassembly.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/reassembly.c 2007-12-19 22:49:20.000000000 -0500 @@ -301,7 +301,7 @@ fq_kill(fq); - dev = dev_get_by_index(fq->iif); + dev = dev_get_by_index(&init_net, fq->iif); if (!dev) goto out; diff -Nurb linux-2.6.22-try2/net/ipv6/route.c linux-2.6.22-try2-netns/net/ipv6/route.c --- linux-2.6.22-try2/net/ipv6/route.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/route.c 2007-12-19 22:49:20.000000000 -0500 @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -137,7 +138,7 @@ .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = NULL, .obsolete = -1, .error = -ENETUNREACH, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -163,7 +164,7 @@ .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = NULL, .obsolete = -1, .error = -EACCES, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -183,7 +184,7 @@ .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, + .dev = NULL, .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -223,8 +224,8 @@ struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + if (dev != &init_net.loopback_dev && idev != NULL && idev->dev == dev) { + struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev); if (loopback_idev != NULL) { rt->rt6i_idev = loopback_idev; in6_dev_put(idev); @@ -564,6 +565,7 @@ int oif, int strict) { struct flowi fl = { + .fl_net = &init_net, .oif = oif, .nl_u = { .ip6_u = { @@ -611,7 +613,12 @@ int ip6_ins_rt(struct rt6_info *rt) { - return __ip6_ins_rt(rt, NULL); + struct nl_info info = { + .nlh = NULL, + .pid = 0, + .net = &init_net, + }; + return __ip6_ins_rt(rt, &info); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, @@ -742,6 +749,7 @@ struct ipv6hdr *iph = ipv6_hdr(skb); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { + .fl_net = &init_net, .iif = skb->dev->ifindex, .nl_u = { .ip6_u = { @@ -1129,7 +1137,7 @@ #endif if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(cfg->fc_ifindex); + dev = dev_get_by_index(&init_net, cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1187,12 +1195,12 @@ if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ - if (dev != &loopback_dev) { + if (dev != &init_net.loopback_dev) { if (dev) { dev_put(dev); in6_dev_put(idev); } - dev = &loopback_dev; + dev = &init_net.loopback_dev; dev_hold(dev); idev = in6_dev_get(dev); if (!idev) { @@ -1333,7 +1341,12 @@ int ip6_del_rt(struct rt6_info *rt) { - return __ip6_del_rt(rt, NULL); + struct nl_info info = { + .nlh = NULL, + .pid = 0, + .net = &init_net, + }; + return __ip6_del_rt(rt, &info); } static int ip6_route_del(struct fib6_config *cfg) @@ -1444,6 +1457,7 @@ int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip6rd_flowi rdfl = { .fl = { + .fl_net = &init_net, .oif = dev->ifindex, .nl_u = { .ip6_u = { @@ -1896,13 +1910,13 @@ if (rt == NULL) return ERR_PTR(-ENOMEM); - dev_hold(&loopback_dev); + dev_hold(&init_net.loopback_dev); in6_dev_hold(idev); rt->u.dst.flags = DST_HOST; rt->u.dst.input = ip6_input; rt->u.dst.output = ip6_output; - rt->rt6i_dev = &loopback_dev; + rt->rt6i_dev = &init_net.loopback_dev; rt->rt6i_idev = idev; rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); @@ -2033,6 +2047,7 @@ cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; cfg->fc_nlinfo.nlh = nlh; + cfg->fc_nlinfo.net = skb->sk->sk_net; if (tb[RTA_GATEWAY]) { nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); @@ -2078,9 +2093,13 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (net != &init_net) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2090,9 +2109,13 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib6_config cfg; int err; + if (net != &init_net) + return -EINVAL; + err = rtm_to_fib6_config(skb, nlh, &cfg); if (err < 0) return err; @@ -2227,6 +2250,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = in_skb->sk->sk_net; struct nlattr *tb[RTA_MAX+1]; struct rt6_info *rt; struct sk_buff *skb; @@ -2234,12 +2258,16 @@ struct flowi fl; int err, iif = 0; + if (net != &init_net) + return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); if (err < 0) goto errout; err = -EINVAL; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (tb[RTA_SRC]) { if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) @@ -2263,7 +2291,7 @@ if (iif) { struct net_device *dev; - dev = __dev_get_by_index(iif); + dev = __dev_get_by_index(&init_net, iif); if (!dev) { err = -ENODEV; goto errout; @@ -2293,7 +2321,7 @@ goto errout; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); errout: return err; } @@ -2301,17 +2329,10 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; - u32 pid = 0, seq = 0; - struct nlmsghdr *nlh = NULL; + u32 pid = info->pid, seq = info->nlh ? info->nlh->nlmsg_seq : 0; + struct nlmsghdr *nlh = info->nlh; int err = -ENOBUFS; - if (info) { - pid = info->pid; - nlh = info->nlh; - if (nlh) - seq = nlh->nlmsg_seq; - } - skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); if (skb == NULL) goto errout; @@ -2323,10 +2344,10 @@ kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); + err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); + rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); } /* @@ -2558,13 +2579,19 @@ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; + /* Perform the initialization we can't perform at compile time */ + ip6_null_entry.u.dst.dev = &init_net.loopback_dev; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + ip6_prohibit_entry.u.dst.dev = &init_net.loopback_dev; + ip6_blk_hole_entry.u.dst.dev = &init_net.loopback_dev; +#endif fib6_init(); #ifdef CONFIG_PROC_FS - p = proc_net_create("ipv6_route", 0, rt6_proc_info); + p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info); if (p) p->owner = THIS_MODULE; - proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); #endif #ifdef CONFIG_XFRM xfrm6_init(); @@ -2584,8 +2611,8 @@ fib6_rules_cleanup(); #endif #ifdef CONFIG_PROC_FS - proc_net_remove("ipv6_route"); - proc_net_remove("rt6_stats"); + proc_net_remove(&init_net, "ipv6_route"); + proc_net_remove(&init_net, "rt6_stats"); #endif #ifdef CONFIG_XFRM xfrm6_fini(); diff -Nurb linux-2.6.22-try2/net/ipv6/sit.c linux-2.6.22-try2-netns/net/ipv6/sit.c --- linux-2.6.22-try2/net/ipv6/sit.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/sit.c 2007-12-19 22:49:20.000000000 -0500 @@ -167,7 +167,7 @@ int i; for (i=1; i<100; i++) { sprintf(name, "sit%d", i); - if (__dev_get_by_name(name) == NULL) + if (__dev_get_by_name(&init_net, name) == NULL) break; } if (i==100) @@ -283,6 +283,9 @@ struct sk_buff *skb2; struct rt6_info *rt6i; + if (skb->dev->nd_net != &init_net) + return; + if (len < hlen + sizeof(struct ipv6hdr)) return; iph6 = (struct ipv6hdr*)(dp + hlen); @@ -369,6 +372,10 @@ struct iphdr *iph; struct ip_tunnel *tunnel; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -474,7 +481,8 @@ } { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = dst, .saddr = tiph->saddr, .tos = RT_TOS(tos) } }, @@ -745,7 +753,8 @@ memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); if (iph->daddr) { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .fl_net = &init_net, + .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, .tos = RT_TOS(iph->tos) } }, @@ -760,7 +769,7 @@ } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(tunnel->parms.link); + tdev = __dev_get_by_index(&init_net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); diff -Nurb linux-2.6.22-try2/net/ipv6/tcp_ipv6.c linux-2.6.22-try2-netns/net/ipv6/tcp_ipv6.c --- linux-2.6.22-try2/net/ipv6/tcp_ipv6.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/tcp_ipv6.c 2007-12-19 22:49:20.000000000 -0500 @@ -143,6 +143,7 @@ return(-EAFNOSUPPORT); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (np->sndflow) { fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; @@ -330,6 +331,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { + struct net *net = skb->dev->nd_net; struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; @@ -339,7 +341,7 @@ __u32 seq; sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, - th->source, skb->dev->ifindex); + th->source, skb->dev->ifindex, net); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -388,6 +390,7 @@ for now. */ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); @@ -481,6 +484,7 @@ int err = -1; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); @@ -1066,6 +1070,7 @@ buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); @@ -1167,6 +1172,7 @@ buff->csum = csum_partial((char *)t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); @@ -1224,7 +1230,8 @@ nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), inet6_iif(skb)); + ntohs(th->dest), inet6_iif(skb), + sk->sk_net); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1414,6 +1421,7 @@ struct flowi fl; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); if (opt && opt->srcrt) { @@ -1700,6 +1708,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; + struct net *net = skb->dev->nd_net; struct tcphdr *th; struct sock *sk; int ret; @@ -1736,7 +1745,7 @@ sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), - inet6_iif(skb)); + inet6_iif(skb), net); if (!sk) goto no_tcp_socket; @@ -1816,7 +1825,8 @@ sk2 = inet6_lookup_listener(&tcp_hashinfo, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), inet6_iif(skb)); + ntohs(th->dest), inet6_iif(skb), + net); if (sk2 != NULL) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule(tw, &tcp_death_row); @@ -2121,12 +2131,12 @@ int __init tcp6_proc_init(void) { - return tcp_proc_register(&tcp6_seq_afinfo); + return tcp_proc_register(&init_net, &tcp6_seq_afinfo); } void tcp6_proc_exit(void) { - tcp_proc_unregister(&tcp6_seq_afinfo); + tcp_proc_unregister(&init_net, &tcp6_seq_afinfo); } #endif diff -Nurb linux-2.6.22-try2/net/ipv6/udp.c linux-2.6.22-try2-netns/net/ipv6/udp.c --- linux-2.6.22-try2/net/ipv6/udp.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/udp.c 2007-12-19 22:49:20.000000000 -0500 @@ -657,6 +657,7 @@ ulen += sizeof(struct udphdr); memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; if (sin6) { if (sin6->sin6_port == 0) @@ -967,11 +968,11 @@ int __init udp6_proc_init(void) { - return udp_proc_register(&udp6_seq_afinfo); + return udp_proc_register(&init_net, &udp6_seq_afinfo); } void udp6_proc_exit(void) { - udp_proc_unregister(&udp6_seq_afinfo); + udp_proc_unregister(&init_net, &udp6_seq_afinfo); } #endif /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-try2/net/ipv6/udplite.c linux-2.6.22-try2-netns/net/ipv6/udplite.c --- linux-2.6.22-try2/net/ipv6/udplite.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/udplite.c 2007-12-19 22:49:20.000000000 -0500 @@ -95,11 +95,11 @@ int __init udplite6_proc_init(void) { - return udp_proc_register(&udplite6_seq_afinfo); + return udp_proc_register(&init_net, &udplite6_seq_afinfo); } void udplite6_proc_exit(void) { - udp_proc_unregister(&udplite6_seq_afinfo); + udp_proc_unregister(&init_net, &udplite6_seq_afinfo); } #endif diff -Nurb linux-2.6.22-try2/net/ipv6/xfrm6_policy.c linux-2.6.22-try2-netns/net/ipv6/xfrm6_policy.c --- linux-2.6.22-try2/net/ipv6/xfrm6_policy.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipv6/xfrm6_policy.c 2007-12-19 22:49:20.000000000 -0500 @@ -40,6 +40,7 @@ { struct rt6_info *rt; struct flowi fl_tunnel = { + .fl_net = &init_net, .nl_u = { .ip6_u = { .daddr = *(struct in6_addr *)&daddr->a6, @@ -132,6 +133,7 @@ struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); struct rt6_info *rt = rt0; struct flowi fl_tunnel = { + .fl_net = &init_net, .nl_u = { .ip6_u = { .saddr = fl->fl6_src, @@ -278,6 +280,7 @@ u8 nexthdr = nh[IP6CB(skb)->nhoff]; memset(fl, 0, sizeof(struct flowi)); + fl->fl_net = &init_net; ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr); ipv6_addr_copy(&fl->fl6_src, &hdr->saddr); @@ -375,7 +378,7 @@ xdst = (struct xfrm_dst *)dst; if (xdst->u.rt6.rt6i_idev->dev == dev) { - struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); + struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev); BUG_ON(!loopback_idev); do { diff -Nurb linux-2.6.22-try2/net/ipx/af_ipx.c linux-2.6.22-try2-netns/net/ipx/af_ipx.c --- linux-2.6.22-try2/net/ipx/af_ipx.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipx/af_ipx.c 2007-12-19 22:49:20.000000000 -0500 @@ -347,6 +347,9 @@ struct net_device *dev = ptr; struct ipx_interface *i, *tmp; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN && event != NETDEV_UP) goto out; @@ -986,7 +989,7 @@ if (intrfc) ipxitf_put(intrfc); - dev = dev_get_by_name(idef->ipx_device); + dev = dev_get_by_name(&init_net, idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1094,7 +1097,7 @@ if (!dlink_type) goto out; - dev = __dev_get_by_name(idef->ipx_device); + dev = __dev_get_by_name(&init_net, idef->ipx_device); rc = -ENODEV; if (!dev) goto out; @@ -1189,7 +1192,7 @@ if (copy_from_user(&ifr, arg, sizeof(ifr))) break; sipx = (struct sockaddr_ipx *)&ifr.ifr_addr; - dev = __dev_get_by_name(ifr.ifr_name); + dev = __dev_get_by_name(&init_net, ifr.ifr_name); rc = -ENODEV; if (!dev) break; @@ -1360,11 +1363,14 @@ .obj_size = sizeof(struct ipx_sock), }; -static int ipx_create(struct socket *sock, int protocol) +static int ipx_create(struct net *net, struct socket *sock, int protocol) { int rc = -ESOCKTNOSUPPORT; struct sock *sk; + if (net != &init_net) + return -EAFNOSUPPORT; + /* * SPX support is not anymore in the kernel sources. If you want to * ressurrect it, completing it and making it understand shared skbs, @@ -1375,7 +1381,7 @@ goto out; rc = -ENOMEM; - sk = sk_alloc(PF_IPX, GFP_KERNEL, &ipx_proto, 1); + sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1); if (!sk) goto out; #ifdef IPX_REFCNT_DEBUG @@ -1644,6 +1650,9 @@ u16 ipx_pktsize; int rc = 0; + if (dev->nd_net != &init_net) + goto drop; + /* Not ours */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; diff -Nurb linux-2.6.22-try2/net/ipx/ipx_proc.c linux-2.6.22-try2-netns/net/ipx/ipx_proc.c --- linux-2.6.22-try2/net/ipx/ipx_proc.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/ipx/ipx_proc.c 2007-12-19 22:49:20.000000000 -0500 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -353,7 +354,7 @@ struct proc_dir_entry *p; int rc = -ENOMEM; - ipx_proc_dir = proc_mkdir("ipx", proc_net); + ipx_proc_dir = proc_mkdir("ipx", init_net.proc_net); if (!ipx_proc_dir) goto out; @@ -381,7 +382,7 @@ out_route: remove_proc_entry("interface", ipx_proc_dir); out_interface: - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", init_net.proc_net); goto out; } @@ -390,7 +391,7 @@ remove_proc_entry("interface", ipx_proc_dir); remove_proc_entry("route", ipx_proc_dir); remove_proc_entry("socket", ipx_proc_dir); - remove_proc_entry("ipx", proc_net); + remove_proc_entry("ipx", init_net.proc_net); } #else /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-try2/net/irda/af_irda.c linux-2.6.22-try2-netns/net/irda/af_irda.c --- linux-2.6.22-try2/net/irda/af_irda.c 2007-12-19 13:37:58.000000000 -0500 +++ linux-2.6.22-try2-netns/net/irda/af_irda.c 2007-12-19 22:49:20.000000000 -0500 @@ -60,7 +60,7 @@ #include -static int irda_create(struct socket *sock, int protocol); +static int irda_create(struct net *net, struct socket *sock, int protocol); static const struct proto_ops irda_stream_ops; static const struct proto_ops irda_seqpacket_ops; @@ -831,7 +831,7 @@ IRDA_DEBUG(2, "%s()\n", __FUNCTION__); - err = irda_create(newsock, sk->sk_protocol); + err = irda_create(sk->sk_net, newsock, sk->sk_protocol); if (err) return err; @@ -1057,13 +1057,16 @@ * Create IrDA socket * */ -static int irda_create(struct socket *sock, int protocol) +static int irda_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct irda_sock *self; IRDA_DEBUG(2, "%s()\n", __FUNCTION__); + if (net != &init_net) + return -EAFNOSUPPORT; + /* Check for valid socket type */ switch (sock->type) { case SOCK_STREAM: /* For TTP connections with SAR disabled */ @@ -1075,7 +1078,7 @@ } /* Allocate networking socket */ - sk = sk_alloc(PF_IRDA, GFP_ATOMIC, &irda_proto, 1); + sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1); if (sk == NULL) return -ENOMEM; diff -Nurb linux-2.6.22-try2/net/irda/irlap_frame.c linux-2.6.22-try2-netns/net/irda/irlap_frame.c --- linux-2.6.22-try2/net/irda/irlap_frame.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/irda/irlap_frame.c 2007-12-19 22:49:20.000000000 -0500 @@ -1319,6 +1319,9 @@ int command; __u8 control; + if (dev->nd_net != &init_net) + goto out; + /* FIXME: should we get our own field? */ self = (struct irlap_cb *) dev->atalk_ptr; diff -Nurb linux-2.6.22-try2/net/irda/irproc.c linux-2.6.22-try2-netns/net/irda/irproc.c --- linux-2.6.22-try2/net/irda/irproc.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/irda/irproc.c 2007-12-19 22:49:20.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -66,7 +67,7 @@ int i; struct proc_dir_entry *d; - proc_irda = proc_mkdir("irda", proc_net); + proc_irda = proc_mkdir("irda", init_net.proc_net); if (proc_irda == NULL) return; proc_irda->owner = THIS_MODULE; @@ -92,7 +93,7 @@ for (i=0; i #include #include +#include #include @@ -136,11 +137,14 @@ .obj_size = sizeof(struct pfkey_sock), }; -static int pfkey_create(struct socket *sock, int protocol) +static int pfkey_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int err; + if (net != &init_net) + return -EAFNOSUPPORT; + if (!capable(CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) @@ -149,7 +153,7 @@ return -EPROTONOSUPPORT; err = -ENOMEM; - sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1); + sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1); if (sk == NULL) goto out; @@ -3781,7 +3785,7 @@ static void __exit ipsec_pfkey_exit(void) { xfrm_unregister_km(&pfkeyv2_mgr); - remove_proc_entry("net/pfkey", NULL); + remove_proc_entry("pfkey", init_net.proc_net); sock_unregister(PF_KEY); proto_unregister(&key_proto); } @@ -3798,7 +3802,7 @@ goto out_unregister_key_proto; #ifdef CONFIG_PROC_FS err = -ENOMEM; - if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL) + if (create_proc_read_entry("pfkey", 0, init_net.proc_net, pfkey_read_proc, NULL) == NULL) goto out_sock_unregister; #endif err = xfrm_register_km(&pfkeyv2_mgr); diff -Nurb linux-2.6.22-try2/net/llc/af_llc.c linux-2.6.22-try2-netns/net/llc/af_llc.c --- linux-2.6.22-try2/net/llc/af_llc.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/llc/af_llc.c 2007-12-19 22:49:20.000000000 -0500 @@ -150,14 +150,17 @@ * socket type we have available. * Returns 0 upon success, negative upon failure. */ -static int llc_ui_create(struct socket *sock, int protocol) +static int llc_ui_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { rc = -ENOMEM; - sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto); + sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); if (sk) { rc = 0; llc_ui_sk_init(sock, sk); @@ -249,7 +252,7 @@ if (!sock_flag(sk, SOCK_ZAPPED)) goto out; rc = -ENODEV; - llc->dev = dev_getfirstbyhwtype(addr->sllc_arphrd); + llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); if (!llc->dev) goto out; rc = -EUSERS; @@ -300,7 +303,7 @@ goto out; rc = -ENODEV; rtnl_lock(); - llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac); + llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, addr->sllc_mac); rtnl_unlock(); if (!llc->dev) goto out; diff -Nurb linux-2.6.22-try2/net/llc/llc_conn.c linux-2.6.22-try2-netns/net/llc/llc_conn.c --- linux-2.6.22-try2/net/llc/llc_conn.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/llc/llc_conn.c 2007-12-19 22:49:20.000000000 -0500 @@ -700,7 +700,7 @@ struct llc_addr *saddr, struct llc_addr *daddr) { - struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC, + struct sock *newsk = llc_sk_alloc(sk->sk_net, sk->sk_family, GFP_ATOMIC, sk->sk_prot); struct llc_sock *newllc, *llc = llc_sk(sk); @@ -867,9 +867,9 @@ * Allocates a LLC sock and initializes it. Returns the new LLC sock * or %NULL if there's no memory available for one */ -struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot) +struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) { - struct sock *sk = sk_alloc(family, priority, prot, 1); + struct sock *sk = sk_alloc(net, family, priority, prot, 1); if (!sk) goto out; diff -Nurb linux-2.6.22-try2/net/llc/llc_core.c linux-2.6.22-try2-netns/net/llc/llc_core.c --- linux-2.6.22-try2/net/llc/llc_core.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/llc/llc_core.c 2007-12-19 22:49:20.000000000 -0500 @@ -19,6 +19,7 @@ #include #include #include +#include #include LIST_HEAD(llc_sap_list); @@ -162,7 +163,7 @@ { struct net_device *dev; - dev = first_net_device(); + dev = first_net_device(&init_net); if (dev != NULL) dev = next_net_device(dev); diff -Nurb linux-2.6.22-try2/net/llc/llc_input.c linux-2.6.22-try2-netns/net/llc/llc_input.c --- linux-2.6.22-try2/net/llc/llc_input.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/llc/llc_input.c 2007-12-19 22:49:20.000000000 -0500 @@ -12,6 +12,7 @@ * See the GNU General Public License for more details. */ #include +#include #include #include #include @@ -145,6 +146,9 @@ int (*rcv)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); + if (dev->nd_net != &init_net) + goto drop; + /* * When the interface is in promisc. mode, drop all the crap that it * receives, do not try to analyse it. diff -Nurb linux-2.6.22-try2/net/llc/llc_proc.c linux-2.6.22-try2-netns/net/llc/llc_proc.c --- linux-2.6.22-try2/net/llc/llc_proc.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/llc/llc_proc.c 2007-12-19 22:49:20.000000000 -0500 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -231,7 +232,7 @@ int rc = -ENOMEM; struct proc_dir_entry *p; - llc_proc_dir = proc_mkdir("llc", proc_net); + llc_proc_dir = proc_mkdir("llc", init_net.proc_net); if (!llc_proc_dir) goto out; llc_proc_dir->owner = THIS_MODULE; @@ -254,7 +255,7 @@ out_core: remove_proc_entry("socket", llc_proc_dir); out_socket: - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", init_net.proc_net); goto out; } @@ -262,5 +263,5 @@ { remove_proc_entry("socket", llc_proc_dir); remove_proc_entry("core", llc_proc_dir); - remove_proc_entry("llc", proc_net); + remove_proc_entry("llc", init_net.proc_net); } diff -Nurb linux-2.6.22-try2/net/netfilter/core.c linux-2.6.22-try2-netns/net/netfilter/core.c --- linux-2.6.22-try2/net/netfilter/core.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/core.c 2007-12-19 22:49:20.000000000 -0500 @@ -20,6 +20,7 @@ #include #include #include +#include #include "nf_internals.h" @@ -280,8 +281,28 @@ #endif /* CONFIG_NF_CONNTRACK */ #ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_netfilter; -EXPORT_SYMBOL(proc_net_netfilter); +static int netfilter_proc_init(struct net * net) +{ + int error = -ENOMEM; + net->proc_net_netfilter = proc_mkdir("netfilter", net->proc_net); + + if (net->proc_net_netfilter) { + net->proc_net_netfilter->data = net; + error = 0; + } + return error; +} + +static void netfilter_proc_exit(struct net *net) +{ + remove_proc_entry("netfilter", net->proc_net); +} + +static struct pernet_operations netfilter_proc_ops = { + .init = netfilter_proc_init, + .exit = netfilter_proc_exit, +}; + #endif void __init netfilter_init(void) @@ -293,8 +314,7 @@ } #ifdef CONFIG_PROC_FS - proc_net_netfilter = proc_mkdir("netfilter", proc_net); - if (!proc_net_netfilter) + if (register_pernet_subsys(&netfilter_proc_ops) < 0) panic("cannot create netfilter proc entry"); #endif diff -Nurb linux-2.6.22-try2/net/netfilter/nf_conntrack_h323_main.c linux-2.6.22-try2-netns/net/netfilter/nf_conntrack_h323_main.c --- linux-2.6.22-try2/net/netfilter/nf_conntrack_h323_main.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/nf_conntrack_h323_main.c 2007-12-19 22:49:20.000000000 -0500 @@ -724,6 +724,8 @@ memset(&fl1, 0, sizeof(fl1)); memset(&fl2, 0, sizeof(fl2)); + fl1.fl_net = &init_net; + fl2.fl_net = &init_net; switch (family) { case AF_INET: { diff -Nurb linux-2.6.22-try2/net/netfilter/nf_conntrack_standalone.c linux-2.6.22-try2-netns/net/netfilter/nf_conntrack_standalone.c --- linux-2.6.22-try2/net/netfilter/nf_conntrack_standalone.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/nf_conntrack_standalone.c 2007-12-19 22:49:20.000000000 -0500 @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -419,14 +420,14 @@ return ret; #ifdef CONFIG_PROC_FS - proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops); if (!proc_exp) goto cleanup_proc; - proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); + proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat); if (!proc_stat) goto cleanup_proc_exp; @@ -447,11 +448,11 @@ cleanup_proc_stat: #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); + remove_proc_entry("nf_conntrack", init_net.proc_net_stat); cleanup_proc_exp: - proc_net_remove("nf_conntrack_expect"); + proc_net_remove(&init_net, "nf_conntrack_expect"); cleanup_proc: - proc_net_remove("nf_conntrack"); + proc_net_remove(&init_net, "nf_conntrack"); cleanup_init: #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); @@ -464,9 +465,9 @@ unregister_sysctl_table(nf_ct_sysctl_header); #endif #ifdef CONFIG_PROC_FS - remove_proc_entry("nf_conntrack", proc_net_stat); - proc_net_remove("nf_conntrack_expect"); - proc_net_remove("nf_conntrack"); + remove_proc_entry("nf_conntrack", init_net.proc_net_stat); + proc_net_remove(&init_net, "nf_conntrack_expect"); + proc_net_remove(&init_net, "nf_conntrack"); #endif /* CNFIG_PROC_FS */ nf_conntrack_cleanup(); } diff -Nurb linux-2.6.22-try2/net/netfilter/nf_log.c linux-2.6.22-try2-netns/net/netfilter/nf_log.c --- linux-2.6.22-try2/net/netfilter/nf_log.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/nf_log.c 2007-12-19 22:49:20.000000000 -0500 @@ -168,7 +168,8 @@ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; - pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); + pde = create_proc_entry("nf_log", S_IRUGO, + init_net.proc_net_netfilter); if (!pde) return -1; diff -Nurb linux-2.6.22-try2/net/netfilter/nf_queue.c linux-2.6.22-try2-netns/net/netfilter/nf_queue.c --- linux-2.6.22-try2/net/netfilter/nf_queue.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/nf_queue.c 2007-12-19 22:49:20.000000000 -0500 @@ -346,7 +346,7 @@ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; - pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter); + pde = create_proc_entry("nf_queue", S_IRUGO, init_net.proc_net_netfilter); if (!pde) return -1; pde->proc_fops = &nfqueue_file_ops; diff -Nurb linux-2.6.22-try2/net/netfilter/nfnetlink.c linux-2.6.22-try2-netns/net/netfilter/nfnetlink.c --- linux-2.6.22-try2/net/netfilter/nfnetlink.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/nfnetlink.c 2007-12-19 22:49:20.000000000 -0500 @@ -264,7 +264,7 @@ { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, + nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, nfnetlink_rcv, NULL, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); diff -Nurb linux-2.6.22-try2/net/netfilter/nfnetlink_log.c linux-2.6.22-try2-netns/net/netfilter/nfnetlink_log.c --- linux-2.6.22-try2/net/netfilter/nfnetlink_log.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/nfnetlink_log.c 2007-12-19 22:49:20.000000000 -0500 @@ -705,7 +705,8 @@ hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { UDEBUG("node = %p\n", inst); - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } @@ -1023,7 +1024,7 @@ #ifdef CONFIG_PROC_FS proc_nful = create_proc_entry("nfnetlink_log", 0440, - proc_net_netfilter); + init_net.proc_net_netfilter); if (!proc_nful) goto cleanup_subsys; proc_nful->proc_fops = &nful_file_ops; @@ -1043,7 +1044,7 @@ { nf_log_unregister(&nfulnl_logger); #ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_log", proc_net_netfilter); + remove_proc_entry("nfnetlink_log", init_net.proc_net_netfilter); #endif nfnetlink_subsys_unregister(&nfulnl_subsys); netlink_unregister_notifier(&nfulnl_rtnl_notifier); diff -Nurb linux-2.6.22-try2/net/netfilter/nfnetlink_queue.c linux-2.6.22-try2-netns/net/netfilter/nfnetlink_queue.c --- linux-2.6.22-try2/net/netfilter/nfnetlink_queue.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/nfnetlink_queue.c 2007-12-19 22:49:20.000000000 -0500 @@ -734,6 +734,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) nfqnl_dev_drop(dev->ifindex); @@ -762,7 +765,8 @@ struct hlist_head *head = &instance_table[i]; hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if (n->pid == inst->peer_pid) + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) __instance_destroy(inst); } } @@ -1106,7 +1110,7 @@ #ifdef CONFIG_PROC_FS proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, - proc_net_netfilter); + init_net.proc_net_netfilter); if (!proc_nfqueue) goto cleanup_subsys; proc_nfqueue->proc_fops = &nfqnl_file_ops; @@ -1129,7 +1133,7 @@ nf_unregister_queue_handlers(&nfqh); unregister_netdevice_notifier(&nfqnl_dev_notifier); #ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_queue", proc_net_netfilter); + remove_proc_entry("nfnetlink_queue", init_net.proc_net_netfilter); #endif nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); diff -Nurb linux-2.6.22-try2/net/netfilter/x_tables.c linux-2.6.22-try2-netns/net/netfilter/x_tables.c --- linux-2.6.22-try2/net/netfilter/x_tables.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/x_tables.c 2007-12-19 22:49:20.000000000 -0500 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -37,11 +38,16 @@ struct mutex mutex; struct list_head match; struct list_head target; - struct list_head tables; struct mutex compat_mutex; }; -static struct xt_af *xt; + +struct xt_af_pernet { + struct list_head tables; +}; + +static struct xt_af * xt; + #ifdef DEBUG_IP_FIREWALL_USER #define duprintf(format, args...) printk(format , ## args) @@ -286,9 +292,9 @@ return 1; } if (target == 1) - have_rev = target_revfn(af, name, revision, &best); + have_rev = target_revfn( af, name, revision, &best); else - have_rev = match_revfn(af, name, revision, &best); + have_rev = match_revfn( af, name, revision, &best); mutex_unlock(&xt[af].mutex); /* Nothing at all? Return 0 to try loading module. */ @@ -533,14 +539,14 @@ EXPORT_SYMBOL(xt_free_table_info); /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ -struct xt_table *xt_find_table_lock(int af, const char *name) +struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name) { struct xt_table *t; if (mutex_lock_interruptible(&xt[af].mutex) != 0) return ERR_PTR(-EINTR); - list_for_each_entry(t, &xt[af].tables, list) + list_for_each_entry(t, &net->xtn[af].tables, list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; mutex_unlock(&xt[af].mutex); @@ -596,7 +602,7 @@ } EXPORT_SYMBOL_GPL(xt_replace_table); -int xt_register_table(struct xt_table *table, +int xt_register_table(struct net *net, struct xt_table *table, struct xt_table_info *bootstrap, struct xt_table_info *newinfo) { @@ -609,7 +615,7 @@ return ret; /* Don't autoload: we'd eat our tail... */ - list_for_each_entry(t, &xt[table->af].tables, list) { + list_for_each_entry(t, &net->xtn[table->af].tables, list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; goto unlock; @@ -628,7 +634,7 @@ /* save number of initial entries */ private->initial_entries = private->number; - list_add(&table->list, &xt[table->af].tables); + list_add(&table->list, &net->xtn[table->af].tables); ret = 0; unlock: @@ -666,7 +672,7 @@ return pos ? NULL : head; } -static struct list_head *type2list(u_int16_t af, u_int16_t type) +static struct list_head *type2list(struct net *net, u_int16_t af, u_int16_t type) { struct list_head *list; @@ -678,7 +684,7 @@ list = &xt[af].match; break; case TABLE: - list = &xt[af].tables; + list = &net->xtn[af].tables; break; default: list = NULL; @@ -691,6 +697,7 @@ static void *xt_tgt_seq_start(struct seq_file *seq, loff_t *pos) { struct proc_dir_entry *pde = (struct proc_dir_entry *) seq->private; + struct net *net = PDE_NET(pde); u_int16_t af = (unsigned long)pde->data & 0xffff; u_int16_t type = (unsigned long)pde->data >> 16; struct list_head *list; @@ -698,7 +705,7 @@ if (af >= NPROTO) return NULL; - list = type2list(af, type); + list = type2list(net, af, type); if (!list) return NULL; @@ -711,6 +718,7 @@ static void *xt_tgt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct proc_dir_entry *pde = seq->private; + struct net *net = PDE_NET(pde); u_int16_t af = (unsigned long)pde->data & 0xffff; u_int16_t type = (unsigned long)pde->data >> 16; struct list_head *list; @@ -718,7 +726,7 @@ if (af >= NPROTO) return NULL; - list = type2list(af, type); + list = type2list(net, af, type); if (!list) return NULL; @@ -759,6 +767,7 @@ if (!ret) { struct seq_file *seq = file->private_data; struct proc_dir_entry *pde = PDE(inode); + get_net(PROC_NET(inode)); seq->private = pde; } @@ -766,12 +775,18 @@ return ret; } +static int xt_tgt_release(struct inode *inode, struct file *file) +{ + put_net(PROC_NET(inode)); + return seq_release(inode, file); +} + static const struct file_operations xt_file_ops = { .owner = THIS_MODULE, .open = xt_tgt_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = xt_tgt_release, }; #define FORMAT_TABLES "_tables_names" @@ -794,7 +809,7 @@ #ifdef CONFIG_PROC_FS strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out; proc->data = (void *) ((unsigned long) af | (TABLE << 16)); @@ -802,14 +817,14 @@ strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out_remove_tables; proc->data = (void *) ((unsigned long) af | (MATCH << 16)); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc = proc_net_fops_create(buf, 0440, &xt_file_ops); + proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); if (!proc) goto out_remove_matches; proc->data = (void *) ((unsigned long) af | (TARGET << 16)); @@ -821,12 +836,12 @@ out_remove_matches: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); out_remove_tables: strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); out: return -1; #endif @@ -840,19 +855,42 @@ strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc_net_remove(buf); + proc_net_remove(&init_net, buf); #endif /*CONFIG_PROC_FS*/ } EXPORT_SYMBOL_GPL(xt_proto_fini); +static int xt_net_init(struct net *net) +{ + int i; + + net->xtn = kmalloc(sizeof(struct xt_af_pernet) * NPROTO, GFP_KERNEL); + if (!net->xtn) + return -ENOMEM; + + for (i = 0; i < NPROTO; i++) { + INIT_LIST_HEAD(&net->xtn[i].tables); + } + return 0; +} + +static void xt_net_exit(struct net *net) +{ + kfree(net->xtn); +} + +static struct pernet_operations xt_net_ops = { + .init = xt_net_init, + .exit = xt_net_exit, +}; static int __init xt_init(void) { @@ -869,13 +907,13 @@ #endif INIT_LIST_HEAD(&xt[i].target); INIT_LIST_HEAD(&xt[i].match); - INIT_LIST_HEAD(&xt[i].tables); } - return 0; + return register_pernet_subsys(&xt_net_ops); } static void __exit xt_fini(void) { + unregister_pernet_subsys(&xt_net_ops); kfree(xt); } diff -Nurb linux-2.6.22-try2/net/netfilter/xt_hashlimit.c linux-2.6.22-try2-netns/net/netfilter/xt_hashlimit.c --- linux-2.6.22-try2/net/netfilter/xt_hashlimit.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netfilter/xt_hashlimit.c 2007-12-19 22:49:20.000000000 -0500 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -736,13 +737,13 @@ printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); goto err2; } - hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net); + hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net); if (!hashlimit_procdir4) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); goto err3; } - hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net); + hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net); if (!hashlimit_procdir6) { printk(KERN_ERR "xt_hashlimit: unable to create proc dir " "entry\n"); @@ -750,7 +751,7 @@ } return 0; err4: - remove_proc_entry("ipt_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", init_net.proc_net); err3: kmem_cache_destroy(hashlimit_cachep); err2: @@ -762,8 +763,8 @@ static void __exit xt_hashlimit_fini(void) { - remove_proc_entry("ipt_hashlimit", proc_net); - remove_proc_entry("ip6t_hashlimit", proc_net); + remove_proc_entry("ipt_hashlimit", init_net.proc_net); + remove_proc_entry("ip6t_hashlimit", init_net.proc_net); kmem_cache_destroy(hashlimit_cachep); xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); } diff -Nurb linux-2.6.22-try2/net/netlink/af_netlink.c linux-2.6.22-try2-netns/net/netlink/af_netlink.c --- linux-2.6.22-try2/net/netlink/af_netlink.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netlink/af_netlink.c 2007-12-19 22:49:20.000000000 -0500 @@ -63,6 +63,7 @@ #include #include #include +#include #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) @@ -212,7 +213,7 @@ wake_up(&nl_table_wait); } -static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) +static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) { struct nl_pid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; @@ -222,7 +223,7 @@ read_lock(&nl_table_lock); head = nl_pid_hashfn(hash, pid); sk_for_each(sk, node, head) { - if (nlk_sk(sk)->pid == pid) { + if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { sock_hold(sk); goto found; } @@ -327,7 +328,7 @@ * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 pid) { struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; @@ -340,7 +341,7 @@ head = nl_pid_hashfn(hash, pid); len = 0; sk_for_each(osk, node, head) { - if (nlk_sk(osk)->pid == pid) + if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) break; len++; } @@ -383,15 +384,15 @@ .obj_size = sizeof(struct netlink_sock), }; -static int __netlink_create(struct socket *sock, struct mutex *cb_mutex, - int protocol) +static int __netlink_create(struct net *net, struct socket *sock, + struct mutex *cb_mutex, int protocol) { struct sock *sk; struct netlink_sock *nlk; sock->ops = &netlink_ops; - sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); + sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) return -ENOMEM; @@ -411,7 +412,7 @@ return 0; } -static int netlink_create(struct socket *sock, int protocol) +static int netlink_create(struct net *net, struct socket *sock, int protocol) { struct module *module = NULL; struct mutex *cb_mutex; @@ -440,7 +441,7 @@ cb_mutex = nl_table[protocol].cb_mutex; netlink_unlock_table(); - if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0) + if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0) goto out_module; nlk = nlk_sk(sock->sk); @@ -477,6 +478,7 @@ if (nlk->pid && !nlk->subscriptions) { struct netlink_notify n = { + .net = sk->sk_net, .protocol = sk->sk_protocol, .pid = nlk->pid, }; @@ -505,6 +507,7 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; @@ -518,6 +521,8 @@ netlink_table_grab(); head = nl_pid_hashfn(hash, pid); sk_for_each(osk, node, head) { + if ((osk->sk_net != net)) + continue; if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ pid = rover--; @@ -529,7 +534,7 @@ } netlink_table_ungrab(); - err = netlink_insert(sk, pid); + err = netlink_insert(sk, net, pid); if (err == -EADDRINUSE) goto retry; @@ -583,6 +588,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; int err; @@ -606,7 +612,7 @@ return -EINVAL; } else { err = nladdr->nl_pid ? - netlink_insert(sk, nladdr->nl_pid) : + netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) return err; @@ -690,10 +696,12 @@ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) { int protocol = ssk->sk_protocol; + struct net *net; struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(protocol, pid); + net = ssk->sk_net; + sock = netlink_lookup(net, protocol, pid); if (!sock) return ERR_PTR(-ECONNREFUSED); @@ -866,6 +874,7 @@ struct netlink_broadcast_data { struct sock *exclude_sk; + struct net *net; u32 pid; u32 group; int failure; @@ -888,6 +897,9 @@ !test_bit(p->group - 1, nlk->groups)) goto out; + if ((sk->sk_net != p->net)) + goto out; + if (p->failure) { netlink_overrun(sk); goto out; @@ -926,6 +938,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, u32 group, gfp_t allocation) { + struct net *net = ssk->sk_net; struct netlink_broadcast_data info; struct hlist_node *node; struct sock *sk; @@ -933,6 +946,7 @@ skb = netlink_trim(skb, allocation); info.exclude_sk = ssk; + info.net = net; info.pid = pid; info.group = group; info.failure = 0; @@ -981,6 +995,9 @@ if (sk == p->exclude_sk) goto out; + if (sk->sk_net != p->exclude_sk->sk_net) + goto out; + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1276,7 +1293,7 @@ */ struct sock * -netlink_kernel_create(int unit, unsigned int groups, +netlink_kernel_create(struct net *net, int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct mutex *cb_mutex, struct module *module) { @@ -1293,7 +1310,7 @@ if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(sock, cb_mutex, unit) < 0) + if (__netlink_create(net, sock, cb_mutex, unit) < 0) goto out_sock_release; if (groups < 32) @@ -1308,18 +1325,20 @@ if (input) nlk_sk(sk)->data_ready = input; - if (netlink_insert(sk, 0)) + if (netlink_insert(sk, net, 0)) goto out_sock_release; nlk = nlk_sk(sk); nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); + if (!nl_table[unit].registered) { nl_table[unit].groups = groups; nl_table[unit].listeners = listeners; nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; nl_table[unit].registered = 1; + } netlink_table_ungrab(); return sk; @@ -1420,7 +1439,7 @@ atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1462,7 +1481,8 @@ if (!skb) { struct sock *sk; - sk = netlink_lookup(in_skb->sk->sk_protocol, + sk = netlink_lookup(in_skb->sk->sk_net, + in_skb->sk->sk_protocol, NETLINK_CB(in_skb).pid); if (sk) { sk->sk_err = ENOBUFS; @@ -1613,6 +1633,7 @@ #ifdef CONFIG_PROC_FS struct nl_seq_iter { + struct net *net; int link; int hash_idx; }; @@ -1630,6 +1651,8 @@ for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { + if (iter->net != s->sk_net) + continue; if (off == pos) { iter->link = i; iter->hash_idx = j; @@ -1659,11 +1682,14 @@ if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); - s = sk_next(v); + iter = seq->private; + s = v; + do { + s = sk_next(s); + } while (s && (iter->net != s->sk_net)); if (s) return s; - iter = seq->private; i = iter->link; j = iter->hash_idx + 1; @@ -1672,6 +1698,8 @@ for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); + while (s && (iter->net != s->sk_net)) + s = sk_next(s); if (s) { iter->link = i; iter->hash_idx = j; @@ -1742,15 +1770,24 @@ seq = file->private_data; seq->private = iter; + iter->net = get_net(PROC_NET(inode)); return 0; } +static int netlink_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct nl_seq_iter *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations netlink_seq_fops = { .owner = THIS_MODULE, .open = netlink_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = netlink_seq_release, }; #endif @@ -1792,6 +1829,27 @@ .owner = THIS_MODULE, /* for consistency 8) */ }; +static int netlink_net_init(struct net *net) +{ +#ifdef CONFIG_PROC_FS + if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) + return -ENOMEM; +#endif + return 0; +} + +static void netlink_net_exit(struct net *net) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "netlink"); +#endif +} + +static struct pernet_operations netlink_net_ops = { + .init = netlink_net_init, + .exit = netlink_net_exit, +}; + static int __init netlink_proto_init(void) { struct sk_buff *dummy_skb; @@ -1837,9 +1895,7 @@ } sock_register(&netlink_family_ops); -#ifdef CONFIG_PROC_FS - proc_net_fops_create("netlink", 0, &netlink_seq_fops); -#endif + register_pernet_subsys(&netlink_net_ops); /* The netlink device handler may be needed early. */ rtnetlink_init(); out: diff -Nurb linux-2.6.22-try2/net/netlink/genetlink.c linux-2.6.22-try2-netns/net/netlink/genetlink.c --- linux-2.6.22-try2/net/netlink/genetlink.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netlink/genetlink.c 2007-12-19 22:49:20.000000000 -0500 @@ -557,8 +557,9 @@ goto errout_register; netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); - genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, - genl_rcv, NULL, THIS_MODULE); + genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, + GENL_MAX_ID, genl_rcv, NULL, + THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); diff -Nurb linux-2.6.22-try2/net/netrom/af_netrom.c linux-2.6.22-try2-netns/net/netrom/af_netrom.c --- linux-2.6.22-try2/net/netrom/af_netrom.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netrom/af_netrom.c 2007-12-19 22:49:20.000000000 -0500 @@ -41,6 +41,7 @@ #include #include #include +#include #include static int nr_ndevs = 4; @@ -105,6 +106,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; @@ -408,15 +412,18 @@ .obj_size = sizeof(struct nr_sock), }; -static int nr_create(struct socket *sock, int protocol) +static int nr_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct nr_sock *nr; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) return -ENOMEM; nr = nr_sk(sk); @@ -458,7 +465,7 @@ if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) return NULL; nr = nr_sk(sk); @@ -1447,9 +1454,9 @@ nr_loopback_init(); - proc_net_fops_create("nr", S_IRUGO, &nr_info_fops); - proc_net_fops_create("nr_neigh", S_IRUGO, &nr_neigh_fops); - proc_net_fops_create("nr_nodes", S_IRUGO, &nr_nodes_fops); + proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops); + proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops); + proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops); out: return rc; fail: @@ -1477,9 +1484,9 @@ { int i; - proc_net_remove("nr"); - proc_net_remove("nr_neigh"); - proc_net_remove("nr_nodes"); + proc_net_remove(&init_net, "nr"); + proc_net_remove(&init_net, "nr_neigh"); + proc_net_remove(&init_net, "nr_nodes"); nr_loopback_clear(); nr_rt_free(); diff -Nurb linux-2.6.22-try2/net/netrom/nr_route.c linux-2.6.22-try2-netns/net/netrom/nr_route.c --- linux-2.6.22-try2/net/netrom/nr_route.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/netrom/nr_route.c 2007-12-19 22:49:20.000000000 -0500 @@ -580,7 +580,7 @@ { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -598,7 +598,7 @@ struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -618,7 +618,7 @@ struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; diff -Nurb linux-2.6.22-try2/net/packet/af_packet.c linux-2.6.22-try2-netns/net/packet/af_packet.c --- linux-2.6.22-try2/net/packet/af_packet.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/packet/af_packet.c 2007-12-19 22:49:20.000000000 -0500 @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -135,10 +136,6 @@ packet classifier depends on it. */ -/* List of all packet sockets. */ -static HLIST_HEAD(packet_sklist); -static DEFINE_RWLOCK(packet_sklist_lock); - static atomic_t packet_socks_nr; @@ -273,6 +270,9 @@ if (skb->pkt_type == PACKET_LOOPBACK) goto out; + if (dev->nd_net != sk->sk_net) + goto out; + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto oom; @@ -344,7 +344,7 @@ */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(saddr->spkt_device); + dev = dev_get_by_name(sk->sk_net, saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -462,6 +462,9 @@ sk = pt->af_packet_priv; po = pkt_sk(sk); + if (dev->nd_net != sk->sk_net) + goto drop; + skb->dev = dev; if (dev->hard_header) { @@ -578,6 +581,9 @@ sk = pt->af_packet_priv; po = pkt_sk(sk); + if (dev->nd_net != sk->sk_net) + goto drop; + if (dev->hard_header) { if (sk->sk_type != SOCK_DGRAM) skb_push(skb, skb->data - skb_mac_header(skb)); @@ -738,7 +744,7 @@ } - dev = dev_get_by_index(ifindex); + dev = dev_get_by_index(sk->sk_net, ifindex); err = -ENXIO; if (dev == NULL) goto out_unlock; @@ -811,15 +817,17 @@ { struct sock *sk = sock->sk; struct packet_sock *po; + struct net *net; if (!sk) return 0; + net = sk->sk_net; po = pkt_sk(sk); - write_lock_bh(&packet_sklist_lock); + write_lock_bh(&net->packet_sklist_lock); sk_del_node_init(sk); - write_unlock_bh(&packet_sklist_lock); + write_unlock_bh(&net->packet_sklist_lock); /* * Unhook packet receive handler. @@ -933,7 +941,7 @@ return -EINVAL; strlcpy(name,uaddr->sa_data,sizeof(name)); - dev = dev_get_by_name(name); + dev = dev_get_by_name(sk->sk_net, name); if (dev) { err = packet_do_bind(sk, dev, pkt_sk(sk)->num); dev_put(dev); @@ -960,7 +968,7 @@ if (sll->sll_ifindex) { err = -ENODEV; - dev = dev_get_by_index(sll->sll_ifindex); + dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex); if (dev == NULL) goto out; } @@ -982,7 +990,7 @@ * Create a packet of type SOCK_PACKET. */ -static int packet_create(struct socket *sock, int protocol) +static int packet_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct packet_sock *po; @@ -998,7 +1006,7 @@ sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1); if (sk == NULL) goto out; @@ -1034,9 +1042,9 @@ po->running = 1; } - write_lock_bh(&packet_sklist_lock); - sk_add_node(sk, &packet_sklist); - write_unlock_bh(&packet_sklist_lock); + write_lock_bh(&net->packet_sklist_lock); + sk_add_node(sk, &net->packet_sklist); + write_unlock_bh(&net->packet_sklist_lock); return(0); out: return err; @@ -1154,7 +1162,7 @@ return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - dev = dev_get_by_index(pkt_sk(sk)->ifindex); + dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex); if (dev) { strlcpy(uaddr->sa_data, dev->name, 15); dev_put(dev); @@ -1179,7 +1187,7 @@ sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; - dev = dev_get_by_index(po->ifindex); + dev = dev_get_by_index(sk->sk_net, po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -1231,7 +1239,7 @@ rtnl_lock(); err = -ENODEV; - dev = __dev_get_by_index(mreq->mr_ifindex); + dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex); if (!dev) goto done; @@ -1285,7 +1293,7 @@ if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; - dev = dev_get_by_index(ml->ifindex); + dev = dev_get_by_index(sk->sk_net, ml->ifindex); if (dev) { packet_dev_mc(dev, ml, -1); dev_put(dev); @@ -1313,7 +1321,7 @@ struct net_device *dev; po->mclist = ml->next; - if ((dev = dev_get_by_index(ml->ifindex)) != NULL) { + if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) { packet_dev_mc(dev, ml, -1); dev_put(dev); } @@ -1469,9 +1477,10 @@ struct sock *sk; struct hlist_node *node; struct net_device *dev = data; + struct net *net = dev->nd_net; - read_lock(&packet_sklist_lock); - sk_for_each(sk, node, &packet_sklist) { + read_lock(&net->packet_sklist_lock); + sk_for_each(sk, node, &net->packet_sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { @@ -1510,7 +1519,7 @@ break; } } - read_unlock(&packet_sklist_lock); + read_unlock(&net->packet_sklist_lock); return NOTIFY_DONE; } @@ -1878,12 +1887,12 @@ }; #ifdef CONFIG_PROC_FS -static inline struct sock *packet_seq_idx(loff_t off) +static inline struct sock *packet_seq_idx(struct net *net, loff_t off) { struct sock *s; struct hlist_node *node; - sk_for_each(s, node, &packet_sklist) { + sk_for_each(s, node, &net->packet_sklist) { if (!off--) return s; } @@ -1892,21 +1901,24 @@ static void *packet_seq_start(struct seq_file *seq, loff_t *pos) { - read_lock(&packet_sklist_lock); - return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN; + struct net *net = seq->private; + read_lock(&net->packet_sklist_lock); + return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN; } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq->private; ++*pos; return (v == SEQ_START_TOKEN) - ? sk_head(&packet_sklist) + ? sk_head(&net->packet_sklist) : sk_next((struct sock*)v) ; } static void packet_seq_stop(struct seq_file *seq, void *v) { - read_unlock(&packet_sklist_lock); + struct net *net = seq->private; + read_unlock(&net->packet_sklist_lock); } static int packet_seq_show(struct seq_file *seq, void *v) @@ -1942,7 +1954,22 @@ static int packet_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &packet_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &packet_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_net(PROC_NET(inode)); + } + return res; +} + +static int packet_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq= file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations packet_seq_fops = { @@ -1950,15 +1977,37 @@ .open = packet_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = packet_seq_release, }; #endif +static int packet_net_init(struct net *net) +{ + rwlock_init(&net->packet_sklist_lock); + INIT_HLIST_HEAD(&net->packet_sklist); + + if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) + return -ENOMEM; + + return 0; +} + +static void packet_net_exit(struct net *net) +{ + proc_net_remove(net, "packet"); +} + +static struct pernet_operations packet_net_ops = { + .init = packet_net_init, + .exit = packet_net_exit, +}; + + static void __exit packet_exit(void) { - proc_net_remove("packet"); unregister_netdevice_notifier(&packet_netdev_notifier); + unregister_pernet_subsys(&packet_net_ops); sock_unregister(PF_PACKET); proto_unregister(&packet_proto); } @@ -1971,8 +2020,8 @@ goto out; sock_register(&packet_family_ops); + register_pernet_subsys(&packet_net_ops); register_netdevice_notifier(&packet_netdev_notifier); - proc_net_fops_create("packet", 0, &packet_seq_fops); out: return rc; } diff -Nurb linux-2.6.22-try2/net/rose/af_rose.c linux-2.6.22-try2-netns/net/rose/af_rose.c --- linux-2.6.22-try2/net/rose/af_rose.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/rose/af_rose.c 2007-12-19 22:49:20.000000000 -0500 @@ -45,6 +45,7 @@ #include #include #include +#include static int rose_ndevs = 10; @@ -196,6 +197,9 @@ { struct net_device *dev = (struct net_device *)ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event != NETDEV_DOWN) return NOTIFY_DONE; @@ -498,15 +502,18 @@ .obj_size = sizeof(struct rose_sock), }; -static int rose_create(struct socket *sock, int protocol) +static int rose_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct rose_sock *rose; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol != 0) return -ESOCKTNOSUPPORT; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return -ENOMEM; rose = rose_sk(sk); @@ -544,7 +551,7 @@ if (osk->sk_type != SOCK_SEQPACKET) return NULL; - if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) return NULL; rose = rose_sk(sk); @@ -1576,10 +1583,10 @@ rose_add_loopback_neigh(); - proc_net_fops_create("rose", S_IRUGO, &rose_info_fops); - proc_net_fops_create("rose_neigh", S_IRUGO, &rose_neigh_fops); - proc_net_fops_create("rose_nodes", S_IRUGO, &rose_nodes_fops); - proc_net_fops_create("rose_routes", S_IRUGO, &rose_routes_fops); + proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops); + proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops); + proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops); + proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops); out: return rc; fail: @@ -1606,10 +1613,10 @@ { int i; - proc_net_remove("rose"); - proc_net_remove("rose_neigh"); - proc_net_remove("rose_nodes"); - proc_net_remove("rose_routes"); + proc_net_remove(&init_net, "rose"); + proc_net_remove(&init_net, "rose_neigh"); + proc_net_remove(&init_net, "rose_nodes"); + proc_net_remove(&init_net, "rose_routes"); rose_loopback_clear(); rose_rt_free(); diff -Nurb linux-2.6.22-try2/net/rose/rose_route.c linux-2.6.22-try2-netns/net/rose/rose_route.c --- linux-2.6.22-try2/net/rose/rose_route.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/rose/rose_route.c 2007-12-19 22:49:20.000000000 -0500 @@ -583,7 +583,7 @@ { struct net_device *dev; - if ((dev = dev_get_by_name(devname)) == NULL) + if ((dev = dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) @@ -601,7 +601,7 @@ struct net_device *dev, *first = NULL; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; @@ -619,7 +619,7 @@ struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; @@ -636,7 +636,7 @@ struct net_device *dev; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } diff -Nurb linux-2.6.22-try2/net/rxrpc/af_rxrpc.c linux-2.6.22-try2-netns/net/rxrpc/af_rxrpc.c --- linux-2.6.22-try2/net/rxrpc/af_rxrpc.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/rxrpc/af_rxrpc.c 2007-12-19 22:49:20.000000000 -0500 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "ar-internal.h" @@ -605,13 +606,16 @@ /* * create an RxRPC socket */ -static int rxrpc_create(struct socket *sock, int protocol) +static int rxrpc_create(struct net *net, struct socket *sock, int protocol) { struct rxrpc_sock *rx; struct sock *sk; _enter("%p,%d", sock, protocol); + if (net != &init_net) + return -EAFNOSUPPORT; + /* we support transport protocol UDP only */ if (protocol != PF_INET) return -EPROTONOSUPPORT; @@ -622,7 +626,7 @@ sock->ops = &rxrpc_rpc_ops; sock->state = SS_UNCONNECTED; - sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); if (!sk) return -ENOMEM; @@ -829,8 +833,8 @@ } #ifdef CONFIG_PROC_FS - proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops); - proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops); + proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops); + proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops); #endif return 0; @@ -868,8 +872,8 @@ _debug("flush scheduled work"); flush_workqueue(rxrpc_workqueue); - proc_net_remove("rxrpc_conns"); - proc_net_remove("rxrpc_calls"); + proc_net_remove(&init_net, "rxrpc_conns"); + proc_net_remove(&init_net, "rxrpc_calls"); destroy_workqueue(rxrpc_workqueue); kmem_cache_destroy(rxrpc_call_jar); _leave(""); diff -Nurb linux-2.6.22-try2/net/sched/act_api.c linux-2.6.22-try2-netns/net/sched/act_api.c --- linux-2.6.22-try2/net/sched/act_api.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sched/act_api.c 2007-12-19 22:49:20.000000000 -0500 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -675,7 +676,7 @@ return -EINVAL; } - return rtnl_unicast(skb, pid); + return rtnl_unicast(skb, &init_net, pid); } static struct tc_action * @@ -796,7 +797,7 @@ nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (err > 0) return 0; @@ -859,7 +860,7 @@ /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, pid, RTNLGRP_TC, + ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (ret > 0) return 0; @@ -903,7 +904,7 @@ nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); + err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -941,10 +942,14 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct rtattr **tca = arg; u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = 0, ovr = 0; + if (net != &init_net) + return -EINVAL; + if (tca[TCA_ACT_TAB-1] == NULL) { printk("tc_ctl_action: received NO action attribs\n"); return -EINVAL; @@ -1014,6 +1019,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct rtattr *x; @@ -1023,6 +1029,9 @@ struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); struct rtattr *kind = find_dump_kind(cb->nlh); + if (net != &init_net) + return 0; + if (kind == NULL) { printk("tc_dump_action: action bad kind\n"); return 0; diff -Nurb linux-2.6.22-try2/net/sched/act_mirred.c linux-2.6.22-try2-netns/net/sched/act_mirred.c --- linux-2.6.22-try2/net/sched/act_mirred.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sched/act_mirred.c 2007-12-19 22:49:20.000000000 -0500 @@ -85,7 +85,7 @@ parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]); if (parm->ifindex) { - dev = __dev_get_by_index(parm->ifindex); + dev = __dev_get_by_index(&init_net, parm->ifindex); if (dev == NULL) return -ENODEV; switch (dev->type) { diff -Nurb linux-2.6.22-try2/net/sched/cls_api.c linux-2.6.22-try2-netns/net/sched/cls_api.c --- linux-2.6.22-try2/net/sched/cls_api.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sched/cls_api.c 2007-12-19 22:49:20.000000000 -0500 @@ -129,6 +129,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct rtattr **tca; struct tcmsg *t; u32 protocol; @@ -145,6 +146,9 @@ unsigned long fh; int err; + if (net != &init_net) + return -EINVAL; + replay: tca = arg; t = NLMSG_DATA(n); @@ -164,7 +168,7 @@ /* Find head of filter chain. */ /* Find link */ - if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL) return -ENODEV; /* Find qdisc */ @@ -365,7 +369,7 @@ return -EINVAL; } - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct tcf_dump_args @@ -385,6 +389,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int t; int s_t; struct net_device *dev; @@ -395,9 +400,12 @@ struct Qdisc_class_ops *cops; struct tcf_dump_args arg; + if (net != &init_net) + return 0; + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; if (!tcm->tcm_parent) diff -Nurb linux-2.6.22-try2/net/sched/em_meta.c linux-2.6.22-try2-netns/net/sched/em_meta.c --- linux-2.6.22-try2/net/sched/em_meta.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sched/em_meta.c 2007-12-19 22:49:20.000000000 -0500 @@ -291,7 +291,7 @@ } else { struct net_device *dev; - dev = dev_get_by_index(skb->sk->sk_bound_dev_if); + dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if); *err = var_dev(dev, dst); if (dev) dev_put(dev); diff -Nurb linux-2.6.22-try2/net/sched/sch_api.c linux-2.6.22-try2-netns/net/sched/sch_api.c --- linux-2.6.22-try2/net/sched/sch_api.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sched/sch_api.c 2007-12-19 22:49:20.000000000 -0500 @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -609,6 +610,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct tcmsg *tcm = NLMSG_DATA(n); struct rtattr **tca = arg; struct net_device *dev; @@ -617,7 +619,10 @@ struct Qdisc *p = NULL; int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if (net != &init_net) + return -EINVAL; + + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -670,6 +675,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct tcmsg *tcm; struct rtattr **tca; struct net_device *dev; @@ -677,6 +683,9 @@ struct Qdisc *q, *p; int err; + if (net != &init_net) + return -EINVAL; + replay: /* Reinit, just in case something touches this. */ tcm = NLMSG_DATA(n); @@ -684,7 +693,7 @@ clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; if (clid) { @@ -873,7 +882,7 @@ } if (skb->len) - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); err_out: kfree_skb(skb); @@ -882,16 +891,20 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; struct Qdisc *q; + if (net != &init_net) + return 0; + s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; read_lock(&dev_base_lock); idx = 0; - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { if (idx < s_idx) goto cont; if (idx > s_idx) @@ -930,6 +943,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { + struct net *net = skb->sk->sk_net; struct tcmsg *tcm = NLMSG_DATA(n); struct rtattr **tca = arg; struct net_device *dev; @@ -942,7 +956,10 @@ u32 qid = TC_H_MAJ(clid); int err; - if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if (net != &init_net) + return -EINVAL; + + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return -ENODEV; /* @@ -1096,7 +1113,7 @@ return -EINVAL; } - return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct qdisc_dump_args @@ -1116,6 +1133,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int t; int s_t; struct net_device *dev; @@ -1123,9 +1141,12 @@ struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); struct qdisc_dump_args arg; + if (net != &init_net) + return 0; + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return 0; s_t = cb->args[0]; @@ -1252,7 +1273,7 @@ { register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); - proc_net_fops_create("psched", 0, &psched_fops); + proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); diff -Nurb linux-2.6.22-try2/net/sched/sch_ingress.c linux-2.6.22-try2-netns/net/sched/sch_ingress.c --- linux-2.6.22-try2/net/sched/sch_ingress.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sched/sch_ingress.c 2007-12-19 22:49:20.000000000 -0500 @@ -243,6 +243,10 @@ struct net_device *dev = skb->dev; int fwres=NF_ACCEPT; + /* Only filter packets in the initial network namespace */ + if ((indev?indev:outdev)->nd_net != &init_net) + return NF_ACCEPT; + DPRINTK("ing_hook: skb %s dev=%s len=%u\n", skb->sk ? "(owned)" : "(unowned)", skb->dev ? (*pskb)->dev->name : "(no dev)", diff -Nurb linux-2.6.22-try2/net/sctp/input.c linux-2.6.22-try2-netns/net/sctp/input.c --- linux-2.6.22-try2/net/sctp/input.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sctp/input.c 2007-12-19 22:49:20.000000000 -0500 @@ -126,6 +126,10 @@ int family; struct sctp_af *af; + if (skb->dev->nd_net != &init_net) { + kfree_skb(skb); + return 0; + } if (skb->pkt_type!=PACKET_HOST) goto discard_it; @@ -509,6 +513,9 @@ sk_buff_data_t saveip, savesctp; int err; + if (skb->dev->nd_net != &init_net) + return; + if (skb->len < ihlen + 8) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; diff -Nurb linux-2.6.22-try2/net/sctp/ipv6.c linux-2.6.22-try2-netns/net/sctp/ipv6.c --- linux-2.6.22-try2/net/sctp/ipv6.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sctp/ipv6.c 2007-12-19 22:49:20.000000000 -0500 @@ -189,6 +189,7 @@ memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; fl.proto = sk->sk_protocol; /* Fill in the dest address from the route entry passed with the skb @@ -230,6 +231,7 @@ struct flowi fl; memset(&fl, 0, sizeof(fl)); + fl.fl_net = &init_net; ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) fl.oif = daddr->v6.sin6_scope_id; @@ -619,7 +621,7 @@ struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; - newsk = sk_alloc(PF_INET6, GFP_KERNEL, sk->sk_prot, 1); + newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; @@ -664,7 +666,7 @@ newinet->mc_index = 0; newinet->mc_list = NULL; - if (ipv4_config.no_pmtu_disc) + if (init_net.sysctl_ipv4_no_pmtu_disc) newinet->pmtudisc = IP_PMTUDISC_DONT; else newinet->pmtudisc = IP_PMTUDISC_WANT; @@ -841,7 +843,7 @@ if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); if (!dev) return 0; if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { @@ -872,7 +874,7 @@ if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(addr->v6.sin6_scope_id); + dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); if (!dev) return 0; if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { diff -Nurb linux-2.6.22-try2/net/sctp/protocol.c linux-2.6.22-try2-netns/net/sctp/protocol.c --- linux-2.6.22-try2/net/sctp/protocol.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sctp/protocol.c 2007-12-19 22:49:20.000000000 -0500 @@ -59,6 +59,7 @@ #include #include #include +#include /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; @@ -93,7 +94,7 @@ { if (!proc_net_sctp) { struct proc_dir_entry *ent; - ent = proc_mkdir("net/sctp", NULL); + ent = proc_mkdir("sctp", init_net.proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_sctp = ent; @@ -126,7 +127,7 @@ if (proc_net_sctp) { proc_net_sctp = NULL; - remove_proc_entry("net/sctp", NULL); + remove_proc_entry("sctp", init_net.proc_net); } } @@ -170,7 +171,7 @@ struct sctp_af *af; read_lock(&dev_base_lock); - for_each_netdev(dev) { + for_each_netdev(&init_net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); @@ -354,13 +355,13 @@ /* Should this be available for binding? */ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) { - int ret = inet_addr_type(addr->v4.sin_addr.s_addr); + int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr); if (addr->v4.sin_addr.s_addr != INADDR_ANY && ret != RTN_LOCAL && !sp->inet.freebind && - !sysctl_ip_nonlocal_bind) + !init_net.sysctl_ip_nonlocal_bind) return 0; return 1; @@ -423,6 +424,7 @@ union sctp_addr dst_saddr; memset(&fl, 0x0, sizeof(struct flowi)); + fl.fl_net = &init_net; fl.fl4_dst = daddr->v4.sin_addr.s_addr; fl.proto = IPPROTO_SCTP; if (asoc) { @@ -539,7 +541,7 @@ { struct inet_sock *inet = inet_sk(sk); struct inet_sock *newinet; - struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1); + struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1); if (!newsk) goto out; @@ -1122,7 +1124,7 @@ } spin_lock_init(&sctp_port_alloc_lock); - sctp_port_rover = sysctl_local_port_range[0] - 1; + sctp_port_rover = init_net.sysctl_local_port_range[0] - 1; printk(KERN_INFO "SCTP: Hash tables configured " "(established %d bind %d)\n", diff -Nurb linux-2.6.22-try2/net/sctp/socket.c linux-2.6.22-try2-netns/net/sctp/socket.c --- linux-2.6.22-try2/net/sctp/socket.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sctp/socket.c 2007-12-19 22:49:20.000000000 -0500 @@ -5021,8 +5021,8 @@ * already in the hash table; if not, we use that; if * it is, we try next. */ - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; + int low = sk->sk_net->sysctl_local_port_range[0]; + int high = sk->sk_net->sysctl_local_port_range[1]; int remaining = (high - low) + 1; int rover; int index; diff -Nurb linux-2.6.22-try2/net/socket.c linux-2.6.22-try2-netns/net/socket.c --- linux-2.6.22-try2/net/socket.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/socket.c 2007-12-19 22:49:20.000000000 -0500 @@ -84,6 +84,7 @@ #include #include #include +#include #include #include @@ -821,9 +822,9 @@ */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; -void brioctl_set(int (*hook) (unsigned int, void __user *)) +void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; @@ -833,9 +834,9 @@ EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); -static int (*vlan_ioctl_hook) (void __user *arg); +static int (*vlan_ioctl_hook) (struct net *, void __user *arg); -void vlan_ioctl_set(int (*hook) (void __user *)) +void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) { mutex_lock(&vlan_ioctl_mutex); vlan_ioctl_hook = hook; @@ -864,16 +865,20 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; + struct sock *sk; void __user *argp = (void __user *)arg; int pid, err; + struct net *net; sock = file->private_data; + sk = sock->sk; + net = sk->sk_net; if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #ifdef CONFIG_WIRELESS_EXT if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); } else #endif /* CONFIG_WIRELESS_EXT */ switch (cmd) { @@ -899,7 +904,7 @@ mutex_lock(&br_ioctl_mutex); if (br_ioctl_hook) - err = br_ioctl_hook(cmd, argp); + err = br_ioctl_hook(net, cmd, argp); mutex_unlock(&br_ioctl_mutex); break; case SIOCGIFVLAN: @@ -910,7 +915,7 @@ mutex_lock(&vlan_ioctl_mutex); if (vlan_ioctl_hook) - err = vlan_ioctl_hook(argp); + err = vlan_ioctl_hook(net, argp); mutex_unlock(&vlan_ioctl_mutex); break; case SIOCADDDLCI: @@ -933,7 +938,7 @@ * to the NIC driver. */ if (err == -ENOIOCTLCMD) - err = dev_ioctl(cmd, argp); + err = dev_ioctl(net, cmd, argp); break; } return err; @@ -1102,7 +1107,7 @@ return 0; } -static int __sock_create(int family, int type, int protocol, +static int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { int err; @@ -1185,7 +1190,7 @@ /* Now protected by module ref count */ rcu_read_unlock(); - err = pf->create(sock, protocol); + err = pf->create(net, sock, protocol); if (err < 0) goto out_module_put; @@ -1224,12 +1229,12 @@ int sock_create(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 0); + return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } int sock_create_kern(int family, int type, int protocol, struct socket **res) { - return __sock_create(family, type, protocol, res, 1); + return __sock_create(&init_net, family, type, protocol, res, 1); } asmlinkage long sys_socket(int family, int type, int protocol) @@ -1389,8 +1394,6 @@ * ready for listening. */ -int sysctl_somaxconn __read_mostly = SOMAXCONN; - asmlinkage long sys_listen(int fd, int backlog) { struct socket *sock; @@ -1398,8 +1401,9 @@ sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - if ((unsigned)backlog > sysctl_somaxconn) - backlog = sysctl_somaxconn; + struct net *net = sock->sk->sk_net; + if ((unsigned)backlog > net->sysctl_somaxconn) + backlog = net->sysctl_somaxconn; err = security_socket_listen(sock, backlog); if (!err) @@ -2189,6 +2193,16 @@ printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); } +static int sock_pernet_init(struct net *net) +{ + net->sysctl_somaxconn = SOMAXCONN; + return 0; +} + +static struct pernet_operations sock_net_ops = { + .init = sock_pernet_init, +}; + static int __init sock_init(void) { /* @@ -2217,6 +2231,8 @@ netfilter_init(); #endif + register_pernet_subsys(&sock_net_ops); + return 0; } diff -Nurb linux-2.6.22-try2/net/sunrpc/stats.c linux-2.6.22-try2-netns/net/sunrpc/stats.c --- linux-2.6.22-try2/net/sunrpc/stats.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sunrpc/stats.c 2007-12-19 22:49:20.000000000 -0500 @@ -21,6 +21,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_MISC @@ -265,7 +266,7 @@ dprintk("RPC: registering /proc/net/rpc\n"); if (!proc_net_rpc) { struct proc_dir_entry *ent; - ent = proc_mkdir("rpc", proc_net); + ent = proc_mkdir("rpc", init_net.proc_net); if (ent) { ent->owner = THIS_MODULE; proc_net_rpc = ent; @@ -279,7 +280,7 @@ dprintk("RPC: unregistering /proc/net/rpc\n"); if (proc_net_rpc) { proc_net_rpc = NULL; - remove_proc_entry("net/rpc", NULL); + remove_proc_entry("rpc", init_net.proc_net); } } diff -Nurb linux-2.6.22-try2/net/sysctl_net.c linux-2.6.22-try2-netns/net/sysctl_net.c --- linux-2.6.22-try2/net/sysctl_net.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/sysctl_net.c 2007-12-19 22:49:20.000000000 -0500 @@ -54,3 +54,31 @@ #endif { 0 }, }; + +struct ctl_table multi_net_table[] = { + { + .ctl_name = NET_CORE, + .procname = "core", + .mode = 0555, + .child = multi_core_table, + }, +#ifdef CONFIG_INET + { + .ctl_name = NET_IPV4, + .procname = "ipv4", + .mode = 0555, + .child = multi_ipv4_table, + }, +#endif + {}, +}; + +struct ctl_table net_root_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = multi_net_table, + }, + {}, +}; diff -Nurb linux-2.6.22-try2/net/tipc/eth_media.c linux-2.6.22-try2-netns/net/tipc/eth_media.c --- linux-2.6.22-try2/net/tipc/eth_media.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/tipc/eth_media.c 2007-12-19 22:49:20.000000000 -0500 @@ -38,6 +38,7 @@ #include #include #include +#include #define MAX_ETH_BEARERS 2 #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI @@ -100,6 +101,11 @@ struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; u32 size; + if (dev->nd_net != &init_net) { + kfree_skb(buf); + return 0; + } + if (likely(eb_ptr->bearer)) { if (likely(buf->pkt_type <= PACKET_BROADCAST)) { size = msg_size((struct tipc_msg *)buf->data); @@ -129,7 +135,7 @@ /* Find device with specified name */ - for_each_netdev(pdev){ + for_each_netdev(&init_net, pdev){ if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { dev = pdev; break; @@ -192,6 +198,9 @@ struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + while ((eb_ptr->dev != dev)) { if (++eb_ptr == stop) return NOTIFY_DONE; /* couldn't find device */ diff -Nurb linux-2.6.22-try2/net/tipc/socket.c linux-2.6.22-try2-netns/net/tipc/socket.c --- linux-2.6.22-try2/net/tipc/socket.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/net/tipc/socket.c 2007-12-19 22:49:20.000000000 -0500 @@ -162,13 +162,16 @@ * * Returns 0 on success, errno otherwise */ -static int tipc_create(struct socket *sock, int protocol) +static int tipc_create(struct net *net, struct socket *sock, int protocol) { struct tipc_sock *tsock; struct tipc_port *port; struct sock *sk; u32 ref; + if (net != &init_net) + return -EAFNOSUPPORT; + if (unlikely(protocol != 0)) return -EPROTONOSUPPORT; @@ -198,7 +201,7 @@ return -EPROTOTYPE; } - sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1); + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1); if (!sk) { tipc_deleteport(ref); return -ENOMEM; @@ -1372,7 +1375,7 @@ } buf = skb_peek(&sock->sk->sk_receive_queue); - res = tipc_create(newsock, 0); + res = tipc_create(sock->sk->sk_net, newsock, 0); if (!res) { struct tipc_sock *new_tsock = tipc_sk(newsock->sk); struct tipc_portid id; diff -Nurb linux-2.6.22-try2/net/unix/af_unix.c linux-2.6.22-try2-netns/net/unix/af_unix.c --- linux-2.6.22-try2/net/unix/af_unix.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/unix/af_unix.c 2007-12-19 23:38:14.000000000 -0500 @@ -117,8 +117,8 @@ #include #include #include +#include -int sysctl_unix_max_dgram_qlen __read_mostly = 10; struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; DEFINE_SPINLOCK(unix_table_lock); @@ -245,7 +245,8 @@ spin_unlock(&unix_table_lock); } -static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, +static struct sock *__unix_find_socket_byname(struct net *net, + struct sockaddr_un *sunname, int len, int type, unsigned hash) { struct sock *s; @@ -254,7 +255,7 @@ sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); - if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)) + if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT) || (s->sk_net != net)) continue; if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) @@ -265,21 +266,22 @@ return s; } -static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, +static inline struct sock *unix_find_socket_byname(struct net *net, + struct sockaddr_un *sunname, int len, int type, unsigned hash) { struct sock *s; spin_lock(&unix_table_lock); - s = __unix_find_socket_byname(sunname, len, type, hash); + s = __unix_find_socket_byname(net, sunname, len, type, hash); if (s) sock_hold(s); spin_unlock(&unix_table_lock); return s; } -static struct sock *unix_find_socket_byinode(struct inode *i) +static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) { struct sock *s; struct hlist_node *node; @@ -289,6 +291,9 @@ &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; + if (s->sk_net != net) + continue; + if(dentry && dentry->d_inode == i) { sock_hold(s); @@ -571,7 +576,7 @@ */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock * unix_create1(struct socket *sock) +static struct sock * unix_create1(struct net *net, struct socket *sock) { struct sock *sk = NULL; struct unix_sock *u; @@ -579,7 +584,7 @@ if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) goto out; - sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1); if (!sk) goto out; @@ -590,7 +595,7 @@ &af_unix_sk_receive_queue_lock_key); sk->sk_write_space = unix_write_space; - sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; + sk->sk_max_ack_backlog = net->sysctl_unix_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); u->dentry = NULL; @@ -604,7 +609,7 @@ return sk; } -static int unix_create(struct socket *sock, int protocol) +static int unix_create(struct net *net, struct socket *sock, int protocol) { if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -631,7 +636,7 @@ return -ESOCKTNOSUPPORT; } - return unix_create1(sock) ? 0 : -ENOMEM; + return unix_create1(net, sock) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -649,6 +654,7 @@ static int unix_autobind(struct socket *sock) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct unix_sock *u = unix_sk(sk); static u32 ordernum = 1; struct unix_address * addr; @@ -675,7 +681,7 @@ spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; - if (__unix_find_socket_byname(addr->name, addr->len, sock->type, + if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, addr->hash)) { spin_unlock(&unix_table_lock); /* Sanity yield. It is unusual case, but yet... */ @@ -695,7 +701,8 @@ return err; } -static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, +static struct sock *unix_find_other(struct net *net, + struct sockaddr_un *sunname, int len, int type, unsigned hash, int *error) { struct sock *u; @@ -713,7 +720,7 @@ err = -ECONNREFUSED; if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) goto put_fail; - u=unix_find_socket_byinode(nd.dentry->d_inode); + u=unix_find_socket_byinode(net, nd.dentry->d_inode); if (!u) goto put_fail; @@ -729,7 +736,7 @@ } } else { err = -ECONNREFUSED; - u=unix_find_socket_byname(sunname, len, type, hash); + u=unix_find_socket_byname(net, sunname, len, type, hash); if (u) { struct dentry *dentry; dentry = unix_sk(u)->dentry; @@ -751,6 +758,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; struct dentry * dentry = NULL; @@ -825,7 +833,7 @@ if (!sunaddr->sun_path[0]) { err = -EADDRINUSE; - if (__unix_find_socket_byname(sunaddr, addr_len, + if (__unix_find_socket_byname(net, sunaddr, addr_len, sk->sk_type, hash)) { unix_release_addr(addr); goto out_unlock; @@ -891,6 +899,7 @@ int alen, int flags) { struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; struct sock *other; unsigned hash; @@ -907,7 +916,7 @@ goto out; restart: - other=unix_find_other(sunaddr, alen, sock->type, hash, &err); + other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err); if (!other) goto out; @@ -987,6 +996,7 @@ { struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct sock *newsk = NULL; struct sock *other = NULL; @@ -1015,7 +1025,7 @@ err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(NULL); + newsk = unix_create1(sk->sk_net, NULL); if (newsk == NULL) goto out; @@ -1026,7 +1036,7 @@ restart: /* Find listening sock. */ - other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err); + other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); if (!other) goto out; @@ -1305,6 +1315,7 @@ { struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; + struct net *net = sk->sk_net; struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=msg->msg_name; struct sock *other = NULL; @@ -1368,7 +1379,7 @@ if (sunaddr == NULL) goto out_free; - other = unix_find_other(sunaddr, namelen, sk->sk_type, + other = unix_find_other(net, sunaddr, namelen, sk->sk_type, hash, &err); if (other==NULL) goto out_free; @@ -1974,12 +1985,18 @@ #ifdef CONFIG_PROC_FS -static struct sock *unix_seq_idx(int *iter, loff_t pos) +struct unix_iter_state { + struct net *net; + int i; +}; +static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos) { loff_t off = 0; struct sock *s; - for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) { + for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { + if (s->sk_net != iter->net) + continue; if (off == pos) return s; ++off; @@ -1990,17 +2007,24 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) { + struct unix_iter_state *iter = seq->private; spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); + return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct unix_iter_state *iter = seq->private; + struct sock *sk = v; ++*pos; if (v == (void *)1) - return first_unix_socket(seq->private); - return next_unix_socket(seq->private, v); + sk = first_unix_socket(&iter->i); + else + sk = next_unix_socket(&iter->i, sk); + while (sk && (sk->sk_net != iter->net)) + sk = next_unix_socket(&iter->i, sk); + return sk; } static void unix_seq_stop(struct seq_file *seq, void *v) @@ -2064,7 +2088,7 @@ { struct seq_file *seq; int rc = -ENOMEM; - int *iter = kmalloc(sizeof(int), GFP_KERNEL); + struct unix_iter_state *iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) goto out; @@ -2075,7 +2099,8 @@ seq = file->private_data; seq->private = iter; - *iter = 0; + iter->net = get_net(PROC_NET(inode)); + iter->i = 0; out: return rc; out_kfree: @@ -2083,12 +2108,20 @@ goto out; } +static int unix_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct unix_iter_state *iter = seq->private; + put_net(iter->net); + return seq_release_private(inode, file); +} + static const struct file_operations unix_seq_fops = { .owner = THIS_MODULE, .open = unix_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = unix_seq_release, }; #endif @@ -2099,6 +2132,33 @@ .owner = THIS_MODULE, }; + +static int unix_net_init(struct net *net) +{ + int error = -ENOMEM; + + net->sysctl_unix_max_dgram_qlen = 10; +#ifdef CONFIG_PROC_FS + if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) + goto out; +#endif + unix_sysctl_register(net); + error = 0; +out: + return 0; +} + +static void unix_net_exit(struct net *net) +{ + unix_sysctl_unregister(net); + proc_net_remove(net, "unix"); +} + +static struct pernet_operations unix_net_ops = { + .init = unix_net_init, + .exit = unix_net_exit, +}; + static int __init af_unix_init(void) { int rc = -1; @@ -2114,10 +2174,7 @@ } sock_register(&unix_family_ops); -#ifdef CONFIG_PROC_FS - proc_net_fops_create("unix", 0, &unix_seq_fops); -#endif - unix_sysctl_register(); + register_pernet_subsys(&unix_net_ops); out: return rc; } @@ -2125,9 +2182,8 @@ static void __exit af_unix_exit(void) { sock_unregister(PF_UNIX); - unix_sysctl_unregister(); - proc_net_remove("unix"); proto_unregister(&unix_proto); + unregister_pernet_subsys(&unix_net_ops); } module_init(af_unix_init); diff -Nurb linux-2.6.22-try2/net/unix/sysctl_net_unix.c linux-2.6.22-try2-netns/net/unix/sysctl_net_unix.c --- linux-2.6.22-try2/net/unix/sysctl_net_unix.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/unix/sysctl_net_unix.c 2007-12-19 22:49:20.000000000 -0500 @@ -14,47 +14,71 @@ #include -static ctl_table unix_table[] = { +static struct unix_sysctl_table { + struct ctl_table_header *sysctl_header; + struct ctl_table unix_table[2]; + struct ctl_table unix_net_table[2]; + struct ctl_table unix_root_table[2]; +} unix_sysctl = { + .unix_table = { { .ctl_name = NET_UNIX_MAX_DGRAM_QLEN, .procname = "max_dgram_qlen", - .data = &sysctl_unix_max_dgram_qlen, + .data = &init_net.sysctl_unix_max_dgram_qlen, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec }, - { .ctl_name = 0 } -}; - -static ctl_table unix_net_table[] = { + {} + }, + .unix_net_table = { { .ctl_name = NET_UNIX, .procname = "unix", .mode = 0555, - .child = unix_table + .child = unix_sysctl.unix_table }, - { .ctl_name = 0 } -}; - -static ctl_table unix_root_table[] = { + {} + }, + .unix_root_table = { { .ctl_name = CTL_NET, .procname = "net", .mode = 0555, - .child = unix_net_table + .child = unix_sysctl.unix_net_table }, - { .ctl_name = 0 } + {} + } }; -static struct ctl_table_header * unix_sysctl_header; - -void unix_sysctl_register(void) +void unix_sysctl_register(struct net *net) { - unix_sysctl_header = register_sysctl_table(unix_root_table); + struct unix_sysctl_table *table; + int i; + + table = kmemdup(&unix_sysctl, sizeof(*table), GFP_KERNEL); + if (!table) + return; + for (i = 0; i < ARRAY_SIZE(table->unix_table) - 1; i++) + table->unix_table[i].data += (char *)net - (char *)&init_net; + + table->unix_net_table[0].child = table->unix_table; + table->unix_root_table[0].child = table->unix_net_table; + + table->sysctl_header = + register_net_sysctl_table(net, table->unix_root_table); + if (!table->sysctl_header) { + kfree(table); + return; + } + net->unix_sysctl = table; } -void unix_sysctl_unregister(void) +void unix_sysctl_unregister(struct net *net) { - unregister_sysctl_table(unix_sysctl_header); + struct unix_sysctl_table *table = net->unix_sysctl; + if (table) + unregister_net_sysctl_table(table->sysctl_header); + kfree(table); } diff -Nurb linux-2.6.22-try2/net/wanrouter/wanproc.c linux-2.6.22-try2-netns/net/wanrouter/wanproc.c --- linux-2.6.22-try2/net/wanrouter/wanproc.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/wanrouter/wanproc.c 2007-12-19 22:49:20.000000000 -0500 @@ -28,6 +28,7 @@ #include /* WAN router API definitions */ #include #include +#include #include @@ -287,7 +288,7 @@ int __init wanrouter_proc_init(void) { struct proc_dir_entry *p; - proc_router = proc_mkdir(ROUTER_NAME, proc_net); + proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); if (!proc_router) goto fail; @@ -303,7 +304,7 @@ fail_stat: remove_proc_entry("config", proc_router); fail_config: - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, init_net.proc_net); fail: return -ENOMEM; } @@ -316,7 +317,7 @@ { remove_proc_entry("config", proc_router); remove_proc_entry("status", proc_router); - remove_proc_entry(ROUTER_NAME, proc_net); + remove_proc_entry(ROUTER_NAME, init_net.proc_net); } /* diff -Nurb linux-2.6.22-try2/net/wireless/wext.c linux-2.6.22-try2-netns/net/wireless/wext.c --- linux-2.6.22-try2/net/wireless/wext.c 2007-12-19 13:37:59.000000000 -0500 +++ linux-2.6.22-try2-netns/net/wireless/wext.c 2007-12-19 22:49:20.000000000 -0500 @@ -95,6 +95,7 @@ #include #include /* Pretty obvious */ +#include #include /* New driver API */ #include #include @@ -672,7 +673,22 @@ static int wireless_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &wireless_seq_ops); + struct seq_file *seq; + int res; + res = seq_open(file, &wireless_seq_ops); + if (!res) { + seq = file->private_data; + seq->private = get_net(PROC_NET(inode)); + } + return res; +} + +static int wireless_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct net *net = seq->private; + put_net(net); + return seq_release(inode, file); } static const struct file_operations wireless_seq_fops = { @@ -680,17 +696,22 @@ .open = wireless_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = wireless_seq_release, }; -int __init wext_proc_init(void) +int wext_proc_init(struct net *net) { /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) + if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) return -ENOMEM; return 0; } + +void wext_proc_exit(struct net *net) +{ + proc_net_remove(net, "wireless"); +} #endif /* CONFIG_PROC_FS */ /************************** IOCTL SUPPORT **************************/ @@ -1010,7 +1031,7 @@ * Main IOCTl dispatcher. * Check the type of IOCTL and call the appropriate wrapper... */ -static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) +static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd) { struct net_device *dev; iw_handler handler; @@ -1019,7 +1040,7 @@ * The copy_to/from_user() of ifr is also dealt with in there */ /* Make sure the device exist */ - if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL) return -ENODEV; /* A bunch of special cases, then the generic case... @@ -1053,7 +1074,7 @@ } /* entry point from dev ioctl */ -int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, +int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg) { int ret; @@ -1065,9 +1086,9 @@ && !capable(CAP_NET_ADMIN)) return -EPERM; - dev_load(ifr->ifr_name); + dev_load(net, ifr->ifr_name); rtnl_lock(); - ret = wireless_process_ioctl(ifr, cmd); + ret = wireless_process_ioctl(net, ifr, cmd); rtnl_unlock(); if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq))) return -EFAULT; @@ -1111,8 +1132,13 @@ { struct sk_buff *skb; - while ((skb = skb_dequeue(&wireless_nlevent_queue))) - rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + while ((skb = skb_dequeue(&wireless_nlevent_queue))) { + struct net_device *dev = skb->dev; + struct net *net = dev->nd_net; + skb->dev = NULL; + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + dev_put(dev); + } } static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); @@ -1173,6 +1199,9 @@ kfree_skb(skb); return; } + /* Remember the device until we are in process context */ + dev_hold(dev); + skb->dev = dev; NETLINK_CB(skb).dst_group = RTNLGRP_LINK; skb_queue_tail(&wireless_nlevent_queue, skb); tasklet_schedule(&wireless_nlevent_tasklet); diff -Nurb linux-2.6.22-try2/net/x25/af_x25.c linux-2.6.22-try2-netns/net/x25/af_x25.c --- linux-2.6.22-try2/net/x25/af_x25.c 2007-12-19 13:38:00.000000000 -0500 +++ linux-2.6.22-try2-netns/net/x25/af_x25.c 2007-12-19 22:49:20.000000000 -0500 @@ -191,6 +191,9 @@ struct net_device *dev = ptr; struct x25_neigh *nb; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (dev->type == ARPHRD_X25 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) || dev->type == ARPHRD_ETHER @@ -466,10 +469,10 @@ .obj_size = sizeof(struct x25_sock), }; -static struct sock *x25_alloc_socket(void) +static struct sock *x25_alloc_socket(struct net *net) { struct x25_sock *x25; - struct sock *sk = sk_alloc(AF_X25, GFP_ATOMIC, &x25_proto, 1); + struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1); if (!sk) goto out; @@ -485,17 +488,20 @@ return sk; } -static int x25_create(struct socket *sock, int protocol) +static int x25_create(struct net *net, struct socket *sock, int protocol) { struct sock *sk; struct x25_sock *x25; int rc = -ESOCKTNOSUPPORT; + if (net != &init_net) + return -EAFNOSUPPORT; + if (sock->type != SOCK_SEQPACKET || protocol) goto out; rc = -ENOMEM; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(net)) == NULL) goto out; x25 = x25_sk(sk); @@ -546,7 +552,7 @@ if (osk->sk_type != SOCK_SEQPACKET) goto out; - if ((sk = x25_alloc_socket()) == NULL) + if ((sk = x25_alloc_socket(osk->sk_net)) == NULL) goto out; x25 = x25_sk(sk); diff -Nurb linux-2.6.22-try2/net/x25/x25_dev.c linux-2.6.22-try2-netns/net/x25/x25_dev.c --- linux-2.6.22-try2/net/x25/x25_dev.c 2007-12-19 13:38:00.000000000 -0500 +++ linux-2.6.22-try2-netns/net/x25/x25_dev.c 2007-12-19 22:49:20.000000000 -0500 @@ -95,6 +95,9 @@ struct sk_buff *nskb; struct x25_neigh *nb; + if (dev->nd_net != &init_net) + goto drop; + nskb = skb_copy(skb, GFP_ATOMIC); if (!nskb) goto drop; diff -Nurb linux-2.6.22-try2/net/x25/x25_proc.c linux-2.6.22-try2-netns/net/x25/x25_proc.c --- linux-2.6.22-try2/net/x25/x25_proc.c 2007-12-19 13:38:00.000000000 -0500 +++ linux-2.6.22-try2-netns/net/x25/x25_proc.c 2007-12-19 22:49:20.000000000 -0500 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,7 @@ struct proc_dir_entry *p; int rc = -ENOMEM; - x25_proc_dir = proc_mkdir("x25", proc_net); + x25_proc_dir = proc_mkdir("x25", init_net.proc_net); if (!x25_proc_dir) goto out; @@ -328,7 +329,7 @@ out_socket: remove_proc_entry("route", x25_proc_dir); out_route: - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", init_net.proc_net); goto out; } @@ -337,7 +338,7 @@ remove_proc_entry("forward", x25_proc_dir); remove_proc_entry("route", x25_proc_dir); remove_proc_entry("socket", x25_proc_dir); - remove_proc_entry("x25", proc_net); + remove_proc_entry("x25", init_net.proc_net); } #else /* CONFIG_PROC_FS */ diff -Nurb linux-2.6.22-try2/net/x25/x25_route.c linux-2.6.22-try2-netns/net/x25/x25_route.c --- linux-2.6.22-try2/net/x25/x25_route.c 2007-12-19 13:38:00.000000000 -0500 +++ linux-2.6.22-try2-netns/net/x25/x25_route.c 2007-12-19 22:49:20.000000000 -0500 @@ -129,7 +129,7 @@ */ struct net_device *x25_dev_get(char *devname) { - struct net_device *dev = dev_get_by_name(devname); + struct net_device *dev = dev_get_by_name(&init_net, devname); if (dev && (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25 diff -Nurb linux-2.6.22-try2/net/xfrm/xfrm_policy.c linux-2.6.22-try2-netns/net/xfrm/xfrm_policy.c --- linux-2.6.22-try2/net/xfrm/xfrm_policy.c 2007-12-19 13:38:00.000000000 -0500 +++ linux-2.6.22-try2-netns/net/xfrm/xfrm_policy.c 2007-12-19 22:49:20.000000000 -0500 @@ -30,8 +30,6 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly; - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -1570,7 +1568,7 @@ if (unlikely(nx<0)) { err = nx; - if (err == -EAGAIN && sysctl_xfrm_larval_drop) { + if (err == -EAGAIN && init_net.sysctl_xfrm_larval_drop) { /* EREMOTE tells the caller to generate * a one-shot blackhole route. */ @@ -1954,8 +1952,8 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { - dst->dev = &loopback_dev; - dev_hold(&loopback_dev); + dst->dev = &init_net.loopback_dev; + dev_hold(dst->dev); dev_put(dev); } } @@ -2357,6 +2355,11 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct net_device *dev = ptr; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + switch (event) { case NETDEV_DOWN: xfrm_flush_bundles(); diff -Nurb linux-2.6.22-try2/net/xfrm/xfrm_state.c linux-2.6.22-try2-netns/net/xfrm/xfrm_state.c --- linux-2.6.22-try2/net/xfrm/xfrm_state.c 2007-12-19 13:38:00.000000000 -0500 +++ linux-2.6.22-try2-netns/net/xfrm/xfrm_state.c 2007-12-19 22:49:20.000000000 -0500 @@ -28,14 +28,6 @@ struct sock *xfrm_nl; EXPORT_SYMBOL(xfrm_nl); -u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; -EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); - -u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE; -EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); - -u32 sysctl_xfrm_acq_expires __read_mostly = 30; - /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -665,8 +657,8 @@ h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); } - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires; + x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); xfrm_state_num++; xfrm_hash_grow_check(x->bydst.next != NULL); @@ -815,9 +807,9 @@ x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; + x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); @@ -1775,6 +1767,19 @@ EXPORT_SYMBOL(xfrm_init_state); + +static int xfrm_state_pernet_init(struct net *net) +{ + net->sysctl_xfrm_aevent_etime = XFRM_AE_ETIME; + net->sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE; + net->sysctl_xfrm_acq_expires = 30; + return 0; +} + +static struct pernet_operations xfrm_state_net_ops = { + .init = xfrm_state_pernet_init, +}; + void __init xfrm_state_init(void) { unsigned int sz; @@ -1789,5 +1794,7 @@ xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); + + register_pernet_subsys(&xfrm_state_net_ops); } diff -Nurb linux-2.6.22-try2/net/xfrm/xfrm_user.c linux-2.6.22-try2-netns/net/xfrm/xfrm_user.c --- linux-2.6.22-try2/net/xfrm/xfrm_user.c 2007-12-19 13:38:00.000000000 -0500 +++ linux-2.6.22-try2-netns/net/xfrm/xfrm_user.c 2007-12-19 22:49:20.000000000 -0500 @@ -374,7 +374,8 @@ return err; } -static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, +static struct xfrm_state *xfrm_state_construct(struct net *net, + struct xfrm_usersa_info *p, struct rtattr **xfrma, int *errp) { @@ -410,9 +411,9 @@ goto error; x->km.seq = p->seq; - x->replay_maxdiff = sysctl_xfrm_aevent_rseqth; + x->replay_maxdiff = net->sysctl_xfrm_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ - x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; + x->replay_maxage = (net->sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; x->preplay.bitmap = 0; x->preplay.seq = x->replay.seq+x->replay_maxdiff; x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; @@ -436,6 +437,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct rtattr **xfrma) { + struct net *net = skb->sk->sk_net; struct xfrm_usersa_info *p = NLMSG_DATA(nlh); struct xfrm_state *x; int err; @@ -445,7 +447,7 @@ if (err) return err; - x = xfrm_state_construct(p, xfrma, &err); + x = xfrm_state_construct(net, p, xfrma, &err); if (!x) return err; @@ -2559,7 +2561,7 @@ printk(KERN_INFO "Initializing XFRM netlink socket\n"); - nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, + nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; diff -Nurb linux-2.6.22-try2/security/selinux/hooks.c linux-2.6.22-try2-netns/security/selinux/hooks.c --- linux-2.6.22-try2/security/selinux/hooks.c 2007-12-19 15:29:23.000000000 -0500 +++ linux-2.6.22-try2-netns/security/selinux/hooks.c 2007-12-19 22:49:20.000000000 -0500 @@ -3231,8 +3231,8 @@ /* Range of port numbers used to automatically bind. Need to determine whether we should perform a name_bind permission check between the socket and the port number. */ -#define ip_local_port_range_0 sysctl_local_port_range[0] -#define ip_local_port_range_1 sysctl_local_port_range[1] +#define ip_local_port_range_0 (sk->sk_net->sysctl_local_port_range[0]) +#define ip_local_port_range_1 (sk->sk_net->sysctl_local_port_range[1]) static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { @@ -3976,6 +3976,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET); } @@ -3987,6 +3991,10 @@ const struct net_device *out, int (*okfn)(struct sk_buff *)) { + /* Only filter packets in the initial network namespace */ + if ((in?in:out)->nd_net != &init_net) + return NF_ACCEPT; + return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET6); } diff -Nurb linux-2.6.22-try2/security/selinux/netif.c linux-2.6.22-try2-netns/security/selinux/netif.c --- linux-2.6.22-try2/security/selinux/netif.c 2007-12-19 13:38:00.000000000 -0500 +++ linux-2.6.22-try2-netns/security/selinux/netif.c 2007-12-19 22:49:20.000000000 -0500 @@ -20,6 +20,7 @@ #include #include #include +#include #include "security.h" #include "objsec.h" @@ -234,6 +235,9 @@ { struct net_device *dev = ptr; + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + if (event == NETDEV_DOWN) sel_netif_kill(dev); diff -Nurb linux-2.6.22-try2/security/selinux/netlink.c linux-2.6.22-try2-netns/security/selinux/netlink.c --- linux-2.6.22-try2/security/selinux/netlink.c 2007-12-19 13:38:00.000000000 -0500 +++ linux-2.6.22-try2-netns/security/selinux/netlink.c 2007-12-19 22:49:20.000000000 -0500 @@ -17,6 +17,7 @@ #include #include #include +#include static struct sock *selnl; @@ -104,8 +105,8 @@ static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL, - THIS_MODULE); + selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX, + SELNLGRP_MAX, NULL, NULL, THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV);