diff -Nurb linux-2.6.22-594/include/linux/vserver/network.h.orig.orig linux-2.6.22-595/include/linux/vserver/network.h.orig.orig
--- linux-2.6.22-594/include/linux/vserver/network.h.orig.orig	2008-03-20 00:04:54.000000000 -0400
+++ linux-2.6.22-595/include/linux/vserver/network.h.orig.orig	1969-12-31 19:00:00.000000000 -0500
@@ -1,143 +0,0 @@
-#ifndef _VX_NETWORK_H
-#define _VX_NETWORK_H
-
-#include <linux/types.h>
-
-
-#define MAX_N_CONTEXT	65535	/* Arbitrary limit */
-
-
-/* network flags */
-
-#define NXF_INFO_PRIVATE	0x00000008
-
-#define NXF_SINGLE_IP		0x00000100
-#define NXF_LBACK_REMAP		0x00000200
-
-#define NXF_HIDE_NETIF		0x02000000
-#define NXF_HIDE_LBACK		0x04000000
-
-#define NXF_STATE_SETUP		(1ULL << 32)
-#define NXF_STATE_ADMIN		(1ULL << 34)
-
-#define NXF_SC_HELPER		(1ULL << 36)
-#define NXF_PERSISTENT		(1ULL << 38)
-
-#define NXF_ONE_TIME		(0x0005ULL << 32)
-
-
-#define	NXF_INIT_SET		(__nxf_init_set())
-
-static inline uint64_t __nxf_init_set(void) {
-	return	  NXF_STATE_ADMIN
-#ifdef	CONFIG_VSERVER_AUTO_LBACK
-		| NXF_LBACK_REMAP
-		| NXF_HIDE_LBACK
-#endif
-#ifdef	CONFIG_VSERVER_AUTO_SINGLE
-		| NXF_SINGLE_IP
-#endif
-		| NXF_HIDE_NETIF;
-}
-
-
-/* network caps */
-
-#define NXC_RAW_ICMP		0x00000100
-
-
-/* address types */
-
-#define NXA_TYPE_IPV4		0x0001
-#define NXA_TYPE_IPV6		0x0002
-
-#define NXA_TYPE_NONE		0x0000
-#define NXA_TYPE_ANY		0x00FF
-
-#define NXA_TYPE_ADDR		0x0010
-#define NXA_TYPE_MASK		0x0020
-#define NXA_TYPE_RANGE		0x0040
-
-#define NXA_MASK_ALL		(NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
-
-#define NXA_MOD_BCAST		0x0100
-#define NXA_MOD_LBACK		0x0200
-
-#define NXA_LOOPBACK		0x1000
-
-#define NXA_MASK_BIND		(NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
-#define NXA_MASK_SHOW		(NXA_MASK_ALL | NXA_LOOPBACK)
-
-#ifdef	__KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/rcupdate.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <asm/atomic.h>
-
-struct nx_addr_v4 {
-	struct nx_addr_v4 *next;
-	struct in_addr ip[2];
-	struct in_addr mask;
-	uint16_t type;
-	uint16_t flags;
-};
-
-struct nx_addr_v6 {
-	struct nx_addr_v6 *next;
-	struct in6_addr ip;
-	struct in6_addr mask;
-	uint32_t prefix;
-	uint16_t type;
-	uint16_t flags;
-};
-
-struct nx_info {
-	struct hlist_node nx_hlist;	/* linked list of nxinfos */
-	nid_t nx_id;			/* vnet id */
-	atomic_t nx_usecnt;		/* usage count */
-	atomic_t nx_tasks;		/* tasks count */
-	int nx_state;			/* context state */
-
-	uint64_t nx_flags;		/* network flag word */
-	uint64_t nx_ncaps;		/* network capabilities */
-
-	struct in_addr v4_lback;	/* Loopback address */
-	struct in_addr v4_bcast;	/* Broadcast address */
-	struct nx_addr_v4 v4;		/* First/Single ipv4 address */
-#ifdef	CONFIG_IPV6
-	struct nx_addr_v6 v6;		/* First/Single ipv6 address */
-#endif
-	char nx_name[65];		/* network context name */
-};
-
-
-/* status flags */
-
-#define NXS_HASHED      0x0001
-#define NXS_SHUTDOWN    0x0100
-#define NXS_RELEASED    0x8000
-
-extern struct nx_info *lookup_nx_info(int);
-
-extern int get_nid_list(int, unsigned int *, int);
-extern int nid_is_hashed(nid_t);
-
-extern int nx_migrate_task(struct task_struct *, struct nx_info *);
-
-extern long vs_net_change(struct nx_info *, unsigned int);
-
-struct sock;
-
-
-#define NX_IPV4(n)	((n)->v4.type != NXA_TYPE_NONE)
-#ifdef  CONFIG_IPV6
-#define NX_IPV6(n)	((n)->v6.type != NXA_TYPE_NONE)
-#else
-#define NX_IPV6(n)	(0)
-#endif
-
-#endif	/* __KERNEL__ */
-#endif	/* _VX_NETWORK_H */
diff -Nurb linux-2.6.22-594/kernel/nsproxy.c.orig linux-2.6.22-595/kernel/nsproxy.c.orig
--- linux-2.6.22-594/kernel/nsproxy.c.orig	2008-03-20 00:05:18.000000000 -0400
+++ linux-2.6.22-595/kernel/nsproxy.c.orig	1969-12-31 19:00:00.000000000 -0500
@@ -1,264 +0,0 @@
-/*
- *  Copyright (C) 2006 IBM Corporation
- *
- *  Author: Serge Hallyn <serue@us.ibm.com>
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License as
- *  published by the Free Software Foundation, version 2 of the
- *  License.
- *
- *  Jun 2006 - namespaces support
- *             OpenVZ, SWsoft Inc.
- *             Pavel Emelianov <xemul@openvz.org>
- */
-
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/nsproxy.h>
-#include <linux/init_task.h>
-#include <linux/mnt_namespace.h>
-#include <linux/utsname.h>
-#include <net/net_namespace.h>
-#include <linux/pid_namespace.h>
-#include <linux/vserver/global.h>
-#include <linux/vserver/debug.h>
-
-static struct kmem_cache *nsproxy_cachep;
-
-struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
-
-void get_task_namespaces(struct task_struct *tsk)
-{
-	struct nsproxy *ns = tsk->nsproxy;
-	if (ns) {
-		get_nsproxy(ns);
-	}
-}
-
-/*
- * creates a copy of "orig" with refcount 1.
- */
-static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
-{
-	struct nsproxy *ns;
-
-	ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
-	if (ns)
-		atomic_set(&ns->count, 1);
-	vxdprintk(VXD_CBIT(space, 2), "clone_nsproxy(%p[%u] = %p[1]",
-		orig, atomic_read(&orig->count), ns);
-	atomic_inc(&vs_global_nsproxy);
-	return ns;
-}
-
-/*
- * Create new nsproxy and all of its the associated namespaces.
- * Return the newly created nsproxy.  Do not attach this to the task,
- * leave it to the caller to do proper locking and attach it to task.
- */
-static struct nsproxy *unshare_namespaces(int flags, struct nsproxy *orig,
-			struct fs_struct *new_fs)
-{
-	struct nsproxy *new_nsp;
-	int err = -ENOMEM;
-
-	vxdprintk(VXD_CBIT(space, 4),
-		"unshare_namespaces(0x%08x,%p,%p)",
-		flags, orig, new_fs);
-
-	new_nsp = clone_nsproxy(orig);
-	if (!new_nsp)
-		return ERR_PTR(-ENOMEM);
-
-	new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_fs);
-	if (IS_ERR(new_nsp->mnt_ns))
-		goto out_ns;
-
-	new_nsp->uts_ns = copy_utsname(flags, orig->uts_ns);
-	if (IS_ERR(new_nsp->uts_ns))
-		goto out_uts;
-
-	new_nsp->ipc_ns = copy_ipcs(flags, orig->ipc_ns);
-	if (IS_ERR(new_nsp->ipc_ns))
-		goto out_ipc;
-
-	new_nsp->pid_ns = copy_pid_ns(flags, orig->pid_ns);
-	if (IS_ERR(new_nsp->pid_ns))
-		goto out_pid;
-
-	new_nsp->user_ns = copy_user_ns(flags, orig->user_ns);
-	if (IS_ERR(new_nsp->user_ns))
-		goto out_user;
-
-	new_nsp->net_ns = copy_net_ns(flags, orig->net_ns);
-	if (IS_ERR(new_nsp->net_ns))
-		goto out_net;
-
-	return new_nsp;
-
-out_net:
-	if (new_nsp->user_ns)
-		put_user_ns(new_nsp->user_ns);
-	if (new_nsp->net_ns)
- 		put_net(new_nsp->net_ns);
-out_user:
-	if (new_nsp->pid_ns)
-		put_pid_ns(new_nsp->pid_ns);
-out_pid:
-	if (new_nsp->ipc_ns)
-		put_ipc_ns(new_nsp->ipc_ns);
-out_ipc:
-	if (new_nsp->uts_ns)
-		put_uts_ns(new_nsp->uts_ns);
-out_uts:
-	if (new_nsp->mnt_ns)
-		put_mnt_ns(new_nsp->mnt_ns);
-out_ns:
-	kmem_cache_free(nsproxy_cachep, new_nsp);
-	return ERR_PTR(err);
-}
-
-static struct nsproxy *create_new_namespaces(unsigned long flags, struct task_struct *tsk,
-			struct fs_struct *new_fs)
-{
-	return unshare_namespaces(flags, tsk->nsproxy, new_fs);
-}
-
-/*
- * copies the nsproxy, setting refcount to 1, and grabbing a
- * reference to all contained namespaces.
- */
-struct nsproxy *copy_nsproxy(struct nsproxy *orig)
-{
-	struct nsproxy *ns = clone_nsproxy(orig);
-
-	if (ns) {
-		if (ns->mnt_ns)
-			get_mnt_ns(ns->mnt_ns);
-		if (ns->uts_ns)
-			get_uts_ns(ns->uts_ns);
-		if (ns->ipc_ns)
-			get_ipc_ns(ns->ipc_ns);
-		if (ns->pid_ns)
-			get_pid_ns(ns->pid_ns);
-	}
-	return ns;
-}
-
-/*
- * called from clone.  This now handles copy for nsproxy and all
- * namespaces therein.
- */
-int copy_namespaces(unsigned long flags, struct task_struct *tsk)
-{
-	struct nsproxy *old_ns = tsk->nsproxy;
-	struct nsproxy *new_ns = NULL;
-	int err = 0;
-
-	vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08x,%p[%p])",
-		flags, tsk, old_ns);
-
-	if (!old_ns)
-		return 0;
-
-	get_nsproxy(old_ns);
-	return 0;
-
-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET)))
-		return 0;
-
-	 #ifndef CONFIG_NET_NS
-		if (unshare_flags & CLONE_NEWNET)
-			return -EINVAL;
-	 #endif
-
-
-	if (!capable(CAP_SYS_ADMIN)) {
-		err = -EPERM;
-		goto out;
-	}
-
-	new_ns = create_new_namespaces(flags, tsk, tsk->fs);
-	if (IS_ERR(new_ns)) {
-		err = PTR_ERR(new_ns);
-		goto out;
-	}
-
-	err = ns_container_clone(tsk);
-	if (err) {
-		put_nsproxy(new_ns);
-		goto out;
-	}
-
-	tsk->nsproxy = new_ns;
-
-out:
-	put_nsproxy(old_ns);
-	vxdprintk(VXD_CBIT(space, 3),
-		"copy_namespaces(0x%08x,%p[%p]) = %d [%p]",
-		flags, tsk, old_ns, err, new_ns);
-	return err;
-}
-
-void free_nsproxy(struct nsproxy *ns)
-{
-	if (ns->mnt_ns)
-		put_mnt_ns(ns->mnt_ns);
-	if (ns->uts_ns)
-		put_uts_ns(ns->uts_ns);
-	if (ns->ipc_ns)
-		put_ipc_ns(ns->ipc_ns);
-	if (ns->pid_ns)
-		put_pid_ns(ns->pid_ns);
-	atomic_dec(&vs_global_nsproxy);
-	kfree(ns);
-}
-
-/*
- * Called from unshare. Unshare all the namespaces part of nsproxy.
- * On success, returns the new nsproxy.
- */
-int unshare_nsproxy_namespaces(unsigned long unshare_flags,
-		struct nsproxy **new_nsp, struct fs_struct *new_fs)
-{
-	int err = 0;
-
-	vxdprintk(VXD_CBIT(space, 4),
-		"unshare_nsproxy_namespaces(0x%08lx,[%p])",
-		unshare_flags, current->nsproxy);
-
-	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWUSER | CLONE_NEWNET)))
-		return 0;
-
-#ifndef CONFIG_NET_NS
-	if (unshare_flags & CLONE_NEWNET)
-		return -EINVAL;
-#endif
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	*new_nsp = create_new_namespaces(unshare_flags, current,
-				new_fs ? new_fs : current->fs);
-	if (IS_ERR(*new_nsp)) {
-		err = PTR_ERR(*new_nsp);
-		goto out;
-	}
-
-	err = ns_container_clone(current);
-	if (err)
-		put_nsproxy(*new_nsp);
-
-out:
-	return err;
-}
-
-static int __init nsproxy_cache_init(void)
-{
-	nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
-					   0, SLAB_PANIC, NULL, NULL);
-	return 0;
-}
-
-module_init(nsproxy_cache_init);
diff -Nurb linux-2.6.22-594/kernel/user.c.orig linux-2.6.22-595/kernel/user.c.orig
--- linux-2.6.22-594/kernel/user.c.orig	2008-03-20 00:05:18.000000000 -0400
+++ linux-2.6.22-595/kernel/user.c.orig	1969-12-31 19:00:00.000000000 -0500
@@ -1,227 +0,0 @@
-/*
- * The "user cache".
- *
- * (C) Copyright 1991-2000 Linus Torvalds
- *
- * We have a per-user structure to keep track of how many
- * processes, files etc the user has claimed, in order to be
- * able to have per-user limits for system resources. 
- */
-
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/bitops.h>
-#include <linux/key.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/user_namespace.h>
-
-/*
- * UID task count cache, to get fast user lookup in "alloc_uid"
- * when changing user ID's (ie setuid() and friends).
- */
-
-#define UIDHASH_MASK		(UIDHASH_SZ - 1)
-#define __uidhashfn(xid,uid)	((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK)
-#define uidhashentry(ns, xid, uid)	((ns)->uidhash_table + __uidhashfn(xid, uid))
-
-static struct kmem_cache *uid_cachep;
-static struct list_head uidhash_table[UIDHASH_SZ];
-
-/*
- * The uidhash_lock is mostly taken from process context, but it is
- * occasionally also taken from softirq/tasklet context, when
- * task-structs get RCU-freed. Hence all locking must be softirq-safe.
- * But free_uid() is also called with local interrupts disabled, and running
- * local_bh_enable() with local interrupts disabled is an error - we'll run
- * softirq callbacks, and they can unconditionally enable interrupts, and
- * the caller of free_uid() didn't expect that..
- */
-static DEFINE_SPINLOCK(uidhash_lock);
-
-struct user_struct root_user = {
-	.__count	= ATOMIC_INIT(1),
-	.processes	= ATOMIC_INIT(1),
-	.files		= ATOMIC_INIT(0),
-	.sigpending	= ATOMIC_INIT(0),
-	.mq_bytes	= 0,
-	.locked_shm     = 0,
-#ifdef CONFIG_KEYS
-	.uid_keyring	= &root_user_keyring,
-	.session_keyring = &root_session_keyring,
-#endif
-};
-
-/*
- * These routines must be called with the uidhash spinlock held!
- */
-static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent)
-{
-	list_add(&up->uidhash_list, hashent);
-}
-
-static inline void uid_hash_remove(struct user_struct *up)
-{
-	list_del(&up->uidhash_list);
-}
-
-static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent)
-{
-	struct list_head *up;
-
-	list_for_each(up, hashent) {
-		struct user_struct *user;
-
-		user = list_entry(up, struct user_struct, uidhash_list);
-
-		if(user->uid == uid && user->xid == xid) {
-			atomic_inc(&user->__count);
-			return user;
-		}
-	}
-
-	return NULL;
-}
-
-/*
- * Locate the user_struct for the passed UID.  If found, take a ref on it.  The
- * caller must undo that ref with free_uid().
- *
- * If the user_struct could not be found, return NULL.
- */
-struct user_struct *find_user(xid_t xid, uid_t uid)
-{
-	struct user_struct *ret;
-	unsigned long flags;
- 	struct user_namespace *ns = current->nsproxy->user_ns;
-
-	spin_lock_irqsave(&uidhash_lock, flags);
- 	ret = uid_hash_find(xid, uid, uidhashentry(ns, xid, uid));
-	spin_unlock_irqrestore(&uidhash_lock, flags);
-	return ret;
-}
-
-void free_uid(struct user_struct *up)
-{
-	unsigned long flags;
-
-	if (!up)
-		return;
-
-	local_irq_save(flags);
-	if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
-		uid_hash_remove(up);
-		spin_unlock_irqrestore(&uidhash_lock, flags);
-		key_put(up->uid_keyring);
-		key_put(up->session_keyring);
-		kmem_cache_free(uid_cachep, up);
-	} else {
-		local_irq_restore(flags);
-	}
-}
-
-struct user_struct * alloc_uid(xid_t xid, uid_t uid)
-{
-	struct user_namespace *ns = current->nsproxy->user_ns;
- 	struct list_head *hashent = uidhashentry(ns,xid, uid);
-	struct user_struct *up;
-
-	spin_lock_irq(&uidhash_lock);
-	up = uid_hash_find(xid, uid, hashent);
-	spin_unlock_irq(&uidhash_lock);
-
-	if (!up) {
-		struct user_struct *new;
-
-		new = kmem_cache_alloc(uid_cachep, GFP_KERNEL);
-		if (!new)
-			return NULL;
-		new->uid = uid;
-		new->xid = xid;
-		atomic_set(&new->__count, 1);
-		atomic_set(&new->processes, 0);
-		atomic_set(&new->files, 0);
-		atomic_set(&new->sigpending, 0);
-#ifdef CONFIG_INOTIFY_USER
-		atomic_set(&new->inotify_watches, 0);
-		atomic_set(&new->inotify_devs, 0);
-#endif
-
-		new->mq_bytes = 0;
-		new->locked_shm = 0;
-
-		if (alloc_uid_keyring(new, current) < 0) {
-			kmem_cache_free(uid_cachep, new);
-			return NULL;
-		}
-
-		/*
-		 * Before adding this, check whether we raced
-		 * on adding the same user already..
-		 */
-		spin_lock_irq(&uidhash_lock);
-		up = uid_hash_find(xid, uid, hashent);
-		if (up) {
-			key_put(new->uid_keyring);
-			key_put(new->session_keyring);
-			kmem_cache_free(uid_cachep, new);
-		} else {
-			uid_hash_insert(new, hashent);
-			up = new;
-		}
-		spin_unlock_irq(&uidhash_lock);
-
-	}
-	return up;
-}
-
-void switch_uid(struct user_struct *new_user)
-{
-	struct user_struct *old_user;
-
-	/* What if a process setreuid()'s and this brings the
-	 * new uid over his NPROC rlimit?  We can check this now
-	 * cheaply with the new uid cache, so if it matters
-	 * we should be checking for it.  -DaveM
-	 */
-	old_user = current->user;
-	atomic_inc(&new_user->processes);
-	atomic_dec(&old_user->processes);
-	switch_uid_keyring(new_user);
-	current->user = new_user;
-
-	/*
-	 * We need to synchronize with __sigqueue_alloc()
-	 * doing a get_uid(p->user).. If that saw the old
-	 * user value, we need to wait until it has exited
-	 * its critical region before we can free the old
-	 * structure.
-	 */
-	smp_mb();
-	spin_unlock_wait(&current->sighand->siglock);
-
-	free_uid(old_user);
-	suid_keys(current);
-}
-
-
-static int __init uid_cache_init(void)
-{
-	int n;
-
-	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-
-	for(n = 0; n < UIDHASH_SZ; ++n)
- 		INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
-
-	/* Insert the root user immediately (init already runs as root) */
-	spin_lock_irq(&uidhash_lock);
- 	uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0, 0));
-	spin_unlock_irq(&uidhash_lock);
-
-	return 0;
-}
-
-module_init(uid_cache_init);
diff -Nurb linux-2.6.22-594/kernel/vserver/context.c linux-2.6.22-595/kernel/vserver/context.c
--- linux-2.6.22-594/kernel/vserver/context.c	2008-03-20 00:04:46.000000000 -0400
+++ linux-2.6.22-595/kernel/vserver/context.c	2008-03-20 00:13:22.000000000 -0400
@@ -589,13 +589,13 @@
 			struct nsproxy *old_nsp, *new_nsp;
 
 			ret = unshare_nsproxy_namespaces(
-				CLONE_NEWUTS | CLONE_NEWIPC,
+				CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET,
 				&new_nsp, NULL);
 			if (ret)
 				goto out;
 
 			old_nsp = xchg(&p->nsproxy, new_nsp);
-			vx_set_space(vxi, CLONE_NEWUTS | CLONE_NEWIPC);
+			vx_set_space(vxi, CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET);
 			put_nsproxy(old_nsp);
 		}
 	}
@@ -781,7 +781,7 @@
 	if (vs_state_change(new_vxi, VSC_STARTUP))
 		goto out;
 
-	ret = vx_migrate_task(current, new_vxi, (!data));
+	ret = vx_migrate_task(current, new_vxi, 1 /*(!data) Hack no. 1 - Sapan*/);
 	if (ret)
 		goto out;
 
diff -Nurb linux-2.6.22-594/kernel/vserver/context.c.orig linux-2.6.22-595/kernel/vserver/context.c.orig
--- linux-2.6.22-594/kernel/vserver/context.c.orig	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.22-595/kernel/vserver/context.c.orig	2008-03-20 00:04:46.000000000 -0400
@@ -0,0 +1,966 @@
+/*
+ *  linux/kernel/vserver/context.c
+ *
+ *  Virtual Server: Context Support
+ *
+ *  Copyright (C) 2003-2007  Herbert P�tzl
+ *
+ *  V0.01  context helper
+ *  V0.02  vx_ctx_kill syscall command
+ *  V0.03  replaced context_info calls
+ *  V0.04  redesign of struct (de)alloc
+ *  V0.05  rlimit basic implementation
+ *  V0.06  task_xid and info commands
+ *  V0.07  context flags and caps
+ *  V0.08  switch to RCU based hash
+ *  V0.09  revert to non RCU for now
+ *  V0.10  and back to working RCU hash
+ *  V0.11  and back to locking again
+ *  V0.12  referenced context store
+ *  V0.13  separate per cpu data
+ *  V0.14  changed vcmds to vxi arg
+ *  V0.15  added context stat
+ *  V0.16  have __create claim() the vxi
+ *  V0.17  removed older and legacy stuff
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/pid_namespace.h>
+
+#include <linux/vserver/context.h>
+#include <linux/vserver/network.h>
+#include <linux/vserver/debug.h>
+#include <linux/vserver/limit.h>
+#include <linux/vserver/limit_int.h>
+#include <linux/vserver/space.h>
+
+#include <linux/vs_context.h>
+#include <linux/vs_limit.h>
+#include <linux/vserver/context_cmd.h>
+
+#include "cvirt_init.h"
+#include "cacct_init.h"
+#include "limit_init.h"
+#include "sched_init.h"
+
+
+atomic_t vx_global_ctotal	= ATOMIC_INIT(0);
+atomic_t vx_global_cactive	= ATOMIC_INIT(0);
+
+
+/*	now inactive context structures */
+
+static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
+
+static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED;
+
+
+/*	__alloc_vx_info()
+
+	* allocate an initialized vx_info struct
+	* doesn't make it visible (hash)			*/
+
+static struct vx_info *__alloc_vx_info(xid_t xid)
+{
+	struct vx_info *new = NULL;
+	int cpu;
+
+	vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
+
+	/* would this benefit from a slab cache? */
+	new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
+	if (!new)
+		return 0;
+
+	memset(new, 0, sizeof(struct vx_info));
+#ifdef CONFIG_SMP
+	new->ptr_pc = alloc_percpu(struct _vx_info_pc);
+	if (!new->ptr_pc)
+		goto error;
+#endif
+	new->vx_id = xid;
+	INIT_HLIST_NODE(&new->vx_hlist);
+	atomic_set(&new->vx_usecnt, 0);
+	atomic_set(&new->vx_tasks, 0);
+	new->vx_parent = NULL;
+	new->vx_state = 0;
+	init_waitqueue_head(&new->vx_wait);
+
+	/* prepare reaper */
+	get_task_struct(init_pid_ns.child_reaper);
+	new->vx_reaper = init_pid_ns.child_reaper;
+	new->vx_badness_bias = 0;
+
+	/* rest of init goes here */
+	vx_info_init_limit(&new->limit);
+	vx_info_init_sched(&new->sched);
+	vx_info_init_cvirt(&new->cvirt);
+	vx_info_init_cacct(&new->cacct);
+
+	/* per cpu data structures */
+	for_each_possible_cpu(cpu) {
+		vx_info_init_sched_pc(
+			&vx_per_cpu(new, sched_pc, cpu), cpu);
+		vx_info_init_cvirt_pc(
+			&vx_per_cpu(new, cvirt_pc, cpu), cpu);
+	}
+
+	new->vx_flags = VXF_INIT_SET;
+	new->vx_bcaps = CAP_INIT_EFF_SET;
+	new->vx_ccaps = 0;
+	new->vx_cap_bset = cap_bset;
+
+	new->reboot_cmd = 0;
+	new->exit_code = 0;
+
+	new->vx_nsproxy = copy_nsproxy(current->nsproxy);
+
+	vxdprintk(VXD_CBIT(xid, 0),
+		"alloc_vx_info(%d) = %p", xid, new);
+	vxh_alloc_vx_info(new);
+	atomic_inc(&vx_global_ctotal);
+	return new;
+#ifdef CONFIG_SMP
+error:
+	kfree(new);
+	return 0;
+#endif
+}
+
+/*	__dealloc_vx_info()
+
+	* final disposal of vx_info				*/
+
+static void __dealloc_vx_info(struct vx_info *vxi)
+{
+	int cpu;
+
+	vxdprintk(VXD_CBIT(xid, 0),
+		"dealloc_vx_info(%p)", vxi);
+	vxh_dealloc_vx_info(vxi);
+
+	vxi->vx_id = -1;
+
+	vx_info_exit_limit(&vxi->limit);
+	vx_info_exit_sched(&vxi->sched);
+	vx_info_exit_cvirt(&vxi->cvirt);
+	vx_info_exit_cacct(&vxi->cacct);
+
+	for_each_possible_cpu(cpu) {
+		vx_info_exit_sched_pc(
+			&vx_per_cpu(vxi, sched_pc, cpu), cpu);
+		vx_info_exit_cvirt_pc(
+			&vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
+	}
+
+	vxi->vx_state |= VXS_RELEASED;
+
+#ifdef CONFIG_SMP
+	free_percpu(vxi->ptr_pc);
+#endif
+	kfree(vxi);
+	atomic_dec(&vx_global_ctotal);
+}
+
+static void __shutdown_vx_info(struct vx_info *vxi)
+{
+	struct nsproxy *nsproxy;
+	struct fs_struct *fs;
+
+	might_sleep();
+
+	vxi->vx_state |= VXS_SHUTDOWN;
+	vs_state_change(vxi, VSC_SHUTDOWN);
+
+	nsproxy = xchg(&vxi->vx_nsproxy, NULL);
+	fs = xchg(&vxi->vx_fs, NULL);
+
+	if (nsproxy)
+		put_nsproxy(nsproxy);
+	if (fs)
+		put_fs_struct(fs);
+}
+
+/* exported stuff */
+
+void free_vx_info(struct vx_info *vxi)
+{
+	unsigned long flags;
+
+	/* check for reference counts first */
+	BUG_ON(atomic_read(&vxi->vx_usecnt));
+	BUG_ON(atomic_read(&vxi->vx_tasks));
+
+	/* context must not be hashed */
+	BUG_ON(vx_info_state(vxi, VXS_HASHED));
+
+	/* context shutdown is mandatory */
+	BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
+
+	BUG_ON(vxi->vx_nsproxy);
+	BUG_ON(vxi->vx_fs);
+
+	spin_lock_irqsave(&vx_info_inactive_lock, flags);
+	hlist_del(&vxi->vx_hlist);
+	spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
+
+	__dealloc_vx_info(vxi);
+}
+
+
+/*	hash table for vx_info hash */
+
+#define VX_HASH_SIZE	13
+
+static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
+	{ [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
+
+static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
+
+
+static inline unsigned int __hashval(xid_t xid)
+{
+	return (xid % VX_HASH_SIZE);
+}
+
+
+
+/*	__hash_vx_info()
+
+	* add the vxi to the global hash table
+	* requires the hash_lock to be held			*/
+
+static inline void __hash_vx_info(struct vx_info *vxi)
+{
+	struct hlist_head *head;
+
+	vxd_assert_lock(&vx_info_hash_lock);
+	vxdprintk(VXD_CBIT(xid, 4),
+		"__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
+	vxh_hash_vx_info(vxi);
+
+	/* context must not be hashed */
+	BUG_ON(vx_info_state(vxi, VXS_HASHED));
+
+	vxi->vx_state |= VXS_HASHED;
+	head = &vx_info_hash[__hashval(vxi->vx_id)];
+	hlist_add_head(&vxi->vx_hlist, head);
+	atomic_inc(&vx_global_cactive);
+}
+
+/*	__unhash_vx_info()
+
+	* remove the vxi from the global hash table
+	* requires the hash_lock to be held			*/
+
+static inline void __unhash_vx_info(struct vx_info *vxi)
+{
+	unsigned long flags;
+
+	vxd_assert_lock(&vx_info_hash_lock);
+	vxdprintk(VXD_CBIT(xid, 4),
+		"__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
+		atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
+	vxh_unhash_vx_info(vxi);
+
+	/* context must be hashed */
+	BUG_ON(!vx_info_state(vxi, VXS_HASHED));
+	/* but without tasks */
+	BUG_ON(atomic_read(&vxi->vx_tasks));
+
+	vxi->vx_state &= ~VXS_HASHED;
+	hlist_del_init(&vxi->vx_hlist);
+	spin_lock_irqsave(&vx_info_inactive_lock, flags);
+	hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
+	spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
+	atomic_dec(&vx_global_cactive);
+}
+
+
+/*	__lookup_vx_info()
+
+	* requires the hash_lock to be held
+	* doesn't increment the vx_refcnt			*/
+
+static inline struct vx_info *__lookup_vx_info(xid_t xid)
+{
+	struct hlist_head *head = &vx_info_hash[__hashval(xid)];
+	struct hlist_node *pos;
+	struct vx_info *vxi;
+
+	vxd_assert_lock(&vx_info_hash_lock);
+	hlist_for_each(pos, head) {
+		vxi = hlist_entry(pos, struct vx_info, vx_hlist);
+
+		if (vxi->vx_id == xid)
+			goto found;
+	}
+	vxi = NULL;
+found:
+	vxdprintk(VXD_CBIT(xid, 0),
+		"__lookup_vx_info(#%u): %p[#%u]",
+		xid, vxi, vxi ? vxi->vx_id : 0);
+	vxh_lookup_vx_info(vxi, xid);
+	return vxi;
+}
+
+
+/*	__create_vx_info()
+
+	* create the requested context
+	* get(), claim() and hash it				*/
+
+static struct vx_info *__create_vx_info(int id)
+{
+	struct vx_info *new, *vxi = NULL;
+
+	vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
+
+	if (!(new = __alloc_vx_info(id)))
+		return ERR_PTR(-ENOMEM);
+
+	/* required to make dynamic xids unique */
+	spin_lock(&vx_info_hash_lock);
+
+	/* static context requested */
+	if ((vxi = __lookup_vx_info(id))) {
+		vxdprintk(VXD_CBIT(xid, 0),
+			"create_vx_info(%d) = %p (already there)", id, vxi);
+		if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
+			vxi = ERR_PTR(-EBUSY);
+		else
+			vxi = ERR_PTR(-EEXIST);
+		goto out_unlock;
+	}
+	/* new context */
+	vxdprintk(VXD_CBIT(xid, 0),
+		"create_vx_info(%d) = %p (new)", id, new);
+	claim_vx_info(new, NULL);
+	__hash_vx_info(get_vx_info(new));
+	vxi = new, new = NULL;
+
+out_unlock:
+	spin_unlock(&vx_info_hash_lock);
+	vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
+	if (new)
+		__dealloc_vx_info(new);
+	return vxi;
+}
+
+
+/*	exported stuff						*/
+
+
+void unhash_vx_info(struct vx_info *vxi)
+{
+	__shutdown_vx_info(vxi);
+	spin_lock(&vx_info_hash_lock);
+	__unhash_vx_info(vxi);
+	spin_unlock(&vx_info_hash_lock);
+	__wakeup_vx_info(vxi);
+}
+
+
+/*	lookup_vx_info()
+
+	* search for a vx_info and get() it
+	* negative id means current				*/
+
+struct vx_info *lookup_vx_info(int id)
+{
+	struct vx_info *vxi = NULL;
+
+	if (id < 0) {
+		vxi = get_vx_info(current->vx_info);
+	} else if (id > 1) {
+		spin_lock(&vx_info_hash_lock);
+		vxi = get_vx_info(__lookup_vx_info(id));
+		spin_unlock(&vx_info_hash_lock);
+	}
+	return vxi;
+}
+
+/*	xid_is_hashed()
+
+	* verify that xid is still hashed			*/
+
+int xid_is_hashed(xid_t xid)
+{
+	int hashed;
+
+	spin_lock(&vx_info_hash_lock);
+	hashed = (__lookup_vx_info(xid) != NULL);
+	spin_unlock(&vx_info_hash_lock);
+	return hashed;
+}
+
+#ifdef	CONFIG_PROC_FS
+
+/*	get_xid_list()
+
+	* get a subset of hashed xids for proc
+	* assumes size is at least one				*/
+
+int get_xid_list(int index, unsigned int *xids, int size)
+{
+	int hindex, nr_xids = 0;
+
+	/* only show current and children */
+	if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
+		if (index > 0)
+			return 0;
+		xids[nr_xids] = vx_current_xid();
+		return 1;
+	}
+
+	for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
+		struct hlist_head *head = &vx_info_hash[hindex];
+		struct hlist_node *pos;
+
+		spin_lock(&vx_info_hash_lock);
+		hlist_for_each(pos, head) {
+			struct vx_info *vxi;
+
+			if (--index > 0)
+				continue;
+
+			vxi = hlist_entry(pos, struct vx_info, vx_hlist);
+			xids[nr_xids] = vxi->vx_id;
+			if (++nr_xids >= size) {
+				spin_unlock(&vx_info_hash_lock);
+				goto out;
+			}
+		}
+		/* keep the lock time short */
+		spin_unlock(&vx_info_hash_lock);
+	}
+out:
+	return nr_xids;
+}
+#endif
+
+#ifdef	CONFIG_VSERVER_DEBUG
+
+void	dump_vx_info_inactive(int level)
+{
+	struct hlist_node *entry, *next;
+
+	hlist_for_each_safe(entry, next, &vx_info_inactive) {
+		struct vx_info *vxi =
+			list_entry(entry, struct vx_info, vx_hlist);
+
+		dump_vx_info(vxi, level);
+	}
+}
+
+#endif
+
+int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
+{
+	struct user_struct *new_user, *old_user;
+
+	if (!p || !vxi)
+		BUG();
+
+	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
+		return -EACCES;
+
+	new_user = alloc_uid(vxi->vx_id, p->uid);
+	if (!new_user)
+		return -ENOMEM;
+
+	old_user = p->user;
+	if (new_user != old_user) {
+		atomic_inc(&new_user->processes);
+		atomic_dec(&old_user->processes);
+		p->user = new_user;
+	}
+	free_uid(old_user);
+	return 0;
+}
+
+void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
+{
+	p->cap_effective &= vxi->vx_cap_bset;
+	p->cap_inheritable &= vxi->vx_cap_bset;
+	p->cap_permitted &= vxi->vx_cap_bset;
+}
+
+
+#include <linux/file.h>
+
+static int vx_openfd_task(struct task_struct *tsk)
+{
+	struct files_struct *files = tsk->files;
+	struct fdtable *fdt;
+	const unsigned long *bptr;
+	int count, total;
+
+	/* no rcu_read_lock() because of spin_lock() */
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	bptr = fdt->open_fds->fds_bits;
+	count = fdt->max_fds / (sizeof(unsigned long) * 8);
+	for (total = 0; count > 0; count--) {
+		if (*bptr)
+			total += hweight_long(*bptr);
+		bptr++;
+	}
+	spin_unlock(&files->file_lock);
+	return total;
+}
+
+
+/* 	for *space compatibility */
+
+asmlinkage long sys_unshare(unsigned long);
+
+/*
+ *	migrate task to new context
+ *	gets vxi, puts old_vxi on change
+ *	optionally unshares namespaces (hack)
+ */
+
+int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
+{
+	struct vx_info *old_vxi;
+	int ret = 0;
+
+	if (!p || !vxi)
+		BUG();
+
+	vxdprintk(VXD_CBIT(xid, 5),
+		"vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
+		vxi->vx_id, atomic_read(&vxi->vx_usecnt));
+
+	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
+		!vx_info_flags(vxi, VXF_STATE_SETUP, 0))
+		return -EACCES;
+
+	if (vx_info_state(vxi, VXS_SHUTDOWN))
+		return -EFAULT;
+
+	old_vxi = task_get_vx_info(p);
+	if (old_vxi == vxi)
+		goto out;
+
+	if (!(ret = vx_migrate_user(p, vxi))) {
+		int openfd;
+
+		task_lock(p);
+		openfd = vx_openfd_task(p);
+
+		if (old_vxi) {
+			atomic_dec(&old_vxi->cvirt.nr_threads);
+			atomic_dec(&old_vxi->cvirt.nr_running);
+			__rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
+			/* FIXME: what about the struct files here? */
+			__rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
+			/* account for the executable */
+			__rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
+		}
+		atomic_inc(&vxi->cvirt.nr_threads);
+		atomic_inc(&vxi->cvirt.nr_running);
+		__rlim_inc(&vxi->limit, RLIMIT_NPROC);
+		/* FIXME: what about the struct files here? */
+		__rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
+		/* account for the executable */
+		__rlim_inc(&vxi->limit, VLIMIT_DENTRY);
+
+		if (old_vxi) {
+			release_vx_info(old_vxi, p);
+			clr_vx_info(&p->vx_info);
+		}
+		claim_vx_info(vxi, p);
+		set_vx_info(&p->vx_info, vxi);
+		p->xid = vxi->vx_id;
+
+		vxdprintk(VXD_CBIT(xid, 5),
+			"moved task %p into vxi:%p[#%d]",
+			p, vxi, vxi->vx_id);
+
+		vx_mask_cap_bset(vxi, p);
+		task_unlock(p);
+
+		/* hack for *spaces to provide compatibility */
+		if (unshare) {
+			struct nsproxy *old_nsp, *new_nsp;
+
+			ret = unshare_nsproxy_namespaces(
+				CLONE_NEWUTS | CLONE_NEWIPC,
+				&new_nsp, NULL);
+			if (ret)
+				goto out;
+
+			old_nsp = xchg(&p->nsproxy, new_nsp);
+			vx_set_space(vxi, CLONE_NEWUTS | CLONE_NEWIPC);
+			put_nsproxy(old_nsp);
+		}
+	}
+out:
+	put_vx_info(old_vxi);
+	return ret;
+}
+
+int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
+{
+	struct task_struct *old_reaper;
+
+	if (!vxi)
+		return -EINVAL;
+
+	vxdprintk(VXD_CBIT(xid, 6),
+		"vx_set_reaper(%p[#%d],%p[#%d,%d])",
+		vxi, vxi->vx_id, p, p->xid, p->pid);
+
+	old_reaper = vxi->vx_reaper;
+	if (old_reaper == p)
+		return 0;
+
+	/* set new child reaper */
+	get_task_struct(p);
+	vxi->vx_reaper = p;
+	put_task_struct(old_reaper);
+	return 0;
+}
+
+int vx_set_init(struct vx_info *vxi, struct task_struct *p)
+{
+	if (!vxi)
+		return -EINVAL;
+
+	vxdprintk(VXD_CBIT(xid, 6),
+		"vx_set_init(%p[#%d],%p[#%d,%d,%d])",
+		vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
+
+	vxi->vx_flags &= ~VXF_STATE_INIT;
+	vxi->vx_initpid = p->tgid;
+	return 0;
+}
+
+void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
+{
+	vxdprintk(VXD_CBIT(xid, 6),
+		"vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
+		vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
+
+	vxi->exit_code = code;
+	vxi->vx_initpid = 0;
+}
+
+
+void vx_set_persistent(struct vx_info *vxi)
+{
+	vxdprintk(VXD_CBIT(xid, 6),
+		"vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
+
+	get_vx_info(vxi);
+	claim_vx_info(vxi, NULL);
+}
+
+void vx_clear_persistent(struct vx_info *vxi)
+{
+	vxdprintk(VXD_CBIT(xid, 6),
+		"vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
+
+	release_vx_info(vxi, NULL);
+	put_vx_info(vxi);
+}
+
+void vx_update_persistent(struct vx_info *vxi)
+{
+	if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
+		vx_set_persistent(vxi);
+	else
+		vx_clear_persistent(vxi);
+}
+
+
+/*	task must be current or locked		*/
+
+void	exit_vx_info(struct task_struct *p, int code)
+{
+	struct vx_info *vxi = p->vx_info;
+
+	if (vxi) {
+		atomic_dec(&vxi->cvirt.nr_threads);
+		vx_nproc_dec(p);
+
+		vxi->exit_code = code;
+		release_vx_info(vxi, p);
+	}
+}
+
+void	exit_vx_info_early(struct task_struct *p, int code)
+{
+	struct vx_info *vxi = p->vx_info;
+
+	if (vxi) {
+		if (vxi->vx_initpid == p->tgid)
+			vx_exit_init(vxi, p, code);
+		if (vxi->vx_reaper == p)
+			vx_set_reaper(vxi, init_pid_ns.child_reaper);
+	}
+}
+
+
+/* vserver syscall commands below here */
+
+/* taks xid and vx_info functions */
+
+#include <asm/uaccess.h>
+
+
+int vc_task_xid(uint32_t id)
+{
+	xid_t xid;
+
+	if (id) {
+		struct task_struct *tsk;
+
+		read_lock(&tasklist_lock);
+		tsk = find_task_by_real_pid(id);
+		xid = (tsk) ? tsk->xid : -ESRCH;
+		read_unlock(&tasklist_lock);
+	} else
+		xid = vx_current_xid();
+	return xid;
+}
+
+
+int vc_vx_info(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_vx_info_v0 vc_data;
+
+	vc_data.xid = vxi->vx_id;
+	vc_data.initpid = vxi->vx_initpid;
+
+	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
+		return -EFAULT;
+	return 0;
+}
+
+
+int vc_ctx_stat(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_ctx_stat_v0 vc_data;
+
+	vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
+	vc_data.tasks = atomic_read(&vxi->vx_tasks);
+
+	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
+		return -EFAULT;
+	return 0;
+}
+
+
+/* context functions */
+
+int vc_ctx_create(uint32_t xid, void __user *data)
+{
+	struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
+	struct vx_info *new_vxi;
+	int ret;
+
+	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	if ((xid > MAX_S_CONTEXT) || (xid < 2))
+		return -EINVAL;
+
+	new_vxi = __create_vx_info(xid);
+	if (IS_ERR(new_vxi))
+		return PTR_ERR(new_vxi);
+
+	/* initial flags */
+	new_vxi->vx_flags = vc_data.flagword;
+
+	ret = -ENOEXEC;
+	if (vs_state_change(new_vxi, VSC_STARTUP))
+		goto out;
+
+	ret = vx_migrate_task(current, new_vxi, (!data));
+	if (ret)
+		goto out;
+
+	/* return context id on success */
+	ret = new_vxi->vx_id;
+
+	/* get a reference for persistent contexts */
+	if ((vc_data.flagword & VXF_PERSISTENT))
+		vx_set_persistent(new_vxi);
+out:
+	release_vx_info(new_vxi, NULL);
+	put_vx_info(new_vxi);
+	return ret;
+}
+
+
+int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
+	int ret;
+
+	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	ret = vx_migrate_task(current, vxi, 0);
+	if (ret)
+		return ret;
+	if (vc_data.flagword & VXM_SET_INIT)
+		ret = vx_set_init(vxi, current);
+	if (ret)
+		return ret;
+	if (vc_data.flagword & VXM_SET_REAPER)
+		ret = vx_set_reaper(vxi, current);
+	return ret;
+}
+
+
+int vc_get_cflags(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_ctx_flags_v0 vc_data;
+
+	vc_data.flagword = vxi->vx_flags;
+
+	/* special STATE flag handling */
+	vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
+
+	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
+		return -EFAULT;
+	return 0;
+}
+
+int vc_set_cflags(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_ctx_flags_v0 vc_data;
+	uint64_t mask, trigger;
+
+	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	/* special STATE flag handling */
+	mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
+	trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
+
+	if (vxi == current->vx_info) {
+		if (trigger & VXF_STATE_SETUP)
+			vx_mask_cap_bset(vxi, current);
+		if (trigger & VXF_STATE_INIT) {
+			int ret;
+
+			ret = vx_set_init(vxi, current);
+			if (ret)
+				return ret;
+			ret = vx_set_reaper(vxi, current);
+			if (ret)
+				return ret;
+		}
+	}
+
+	vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
+		vc_data.flagword, mask);
+	if (trigger & VXF_PERSISTENT)
+		vx_update_persistent(vxi);
+
+	return 0;
+}
+
+static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
+{
+	if (bcaps)
+		*bcaps = vxi->vx_bcaps;
+	if (ccaps)
+		*ccaps = vxi->vx_ccaps;
+
+	return 0;
+}
+
+int vc_get_ccaps(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_ctx_caps_v1 vc_data;
+	int ret;
+
+	ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
+	if (ret)
+		return ret;
+	vc_data.cmask = ~0ULL;
+
+	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
+		return -EFAULT;
+	return 0;
+}
+
+static int do_set_caps(struct vx_info *vxi,
+	uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
+{
+	vxi->vx_bcaps = vs_mask_flags(vxi->vx_bcaps, bcaps, bmask);
+	vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
+
+	return 0;
+}
+
+int vc_set_ccaps(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_ctx_caps_v1 vc_data;
+
+	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
+}
+
+int vc_get_bcaps(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_bcaps vc_data;
+	int ret;
+
+	ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
+	if (ret)
+		return ret;
+	vc_data.bmask = ~0ULL;
+
+	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
+		return -EFAULT;
+	return 0;
+}
+
+int vc_set_bcaps(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_bcaps vc_data;
+
+	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
+}
+
+
+int vc_get_badness(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_badness_v0 vc_data;
+
+	vc_data.bias = vxi->vx_badness_bias;
+
+	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
+		return -EFAULT;
+	return 0;
+}
+
+int vc_set_badness(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_badness_v0 vc_data;
+
+	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	vxi->vx_badness_bias = vc_data.bias;
+	return 0;
+}
+
+#include <linux/module.h>
+
+EXPORT_SYMBOL_GPL(free_vx_info);
+
diff -Nurb linux-2.6.22-594/kernel/vserver/space.c linux-2.6.22-595/kernel/vserver/space.c
--- linux-2.6.22-594/kernel/vserver/space.c	2008-03-20 00:05:21.000000000 -0400
+++ linux-2.6.22-595/kernel/vserver/space.c	2008-03-20 00:08:28.000000000 -0400
@@ -15,6 +15,7 @@
 #include <linux/utsname.h>
 #include <linux/nsproxy.h>
 #include <linux/err.h>
+#include <net/net_namespace.h>
 #include <asm/uaccess.h>
 
 #include <linux/vs_context.h>
@@ -55,6 +56,7 @@
 	struct mnt_namespace *old_ns;
 	struct uts_namespace *old_uts;
 	struct ipc_namespace *old_ipc;
+	struct net *old_net;
 	struct nsproxy *nsproxy;
 
 	nsproxy = copy_nsproxy(old_nsproxy);
@@ -85,12 +87,26 @@
 	} else
 		old_ipc = NULL;
 
+	if (mask & CLONE_NEWNET) {
+		old_net = nsproxy->net_ns;
+		nsproxy->net_ns = new_nsproxy->net_ns;
+		if (nsproxy->net_ns) {
+			get_net(nsproxy->net_ns);
+ 		    	printk(KERN_ALERT "Cloning network namespace\n"); 
+		}	
+	} else
+		old_net = NULL;
+
+
 	if (old_ns)
 		put_mnt_ns(old_ns);
 	if (old_uts)
 		put_uts_ns(old_uts);
 	if (old_ipc)
 		put_ipc_ns(old_ipc);
+	if (old_net)
+		put_net(old_net);
+
 out:
 	return nsproxy;
 }
@@ -251,6 +267,7 @@
 
 int vc_enter_space(struct vx_info *vxi, void __user *data)
 {
+	/* Ask dhozac how to pass this flag from user space - Sapan*/
 	struct vcmd_space_mask vc_data = { .mask = 0 };
 
 	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
diff -Nurb linux-2.6.22-594/kernel/vserver/space.c.orig linux-2.6.22-595/kernel/vserver/space.c.orig
--- linux-2.6.22-594/kernel/vserver/space.c.orig	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.22-595/kernel/vserver/space.c.orig	2008-03-20 00:05:28.000000000 -0400
@@ -0,0 +1,295 @@
+/*
+ *  linux/kernel/vserver/space.c
+ *
+ *  Virtual Server: Context Space Support
+ *
+ *  Copyright (C) 2003-2007  Herbert P�tzl
+ *
+ *  V0.01  broken out from context.c 0.07
+ *  V0.02  added task locking for namespace
+ *  V0.03  broken out vx_enter_namespace
+ *  V0.04  added *space support and commands
+ *
+ */
+
+#include <linux/utsname.h>
+#include <linux/nsproxy.h>
+#include <linux/err.h>
+#include <net/net_namespace.h>
+#include <asm/uaccess.h>
+
+#include <linux/vs_context.h>
+#include <linux/vserver/space.h>
+#include <linux/vserver/space_cmd.h>
+
+
+atomic_t vs_global_nsproxy	= ATOMIC_INIT(0);
+atomic_t vs_global_fs		= ATOMIC_INIT(0);
+atomic_t vs_global_mnt_ns	= ATOMIC_INIT(0);
+atomic_t vs_global_uts_ns	= ATOMIC_INIT(0);
+atomic_t vs_global_ipc_ns	= ATOMIC_INIT(0);
+
+
+/* namespace functions */
+
+#include <linux/mnt_namespace.h>
+
+const struct vcmd_space_mask space_mask = {
+	.mask = CLONE_NEWNS |
+		CLONE_NEWUTS |
+		CLONE_NEWIPC |
+		CLONE_FS | 
+		CLONE_NEWNET
+};
+
+
+/*
+ *	build a new nsproxy mix
+ *      assumes that both proxies are 'const'
+ *	does not touch nsproxy refcounts
+ *	will hold a reference on the result.
+ */
+
+struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
+	struct nsproxy *new_nsproxy, unsigned long mask)
+{
+	struct mnt_namespace *old_ns;
+	struct uts_namespace *old_uts;
+	struct ipc_namespace *old_ipc;
+	struct net *old_net;
+	struct nsproxy *nsproxy;
+
+	nsproxy = copy_nsproxy(old_nsproxy);
+	if (!nsproxy)
+		goto out;
+
+	if (mask & CLONE_NEWNS) {
+		old_ns = nsproxy->mnt_ns;
+		nsproxy->mnt_ns = new_nsproxy->mnt_ns;
+		if (nsproxy->mnt_ns)
+			get_mnt_ns(nsproxy->mnt_ns);
+	} else
+		old_ns = NULL;
+
+	if (mask & CLONE_NEWUTS) {
+		old_uts = nsproxy->uts_ns;
+		nsproxy->uts_ns = new_nsproxy->uts_ns;
+		if (nsproxy->uts_ns)
+			get_uts_ns(nsproxy->uts_ns);
+	} else
+		old_uts = NULL;
+
+	if (mask & CLONE_NEWIPC) {
+		old_ipc = nsproxy->ipc_ns;
+		nsproxy->ipc_ns = new_nsproxy->ipc_ns;
+		if (nsproxy->ipc_ns)
+			get_ipc_ns(nsproxy->ipc_ns);
+	} else
+		old_ipc = NULL;
+	
+	if (mask & CLONE_NEWNET) {
+		old_net = nsproxy->net_ns;
+		nsproxy->net_ns = new_nsproxy->net_ns;
+		if (nsproxy->net_ns) {
+			get_net(nsproxy->net_ns);
+ 		    	printk(KERN_ALERT "Cloning network namespace\n"); 
+		}	
+	} else
+		old_net = NULL;
+
+
+	if (old_ns)
+		put_mnt_ns(old_ns);
+	if (old_uts)
+		put_uts_ns(old_uts);
+	if (old_ipc)
+		put_ipc_ns(old_ipc);
+	if (old_net)
+		put_net(old_net);
+
+out:
+	return nsproxy;
+}
+
+
+/*
+ *	merge two nsproxy structs into a new one.
+ *	will hold a reference on the result.
+ */
+
+static inline
+struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
+	struct nsproxy *proxy, unsigned long mask)
+{
+	struct nsproxy null_proxy = { .mnt_ns = NULL };
+
+	if (!proxy)
+		return NULL;
+
+	if (mask) {
+		/* vs_mix_nsproxy returns with reference */
+		return vs_mix_nsproxy(old ? old : &null_proxy,
+			proxy, mask);
+	}
+	get_nsproxy(proxy);
+	return proxy;
+}
+
+/*
+ *	merge two fs structs into a new one.
+ *	will take a reference on the result.
+ */
+
+static inline
+struct fs_struct *__vs_merge_fs(struct fs_struct *old,
+	struct fs_struct *fs, unsigned long mask)
+{
+	if (!(mask & CLONE_FS)) {
+		if (old)
+			atomic_inc(&old->count);
+		return old;
+	}
+
+	if (!fs)
+		return NULL;
+
+	return copy_fs_struct(fs);
+}
+
+
+int vx_enter_space(struct vx_info *vxi, unsigned long mask)
+{
+	struct nsproxy *proxy, *proxy_cur, *proxy_new;
+	struct fs_struct *fs, *fs_cur, *fs_new;
+	int ret;
+
+	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
+		return -EACCES;
+
+	if (!mask)
+		mask = vxi->vx_nsmask;
+
+	if ((mask & vxi->vx_nsmask) != mask)
+		return -EINVAL;
+
+	proxy = vxi->vx_nsproxy;
+	fs = vxi->vx_fs;
+
+	task_lock(current);
+	fs_cur = current->fs;
+	atomic_inc(&fs_cur->count);
+	proxy_cur = current->nsproxy;
+	get_nsproxy(proxy_cur);
+	task_unlock(current);
+
+	fs_new = __vs_merge_fs(fs_cur, fs, mask);
+	if (IS_ERR(fs_new)) {
+		ret = PTR_ERR(fs_new);
+		goto out_put;
+	}
+
+	proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
+	if (IS_ERR(proxy_new)) {
+		ret = PTR_ERR(proxy_new);
+		goto out_put_fs;
+	}
+
+	fs_new = xchg(&current->fs, fs_new);
+	proxy_new = xchg(&current->nsproxy, proxy_new);
+	ret = 0;
+
+	if (proxy_new)
+		put_nsproxy(proxy_new);
+out_put_fs:
+	if (fs_new)
+		put_fs_struct(fs_new);
+out_put:
+	if (proxy_cur)
+		put_nsproxy(proxy_cur);
+	if (fs_cur)
+		put_fs_struct(fs_cur);
+	return ret;
+}
+
+
+int vx_set_space(struct vx_info *vxi, unsigned long mask)
+{
+	struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
+	struct fs_struct *fs_vxi, *fs_cur, *fs_new;
+	int ret;
+
+	if (!mask)
+		mask = space_mask.mask;
+
+	if ((mask & space_mask.mask) != mask)
+		return -EINVAL;
+
+	proxy_vxi = vxi->vx_nsproxy;
+	fs_vxi = vxi->vx_fs;
+
+	task_lock(current);
+	fs_cur = current->fs;
+	atomic_inc(&fs_cur->count);
+	proxy_cur = current->nsproxy;
+	get_nsproxy(proxy_cur);
+	task_unlock(current);
+
+	fs_new = __vs_merge_fs(fs_vxi, fs_cur, mask);
+	if (IS_ERR(fs_new)) {
+		ret = PTR_ERR(fs_new);
+		goto out_put;
+	}
+
+	proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
+	if (IS_ERR(proxy_new)) {
+		ret = PTR_ERR(proxy_new);
+		goto out_put_fs;
+	}
+
+	fs_new = xchg(&vxi->vx_fs, fs_new);
+	proxy_new = xchg(&vxi->vx_nsproxy, proxy_new);
+	vxi->vx_nsmask |= mask;
+	ret = 0;
+
+	if (proxy_new)
+		put_nsproxy(proxy_new);
+out_put_fs:
+	if (fs_new)
+		put_fs_struct(fs_new);
+out_put:
+	if (proxy_cur)
+		put_nsproxy(proxy_cur);
+	if (fs_cur)
+		put_fs_struct(fs_cur);
+	return ret;
+}
+
+
+int vc_enter_space(struct vx_info *vxi, void __user *data)
+{
+	/* Ask dhozac how to pass this flag from user space - Sapan*/
+	struct vcmd_space_mask vc_data = { .mask = CLONE_NEWNET };
+
+	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	return vx_enter_space(vxi, vc_data.mask);
+}
+
+int vc_set_space(struct vx_info *vxi, void __user *data)
+{
+	struct vcmd_space_mask vc_data = { .mask = 0 };
+
+	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	return vx_set_space(vxi, vc_data.mask);
+}
+
+int vc_get_space_mask(struct vx_info *vxi, void __user *data)
+{
+	if (copy_to_user(data, &space_mask, sizeof(space_mask)))
+		return -EFAULT;
+	return 0;
+}
+
diff -Nurb linux-2.6.22-594/net/core/net_namespace.c linux-2.6.22-595/net/core/net_namespace.c
--- linux-2.6.22-594/net/core/net_namespace.c	2008-03-20 00:05:18.000000000 -0400
+++ linux-2.6.22-595/net/core/net_namespace.c	2008-03-20 00:14:56.000000000 -0400
@@ -112,10 +112,12 @@
 		ops = list_entry(ptr, struct pernet_operations, list);
 		if (ops->init) {
 			error = ops->init(net);
-			if (error < 0)
+			if (error < 0) {
+				printk(KERN_ALERT "Error setting up netns: %x\n", ops->init);
 				goto out_undo;
 		}
 	}
+	}
 out:
 	return error;
 out_undo:
diff -Nurb linux-2.6.22-594/net/core/net_namespace.c.orig linux-2.6.22-595/net/core/net_namespace.c.orig
--- linux-2.6.22-594/net/core/net_namespace.c.orig	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.22-595/net/core/net_namespace.c.orig	2008-03-20 00:05:18.000000000 -0400
@@ -0,0 +1,332 @@
+#include <linux/workqueue.h>
+#include <linux/rtnetlink.h>
+#include <linux/cache.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/delay.h>
+#include <net/net_namespace.h>
+
+/*
+ *	Our network namespace constructor/destructor lists
+ */
+
+static LIST_HEAD(pernet_list);
+static struct list_head *first_device = &pernet_list;
+static DEFINE_MUTEX(net_mutex);
+
+static DEFINE_MUTEX(net_list_mutex);
+LIST_HEAD(net_namespace_list);
+
+static struct kmem_cache *net_cachep;
+
+struct net init_net;
+EXPORT_SYMBOL_GPL(init_net);
+
+void net_lock(void)
+{
+	mutex_lock(&net_list_mutex);
+}
+
+void net_unlock(void)
+{
+	mutex_unlock(&net_list_mutex);
+}
+
+static struct net *net_alloc(void)
+{
+	return kmem_cache_alloc(net_cachep, GFP_KERNEL);
+}
+
+static void net_free(struct net *net)
+{
+	if (!net)
+		return;
+
+	if (unlikely(atomic_read(&net->use_count) != 0)) {
+		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
+			atomic_read(&net->use_count));
+		return;
+	}
+
+	kmem_cache_free(net_cachep, net);
+}
+
+static void cleanup_net(struct work_struct *work)
+{
+	struct pernet_operations *ops;
+	struct list_head *ptr;
+	struct net *net;
+
+	net = container_of(work, struct net, work);
+
+	mutex_lock(&net_mutex);
+
+	/* Don't let anyone else find us. */
+	net_lock();
+	list_del(&net->list);
+	net_unlock();
+
+	/* Run all of the network namespace exit methods */
+	list_for_each_prev(ptr, &pernet_list) {
+		ops = list_entry(ptr, struct pernet_operations, list);
+		if (ops->exit)
+			ops->exit(net);
+	}
+
+	mutex_unlock(&net_mutex);
+
+	/* Ensure there are no outstanding rcu callbacks using this
+	 * network namespace.
+	 */
+	rcu_barrier();
+
+	/* Finally it is safe to free my network namespace structure */
+	net_free(net);
+}
+
+
+void __put_net(struct net *net)
+{
+	/* Cleanup the network namespace in process context */
+	INIT_WORK(&net->work, cleanup_net);
+	schedule_work(&net->work);
+}
+EXPORT_SYMBOL_GPL(__put_net);
+
+/*
+ * setup_net runs the initializers for the network namespace object.
+ */
+static int setup_net(struct net *net)
+{
+	/* Must be called with net_mutex held */
+	struct pernet_operations *ops;
+	struct list_head *ptr;
+	int error;
+
+	memset(net, 0, sizeof(struct net));
+	atomic_set(&net->count, 1);
+	atomic_set(&net->use_count, 0);
+
+	error = 0;
+	list_for_each(ptr, &pernet_list) {
+		ops = list_entry(ptr, struct pernet_operations, list);
+		if (ops->init) {
+			error = ops->init(net);
+			if (error < 0)
+				goto out_undo;
+		}
+	}
+out:
+	return error;
+out_undo:
+	/* Walk through the list backwards calling the exit functions
+	 * for the pernet modules whose init functions did not fail.
+	 */
+	for (ptr = ptr->prev; ptr != &pernet_list; ptr = ptr->prev) {
+		ops = list_entry(ptr, struct pernet_operations, list);
+		if (ops->exit)
+			ops->exit(net);
+	}
+	goto out;
+}
+
+struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+{
+	struct net *new_net = NULL;
+	int err;
+
+	get_net(old_net);
+
+	if (!(flags & CLONE_NEWNET))
+		return old_net;
+
+	err = -EPERM;
+	if (!capable(CAP_SYS_ADMIN))
+		goto out;
+
+	err = -ENOMEM;
+	new_net = net_alloc();
+	if (!new_net)
+		goto out;
+
+	mutex_lock(&net_mutex);
+	err = setup_net(new_net);
+	if (err)
+		goto out_unlock;
+
+	net_lock();
+	list_add_tail(&new_net->list, &net_namespace_list);
+	net_unlock();
+
+
+out_unlock:
+	mutex_unlock(&net_mutex);
+out:
+	put_net(old_net);
+	if (err) {
+		net_free(new_net);
+		new_net = ERR_PTR(err);
+	}
+	return new_net;
+}
+
+static int __init net_ns_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
+	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
+					SMP_CACHE_BYTES,
+					SLAB_PANIC, NULL, NULL);
+	mutex_lock(&net_mutex);
+	err = setup_net(&init_net);
+
+	net_lock();
+	list_add_tail(&init_net.list, &net_namespace_list);
+	net_unlock();
+
+	mutex_unlock(&net_mutex);
+	if (err)
+		panic("Could not setup the initial network namespace");
+
+	return 0;
+}
+
+pure_initcall(net_ns_init);
+
+static int register_pernet_operations(struct list_head *list,
+				      struct pernet_operations *ops)
+{
+	struct net *net, *undo_net;
+	int error;
+
+	error = 0;
+	list_add_tail(&ops->list, list);
+	for_each_net(net) {
+		if (ops->init) {
+			error = ops->init(net);
+			if (error)
+				goto out_undo;
+		}
+	}
+out:
+	return error;
+
+out_undo:
+	/* If I have an error cleanup all namespaces I initialized */
+	list_del(&ops->list);
+	for_each_net(undo_net) {
+		if (undo_net == net)
+			goto undone;
+		if (ops->exit)
+			ops->exit(undo_net);
+	}
+undone:
+	goto out;
+}
+
+static void unregister_pernet_operations(struct pernet_operations *ops)
+{
+	struct net *net;
+
+	list_del(&ops->list);
+	for_each_net(net)
+		if (ops->exit)
+			ops->exit(net);
+}
+
+/**
+ *      register_pernet_subsys - register a network namespace subsystem
+ *	@ops:  pernet operations structure for the subsystem
+ *
+ *	Register a subsystem which has init and exit functions
+ *	that are called when network namespaces are created and
+ *	destroyed respectively.
+ *
+ *	When registered all network namespace init functions are
+ *	called for every existing network namespace.  Allowing kernel
+ *	modules to have a race free view of the set of network namespaces.
+ *
+ *	When a new network namespace is created all of the init
+ *	methods are called in the order in which they were registered.
+ *
+ *	When a network namespace is destroyed all of the exit methods
+ *	are called in the reverse of the order with which they were
+ *	registered.
+ */
+int register_pernet_subsys(struct pernet_operations *ops)
+{
+	int error;
+	mutex_lock(&net_mutex);
+	error =  register_pernet_operations(first_device, ops);
+	mutex_unlock(&net_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(register_pernet_subsys);
+
+/**
+ *      unregister_pernet_subsys - unregister a network namespace subsystem
+ *	@ops: pernet operations structure to manipulate
+ *
+ *	Remove the pernet operations structure from the list to be
+ *	used when network namespaces are created or destoryed.  In
+ *	addition run the exit method for all existing network
+ *	namespaces.
+ */
+void unregister_pernet_subsys(struct pernet_operations *module)
+{
+	mutex_lock(&net_mutex);
+	unregister_pernet_operations(module);
+	mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
+
+/**
+ *      register_pernet_device - register a network namespace device
+ *	@ops:  pernet operations structure for the subsystem
+ *
+ *	Register a device which has init and exit functions
+ *	that are called when network namespaces are created and
+ *	destroyed respectively.
+ *
+ *	When registered all network namespace init functions are
+ *	called for every existing network namespace.  Allowing kernel
+ *	modules to have a race free view of the set of network namespaces.
+ *
+ *	When a new network namespace is created all of the init
+ *	methods are called in the order in which they were registered.
+ *
+ *	When a network namespace is destroyed all of the exit methods
+ *	are called in the reverse of the order with which they were
+ *	registered.
+ */
+int register_pernet_device(struct pernet_operations *ops)
+{
+	int error;
+	mutex_lock(&net_mutex);
+	error = register_pernet_operations(&pernet_list, ops);
+	if (!error && (first_device == &pernet_list))
+		first_device = &ops->list;
+	mutex_unlock(&net_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(register_pernet_device);
+
+/**
+ *      unregister_pernet_device - unregister a network namespace netdevice
+ *	@ops: pernet operations structure to manipulate
+ *
+ *	Remove the pernet operations structure from the list to be
+ *	used when network namespaces are created or destoryed.  In
+ *	addition run the exit method for all existing network
+ *	namespaces.
+ */
+void unregister_pernet_device(struct pernet_operations *ops)
+{
+	mutex_lock(&net_mutex);
+	if (&ops->list == first_device)
+		first_device = first_device->next;
+	unregister_pernet_operations(ops);
+	mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_device);
diff -Nurb linux-2.6.22-594/net/ipv4/af_inet.c.orig linux-2.6.22-595/net/ipv4/af_inet.c.orig
--- linux-2.6.22-594/net/ipv4/af_inet.c.orig	2008-03-20 00:05:18.000000000 -0400
+++ linux-2.6.22-595/net/ipv4/af_inet.c.orig	1969-12-31 19:00:00.000000000 -0500
@@ -1,1522 +0,0 @@
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		PF_INET protocol family socket handler.
- *
- * Version:	$Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $
- *
- * Authors:	Ross Biro
- *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- *		Florian La Roche, <flla@stud.uni-sb.de>
- *		Alan Cox, <A.Cox@swansea.ac.uk>
- *
- * Changes (see also sock.c)
- *
- *		piggy,
- *		Karl Knutson	:	Socket protocol table
- *		A.N.Kuznetsov	:	Socket death error in accept().
- *		John Richardson :	Fix non blocking error in connect()
- *					so sockets that fail to connect
- *					don't return -EINPROGRESS.
- *		Alan Cox	:	Asynchronous I/O support
- *		Alan Cox	:	Keep correct socket pointer on sock
- *					structures
- *					when accept() ed
- *		Alan Cox	:	Semantics of SO_LINGER aren't state
- *					moved to close when you look carefully.
- *					With this fixed and the accept bug fixed
- *					some RPC stuff seems happier.
- *		Niibe Yutaka	:	4.4BSD style write async I/O
- *		Alan Cox,
- *		Tony Gale 	:	Fixed reuse semantics.
- *		Alan Cox	:	bind() shouldn't abort existing but dead
- *					sockets. Stops FTP netin:.. I hope.
- *		Alan Cox	:	bind() works correctly for RAW sockets.
- *					Note that FreeBSD at least was broken
- *					in this respect so be careful with
- *					compatibility tests...
- *		Alan Cox	:	routing cache support
- *		Alan Cox	:	memzero the socket structure for
- *					compactness.
- *		Matt Day	:	nonblock connect error handler
- *		Alan Cox	:	Allow large numbers of pending sockets
- *					(eg for big web sites), but only if
- *					specifically application requested.
- *		Alan Cox	:	New buffering throughout IP. Used
- *					dumbly.
- *		Alan Cox	:	New buffering now used smartly.
- *		Alan Cox	:	BSD rather than common sense
- *					interpretation of listen.
- *		Germano Caronni	:	Assorted small races.
- *		Alan Cox	:	sendmsg/recvmsg basic support.
- *		Alan Cox	:	Only sendmsg/recvmsg now supported.
- *		Alan Cox	:	Locked down bind (see security list).
- *		Alan Cox	:	Loosened bind a little.
- *		Mike McLagan	:	ADD/DEL DLCI Ioctls
- *	Willy Konynenberg	:	Transparent proxying support.
- *		David S. Miller	:	New socket lookup architecture.
- *					Some other random speedups.
- *		Cyrus Durgin	:	Cleaned up file for kmod hacks.
- *		Andi Kleen	:	Fix inet_stream_connect TCP race.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- */
-
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/capability.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/random.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/inet.h>
-#include <linux/igmp.h>
-#include <linux/inetdevice.h>
-#include <linux/netdevice.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <net/arp.h>
-#include <net/route.h>
-#include <net/ip_fib.h>
-#include <net/inet_connection_sock.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-#include <net/udplite.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/raw.h>
-#include <net/icmp.h>
-#include <net/ipip.h>
-#include <net/inet_common.h>
-#include <net/xfrm.h>
-#ifdef CONFIG_IP_MROUTE
-#include <linux/mroute.h>
-#endif
-#include <linux/vs_limit.h>
-
-DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
-
-extern void ip_mc_drop_socket(struct sock *sk);
-
-/* The inetsw table contains everything that inet_create needs to
- * build a new socket.
- */
-static struct list_head inetsw[SOCK_MAX];
-static DEFINE_SPINLOCK(inetsw_lock);
-
-/* New destruction routine */
-
-void inet_sock_destruct(struct sock *sk)
-{
-	struct inet_sock *inet = inet_sk(sk);
-
-	__skb_queue_purge(&sk->sk_receive_queue);
-	__skb_queue_purge(&sk->sk_error_queue);
-
-	if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
-		printk("Attempt to release TCP socket in state %d %p\n",
-		       sk->sk_state, sk);
-		return;
-	}
-	if (!sock_flag(sk, SOCK_DEAD)) {
-		printk("Attempt to release alive inet socket %p\n", sk);
-		return;
-	}
-
-	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
-	BUG_TRAP(!sk->sk_wmem_queued);
-	BUG_TRAP(!sk->sk_forward_alloc);
-
-	kfree(inet->opt);
-	dst_release(sk->sk_dst_cache);
-	sk_refcnt_debug_dec(sk);
-}
-
-/*
- *	The routines beyond this point handle the behaviour of an AF_INET
- *	socket object. Mostly it punts to the subprotocols of IP to do
- *	the work.
- */
-
-/*
- *	Automatically bind an unbound socket.
- */
-
-static int inet_autobind(struct sock *sk)
-{
-	struct inet_sock *inet;
-	/* We may need to bind the socket. */
-	lock_sock(sk);
-	inet = inet_sk(sk);
-	if (!inet->num) {
-		if (sk->sk_prot->get_port(sk, 0)) {
-			release_sock(sk);
-			return -EAGAIN;
-		}
-		inet->sport = htons(inet->num);
-		sk->sk_xid = vx_current_xid();
-		sk->sk_nid = nx_current_nid();
-	}
-	release_sock(sk);
-	return 0;
-}
-
-/*
- *	Move a socket into listening state.
- */
-int inet_listen(struct socket *sock, int backlog)
-{
-	struct sock *sk = sock->sk;
-	unsigned char old_state;
-	int err;
-
-	lock_sock(sk);
-
-	err = -EINVAL;
-	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
-		goto out;
-
-	old_state = sk->sk_state;
-	if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
-		goto out;
-
-	/* Really, if the socket is already in listen state
-	 * we can only allow the backlog to be adjusted.
-	 */
-	if (old_state != TCP_LISTEN) {
-		err = inet_csk_listen_start(sk, backlog);
-		if (err)
-			goto out;
-	}
-	sk->sk_max_ack_backlog = backlog;
-	err = 0;
-
-out:
-	release_sock(sk);
-	return err;
-}
-
-u32 inet_ehash_secret __read_mostly;
-EXPORT_SYMBOL(inet_ehash_secret);
-
-/*
- * inet_ehash_secret must be set exactly once
- * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
- */
-void build_ehash_secret(void)
-{
-	u32 rnd;
-	do {
-		get_random_bytes(&rnd, sizeof(rnd));
-	} while (rnd == 0);
-	spin_lock_bh(&inetsw_lock);
-	if (!inet_ehash_secret)
-		inet_ehash_secret = rnd;
-	spin_unlock_bh(&inetsw_lock);
-}
-EXPORT_SYMBOL(build_ehash_secret);
-
-/*
- *	Create an inet socket.
- */
-
-static int inet_create(struct socket *sock, int protocol)
-{
-	struct sock *sk;
-	struct list_head *p;
-	struct inet_protosw *answer;
-	struct inet_sock *inet;
-	struct proto *answer_prot;
-	unsigned char answer_flags;
-	char answer_no_check;
-	int try_loading_module = 0;
-	int err;
-
-	if (sock->type != SOCK_RAW &&
-	    sock->type != SOCK_DGRAM &&
-	    !inet_ehash_secret)
-		build_ehash_secret();
-
-	sock->state = SS_UNCONNECTED;
-
-	/* Look for the requested type/protocol pair. */
-	answer = NULL;
-lookup_protocol:
-	err = -ESOCKTNOSUPPORT;
-	rcu_read_lock();
-	list_for_each_rcu(p, &inetsw[sock->type]) {
-		answer = list_entry(p, struct inet_protosw, list);
-
-		/* Check the non-wild match. */
-		if (protocol == answer->protocol) {
-			if (protocol != IPPROTO_IP)
-				break;
-		} else {
-			/* Check for the two wild cases. */
-			if (IPPROTO_IP == protocol) {
-				protocol = answer->protocol;
-				break;
-			}
-			if (IPPROTO_IP == answer->protocol)
-				break;
-		}
-		err = -EPROTONOSUPPORT;
-		answer = NULL;
-	}
-
-	if (unlikely(answer == NULL)) {
-		if (try_loading_module < 2) {
-			rcu_read_unlock();
-			/*
-			 * Be more specific, e.g. net-pf-2-proto-132-type-1
-			 * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
-			 */
-			if (++try_loading_module == 1)
-				request_module("net-pf-%d-proto-%d-type-%d",
-					       PF_INET, protocol, sock->type);
-			/*
-			 * Fall back to generic, e.g. net-pf-2-proto-132
-			 * (net-pf-PF_INET-proto-IPPROTO_SCTP)
-			 */
-			else
-				request_module("net-pf-%d-proto-%d",
-					       PF_INET, protocol);
-			goto lookup_protocol;
-		} else
-			goto out_rcu_unlock;
-	}
-
-	err = -EPERM;
-	if ((protocol == IPPROTO_ICMP) &&
-		nx_capable(answer->capability, NXC_RAW_ICMP))
-		goto override;
-	if (sock->type == SOCK_RAW &&
-		nx_capable(answer->capability, NXC_RAW_SOCKET))
-		goto override;
-	if (answer->capability > 0 && !capable(answer->capability))
-		goto out_rcu_unlock;
-override:
-	sock->ops = answer->ops;
-	answer_prot = answer->prot;
-	answer_no_check = answer->no_check;
-	answer_flags = answer->flags;
-	rcu_read_unlock();
-
-	BUG_TRAP(answer_prot->slab != NULL);
-
-	err = -ENOBUFS;
-	sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1);
-	if (sk == NULL)
-		goto out;
-
-	err = 0;
-	sk->sk_no_check = answer_no_check;
-	if (INET_PROTOSW_REUSE & answer_flags)
-		sk->sk_reuse = 1;
-
-	inet = inet_sk(sk);
-	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
-
-	if (SOCK_RAW == sock->type) {
-		inet->num = protocol;
-		if (IPPROTO_RAW == protocol)
-			inet->hdrincl = 1;
-	}
-
-	if (ipv4_config.no_pmtu_disc)
-		inet->pmtudisc = IP_PMTUDISC_DONT;
-	else
-		inet->pmtudisc = IP_PMTUDISC_WANT;
-
-	inet->id = 0;
-
-	sock_init_data(sock, sk);
-
-	sk->sk_destruct	   = inet_sock_destruct;
-	sk->sk_family	   = PF_INET;
-	sk->sk_protocol	   = protocol;
-	sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
-
-	inet->uc_ttl	= -1;
-	inet->mc_loop	= 1;
-	inet->mc_ttl	= 1;
-	inet->mc_index	= 0;
-	inet->mc_list	= NULL;
-
-	sk_refcnt_debug_inc(sk);
-
-	if (inet->num) {
-		/* It assumes that any protocol which allows
-		 * the user to assign a number at socket
-		 * creation time automatically
-		 * shares.
-		 */
-		inet->sport = htons(inet->num);
-		/* Add to protocol hash chains. */
-		sk->sk_prot->hash(sk);
-	}
-
-	if (sk->sk_prot->init) {
-		err = sk->sk_prot->init(sk);
-		if (err)
-			sk_common_release(sk);
-	}
-out:
-	return err;
-out_rcu_unlock:
-	rcu_read_unlock();
-	goto out;
-}
-
-
-/*
- *	The peer socket should always be NULL (or else). When we call this
- *	function we are destroying the object and from then on nobody
- *	should refer to it.
- */
-int inet_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-
-	if (sk) {
-		long timeout;
-
-		/* Applications forget to leave groups before exiting */
-		ip_mc_drop_socket(sk);
-
-		/* If linger is set, we don't return until the close
-		 * is complete.  Otherwise we return immediately. The
-		 * actually closing is done the same either way.
-		 *
-		 * If the close is due to the process exiting, we never
-		 * linger..
-		 */
-		timeout = 0;
-		if (sock_flag(sk, SOCK_LINGER) &&
-		    !(current->flags & PF_EXITING))
-			timeout = sk->sk_lingertime;
-		sock->sk = NULL;
-		sk->sk_prot->close(sk, timeout);
-	}
-	return 0;
-}
-
-/* It is off by default, see below. */
-int sysctl_ip_nonlocal_bind __read_mostly;
-
-int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
-	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
-	struct sock *sk = sock->sk;
-	struct inet_sock *inet = inet_sk(sk);
-	struct nx_v4_sock_addr nsa;
-	unsigned short snum;
-	int chk_addr_ret;
-	int err;
-
-	/* If the socket has its own bind function then use it. (RAW) */
-	if (sk->sk_prot->bind) {
-		err = sk->sk_prot->bind(sk, uaddr, addr_len);
-		goto out;
-	}
-	err = -EINVAL;
-	if (addr_len < sizeof(struct sockaddr_in))
-		goto out;
-
-	err = v4_map_sock_addr(inet, addr, &nsa);
-	if (err)
-		goto out;
-
-	chk_addr_ret = inet_addr_type(nsa.saddr);
-
-	/* Not specified by any standard per-se, however it breaks too
-	 * many applications when removed.  It is unfortunate since
-	 * allowing applications to make a non-local bind solves
-	 * several problems with systems using dynamic addressing.
-	 * (ie. your servers still start up even if your ISDN link
-	 *  is temporarily down)
-	 */
-	err = -EADDRNOTAVAIL;
-	if (!sysctl_ip_nonlocal_bind &&
-	    !inet->freebind &&
-	    nsa.saddr != INADDR_ANY &&
-	    chk_addr_ret != RTN_LOCAL &&
-	    chk_addr_ret != RTN_MULTICAST &&
-	    chk_addr_ret != RTN_BROADCAST)
-		goto out;
-
-	snum = ntohs(addr->sin_port);
-	err = -EACCES;
-	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
-		goto out;
-
-	/*      We keep a pair of addresses. rcv_saddr is the one
-	 *      used by hash lookups, and saddr is used for transmit.
-	 *
-	 *      In the BSD API these are the same except where it
-	 *      would be illegal to use them (multicast/broadcast) in
-	 *      which case the sending device address is used.
-	 */
-	lock_sock(sk);
-
-	/* Check these errors (active socket, double bind). */
-	err = -EINVAL;
-	if (sk->sk_state != TCP_CLOSE || inet->num)
-		goto out_release_sock;
-
-	v4_set_sock_addr(inet, &nsa);
-	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
-		inet->saddr = 0;  /* Use device */
-
-	/* Make sure we are allowed to bind here. */
-	if (sk->sk_prot->get_port(sk, snum)) {
-		inet->saddr = inet->rcv_saddr = 0;
-		err = -EADDRINUSE;
-		goto out_release_sock;
-	}
-
-	if (inet->rcv_saddr)
-		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
-	if (snum)
-		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
-	inet->sport = htons(inet->num);
-	inet->daddr = 0;
-	inet->dport = 0;
-	sk_dst_reset(sk);
-	err = 0;
-out_release_sock:
-	release_sock(sk);
-out:
-	return err;
-}
-
-int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
-		       int addr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-
-	if (uaddr->sa_family == AF_UNSPEC)
-		return sk->sk_prot->disconnect(sk, flags);
-
-	if (!inet_sk(sk)->num && inet_autobind(sk))
-		return -EAGAIN;
-	return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
-}
-
-static long inet_wait_for_connect(struct sock *sk, long timeo)
-{
-	DEFINE_WAIT(wait);
-
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
-
-	/* Basic assumption: if someone sets sk->sk_err, he _must_
-	 * change state of the socket from TCP_SYN_*.
-	 * Connect() does not allow to get error notifications
-	 * without closing the socket.
-	 */
-	while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
-		release_sock(sk);
-		timeo = schedule_timeout(timeo);
-		lock_sock(sk);
-		if (signal_pending(current) || !timeo)
-			break;
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
-	}
-	finish_wait(sk->sk_sleep, &wait);
-	return timeo;
-}
-
-/*
- *	Connect to a remote host. There is regrettably still a little
- *	TCP 'magic' in here.
- */
-int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
-			int addr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-	int err;
-	long timeo;
-
-	lock_sock(sk);
-
-	if (uaddr->sa_family == AF_UNSPEC) {
-		err = sk->sk_prot->disconnect(sk, flags);
-		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
-		goto out;
-	}
-
-	switch (sock->state) {
-	default:
-		err = -EINVAL;
-		goto out;
-	case SS_CONNECTED:
-		err = -EISCONN;
-		goto out;
-	case SS_CONNECTING:
-		err = -EALREADY;
-		/* Fall out of switch with err, set for this state */
-		break;
-	case SS_UNCONNECTED:
-		err = -EISCONN;
-		if (sk->sk_state != TCP_CLOSE)
-			goto out;
-
-		err = sk->sk_prot->connect(sk, uaddr, addr_len);
-		if (err < 0)
-			goto out;
-
-		sock->state = SS_CONNECTING;
-
-		/* Just entered SS_CONNECTING state; the only
-		 * difference is that return value in non-blocking
-		 * case is EINPROGRESS, rather than EALREADY.
-		 */
-		err = -EINPROGRESS;
-		break;
-	}
-
-	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
-
-	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
-		/* Error code is set above */
-		if (!timeo || !inet_wait_for_connect(sk, timeo))
-			goto out;
-
-		err = sock_intr_errno(timeo);
-		if (signal_pending(current))
-			goto out;
-	}
-
-	/* Connection was closed by RST, timeout, ICMP error
-	 * or another process disconnected us.
-	 */
-	if (sk->sk_state == TCP_CLOSE)
-		goto sock_error;
-
-	/* sk->sk_err may be not zero now, if RECVERR was ordered by user
-	 * and error was received after socket entered established state.
-	 * Hence, it is handled normally after connect() return successfully.
-	 */
-
-	sock->state = SS_CONNECTED;
-	err = 0;
-out:
-	release_sock(sk);
-	return err;
-
-sock_error:
-	err = sock_error(sk) ? : -ECONNABORTED;
-	sock->state = SS_UNCONNECTED;
-	if (sk->sk_prot->disconnect(sk, flags))
-		sock->state = SS_DISCONNECTING;
-	goto out;
-}
-
-/*
- *	Accept a pending connection. The TCP layer now gives BSD semantics.
- */
-
-int inet_accept(struct socket *sock, struct socket *newsock, int flags)
-{
-	struct sock *sk1 = sock->sk;
-	int err = -EINVAL;
-	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
-
-	if (!sk2)
-		goto do_err;
-
-	lock_sock(sk2);
-
-	BUG_TRAP((1 << sk2->sk_state) &
-		 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE));
-
-	sock_graft(sk2, newsock);
-
-	newsock->state = SS_CONNECTED;
-	err = 0;
-	release_sock(sk2);
-do_err:
-	return err;
-}
-
-
-/*
- *	This does both peername and sockname.
- */
-int inet_getname(struct socket *sock, struct sockaddr *uaddr,
-			int *uaddr_len, int peer)
-{
-	struct sock *sk		= sock->sk;
-	struct inet_sock *inet	= inet_sk(sk);
-	struct sockaddr_in *sin	= (struct sockaddr_in *)uaddr;
-
-	sin->sin_family = AF_INET;
-	if (peer) {
-		if (!inet->dport ||
-		    (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
-		     peer == 1))
-			return -ENOTCONN;
-		sin->sin_port = inet->dport;
-		sin->sin_addr.s_addr =
-			nx_map_sock_lback(sk->sk_nx_info, inet->daddr);
-	} else {
-		__be32 addr = inet->rcv_saddr;
-		if (!addr)
-			addr = inet->saddr;
-		addr = nx_map_sock_lback(sk->sk_nx_info, addr);
-		sin->sin_port = inet->sport;
-		sin->sin_addr.s_addr = addr;
-	}
-	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
-	*uaddr_len = sizeof(*sin);
-	return 0;
-}
-
-int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
-		 size_t size)
-{
-	struct sock *sk = sock->sk;
-
-	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->num && inet_autobind(sk))
-		return -EAGAIN;
-
-	return sk->sk_prot->sendmsg(iocb, sk, msg, size);
-}
-
-
-static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-
-	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->num && inet_autobind(sk))
-		return -EAGAIN;
-
-	if (sk->sk_prot->sendpage)
-		return sk->sk_prot->sendpage(sk, page, offset, size, flags);
-	return sock_no_sendpage(sock, page, offset, size, flags);
-}
-
-
-int inet_shutdown(struct socket *sock, int how)
-{
-	struct sock *sk = sock->sk;
-	int err = 0;
-
-	/* This should really check to make sure
-	 * the socket is a TCP socket. (WHY AC...)
-	 */
-	how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
-		       1->2 bit 2 snds.
-		       2->3 */
-	if ((how & ~SHUTDOWN_MASK) || !how)	/* MAXINT->0 */
-		return -EINVAL;
-
-	lock_sock(sk);
-	if (sock->state == SS_CONNECTING) {
-		if ((1 << sk->sk_state) &
-		    (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
-			sock->state = SS_DISCONNECTING;
-		else
-			sock->state = SS_CONNECTED;
-	}
-
-	switch (sk->sk_state) {
-	case TCP_CLOSE:
-		err = -ENOTCONN;
-		/* Hack to wake up other listeners, who can poll for
-		   POLLHUP, even on eg. unconnected UDP sockets -- RR */
-	default:
-		sk->sk_shutdown |= how;
-		if (sk->sk_prot->shutdown)
-			sk->sk_prot->shutdown(sk, how);
-		break;
-
-	/* Remaining two branches are temporary solution for missing
-	 * close() in multithreaded environment. It is _not_ a good idea,
-	 * but we have no choice until close() is repaired at VFS level.
-	 */
-	case TCP_LISTEN:
-		if (!(how & RCV_SHUTDOWN))
-			break;
-		/* Fall through */
-	case TCP_SYN_SENT:
-		err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
-		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
-		break;
-	}
-
-	/* Wake up anyone sleeping in poll. */
-	sk->sk_state_change(sk);
-	release_sock(sk);
-	return err;
-}
-
-/*
- *	ioctl() calls you can issue on an INET socket. Most of these are
- *	device configuration and stuff and very rarely used. Some ioctls
- *	pass on to the socket itself.
- *
- *	NOTE: I like the idea of a module for the config stuff. ie ifconfig
- *	loads the devconfigure module does its configuring and unloads it.
- *	There's a good 20K of config code hanging around the kernel.
- */
-
-int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-	int err = 0;
-
-	switch (cmd) {
-		case SIOCGSTAMP:
-			err = sock_get_timestamp(sk, (struct timeval __user *)arg);
-			break;
-		case SIOCGSTAMPNS:
-			err = sock_get_timestampns(sk, (struct timespec __user *)arg);
-			break;
-		case SIOCADDRT:
-		case SIOCDELRT:
-		case SIOCRTMSG:
-			err = ip_rt_ioctl(cmd, (void __user *)arg);
-			break;
-		case SIOCDARP:
-		case SIOCGARP:
-		case SIOCSARP:
-			err = arp_ioctl(cmd, (void __user *)arg);
-			break;
-		case SIOCGIFADDR:
-		case SIOCSIFADDR:
-		case SIOCGIFBRDADDR:
-		case SIOCSIFBRDADDR:
-		case SIOCGIFNETMASK:
-		case SIOCSIFNETMASK:
-		case SIOCGIFDSTADDR:
-		case SIOCSIFDSTADDR:
-		case SIOCSIFPFLAGS:
-		case SIOCGIFPFLAGS:
-		case SIOCSIFFLAGS:
-			err = devinet_ioctl(cmd, (void __user *)arg);
-			break;
-		default:
-			if (sk->sk_prot->ioctl)
-				err = sk->sk_prot->ioctl(sk, cmd, arg);
-			else
-				err = -ENOIOCTLCMD;
-			break;
-	}
-	return err;
-}
-
-const struct proto_ops inet_stream_ops = {
-	.family		   = PF_INET,
-	.owner		   = THIS_MODULE,
-	.release	   = inet_release,
-	.bind		   = inet_bind,
-	.connect	   = inet_stream_connect,
-	.socketpair	   = sock_no_socketpair,
-	.accept		   = inet_accept,
-	.getname	   = inet_getname,
-	.poll		   = tcp_poll,
-	.ioctl		   = inet_ioctl,
-	.listen		   = inet_listen,
-	.shutdown	   = inet_shutdown,
-	.setsockopt	   = sock_common_setsockopt,
-	.getsockopt	   = sock_common_getsockopt,
-	.sendmsg	   = tcp_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
-	.mmap		   = sock_no_mmap,
-	.sendpage	   = tcp_sendpage,
-#ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_sock_common_setsockopt,
-	.compat_getsockopt = compat_sock_common_getsockopt,
-#endif
-};
-
-const struct proto_ops inet_dgram_ops = {
-	.family		   = PF_INET,
-	.owner		   = THIS_MODULE,
-	.release	   = inet_release,
-	.bind		   = inet_bind,
-	.connect	   = inet_dgram_connect,
-	.socketpair	   = sock_no_socketpair,
-	.accept		   = sock_no_accept,
-	.getname	   = inet_getname,
-	.poll		   = udp_poll,
-	.ioctl		   = inet_ioctl,
-	.listen		   = sock_no_listen,
-	.shutdown	   = inet_shutdown,
-	.setsockopt	   = sock_common_setsockopt,
-	.getsockopt	   = sock_common_getsockopt,
-	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
-	.mmap		   = sock_no_mmap,
-	.sendpage	   = inet_sendpage,
-#ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_sock_common_setsockopt,
-	.compat_getsockopt = compat_sock_common_getsockopt,
-#endif
-};
-
-/*
- * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
- * udp_poll
- */
-static const struct proto_ops inet_sockraw_ops = {
-	.family		   = PF_INET,
-	.owner		   = THIS_MODULE,
-	.release	   = inet_release,
-	.bind		   = inet_bind,
-	.connect	   = inet_dgram_connect,
-	.socketpair	   = sock_no_socketpair,
-	.accept		   = sock_no_accept,
-	.getname	   = inet_getname,
-	.poll		   = datagram_poll,
-	.ioctl		   = inet_ioctl,
-	.listen		   = sock_no_listen,
-	.shutdown	   = inet_shutdown,
-	.setsockopt	   = sock_common_setsockopt,
-	.getsockopt	   = sock_common_getsockopt,
-	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
-	.mmap		   = sock_no_mmap,
-	.sendpage	   = inet_sendpage,
-#ifdef CONFIG_COMPAT
-	.compat_setsockopt = compat_sock_common_setsockopt,
-	.compat_getsockopt = compat_sock_common_getsockopt,
-#endif
-};
-
-static struct net_proto_family inet_family_ops = {
-	.family = PF_INET,
-	.create = inet_create,
-	.owner	= THIS_MODULE,
-};
-
-/* Upon startup we insert all the elements in inetsw_array[] into
- * the linked list inetsw.
- */
-static struct inet_protosw inetsw_array[] =
-{
-	{
-		.type =       SOCK_STREAM,
-		.protocol =   IPPROTO_TCP,
-		.prot =       &tcp_prot,
-		.ops =        &inet_stream_ops,
-		.capability = -1,
-		.no_check =   0,
-		.flags =      INET_PROTOSW_PERMANENT |
-			      INET_PROTOSW_ICSK,
-	},
-
-	{
-		.type =       SOCK_DGRAM,
-		.protocol =   IPPROTO_UDP,
-		.prot =       &udp_prot,
-		.ops =        &inet_dgram_ops,
-		.capability = -1,
-		.no_check =   UDP_CSUM_DEFAULT,
-		.flags =      INET_PROTOSW_PERMANENT,
-       },
-
-
-       {
-	       .type =       SOCK_RAW,
-	       .protocol =   IPPROTO_IP,	/* wild card */
-	       .prot =       &raw_prot,
-	       .ops =        &inet_sockraw_ops,
-	       .capability = CAP_NET_RAW,
-	       .no_check =   UDP_CSUM_DEFAULT,
-	       .flags =      INET_PROTOSW_REUSE,
-       }
-};
-
-#define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw))
-
-void inet_register_protosw(struct inet_protosw *p)
-{
-	struct list_head *lh;
-	struct inet_protosw *answer;
-	int protocol = p->protocol;
-	struct list_head *last_perm;
-
-	spin_lock_bh(&inetsw_lock);
-
-	if (p->type >= SOCK_MAX)
-		goto out_illegal;
-
-	/* If we are trying to override a permanent protocol, bail. */
-	answer = NULL;
-	last_perm = &inetsw[p->type];
-	list_for_each(lh, &inetsw[p->type]) {
-		answer = list_entry(lh, struct inet_protosw, list);
-
-		/* Check only the non-wild match. */
-		if (INET_PROTOSW_PERMANENT & answer->flags) {
-			if (protocol == answer->protocol)
-				break;
-			last_perm = lh;
-		}
-
-		answer = NULL;
-	}
-	if (answer)
-		goto out_permanent;
-
-	/* Add the new entry after the last permanent entry if any, so that
-	 * the new entry does not override a permanent entry when matched with
-	 * a wild-card protocol. But it is allowed to override any existing
-	 * non-permanent entry.  This means that when we remove this entry, the
-	 * system automatically returns to the old behavior.
-	 */
-	list_add_rcu(&p->list, last_perm);
-out:
-	spin_unlock_bh(&inetsw_lock);
-
-	synchronize_net();
-
-	return;
-
-out_permanent:
-	printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
-	       protocol);
-	goto out;
-
-out_illegal:
-	printk(KERN_ERR
-	       "Ignoring attempt to register invalid socket type %d.\n",
-	       p->type);
-	goto out;
-}
-
-void inet_unregister_protosw(struct inet_protosw *p)
-{
-	if (INET_PROTOSW_PERMANENT & p->flags) {
-		printk(KERN_ERR
-		       "Attempt to unregister permanent protocol %d.\n",
-		       p->protocol);
-	} else {
-		spin_lock_bh(&inetsw_lock);
-		list_del_rcu(&p->list);
-		spin_unlock_bh(&inetsw_lock);
-
-		synchronize_net();
-	}
-}
-
-/*
- *      Shall we try to damage output packets if routing dev changes?
- */
-
-int sysctl_ip_dynaddr __read_mostly;
-
-static int inet_sk_reselect_saddr(struct sock *sk)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	int err;
-	struct rtable *rt;
-	__be32 old_saddr = inet->saddr;
-	__be32 new_saddr;
-	__be32 daddr = inet->daddr;
-
-	if (inet->opt && inet->opt->srr)
-		daddr = inet->opt->faddr;
-
-	/* Query new route. */
-	err = ip_route_connect(&rt, daddr, 0,
-			       RT_CONN_FLAGS(sk),
-			       sk->sk_bound_dev_if,
-			       sk->sk_protocol,
-			       inet->sport, inet->dport, sk, 0);
-	if (err)
-		return err;
-
-	sk_setup_caps(sk, &rt->u.dst);
-
-	new_saddr = rt->rt_src;
-
-	if (new_saddr == old_saddr)
-		return 0;
-
-	if (sysctl_ip_dynaddr > 1) {
-		printk(KERN_INFO "%s(): shifting inet->"
-				 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
-		       __FUNCTION__,
-		       NIPQUAD(old_saddr),
-		       NIPQUAD(new_saddr));
-	}
-
-	inet->saddr = inet->rcv_saddr = new_saddr;
-
-	/*
-	 * XXX The only one ugly spot where we need to
-	 * XXX really change the sockets identity after
-	 * XXX it has entered the hashes. -DaveM
-	 *
-	 * Besides that, it does not check for connection
-	 * uniqueness. Wait for troubles.
-	 */
-	__sk_prot_rehash(sk);
-	return 0;
-}
-
-int inet_sk_rebuild_header(struct sock *sk)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
-	__be32 daddr;
-	int err;
-
-	/* Route is OK, nothing to do. */
-	if (rt)
-		return 0;
-
-	/* Reroute. */
-	daddr = inet->daddr;
-	if (inet->opt && inet->opt->srr)
-		daddr = inet->opt->faddr;
-{
-	struct flowi fl = {
-		.oif = sk->sk_bound_dev_if,
-		.nl_u = {
-			.ip4_u = {
-				.daddr	= daddr,
-				.saddr	= inet->saddr,
-				.tos	= RT_CONN_FLAGS(sk),
-			},
-		},
-		.proto = sk->sk_protocol,
-		.uli_u = {
-			.ports = {
-				.sport = inet->sport,
-				.dport = inet->dport,
-			},
-		},
-	};
-
-	security_sk_classify_flow(sk, &fl);
-	err = ip_route_output_flow(&rt, &fl, sk, 0);
-}
-	if (!err)
-		sk_setup_caps(sk, &rt->u.dst);
-	else {
-		/* Routing failed... */
-		sk->sk_route_caps = 0;
-		/*
-		 * Other protocols have to map its equivalent state to TCP_SYN_SENT.
-		 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
-		 */
-		if (!sysctl_ip_dynaddr ||
-		    sk->sk_state != TCP_SYN_SENT ||
-		    (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
-		    (err = inet_sk_reselect_saddr(sk)) != 0)
-			sk->sk_err_soft = -err;
-	}
-
-	return err;
-}
-
-EXPORT_SYMBOL(inet_sk_rebuild_header);
-
-static int inet_gso_send_check(struct sk_buff *skb)
-{
-	struct iphdr *iph;
-	struct net_protocol *ops;
-	int proto;
-	int ihl;
-	int err = -EINVAL;
-
-	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
-		goto out;
-
-	iph = ip_hdr(skb);
-	ihl = iph->ihl * 4;
-	if (ihl < sizeof(*iph))
-		goto out;
-
-	if (unlikely(!pskb_may_pull(skb, ihl)))
-		goto out;
-
-	__skb_pull(skb, ihl);
-	skb_reset_transport_header(skb);
-	iph = ip_hdr(skb);
-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
-	err = -EPROTONOSUPPORT;
-
-	rcu_read_lock();
-	ops = rcu_dereference(inet_protos[proto]);
-	if (likely(ops && ops->gso_send_check))
-		err = ops->gso_send_check(skb);
-	rcu_read_unlock();
-
-out:
-	return err;
-}
-
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
-{
-	struct sk_buff *segs = ERR_PTR(-EINVAL);
-	struct iphdr *iph;
-	struct net_protocol *ops;
-	int proto;
-	int ihl;
-	int id;
-
-	if (unlikely(skb_shinfo(skb)->gso_type &
-		     ~(SKB_GSO_TCPV4 |
-		       SKB_GSO_UDP |
-		       SKB_GSO_DODGY |
-		       SKB_GSO_TCP_ECN |
-		       0)))
-		goto out;
-
-	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
-		goto out;
-
-	iph = ip_hdr(skb);
-	ihl = iph->ihl * 4;
-	if (ihl < sizeof(*iph))
-		goto out;
-
-	if (unlikely(!pskb_may_pull(skb, ihl)))
-		goto out;
-
-	__skb_pull(skb, ihl);
-	skb_reset_transport_header(skb);
-	iph = ip_hdr(skb);
-	id = ntohs(iph->id);
-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
-	segs = ERR_PTR(-EPROTONOSUPPORT);
-
-	rcu_read_lock();
-	ops = rcu_dereference(inet_protos[proto]);
-	if (likely(ops && ops->gso_segment))
-		segs = ops->gso_segment(skb, features);
-	rcu_read_unlock();
-
-	if (!segs || unlikely(IS_ERR(segs)))
-		goto out;
-
-	skb = segs;
-	do {
-		iph = ip_hdr(skb);
-		iph->id = htons(id++);
-		iph->tot_len = htons(skb->len - skb->mac_len);
-		iph->check = 0;
-		iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
-	} while ((skb = skb->next));
-
-out:
-	return segs;
-}
-
-unsigned long snmp_fold_field(void *mib[], int offt)
-{
-	unsigned long res = 0;
-	int i;
-
-	for_each_possible_cpu(i) {
-		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
-		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
-	}
-	return res;
-}
-EXPORT_SYMBOL_GPL(snmp_fold_field);
-
-int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
-{
-	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize);
-	if (!ptr[0])
-		goto err0;
-	ptr[1] = __alloc_percpu(mibsize);
-	if (!ptr[1])
-		goto err1;
-	return 0;
-err1:
-	free_percpu(ptr[0]);
-	ptr[0] = NULL;
-err0:
-	return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(snmp_mib_init);
-
-void snmp_mib_free(void *ptr[2])
-{
-	BUG_ON(ptr == NULL);
-	free_percpu(ptr[0]);
-	free_percpu(ptr[1]);
-	ptr[0] = ptr[1] = NULL;
-}
-EXPORT_SYMBOL_GPL(snmp_mib_free);
-
-#ifdef CONFIG_IP_MULTICAST
-static struct net_protocol igmp_protocol = {
-	.handler =	igmp_rcv,
-};
-#endif
-
-static struct net_protocol tcp_protocol = {
-	.handler =	tcp_v4_rcv,
-	.err_handler =	tcp_v4_err,
-	.gso_send_check = tcp_v4_gso_send_check,
-	.gso_segment =	tcp_tso_segment,
-	.no_policy =	1,
-};
-
-static struct net_protocol udp_protocol = {
-	.handler =	udp_rcv,
-	.err_handler =	udp_err,
-	.no_policy =	1,
-};
-
-static struct net_protocol icmp_protocol = {
-	.handler =	icmp_rcv,
-};
-
-static int __init init_ipv4_mibs(void)
-{
-	if (snmp_mib_init((void **)net_statistics,
-			  sizeof(struct linux_mib),
-			  __alignof__(struct linux_mib)) < 0)
-		goto err_net_mib;
-	if (snmp_mib_init((void **)ip_statistics,
-			  sizeof(struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
-		goto err_ip_mib;
-	if (snmp_mib_init((void **)icmp_statistics,
-			  sizeof(struct icmp_mib),
-			  __alignof__(struct icmp_mib)) < 0)
-		goto err_icmp_mib;
-	if (snmp_mib_init((void **)tcp_statistics,
-			  sizeof(struct tcp_mib),
-			  __alignof__(struct tcp_mib)) < 0)
-		goto err_tcp_mib;
-	if (snmp_mib_init((void **)udp_statistics,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
-		goto err_udp_mib;
-	if (snmp_mib_init((void **)udplite_statistics,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
-		goto err_udplite_mib;
-
-	tcp_mib_init();
-
-	return 0;
-
-err_udplite_mib:
-	snmp_mib_free((void **)udp_statistics);
-err_udp_mib:
-	snmp_mib_free((void **)tcp_statistics);
-err_tcp_mib:
-	snmp_mib_free((void **)icmp_statistics);
-err_icmp_mib:
-	snmp_mib_free((void **)ip_statistics);
-err_ip_mib:
-	snmp_mib_free((void **)net_statistics);
-err_net_mib:
-	return -ENOMEM;
-}
-
-static int ipv4_proc_init(void);
-
-/*
- *	IP protocol layer initialiser
- */
-
-static struct packet_type ip_packet_type = {
-	.type = __constant_htons(ETH_P_IP),
-	.func = ip_rcv,
-	.gso_send_check = inet_gso_send_check,
-	.gso_segment = inet_gso_segment,
-};
-
-static int __init inet_init(void)
-{
-	struct sk_buff *dummy_skb;
-	struct inet_protosw *q;
-	struct list_head *r;
-	int rc = -EINVAL;
-
-	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
-
-	rc = proto_register(&tcp_prot, 1);
-	if (rc)
-		goto out;
-
-	rc = proto_register(&udp_prot, 1);
-	if (rc)
-		goto out_unregister_tcp_proto;
-
-	rc = proto_register(&raw_prot, 1);
-	if (rc)
-		goto out_unregister_udp_proto;
-
-	/*
-	 *	Tell SOCKET that we are alive...
-	 */
-
-	(void)sock_register(&inet_family_ops);
-
-	/*
-	 *	Add all the base protocols.
-	 */
-
-	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
-		printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
-	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
-		printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
-	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
-		printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
-#ifdef CONFIG_IP_MULTICAST
-	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
-		printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
-#endif
-
-	/* Register the socket-side information for inet_create. */
-	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
-		INIT_LIST_HEAD(r);
-
-	for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
-		inet_register_protosw(q);
-
-	/*
-	 *	Set the ARP module up
-	 */
-
-	arp_init();
-
-	/*
-	 *	Set the IP module up
-	 */
-
-	ip_init();
-
-	tcp_v4_init(&inet_family_ops);
-
-	/* Setup TCP slab cache for open requests. */
-	tcp_init();
-
-	/* Add UDP-Lite (RFC 3828) */
-	udplite4_register();
-
-	/*
-	 *	Set the ICMP layer up
-	 */
-
-	icmp_init(&inet_family_ops);
-
-	/*
-	 *	Initialise the multicast router
-	 */
-#if defined(CONFIG_IP_MROUTE)
-	ip_mr_init();
-#endif
-	/*
-	 *	Initialise per-cpu ipv4 mibs
-	 */
-
-	if (init_ipv4_mibs())
-		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
-
-	ipv4_proc_init();
-
-	ipfrag_init();
-
-	dev_add_pack(&ip_packet_type);
-
-	rc = 0;
-out:
-	return rc;
-out_unregister_udp_proto:
-	proto_unregister(&udp_prot);
-out_unregister_tcp_proto:
-	proto_unregister(&tcp_prot);
-	goto out;
-}
-
-fs_initcall(inet_init);
-
-/* ------------------------------------------------------------------------ */
-
-#ifdef CONFIG_PROC_FS
-static int __init ipv4_proc_init(void)
-{
-	int rc = 0;
-
-	if (raw_proc_init())
-		goto out_raw;
-	if (tcp4_proc_init())
-		goto out_tcp;
-	if (udp4_proc_init())
-		goto out_udp;
-	if (fib_proc_init())
-		goto out_fib;
-	if (ip_misc_proc_init())
-		goto out_misc;
-out:
-	return rc;
-out_misc:
-	fib_proc_exit();
-out_fib:
-	udp4_proc_exit();
-out_udp:
-	tcp4_proc_exit();
-out_tcp:
-	raw_proc_exit();
-out_raw:
-	rc = -ENOMEM;
-	goto out;
-}
-
-#else /* CONFIG_PROC_FS */
-static int __init ipv4_proc_init(void)
-{
-	return 0;
-}
-#endif /* CONFIG_PROC_FS */
-
-MODULE_ALIAS_NETPROTO(PF_INET);
-
-EXPORT_SYMBOL(inet_accept);
-EXPORT_SYMBOL(inet_bind);
-EXPORT_SYMBOL(inet_dgram_connect);
-EXPORT_SYMBOL(inet_dgram_ops);
-EXPORT_SYMBOL(inet_getname);
-EXPORT_SYMBOL(inet_ioctl);
-EXPORT_SYMBOL(inet_listen);
-EXPORT_SYMBOL(inet_register_protosw);
-EXPORT_SYMBOL(inet_release);
-EXPORT_SYMBOL(inet_sendmsg);
-EXPORT_SYMBOL(inet_shutdown);
-EXPORT_SYMBOL(inet_sock_destruct);
-EXPORT_SYMBOL(inet_stream_connect);
-EXPORT_SYMBOL(inet_stream_ops);
-EXPORT_SYMBOL(inet_unregister_protosw);
-EXPORT_SYMBOL(net_statistics);
-EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
diff -Nurb linux-2.6.22-594/net/netfilter/xt_MARK.c.orig linux-2.6.22-595/net/netfilter/xt_MARK.c.orig
--- linux-2.6.22-594/net/netfilter/xt_MARK.c.orig	2008-03-20 00:05:19.000000000 -0400
+++ linux-2.6.22-595/net/netfilter/xt_MARK.c.orig	1969-12-31 19:00:00.000000000 -0500
@@ -1,283 +0,0 @@
-/* This is a module which is used for setting the NFMARK field of an skb. */
-
-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <net/inet_hashtables.h>
-
-#include <net/netfilter/nf_conntrack.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_MARK.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("ip[6]tables MARK modification module");
-MODULE_ALIAS("ipt_MARK");
-MODULE_ALIAS("ip6t_MARK");
-
-static inline u_int16_t
-get_dst_port(struct nf_conntrack_tuple *tuple)
-{
-	switch (tuple->dst.protonum) {
-	case IPPROTO_GRE:
-		/* XXX Truncate 32-bit GRE key to 16 bits */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
-		return tuple->dst.u.gre.key;
-#else
-		return htons(ntohl(tuple->dst.u.gre.key));
-#endif  
-	case IPPROTO_ICMP:
-		/* Bind on ICMP echo ID */
-		return tuple->src.u.icmp.id;
-	case IPPROTO_TCP:
-		return tuple->dst.u.tcp.port;
-	case IPPROTO_UDP:
-		return tuple->dst.u.udp.port;
-	default:
-		return tuple->dst.u.all;
-	}
-}
-
-static inline u_int16_t
-get_src_port(struct nf_conntrack_tuple *tuple)
-{
-	switch (tuple->dst.protonum) {
-	case IPPROTO_GRE:
-		/* XXX Truncate 32-bit GRE key to 16 bits */
-		return htons(ntohl(tuple->src.u.gre.key));
-	case IPPROTO_ICMP:
-		/* Bind on ICMP echo ID */
-		return tuple->src.u.icmp.id;
-	case IPPROTO_TCP:
-		return tuple->src.u.tcp.port;
-	case IPPROTO_UDP:
-		return tuple->src.u.udp.port;
-	default:
-		return tuple->src.u.all;
-	}
-}
-
-static unsigned int
-target_v0(struct sk_buff **pskb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const struct xt_target *target,
-	  const void *targinfo)
-{
-	const struct xt_mark_target_info *markinfo = targinfo;
-
-	(*pskb)->mark = markinfo->mark;
-	return XT_CONTINUE;
-}
-
-static unsigned int
-target_v1(struct sk_buff **pskb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  unsigned int hooknum,
-	  const struct xt_target *target,
-	  const void *targinfo)
-{
-	const struct xt_mark_target_info_v1 *markinfo = targinfo;
-	int mark = -1;
-
-	switch (markinfo->mode) {
-	case XT_MARK_SET:
-		mark = markinfo->mark;
-		break;
-
-	case XT_MARK_AND:
-		mark = (*pskb)->mark & markinfo->mark;
-		break;
-
-	case XT_MARK_OR:
-		mark = (*pskb)->mark | markinfo->mark;
-		break;
-
-	case XT_MARK_COPYXID: {
-		enum ip_conntrack_info ctinfo;
-		struct sock *connection_sk=NULL;
-		int dif;
-
-		struct nf_conn *ct = nf_ct_get((*pskb), &ctinfo);
-		extern struct inet_hashinfo tcp_hashinfo;
-		enum ip_conntrack_dir dir;
-		if (!ct) 
-			break;
-
-		dir = CTINFO2DIR(ctinfo);
-		u_int32_t src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
-		u_int16_t src_port = get_src_port(&ct->tuplehash[dir].tuple);
-		u_int16_t proto = ct->tuplehash[dir].tuple.dst.protonum;
-
-		u_int32_t ip;
-		u_int16_t port;
-
-		dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
-		ip = ct->tuplehash[dir].tuple.dst.u3.ip;
-		port = get_dst_port(&ct->tuplehash[dir].tuple);
-
-		if (proto == 1 || proto == 17) {
-			if (((*pskb)->mark!=-1) && (*pskb)->mark)
-				ct->xid[0]=(*pskb)->mark;
-			if (ct->xid[0]) 
-				mark = ct->xid[0];
-
-		}
-		else if (proto == 6) {
-				if ((*pskb)->sk) 
-					connection_sk = (*pskb)->sk;
-				else {
-					connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
-				}
-
-				if (connection_sk) {
-					connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir];
-					ct->xid[!dir]=connection_sk->sk_xid;
-					if (connection_sk->sk_xid != 0) 
-						mark = connection_sk->sk_xid;
-					if (connection_sk != (*pskb)->sk)
-						sock_put(connection_sk);
-				}
-				break;
-				}
-			      }
-	}
-
-	if (mark != -1)
-	(*pskb)->mark = mark;
-	return XT_CONTINUE;
-}
-
-
-static int
-checkentry_v0(const char *tablename,
-	      const void *entry,
-	      const struct xt_target *target,
-	      void *targinfo,
-	      unsigned int hook_mask)
-{
-	struct xt_mark_target_info *markinfo = targinfo;
-
-	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-		return 0;
-	}
-	return 1;
-}
-
-static int
-checkentry_v1(const char *tablename,
-	      const void *entry,
-	      const struct xt_target *target,
-	      void *targinfo,
-	      unsigned int hook_mask)
-{
-	struct xt_mark_target_info_v1 *markinfo = targinfo;
-
-	if (markinfo->mode != XT_MARK_SET
-	    && markinfo->mode != XT_MARK_AND
-	    && markinfo->mode != XT_MARK_OR
-	    && markinfo->mode != XT_MARK_COPYXID) {
-		printk(KERN_WARNING "MARK: unknown mode %u\n",
-		       markinfo->mode);
-		return 0;
-	}
-	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-		return 0;
-	}
-	return 1;
-}
-
-#ifdef CONFIG_COMPAT
-struct compat_xt_mark_target_info_v1 {
-	compat_ulong_t	mark;
-	u_int8_t	mode;
-	u_int8_t	__pad1;
-	u_int16_t	__pad2;
-};
-
-static void compat_from_user_v1(void *dst, void *src)
-{
-	struct compat_xt_mark_target_info_v1 *cm = src;
-	struct xt_mark_target_info_v1 m = {
-		.mark	= cm->mark,
-		.mode	= cm->mode,
-	};
-	memcpy(dst, &m, sizeof(m));
-}
-
-static int compat_to_user_v1(void __user *dst, void *src)
-{
-	struct xt_mark_target_info_v1 *m = src;
-	struct compat_xt_mark_target_info_v1 cm = {
-		.mark	= m->mark,
-		.mode	= m->mode,
-	};
-	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
-}
-#endif /* CONFIG_COMPAT */
-
-static struct xt_target xt_mark_target[] = {
-	{
-		.name		= "MARK",
-		.family		= AF_INET,
-		.revision	= 0,
-		.checkentry	= checkentry_v0,
-		.target		= target_v0,
-		.targetsize	= sizeof(struct xt_mark_target_info),
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "MARK",
-		.family		= AF_INET,
-		.revision	= 1,
-		.checkentry	= checkentry_v1,
-		.target		= target_v1,
-		.targetsize	= sizeof(struct xt_mark_target_info_v1),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
-		.compat_from_user = compat_from_user_v1,
-		.compat_to_user	= compat_to_user_v1,
-#endif
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "MARK",
-		.family		= AF_INET6,
-		.revision	= 0,
-		.checkentry	= checkentry_v0,
-		.target		= target_v0,
-		.targetsize	= sizeof(struct xt_mark_target_info),
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-};
-
-static int __init xt_mark_init(void)
-{
-	return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
-}
-
-static void __exit xt_mark_fini(void)
-{
-	xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
-}
-
-module_init(xt_mark_init);
-module_exit(xt_mark_fini);
diff -Nurb linux-2.6.22-594/net/packet/af_packet.c.orig linux-2.6.22-595/net/packet/af_packet.c.orig
--- linux-2.6.22-594/net/packet/af_packet.c.orig	2008-03-20 00:05:19.000000000 -0400
+++ linux-2.6.22-595/net/packet/af_packet.c.orig	1969-12-31 19:00:00.000000000 -0500
@@ -1,1989 +0,0 @@
-/*
- * INET		An implementation of the TCP/IP protocol suite for the LINUX
- *		operating system.  INET is implemented using the  BSD Socket
- *		interface as the means of communication with the user level.
- *
- *		PACKET - implements raw packet sockets.
- *
- * Version:	$Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
- *
- * Authors:	Ross Biro
- *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
- *		Alan Cox, <gw4pts@gw4pts.ampr.org>
- *
- * Fixes:
- *		Alan Cox	:	verify_area() now used correctly
- *		Alan Cox	:	new skbuff lists, look ma no backlogs!
- *		Alan Cox	:	tidied skbuff lists.
- *		Alan Cox	:	Now uses generic datagram routines I
- *					added. Also fixed the peek/read crash
- *					from all old Linux datagram code.
- *		Alan Cox	:	Uses the improved datagram code.
- *		Alan Cox	:	Added NULL's for socket options.
- *		Alan Cox	:	Re-commented the code.
- *		Alan Cox	:	Use new kernel side addressing
- *		Rob Janssen	:	Correct MTU usage.
- *		Dave Platt	:	Counter leaks caused by incorrect
- *					interrupt locking and some slightly
- *					dubious gcc output. Can you read
- *					compiler: it said _VOLATILE_
- *	Richard Kooijman	:	Timestamp fixes.
- *		Alan Cox	:	New buffers. Use sk->mac.raw.
- *		Alan Cox	:	sendmsg/recvmsg support.
- *		Alan Cox	:	Protocol setting support
- *	Alexey Kuznetsov	:	Untied from IPv4 stack.
- *	Cyrus Durgin		:	Fixed kerneld for kmod.
- *	Michal Ostrowski        :       Module initialization cleanup.
- *         Ulises Alonso        :       Frame number limit removal and
- *                                      packet_set_ring memory leak.
- *		Eric Biederman	:	Allow for > 8 byte hardware addresses.
- *					The convention is that longer addresses
- *					will simply extend the hardware address
- *					byte arrays at the end of sockaddr_ll
- *					and packet_mreq.
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/capability.h>
-#include <linux/fcntl.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/if_packet.h>
-#include <linux/wireless.h>
-#include <linux/kernel.h>
-#include <linux/kmod.h>
-#include <net/ip.h>
-#include <net/protocol.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/ioctls.h>
-#include <asm/page.h>
-#include <asm/cacheflush.h>
-#include <asm/io.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/poll.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/vs_network.h>
-
-#ifdef CONFIG_INET
-#include <net/inet_common.h>
-#endif
-
-/*
-   Assumptions:
-   - if device has no dev->hard_header routine, it adds and removes ll header
-     inside itself. In this case ll header is invisible outside of device,
-     but higher levels still should reserve dev->hard_header_len.
-     Some devices are enough clever to reallocate skb, when header
-     will not fit to reserved space (tunnel), another ones are silly
-     (PPP).
-   - packet socket receives packets with pulled ll header,
-     so that SOCK_RAW should push it back.
-
-On receive:
------------
-
-Incoming, dev->hard_header!=NULL
-   mac_header -> ll header
-   data       -> data
-
-Outgoing, dev->hard_header!=NULL
-   mac_header -> ll header
-   data       -> ll header
-
-Incoming, dev->hard_header==NULL
-   mac_header -> UNKNOWN position. It is very likely, that it points to ll
-		 header.  PPP makes it, that is wrong, because introduce
-                 assymetry between rx and tx paths.
-   data       -> data
-
-Outgoing, dev->hard_header==NULL
-   mac_header -> data. ll header is still not built!
-   data       -> data
-
-Resume
-  If dev->hard_header==NULL we are unlikely to restore sensible ll header.
-
-
-On transmit:
-------------
-
-dev->hard_header != NULL
-   mac_header -> ll header
-   data       -> ll header
-
-dev->hard_header == NULL (ll header is added by device, we cannot control it)
-   mac_header -> data
-   data       -> data
-
-   We should set nh.raw on output to correct posistion,
-   packet classifier depends on it.
- */
-
-/* List of all packet sockets. */
-static HLIST_HEAD(packet_sklist);
-static DEFINE_RWLOCK(packet_sklist_lock);
-
-static atomic_t packet_socks_nr;
-
-
-/* Private packet socket structures. */
-
-struct packet_mclist
-{
-	struct packet_mclist	*next;
-	int			ifindex;
-	int			count;
-	unsigned short		type;
-	unsigned short		alen;
-	unsigned char		addr[MAX_ADDR_LEN];
-};
-/* identical to struct packet_mreq except it has
- * a longer address field.
- */
-struct packet_mreq_max
-{
-	int		mr_ifindex;
-	unsigned short	mr_type;
-	unsigned short	mr_alen;
-	unsigned char	mr_address[MAX_ADDR_LEN];
-};
-
-#ifdef CONFIG_PACKET_MMAP
-static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
-#endif
-
-static void packet_flush_mclist(struct sock *sk);
-
-struct packet_sock {
-	/* struct sock has to be the first member of packet_sock */
-	struct sock		sk;
-	struct tpacket_stats	stats;
-#ifdef CONFIG_PACKET_MMAP
-	char *			*pg_vec;
-	unsigned int		head;
-	unsigned int            frames_per_block;
-	unsigned int		frame_size;
-	unsigned int		frame_max;
-	int			copy_thresh;
-#endif
-	struct packet_type	prot_hook;
-	spinlock_t		bind_lock;
-	unsigned int		running:1,	/* prot_hook is attached*/
-				auxdata:1,
-				origdev:1;
-	int			ifindex;	/* bound device		*/
-	__be16			num;
-	struct packet_mclist	*mclist;
-#ifdef CONFIG_PACKET_MMAP
-	atomic_t		mapped;
-	unsigned int            pg_vec_order;
-	unsigned int		pg_vec_pages;
-	unsigned int		pg_vec_len;
-#endif
-};
-
-struct packet_skb_cb {
-	unsigned int origlen;
-	union {
-		struct sockaddr_pkt pkt;
-		struct sockaddr_ll ll;
-	} sa;
-};
-
-#define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
-
-#ifdef CONFIG_PACKET_MMAP
-
-static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
-{
-	unsigned int pg_vec_pos, frame_offset;
-
-	pg_vec_pos = position / po->frames_per_block;
-	frame_offset = position % po->frames_per_block;
-
-	return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
-}
-#endif
-
-static inline struct packet_sock *pkt_sk(struct sock *sk)
-{
-	return (struct packet_sock *)sk;
-}
-
-static void packet_sock_destruct(struct sock *sk)
-{
-	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
-
-	if (!sock_flag(sk, SOCK_DEAD)) {
-		printk("Attempt to release alive packet socket: %p\n", sk);
-		return;
-	}
-
-	atomic_dec(&packet_socks_nr);
-#ifdef PACKET_REFCNT_DEBUG
-	printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
-#endif
-}
-
-
-static const struct proto_ops packet_ops;
-
-static const struct proto_ops packet_ops_spkt;
-
-static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
-{
-	struct sock *sk;
-	struct sockaddr_pkt *spkt;
-
-	/*
-	 *	When we registered the protocol we saved the socket in the data
-	 *	field for just this event.
-	 */
-
-	sk = pt->af_packet_priv;
-
-	/*
-	 *	Yank back the headers [hope the device set this
-	 *	right or kerboom...]
-	 *
-	 *	Incoming packets have ll header pulled,
-	 *	push it back.
-	 *
-	 *	For outgoing ones skb->data == skb_mac_header(skb)
-	 *	so that this procedure is noop.
-	 */
-
-	if (skb->pkt_type == PACKET_LOOPBACK)
-		goto out;
-
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
-		goto oom;
-
-	/* drop any routing info */
-	dst_release(skb->dst);
-	skb->dst = NULL;
-
-	/* drop conntrack reference */
-	nf_reset(skb);
-
-	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
-
-	skb_push(skb, skb->data - skb_mac_header(skb));
-
-	/*
-	 *	The SOCK_PACKET socket receives _all_ frames.
-	 */
-
-	spkt->spkt_family = dev->type;
-	strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
-	spkt->spkt_protocol = skb->protocol;
-
-	/*
-	 *	Charge the memory to the socket. This is done specifically
-	 *	to prevent sockets using all the memory up.
-	 */
-
-	if (sock_queue_rcv_skb(sk,skb) == 0)
-		return 0;
-
-out:
-	kfree_skb(skb);
-oom:
-	return 0;
-}
-
-
-/*
- *	Output a raw packet to a device layer. This bypasses all the other
- *	protocol layers and you must therefore supply it with a complete frame
- */
-
-static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
-			       struct msghdr *msg, size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
-	struct sk_buff *skb;
-	struct net_device *dev;
-	__be16 proto=0;
-	int err;
-
-	if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
-		return -EPERM;
-
-	/*
-	 *	Get and verify the address.
-	 */
-
-	if (saddr)
-	{
-		if (msg->msg_namelen < sizeof(struct sockaddr))
-			return(-EINVAL);
-		if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
-			proto=saddr->spkt_protocol;
-	}
-	else
-		return(-ENOTCONN);	/* SOCK_PACKET must be sent giving an address */
-
-	/*
-	 *	Find the device first to size check it
-	 */
-
-	saddr->spkt_device[13] = 0;
-	dev = dev_get_by_name(saddr->spkt_device);
-	err = -ENODEV;
-	if (dev == NULL)
-		goto out_unlock;
-
-	err = -ENETDOWN;
-	if (!(dev->flags & IFF_UP))
-		goto out_unlock;
-
-	/*
-	 *	You may not queue a frame bigger than the mtu. This is the lowest level
-	 *	raw protocol and you must do your own fragmentation at this level.
-	 */
-
-	err = -EMSGSIZE;
-	if (len > dev->mtu + dev->hard_header_len)
-		goto out_unlock;
-
-	err = -ENOBUFS;
-	skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
-
-	/*
-	 *	If the write buffer is full, then tough. At this level the user gets to
-	 *	deal with the problem - do your own algorithmic backoffs. That's far
-	 *	more flexible.
-	 */
-
-	if (skb == NULL)
-		goto out_unlock;
-
-	/*
-	 *	Fill it in
-	 */
-
-	/* FIXME: Save some space for broken drivers that write a
-	 * hard header at transmission time by themselves. PPP is the
-	 * notable one here. This should really be fixed at the driver level.
-	 */
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb_reset_network_header(skb);
-
-	/* Try to align data part correctly */
-	if (dev->hard_header) {
-		skb->data -= dev->hard_header_len;
-		skb->tail -= dev->hard_header_len;
-		if (len < dev->hard_header_len)
-			skb_reset_network_header(skb);
-	}
-
-	/* Returns -EFAULT on error */
-	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
-	skb->protocol = proto;
-	skb->dev = dev;
-	skb->priority = sk->sk_priority;
-	if (err)
-		goto out_free;
-
-	/*
-	 *	Now send it
-	 */
-
-	dev_queue_xmit(skb);
-	dev_put(dev);
-	return(len);
-
-out_free:
-	kfree_skb(skb);
-out_unlock:
-	if (dev)
-		dev_put(dev);
-	return err;
-}
-
-static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
-				      unsigned int res)
-{
-	struct sk_filter *filter;
-	int tag = skb->skb_tag;
-
-	if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag))
-		return 0;
-
-	rcu_read_lock_bh();
-	filter = rcu_dereference(sk->sk_filter);
-	if (filter != NULL)
-		res = sk_run_filter(skb, filter->insns, filter->len);
-	rcu_read_unlock_bh();
-
-	return res;
-}
-
-/*
-   This function makes lazy skb cloning in hope that most of packets
-   are discarded by BPF.
-
-   Note tricky part: we DO mangle shared skb! skb->data, skb->len
-   and skb->cb are mangled. It works because (and until) packets
-   falling here are owned by current CPU. Output packets are cloned
-   by dev_queue_xmit_nit(), input packets are processed by net_bh
-   sequencially, so that if we return skb to original state on exit,
-   we will not harm anyone.
- */
-
-static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
-{
-	struct sock *sk;
-	struct sockaddr_ll *sll;
-	struct packet_sock *po;
-	u8 * skb_head = skb->data;
-	int skb_len = skb->len;
-	unsigned int snaplen, res;
-
-	if (skb->pkt_type == PACKET_LOOPBACK)
-		goto drop;
-
-	sk = pt->af_packet_priv;
-	po = pkt_sk(sk);
-
-	skb->dev = dev;
-
-	if (dev->hard_header) {
-		/* The device has an explicit notion of ll header,
-		   exported to higher levels.
-
-		   Otherwise, the device hides datails of it frame
-		   structure, so that corresponding packet head
-		   never delivered to user.
-		 */
-		if (sk->sk_type != SOCK_DGRAM)
-			skb_push(skb, skb->data - skb_mac_header(skb));
-		else if (skb->pkt_type == PACKET_OUTGOING) {
-			/* Special case: outgoing packets have ll header at head */
-			skb_pull(skb, skb_network_offset(skb));
-		}
-	}
-
-	snaplen = skb->len;
-
-	res = run_filter(skb, sk, snaplen);
-	if (!res)
-		goto drop_n_restore;
-	if (snaplen > res)
-		snaplen = res;
-
-	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-	    (unsigned)sk->sk_rcvbuf)
-		goto drop_n_acct;
-
-	if (skb_shared(skb)) {
-		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-		if (nskb == NULL)
-			goto drop_n_acct;
-
-		if (skb_head != skb->data) {
-			skb->data = skb_head;
-			skb->len = skb_len;
-		}
-		kfree_skb(skb);
-		skb = nskb;
-	}
-
-	BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
-		     sizeof(skb->cb));
-
-	sll = &PACKET_SKB_CB(skb)->sa.ll;
-	sll->sll_family = AF_PACKET;
-	sll->sll_hatype = dev->type;
-	sll->sll_protocol = skb->protocol;
-	sll->sll_pkttype = skb->pkt_type;
-	if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
-		sll->sll_ifindex = orig_dev->ifindex;
-	else
-		sll->sll_ifindex = dev->ifindex;
-	sll->sll_halen = 0;
-
-	if (dev->hard_header_parse)
-		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
-
-	PACKET_SKB_CB(skb)->origlen = skb->len;
-
-	if (pskb_trim(skb, snaplen))
-		goto drop_n_acct;
-
-	skb_set_owner_r(skb, sk);
-	skb->dev = NULL;
-	dst_release(skb->dst);
-	skb->dst = NULL;
-
-	/* drop conntrack reference */
-	nf_reset(skb);
-
-	spin_lock(&sk->sk_receive_queue.lock);
-	po->stats.tp_packets++;
-	__skb_queue_tail(&sk->sk_receive_queue, skb);
-	spin_unlock(&sk->sk_receive_queue.lock);
-	sk->sk_data_ready(sk, skb->len);
-	return 0;
-
-drop_n_acct:
-	spin_lock(&sk->sk_receive_queue.lock);
-	po->stats.tp_drops++;
-	spin_unlock(&sk->sk_receive_queue.lock);
-
-drop_n_restore:
-	if (skb_head != skb->data && skb_shared(skb)) {
-		skb->data = skb_head;
-		skb->len = skb_len;
-	}
-drop:
-	kfree_skb(skb);
-	return 0;
-}
-
-#ifdef CONFIG_PACKET_MMAP
-static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
-{
-	struct sock *sk;
-	struct packet_sock *po;
-	struct sockaddr_ll *sll;
-	struct tpacket_hdr *h;
-	u8 * skb_head = skb->data;
-	int skb_len = skb->len;
-	unsigned int snaplen, res;
-	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
-	unsigned short macoff, netoff;
-	struct sk_buff *copy_skb = NULL;
-	struct timeval tv;
-
-	if (skb->pkt_type == PACKET_LOOPBACK)
-		goto drop;
-
-	sk = pt->af_packet_priv;
-	po = pkt_sk(sk);
-
-	if (dev->hard_header) {
-		if (sk->sk_type != SOCK_DGRAM)
-			skb_push(skb, skb->data - skb_mac_header(skb));
-		else if (skb->pkt_type == PACKET_OUTGOING) {
-			/* Special case: outgoing packets have ll header at head */
-			skb_pull(skb, skb_network_offset(skb));
-		}
-	}
-
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		status |= TP_STATUS_CSUMNOTREADY;
-
-	snaplen = skb->len;
-
-	res = run_filter(skb, sk, snaplen);
-	if (!res)
-		goto drop_n_restore;
-	if (snaplen > res)
-		snaplen = res;
-
-	if (sk->sk_type == SOCK_DGRAM) {
-		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
-	} else {
-		unsigned maclen = skb_network_offset(skb);
-		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
-		macoff = netoff - maclen;
-	}
-
-	if (macoff + snaplen > po->frame_size) {
-		if (po->copy_thresh &&
-		    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
-		    (unsigned)sk->sk_rcvbuf) {
-			if (skb_shared(skb)) {
-				copy_skb = skb_clone(skb, GFP_ATOMIC);
-			} else {
-				copy_skb = skb_get(skb);
-				skb_head = skb->data;
-			}
-			if (copy_skb)
-				skb_set_owner_r(copy_skb, sk);
-		}
-		snaplen = po->frame_size - macoff;
-		if ((int)snaplen < 0)
-			snaplen = 0;
-	}
-
-	spin_lock(&sk->sk_receive_queue.lock);
-	h = packet_lookup_frame(po, po->head);
-
-	if (h->tp_status)
-		goto ring_is_full;
-	po->head = po->head != po->frame_max ? po->head+1 : 0;
-	po->stats.tp_packets++;
-	if (copy_skb) {
-		status |= TP_STATUS_COPY;
-		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
-	}
-	if (!po->stats.tp_drops)
-		status &= ~TP_STATUS_LOSING;
-	spin_unlock(&sk->sk_receive_queue.lock);
-
-	skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
-
-	h->tp_len = skb->len;
-	h->tp_snaplen = snaplen;
-	h->tp_mac = macoff;
-	h->tp_net = netoff;
-	if (skb->tstamp.tv64 == 0) {
-		__net_timestamp(skb);
-		sock_enable_timestamp(sk);
-	}
-	tv = ktime_to_timeval(skb->tstamp);
-	h->tp_sec = tv.tv_sec;
-	h->tp_usec = tv.tv_usec;
-
-	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
-	sll->sll_halen = 0;
-	if (dev->hard_header_parse)
-		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
-	sll->sll_family = AF_PACKET;
-	sll->sll_hatype = dev->type;
-	sll->sll_protocol = skb->protocol;
-	sll->sll_pkttype = skb->pkt_type;
-	if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
-		sll->sll_ifindex = orig_dev->ifindex;
-	else
-		sll->sll_ifindex = dev->ifindex;
-
-	h->tp_status = status;
-	smp_mb();
-
-	{
-		struct page *p_start, *p_end;
-		u8 *h_end = (u8 *)h + macoff + snaplen - 1;
-
-		p_start = virt_to_page(h);
-		p_end = virt_to_page(h_end);
-		while (p_start <= p_end) {
-			flush_dcache_page(p_start);
-			p_start++;
-		}
-	}
-
-	sk->sk_data_ready(sk, 0);
-
-drop_n_restore:
-	if (skb_head != skb->data && skb_shared(skb)) {
-		skb->data = skb_head;
-		skb->len = skb_len;
-	}
-drop:
-	kfree_skb(skb);
-	return 0;
-
-ring_is_full:
-	po->stats.tp_drops++;
-	spin_unlock(&sk->sk_receive_queue.lock);
-
-	sk->sk_data_ready(sk, 0);
-	if (copy_skb)
-		kfree_skb(copy_skb);
-	goto drop_n_restore;
-}
-
-#endif
-
-
-static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
-			  struct msghdr *msg, size_t len)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
-	struct sk_buff *skb;
-	struct net_device *dev;
-	__be16 proto;
-	unsigned char *addr;
-	int ifindex, err, reserve = 0;
-
-	if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
-		return -EPERM;
-
-	/*
-	 *	Get and verify the address.
-	 */
-
-	if (saddr == NULL) {
-		struct packet_sock *po = pkt_sk(sk);
-
-		ifindex	= po->ifindex;
-		proto	= po->num;
-		addr	= NULL;
-	} else {
-		err = -EINVAL;
-		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
-			goto out;
-		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
-			goto out;
-		ifindex	= saddr->sll_ifindex;
-		proto	= saddr->sll_protocol;
-		addr	= saddr->sll_addr;
-	}
-
-
-	dev = dev_get_by_index(ifindex);
-	err = -ENXIO;
-	if (dev == NULL)
-		goto out_unlock;
-	if (sock->type == SOCK_RAW)
-		reserve = dev->hard_header_len;
-
-	err = -ENETDOWN;
-	if (!(dev->flags & IFF_UP))
-		goto out_unlock;
-
-	err = -EMSGSIZE;
-	if (len > dev->mtu+reserve)
-		goto out_unlock;
-
-	skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
-				msg->msg_flags & MSG_DONTWAIT, &err);
-	if (skb==NULL)
-		goto out_unlock;
-
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb_reset_network_header(skb);
-
-	if (dev->hard_header) {
-		int res;
-		err = -EINVAL;
-		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
-		if (sock->type != SOCK_DGRAM) {
-			skb_reset_tail_pointer(skb);
-			skb->len = 0;
-		} else if (res < 0)
-			goto out_free;
-	}
-
-	/* Returns -EFAULT on error */
-	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
-	if (err)
-		goto out_free;
-
-	skb->protocol = proto;
-	skb->dev = dev;
-	skb->priority = sk->sk_priority;
-
-	/*
-	 *	Now send it
-	 */
-
-	err = dev_queue_xmit(skb);
-	if (err > 0 && (err = net_xmit_errno(err)) != 0)
-		goto out_unlock;
-
-	dev_put(dev);
-
-	return(len);
-
-out_free:
-	kfree_skb(skb);
-out_unlock:
-	if (dev)
-		dev_put(dev);
-out:
-	return err;
-}
-
-/*
- *	Close a PACKET socket. This is fairly simple. We immediately go
- *	to 'closed' state and remove our protocol entry in the device list.
- */
-
-static int packet_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct packet_sock *po;
-
-	if (!sk)
-		return 0;
-
-	po = pkt_sk(sk);
-
-	write_lock_bh(&packet_sklist_lock);
-	sk_del_node_init(sk);
-	write_unlock_bh(&packet_sklist_lock);
-
-	/*
-	 *	Unhook packet receive handler.
-	 */
-
-	if (po->running) {
-		/*
-		 *	Remove the protocol hook
-		 */
-		dev_remove_pack(&po->prot_hook);
-		po->running = 0;
-		po->num = 0;
-		__sock_put(sk);
-	}
-
-	packet_flush_mclist(sk);
-
-#ifdef CONFIG_PACKET_MMAP
-	if (po->pg_vec) {
-		struct tpacket_req req;
-		memset(&req, 0, sizeof(req));
-		packet_set_ring(sk, &req, 1);
-	}
-#endif
-
-	/*
-	 *	Now the socket is dead. No more input will appear.
-	 */
-
-	sock_orphan(sk);
-	sock->sk = NULL;
-
-	/* Purge queues */
-
-	skb_queue_purge(&sk->sk_receive_queue);
-
-	sock_put(sk);
-	return 0;
-}
-
-/*
- *	Attach a packet hook.
- */
-
-static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
-{
-	struct packet_sock *po = pkt_sk(sk);
-	/*
-	 *	Detach an existing hook if present.
-	 */
-
-	lock_sock(sk);
-
-	spin_lock(&po->bind_lock);
-	if (po->running) {
-		__sock_put(sk);
-		po->running = 0;
-		po->num = 0;
-		spin_unlock(&po->bind_lock);
-		dev_remove_pack(&po->prot_hook);
-		spin_lock(&po->bind_lock);
-	}
-
-	po->num = protocol;
-	po->prot_hook.type = protocol;
-	po->prot_hook.dev = dev;
-
-	po->ifindex = dev ? dev->ifindex : 0;
-
-	if (protocol == 0)
-		goto out_unlock;
-
-	if (dev) {
-		if (dev->flags&IFF_UP) {
-			dev_add_pack(&po->prot_hook);
-			sock_hold(sk);
-			po->running = 1;
-		} else {
-			sk->sk_err = ENETDOWN;
-			if (!sock_flag(sk, SOCK_DEAD))
-				sk->sk_error_report(sk);
-		}
-	} else {
-		dev_add_pack(&po->prot_hook);
-		sock_hold(sk);
-		po->running = 1;
-	}
-
-out_unlock:
-	spin_unlock(&po->bind_lock);
-	release_sock(sk);
-	return 0;
-}
-
-/*
- *	Bind a packet socket to a device
- */
-
-static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
-	struct sock *sk=sock->sk;
-	char name[15];
-	struct net_device *dev;
-	int err = -ENODEV;
-
-	/*
-	 *	Check legality
-	 */
-
-	if (addr_len != sizeof(struct sockaddr))
-		return -EINVAL;
-	strlcpy(name,uaddr->sa_data,sizeof(name));
-
-	dev = dev_get_by_name(name);
-	if (dev) {
-		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
-		dev_put(dev);
-	}
-	return err;
-}
-
-static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
-	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
-	struct sock *sk=sock->sk;
-	struct net_device *dev = NULL;
-	int err;
-
-
-	/*
-	 *	Check legality
-	 */
-
-	if (addr_len < sizeof(struct sockaddr_ll))
-		return -EINVAL;
-	if (sll->sll_family != AF_PACKET)
-		return -EINVAL;
-
-	if (sll->sll_ifindex) {
-		err = -ENODEV;
-		dev = dev_get_by_index(sll->sll_ifindex);
-		if (dev == NULL)
-			goto out;
-	}
-	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
-	if (dev)
-		dev_put(dev);
-
-out:
-	return err;
-}
-
-static struct proto packet_proto = {
-	.name	  = "PACKET",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct packet_sock),
-};
-
-/*
- *	Create a packet of type SOCK_PACKET.
- */
-
-static int packet_create(struct socket *sock, int protocol)
-{
-	struct sock *sk;
-	struct packet_sock *po;
-	__be16 proto = (__force __be16)protocol; /* weird, but documented */
-	int err;
-
-	if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
-		return -EPERM;
-	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
-	    sock->type != SOCK_PACKET)
-		return -ESOCKTNOSUPPORT;
-
-	sock->state = SS_UNCONNECTED;
-
-	err = -ENOBUFS;
-	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
-	if (sk == NULL)
-		goto out;
-
-	sock->ops = &packet_ops;
-	if (sock->type == SOCK_PACKET)
-		sock->ops = &packet_ops_spkt;
-
-	sock_init_data(sock, sk);
-
-	po = pkt_sk(sk);
-	sk->sk_family = PF_PACKET;
-	po->num = proto;
-
-	sk->sk_destruct = packet_sock_destruct;
-	atomic_inc(&packet_socks_nr);
-
-	/*
-	 *	Attach a protocol block
-	 */
-
-	spin_lock_init(&po->bind_lock);
-	po->prot_hook.func = packet_rcv;
-
-	if (sock->type == SOCK_PACKET)
-		po->prot_hook.func = packet_rcv_spkt;
-
-	po->prot_hook.af_packet_priv = sk;
-
-	if (proto) {
-		po->prot_hook.type = proto;
-		dev_add_pack(&po->prot_hook);
-		sock_hold(sk);
-		po->running = 1;
-	}
-
-	write_lock_bh(&packet_sklist_lock);
-	sk_add_node(sk, &packet_sklist);
-	write_unlock_bh(&packet_sklist_lock);
-	return(0);
-out:
-	return err;
-}
-
-/*
- *	Pull a packet from our receive queue and hand it to the user.
- *	If necessary we block.
- */
-
-static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
-			  struct msghdr *msg, size_t len, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct sk_buff *skb;
-	int copied, err;
-	struct sockaddr_ll *sll;
-
-	err = -EINVAL;
-	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
-		goto out;
-
-#if 0
-	/* What error should we return now? EUNATTACH? */
-	if (pkt_sk(sk)->ifindex < 0)
-		return -ENODEV;
-#endif
-
-	/*
-	 *	Call the generic datagram receiver. This handles all sorts
-	 *	of horrible races and re-entrancy so we can forget about it
-	 *	in the protocol layers.
-	 *
-	 *	Now it will return ENETDOWN, if device have just gone down,
-	 *	but then it will block.
-	 */
-
-	skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
-
-	/*
-	 *	An error occurred so return it. Because skb_recv_datagram()
-	 *	handles the blocking we don't see and worry about blocking
-	 *	retries.
-	 */
-
-	if (skb == NULL)
-		goto out;
-
-	/*
-	 *	If the address length field is there to be filled in, we fill
-	 *	it in now.
-	 */
-
-	sll = &PACKET_SKB_CB(skb)->sa.ll;
-	if (sock->type == SOCK_PACKET)
-		msg->msg_namelen = sizeof(struct sockaddr_pkt);
-	else
-		msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
-
-	/*
-	 *	You lose any data beyond the buffer you gave. If it worries a
-	 *	user program they can ask the device for its MTU anyway.
-	 */
-
-	copied = skb->len;
-	if (copied > len)
-	{
-		copied=len;
-		msg->msg_flags|=MSG_TRUNC;
-	}
-
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
-	if (err)
-		goto out_free;
-
-	sock_recv_timestamp(msg, sk, skb);
-
-	if (msg->msg_name)
-		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
-		       msg->msg_namelen);
-
-	if (pkt_sk(sk)->auxdata) {
-		struct tpacket_auxdata aux;
-
-		aux.tp_status = TP_STATUS_USER;
-		if (skb->ip_summed == CHECKSUM_PARTIAL)
-			aux.tp_status |= TP_STATUS_CSUMNOTREADY;
-		aux.tp_len = PACKET_SKB_CB(skb)->origlen;
-		aux.tp_snaplen = skb->len;
-		aux.tp_mac = 0;
-		aux.tp_net = skb_network_offset(skb);
-
-		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
-	}
-
-	/*
-	 *	Free or return the buffer as appropriate. Again this
-	 *	hides all the races and re-entrancy issues from us.
-	 */
-	err = (flags&MSG_TRUNC) ? skb->len : copied;
-
-out_free:
-	skb_free_datagram(sk, skb);
-out:
-	return err;
-}
-
-static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
-			       int *uaddr_len, int peer)
-{
-	struct net_device *dev;
-	struct sock *sk	= sock->sk;
-
-	if (peer)
-		return -EOPNOTSUPP;
-
-	uaddr->sa_family = AF_PACKET;
-	dev = dev_get_by_index(pkt_sk(sk)->ifindex);
-	if (dev) {
-		strlcpy(uaddr->sa_data, dev->name, 15);
-		dev_put(dev);
-	} else
-		memset(uaddr->sa_data, 0, 14);
-	*uaddr_len = sizeof(*uaddr);
-
-	return 0;
-}
-
-static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
-			  int *uaddr_len, int peer)
-{
-	struct net_device *dev;
-	struct sock *sk = sock->sk;
-	struct packet_sock *po = pkt_sk(sk);
-	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
-
-	if (peer)
-		return -EOPNOTSUPP;
-
-	sll->sll_family = AF_PACKET;
-	sll->sll_ifindex = po->ifindex;
-	sll->sll_protocol = po->num;
-	dev = dev_get_by_index(po->ifindex);
-	if (dev) {
-		sll->sll_hatype = dev->type;
-		sll->sll_halen = dev->addr_len;
-		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
-		dev_put(dev);
-	} else {
-		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
-		sll->sll_halen = 0;
-	}
-	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
-
-	return 0;
-}
-
-static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
-{
-	switch (i->type) {
-	case PACKET_MR_MULTICAST:
-		if (what > 0)
-			dev_mc_add(dev, i->addr, i->alen, 0);
-		else
-			dev_mc_delete(dev, i->addr, i->alen, 0);
-		break;
-	case PACKET_MR_PROMISC:
-		dev_set_promiscuity(dev, what);
-		break;
-	case PACKET_MR_ALLMULTI:
-		dev_set_allmulti(dev, what);
-		break;
-	default:;
-	}
-}
-
-static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
-{
-	for ( ; i; i=i->next) {
-		if (i->ifindex == dev->ifindex)
-			packet_dev_mc(dev, i, what);
-	}
-}
-
-static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
-{
-	struct packet_sock *po = pkt_sk(sk);
-	struct packet_mclist *ml, *i;
-	struct net_device *dev;
-	int err;
-
-	rtnl_lock();
-
-	err = -ENODEV;
-	dev = __dev_get_by_index(mreq->mr_ifindex);
-	if (!dev)
-		goto done;
-
-	err = -EINVAL;
-	if (mreq->mr_alen > dev->addr_len)
-		goto done;
-
-	err = -ENOBUFS;
-	i = kmalloc(sizeof(*i), GFP_KERNEL);
-	if (i == NULL)
-		goto done;
-
-	err = 0;
-	for (ml = po->mclist; ml; ml = ml->next) {
-		if (ml->ifindex == mreq->mr_ifindex &&
-		    ml->type == mreq->mr_type &&
-		    ml->alen == mreq->mr_alen &&
-		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
-			ml->count++;
-			/* Free the new element ... */
-			kfree(i);
-			goto done;
-		}
-	}
-
-	i->type = mreq->mr_type;
-	i->ifindex = mreq->mr_ifindex;
-	i->alen = mreq->mr_alen;
-	memcpy(i->addr, mreq->mr_address, i->alen);
-	i->count = 1;
-	i->next = po->mclist;
-	po->mclist = i;
-	packet_dev_mc(dev, i, +1);
-
-done:
-	rtnl_unlock();
-	return err;
-}
-
-static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
-{
-	struct packet_mclist *ml, **mlp;
-
-	rtnl_lock();
-
-	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
-		if (ml->ifindex == mreq->mr_ifindex &&
-		    ml->type == mreq->mr_type &&
-		    ml->alen == mreq->mr_alen &&
-		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
-			if (--ml->count == 0) {
-				struct net_device *dev;
-				*mlp = ml->next;
-				dev = dev_get_by_index(ml->ifindex);
-				if (dev) {
-					packet_dev_mc(dev, ml, -1);
-					dev_put(dev);
-				}
-				kfree(ml);
-			}
-			rtnl_unlock();
-			return 0;
-		}
-	}
-	rtnl_unlock();
-	return -EADDRNOTAVAIL;
-}
-
-static void packet_flush_mclist(struct sock *sk)
-{
-	struct packet_sock *po = pkt_sk(sk);
-	struct packet_mclist *ml;
-
-	if (!po->mclist)
-		return;
-
-	rtnl_lock();
-	while ((ml = po->mclist) != NULL) {
-		struct net_device *dev;
-
-		po->mclist = ml->next;
-		if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
-			packet_dev_mc(dev, ml, -1);
-			dev_put(dev);
-		}
-		kfree(ml);
-	}
-	rtnl_unlock();
-}
-
-static int
-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct packet_sock *po = pkt_sk(sk);
-	int ret;
-
-	if (level != SOL_PACKET)
-		return -ENOPROTOOPT;
-
-	switch(optname)	{
-	case PACKET_ADD_MEMBERSHIP:
-	case PACKET_DROP_MEMBERSHIP:
-	{
-		struct packet_mreq_max mreq;
-		int len = optlen;
-		memset(&mreq, 0, sizeof(mreq));
-		if (len < sizeof(struct packet_mreq))
-			return -EINVAL;
-		if (len > sizeof(mreq))
-			len = sizeof(mreq);
-		if (copy_from_user(&mreq,optval,len))
-			return -EFAULT;
-		if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
-			return -EINVAL;
-		if (optname == PACKET_ADD_MEMBERSHIP)
-			ret = packet_mc_add(sk, &mreq);
-		else
-			ret = packet_mc_drop(sk, &mreq);
-		return ret;
-	}
-
-#ifdef CONFIG_PACKET_MMAP
-	case PACKET_RX_RING:
-	{
-		struct tpacket_req req;
-
-		if (optlen<sizeof(req))
-			return -EINVAL;
-		if (copy_from_user(&req,optval,sizeof(req)))
-			return -EFAULT;
-		return packet_set_ring(sk, &req, 0);
-	}
-	case PACKET_COPY_THRESH:
-	{
-		int val;
-
-		if (optlen!=sizeof(val))
-			return -EINVAL;
-		if (copy_from_user(&val,optval,sizeof(val)))
-			return -EFAULT;
-
-		pkt_sk(sk)->copy_thresh = val;
-		return 0;
-	}
-#endif
-	case PACKET_AUXDATA:
-	{
-		int val;
-
-		if (optlen < sizeof(val))
-			return -EINVAL;
-		if (copy_from_user(&val, optval, sizeof(val)))
-			return -EFAULT;
-
-		po->auxdata = !!val;
-		return 0;
-	}
-	case PACKET_ORIGDEV:
-	{
-		int val;
-
-		if (optlen < sizeof(val))
-			return -EINVAL;
-		if (copy_from_user(&val, optval, sizeof(val)))
-			return -EFAULT;
-
-		po->origdev = !!val;
-		return 0;
-	}
-	default:
-		return -ENOPROTOOPT;
-	}
-}
-
-static int packet_getsockopt(struct socket *sock, int level, int optname,
-			     char __user *optval, int __user *optlen)
-{
-	int len;
-	int val;
-	struct sock *sk = sock->sk;
-	struct packet_sock *po = pkt_sk(sk);
-	void *data;
-	struct tpacket_stats st;
-
-	if (level != SOL_PACKET)
-		return -ENOPROTOOPT;
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	if (len < 0)
-		return -EINVAL;
-
-	switch(optname)	{
-	case PACKET_STATISTICS:
-		if (len > sizeof(struct tpacket_stats))
-			len = sizeof(struct tpacket_stats);
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		st = po->stats;
-		memset(&po->stats, 0, sizeof(st));
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		st.tp_packets += st.tp_drops;
-
-		data = &st;
-		break;
-	case PACKET_AUXDATA:
-		if (len > sizeof(int))
-			len = sizeof(int);
-		val = po->auxdata;
-
-		data = &val;
-		break;
-	case PACKET_ORIGDEV:
-		if (len > sizeof(int))
-			len = sizeof(int);
-		val = po->origdev;
-
-		data = &val;
-		break;
-	default:
-		return -ENOPROTOOPT;
-	}
-
-	if (put_user(len, optlen))
-		return -EFAULT;
-	if (copy_to_user(optval, data, len))
-		return -EFAULT;
-	return 0;
-}
-
-
-static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
-{
-	struct sock *sk;
-	struct hlist_node *node;
-	struct net_device *dev = data;
-
-	read_lock(&packet_sklist_lock);
-	sk_for_each(sk, node, &packet_sklist) {
-		struct packet_sock *po = pkt_sk(sk);
-
-		switch (msg) {
-		case NETDEV_UNREGISTER:
-			if (po->mclist)
-				packet_dev_mclist(dev, po->mclist, -1);
-			/* fallthrough */
-
-		case NETDEV_DOWN:
-			if (dev->ifindex == po->ifindex) {
-				spin_lock(&po->bind_lock);
-				if (po->running) {
-					__dev_remove_pack(&po->prot_hook);
-					__sock_put(sk);
-					po->running = 0;
-					sk->sk_err = ENETDOWN;
-					if (!sock_flag(sk, SOCK_DEAD))
-						sk->sk_error_report(sk);
-				}
-				if (msg == NETDEV_UNREGISTER) {
-					po->ifindex = -1;
-					po->prot_hook.dev = NULL;
-				}
-				spin_unlock(&po->bind_lock);
-			}
-			break;
-		case NETDEV_UP:
-			spin_lock(&po->bind_lock);
-			if (dev->ifindex == po->ifindex && po->num &&
-			    !po->running) {
-				dev_add_pack(&po->prot_hook);
-				sock_hold(sk);
-				po->running = 1;
-			}
-			spin_unlock(&po->bind_lock);
-			break;
-		}
-	}
-	read_unlock(&packet_sklist_lock);
-	return NOTIFY_DONE;
-}
-
-
-static int packet_ioctl(struct socket *sock, unsigned int cmd,
-			unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-
-	switch(cmd) {
-		case SIOCOUTQ:
-		{
-			int amount = atomic_read(&sk->sk_wmem_alloc);
-			return put_user(amount, (int __user *)arg);
-		}
-		case SIOCINQ:
-		{
-			struct sk_buff *skb;
-			int amount = 0;
-
-			spin_lock_bh(&sk->sk_receive_queue.lock);
-			skb = skb_peek(&sk->sk_receive_queue);
-			if (skb)
-				amount = skb->len;
-			spin_unlock_bh(&sk->sk_receive_queue.lock);
-			return put_user(amount, (int __user *)arg);
-		}
-		case SIOCGSTAMP:
-			return sock_get_timestamp(sk, (struct timeval __user *)arg);
-		case SIOCGSTAMPNS:
-			return sock_get_timestampns(sk, (struct timespec __user *)arg);
-
-#ifdef CONFIG_INET
-		case SIOCADDRT:
-		case SIOCDELRT:
-		case SIOCDARP:
-		case SIOCGARP:
-		case SIOCSARP:
-		case SIOCGIFADDR:
-		case SIOCSIFADDR:
-		case SIOCGIFBRDADDR:
-		case SIOCSIFBRDADDR:
-		case SIOCGIFNETMASK:
-		case SIOCSIFNETMASK:
-		case SIOCGIFDSTADDR:
-		case SIOCSIFDSTADDR:
-		case SIOCSIFFLAGS:
-			return inet_dgram_ops.ioctl(sock, cmd, arg);
-#endif
-
-		default:
-			return -ENOIOCTLCMD;
-	}
-	return 0;
-}
-
-#ifndef CONFIG_PACKET_MMAP
-#define packet_mmap sock_no_mmap
-#define packet_poll datagram_poll
-#else
-
-static unsigned int packet_poll(struct file * file, struct socket *sock,
-				poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	struct packet_sock *po = pkt_sk(sk);
-	unsigned int mask = datagram_poll(file, sock, wait);
-
-	spin_lock_bh(&sk->sk_receive_queue.lock);
-	if (po->pg_vec) {
-		unsigned last = po->head ? po->head-1 : po->frame_max;
-		struct tpacket_hdr *h;
-
-		h = packet_lookup_frame(po, last);
-
-		if (h->tp_status)
-			mask |= POLLIN | POLLRDNORM;
-	}
-	spin_unlock_bh(&sk->sk_receive_queue.lock);
-	return mask;
-}
-
-
-/* Dirty? Well, I still did not learn better way to account
- * for user mmaps.
- */
-
-static void packet_mm_open(struct vm_area_struct *vma)
-{
-	struct file *file = vma->vm_file;
-	struct socket * sock = file->private_data;
-	struct sock *sk = sock->sk;
-
-	if (sk)
-		atomic_inc(&pkt_sk(sk)->mapped);
-}
-
-static void packet_mm_close(struct vm_area_struct *vma)
-{
-	struct file *file = vma->vm_file;
-	struct socket * sock = file->private_data;
-	struct sock *sk = sock->sk;
-
-	if (sk)
-		atomic_dec(&pkt_sk(sk)->mapped);
-}
-
-static struct vm_operations_struct packet_mmap_ops = {
-	.open =	packet_mm_open,
-	.close =packet_mm_close,
-};
-
-static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
-{
-	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
-}
-
-static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++) {
-		if (likely(pg_vec[i]))
-			free_pages((unsigned long) pg_vec[i], order);
-	}
-	kfree(pg_vec);
-}
-
-static inline char *alloc_one_pg_vec_page(unsigned long order)
-{
-	return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
-					 order);
-}
-
-static char **alloc_pg_vec(struct tpacket_req *req, int order)
-{
-	unsigned int block_nr = req->tp_block_nr;
-	char **pg_vec;
-	int i;
-
-	pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
-	if (unlikely(!pg_vec))
-		goto out;
-
-	for (i = 0; i < block_nr; i++) {
-		pg_vec[i] = alloc_one_pg_vec_page(order);
-		if (unlikely(!pg_vec[i]))
-			goto out_free_pgvec;
-	}
-
-out:
-	return pg_vec;
-
-out_free_pgvec:
-	free_pg_vec(pg_vec, order, block_nr);
-	pg_vec = NULL;
-	goto out;
-}
-
-static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
-{
-	char **pg_vec = NULL;
-	struct packet_sock *po = pkt_sk(sk);
-	int was_running, order = 0;
-	__be16 num;
-	int err = 0;
-
-	if (req->tp_block_nr) {
-		int i, l;
-
-		/* Sanity tests and some calculations */
-
-		if (unlikely(po->pg_vec))
-			return -EBUSY;
-
-		if (unlikely((int)req->tp_block_size <= 0))
-			return -EINVAL;
-		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
-			return -EINVAL;
-		if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
-			return -EINVAL;
-		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
-			return -EINVAL;
-
-		po->frames_per_block = req->tp_block_size/req->tp_frame_size;
-		if (unlikely(po->frames_per_block <= 0))
-			return -EINVAL;
-		if (unlikely((po->frames_per_block * req->tp_block_nr) !=
-			     req->tp_frame_nr))
-			return -EINVAL;
-
-		err = -ENOMEM;
-		order = get_order(req->tp_block_size);
-		pg_vec = alloc_pg_vec(req, order);
-		if (unlikely(!pg_vec))
-			goto out;
-
-		l = 0;
-		for (i = 0; i < req->tp_block_nr; i++) {
-			char *ptr = pg_vec[i];
-			struct tpacket_hdr *header;
-			int k;
-
-			for (k = 0; k < po->frames_per_block; k++) {
-				header = (struct tpacket_hdr *) ptr;
-				header->tp_status = TP_STATUS_KERNEL;
-				ptr += req->tp_frame_size;
-			}
-		}
-		/* Done */
-	} else {
-		if (unlikely(req->tp_frame_nr))
-			return -EINVAL;
-	}
-
-	lock_sock(sk);
-
-	/* Detach socket from network */
-	spin_lock(&po->bind_lock);
-	was_running = po->running;
-	num = po->num;
-	if (was_running) {
-		__dev_remove_pack(&po->prot_hook);
-		po->num = 0;
-		po->running = 0;
-		__sock_put(sk);
-	}
-	spin_unlock(&po->bind_lock);
-
-	synchronize_net();
-
-	err = -EBUSY;
-	if (closing || atomic_read(&po->mapped) == 0) {
-		err = 0;
-#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
-
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		pg_vec = XC(po->pg_vec, pg_vec);
-		po->frame_max = (req->tp_frame_nr - 1);
-		po->head = 0;
-		po->frame_size = req->tp_frame_size;
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-
-		order = XC(po->pg_vec_order, order);
-		req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
-
-		po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
-		po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
-		skb_queue_purge(&sk->sk_receive_queue);
-#undef XC
-		if (atomic_read(&po->mapped))
-			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
-	}
-
-	spin_lock(&po->bind_lock);
-	if (was_running && !po->running) {
-		sock_hold(sk);
-		po->running = 1;
-		po->num = num;
-		dev_add_pack(&po->prot_hook);
-	}
-	spin_unlock(&po->bind_lock);
-
-	release_sock(sk);
-
-	if (pg_vec)
-		free_pg_vec(pg_vec, order, req->tp_block_nr);
-out:
-	return err;
-}
-
-static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
-{
-	struct sock *sk = sock->sk;
-	struct packet_sock *po = pkt_sk(sk);
-	unsigned long size;
-	unsigned long start;
-	int err = -EINVAL;
-	int i;
-
-	if (vma->vm_pgoff)
-		return -EINVAL;
-
-	size = vma->vm_end - vma->vm_start;
-
-	lock_sock(sk);
-	if (po->pg_vec == NULL)
-		goto out;
-	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
-		goto out;
-
-	start = vma->vm_start;
-	for (i = 0; i < po->pg_vec_len; i++) {
-		struct page *page = virt_to_page(po->pg_vec[i]);
-		int pg_num;
-
-		for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
-			err = vm_insert_page(vma, start, page);
-			if (unlikely(err))
-				goto out;
-			start += PAGE_SIZE;
-		}
-	}
-	atomic_inc(&po->mapped);
-	vma->vm_ops = &packet_mmap_ops;
-	err = 0;
-
-out:
-	release_sock(sk);
-	return err;
-}
-#endif
-
-
-static const struct proto_ops packet_ops_spkt = {
-	.family =	PF_PACKET,
-	.owner =	THIS_MODULE,
-	.release =	packet_release,
-	.bind =		packet_bind_spkt,
-	.connect =	sock_no_connect,
-	.socketpair =	sock_no_socketpair,
-	.accept =	sock_no_accept,
-	.getname =	packet_getname_spkt,
-	.poll =		datagram_poll,
-	.ioctl =	packet_ioctl,
-	.listen =	sock_no_listen,
-	.shutdown =	sock_no_shutdown,
-	.setsockopt =	sock_no_setsockopt,
-	.getsockopt =	sock_no_getsockopt,
-	.sendmsg =	packet_sendmsg_spkt,
-	.recvmsg =	packet_recvmsg,
-	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
-};
-
-static const struct proto_ops packet_ops = {
-	.family =	PF_PACKET,
-	.owner =	THIS_MODULE,
-	.release =	packet_release,
-	.bind =		packet_bind,
-	.connect =	sock_no_connect,
-	.socketpair =	sock_no_socketpair,
-	.accept =	sock_no_accept,
-	.getname =	packet_getname,
-	.poll =		packet_poll,
-	.ioctl =	packet_ioctl,
-	.listen =	sock_no_listen,
-	.shutdown =	sock_no_shutdown,
-	.setsockopt =	packet_setsockopt,
-	.getsockopt =	packet_getsockopt,
-	.sendmsg =	packet_sendmsg,
-	.recvmsg =	packet_recvmsg,
-	.mmap =		packet_mmap,
-	.sendpage =	sock_no_sendpage,
-};
-
-static struct net_proto_family packet_family_ops = {
-	.family =	PF_PACKET,
-	.create =	packet_create,
-	.owner	=	THIS_MODULE,
-};
-
-static struct notifier_block packet_netdev_notifier = {
-	.notifier_call =packet_notifier,
-};
-
-#ifdef CONFIG_PROC_FS
-static inline struct sock *packet_seq_idx(loff_t off)
-{
-	struct sock *s;
-	struct hlist_node *node;
-
-	sk_for_each(s, node, &packet_sklist) {
-		if (!off--)
-			return s;
-	}
-	return NULL;
-}
-
-static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	read_lock(&packet_sklist_lock);
-	return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	return  (v == SEQ_START_TOKEN)
-		? sk_head(&packet_sklist)
-		: sk_next((struct sock*)v) ;
-}
-
-static void packet_seq_stop(struct seq_file *seq, void *v)
-{
-	read_unlock(&packet_sklist_lock);
-}
-
-static int packet_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
-	else {
-		struct sock *s = v;
-		const struct packet_sock *po = pkt_sk(s);
-
-		seq_printf(seq,
-			   "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
-			   s,
-			   atomic_read(&s->sk_refcnt),
-			   s->sk_type,
-			   ntohs(po->num),
-			   po->ifindex,
-			   po->running,
-			   atomic_read(&s->sk_rmem_alloc),
-			   sock_i_uid(s),
-			   sock_i_ino(s) );
-	}
-
-	return 0;
-}
-
-static struct seq_operations packet_seq_ops = {
-	.start	= packet_seq_start,
-	.next	= packet_seq_next,
-	.stop	= packet_seq_stop,
-	.show	= packet_seq_show,
-};
-
-static int packet_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &packet_seq_ops);
-}
-
-static const struct file_operations packet_seq_fops = {
-	.owner		= THIS_MODULE,
-	.open		= packet_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-#endif
-
-static void __exit packet_exit(void)
-{
-	proc_net_remove("packet");
-	unregister_netdevice_notifier(&packet_netdev_notifier);
-	sock_unregister(PF_PACKET);
-	proto_unregister(&packet_proto);
-}
-
-static int __init packet_init(void)
-{
-	int rc = proto_register(&packet_proto, 0);
-
-	if (rc != 0)
-		goto out;
-
-	sock_register(&packet_family_ops);
-	register_netdevice_notifier(&packet_netdev_notifier);
-	proc_net_fops_create("packet", 0, &packet_seq_fops);
-out:
-	return rc;
-}
-
-module_init(packet_init);
-module_exit(packet_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_PACKET);
diff -Nurb linux-2.6.22-594/net/socket.c linux-2.6.22-595/net/socket.c
--- linux-2.6.22-594/net/socket.c	2008-03-20 00:05:19.000000000 -0400
+++ linux-2.6.22-595/net/socket.c	2008-03-20 00:14:03.000000000 -0400
@@ -1122,12 +1122,17 @@
 	if (type < 0 || type >= SOCK_MAX)
 		return -EINVAL;
 
+	/*
+	 * Hack no. 2 - Sapan
+	 * Clean this up later
+	 *
 	if (!nx_check(0, VS_ADMIN)) {
 		if (family == PF_INET && !current_nx_info_has_v4())
 			return -EAFNOSUPPORT;
 		if (family == PF_INET6 && !current_nx_info_has_v6())
 			return -EAFNOSUPPORT;
 	}
+	*/
 
 	/* Compatibility.
 
diff -Nurb linux-2.6.22-594/net/socket.c.orig linux-2.6.22-595/net/socket.c.orig
--- linux-2.6.22-594/net/socket.c.orig	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.6.22-595/net/socket.c.orig	2008-03-20 00:05:19.000000000 -0400
@@ -0,0 +1,2400 @@
+/*
+ * NET		An implementation of the SOCKET network access protocol.
+ *
+ * Version:	@(#)socket.c	1.1.93	18/02/95
+ *
+ * Authors:	Orest Zborowski, <obz@Kodak.COM>
+ *		Ross Biro
+ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * Fixes:
+ *		Anonymous	:	NOTSOCK/BADF cleanup. Error fix in
+ *					shutdown()
+ *		Alan Cox	:	verify_area() fixes
+ *		Alan Cox	:	Removed DDI
+ *		Jonathan Kamens	:	SOCK_DGRAM reconnect bug
+ *		Alan Cox	:	Moved a load of checks to the very
+ *					top level.
+ *		Alan Cox	:	Move address structures to/from user
+ *					mode above the protocol layers.
+ *		Rob Janssen	:	Allow 0 length sends.
+ *		Alan Cox	:	Asynchronous I/O support (cribbed from the
+ *					tty drivers).
+ *		Niibe Yutaka	:	Asynchronous I/O for writes (4.4BSD style)
+ *		Jeff Uphoff	:	Made max number of sockets command-line
+ *					configurable.
+ *		Matti Aarnio	:	Made the number of sockets dynamic,
+ *					to be allocated when needed, and mr.
+ *					Uphoff's max is used as max to be
+ *					allowed to allocate.
+ *		Linus		:	Argh. removed all the socket allocation
+ *					altogether: it's in the inode now.
+ *		Alan Cox	:	Made sock_alloc()/sock_release() public
+ *					for NetROM and future kernel nfsd type
+ *					stuff.
+ *		Alan Cox	:	sendmsg/recvmsg basics.
+ *		Tom Dyas	:	Export net symbols.
+ *		Marcin Dalecki	:	Fixed problems with CONFIG_NET="n".
+ *		Alan Cox	:	Added thread locking to sys_* calls
+ *					for sockets. May have errors at the
+ *					moment.
+ *		Kevin Buhr	:	Fixed the dumb errors in the above.
+ *		Andi Kleen	:	Some small cleanups, optimizations,
+ *					and fixed a copy_from_user() bug.
+ *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
+ *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
+ *					protocol-independent
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ *
+ *	This module is effectively the top level interface to the BSD socket
+ *	paradigm.
+ *
+ *	Based upon Swansea University Computer Society NET3.039
+ */
+
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/file.h>
+#include <linux/net.h>
+#include <linux/interrupt.h>
+#include <linux/rcupdate.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+#include <linux/wanrouter.h>
+#include <linux/if_bridge.h>
+#include <linux/if_frad.h>
+#include <linux/if_vlan.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/mount.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/kmod.h>
+#include <linux/audit.h>
+#include <linux/wireless.h>
+#include <linux/nsproxy.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+#include <net/compat.h>
+
+#include <net/sock.h>
+#include <linux/netfilter.h>
+#include <linux/vs_base.h>
+#include <linux/vs_socket.h>
+#include <linux/vs_inet.h>
+#include <linux/vs_inet6.h>
+
+static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+			 unsigned long nr_segs, loff_t pos);
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+			  unsigned long nr_segs, loff_t pos);
+static int sock_mmap(struct file *file, struct vm_area_struct *vma);
+
+static int sock_close(struct inode *inode, struct file *file);
+static unsigned int sock_poll(struct file *file,
+			      struct poll_table_struct *wait);
+static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+#ifdef CONFIG_COMPAT
+static long compat_sock_ioctl(struct file *file,
+			      unsigned int cmd, unsigned long arg);
+#endif
+static int sock_fasync(int fd, struct file *filp, int on);
+static ssize_t sock_sendpage(struct file *file, struct page *page,
+			     int offset, size_t size, loff_t *ppos, int more);
+
+/*
+ *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
+ *	in the operation structures but are done directly via the socketcall() multiplexor.
+ */
+
+static const struct file_operations socket_file_ops = {
+	.owner =	THIS_MODULE,
+	.llseek =	no_llseek,
+	.aio_read =	sock_aio_read,
+	.aio_write =	sock_aio_write,
+	.poll =		sock_poll,
+	.unlocked_ioctl = sock_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = compat_sock_ioctl,
+#endif
+	.mmap =		sock_mmap,
+	.open =		sock_no_open,	/* special open code to disallow open via /proc */
+	.release =	sock_close,
+	.fasync =	sock_fasync,
+	.sendpage =	sock_sendpage,
+	.splice_write = generic_splice_sendpage,
+};
+
+/*
+ *	The protocol list. Each protocol is registered in here.
+ */
+
+static DEFINE_SPINLOCK(net_family_lock);
+static const struct net_proto_family *net_families[NPROTO] __read_mostly;
+
+/*
+ *	Statistics counters of the socket lists
+ */
+
+static DEFINE_PER_CPU(int, sockets_in_use) = 0;
+
+/*
+ * Support routines.
+ * Move socket addresses back and forth across the kernel/user
+ * divide and look after the messy bits.
+ */
+
+#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
+					   16 for IP, 16 for IPX,
+					   24 for IPv6,
+					   about 80 for AX.25
+					   must be at least one bigger than
+					   the AF_UNIX size (see net/unix/af_unix.c
+					   :unix_mkname()).
+					 */
+
+/**
+ *	move_addr_to_kernel	-	copy a socket address into kernel space
+ *	@uaddr: Address in user space
+ *	@kaddr: Address in kernel space
+ *	@ulen: Length in user space
+ *
+ *	The address is copied into kernel space. If the provided address is
+ *	too long an error code of -EINVAL is returned. If the copy gives
+ *	invalid addresses -EFAULT is returned. On a success 0 is returned.
+ */
+
+int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
+{
+	if (ulen < 0 || ulen > MAX_SOCK_ADDR)
+		return -EINVAL;
+	if (ulen == 0)
+		return 0;
+	if (copy_from_user(kaddr, uaddr, ulen))
+		return -EFAULT;
+	return audit_sockaddr(ulen, kaddr);
+}
+
+/**
+ *	move_addr_to_user	-	copy an address to user space
+ *	@kaddr: kernel space address
+ *	@klen: length of address in kernel
+ *	@uaddr: user space address
+ *	@ulen: pointer to user length field
+ *
+ *	The value pointed to by ulen on entry is the buffer length available.
+ *	This is overwritten with the buffer space used. -EINVAL is returned
+ *	if an overlong buffer is specified or a negative buffer size. -EFAULT
+ *	is returned if either the buffer or the length field are not
+ *	accessible.
+ *	After copying the data up to the limit the user specifies, the true
+ *	length of the data is written over the length limit the user
+ *	specified. Zero is returned for a success.
+ */
+
+int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
+		      int __user *ulen)
+{
+	int err;
+	int len;
+
+	err = get_user(len, ulen);
+	if (err)
+		return err;
+	if (len > klen)
+		len = klen;
+	if (len < 0 || len > MAX_SOCK_ADDR)
+		return -EINVAL;
+	if (len) {
+		if (audit_sockaddr(klen, kaddr))
+			return -ENOMEM;
+		if (copy_to_user(uaddr, kaddr, len))
+			return -EFAULT;
+	}
+	/*
+	 *      "fromlen shall refer to the value before truncation.."
+	 *                      1003.1g
+	 */
+	return __put_user(klen, ulen);
+}
+
+#define SOCKFS_MAGIC 0x534F434B
+
+static struct kmem_cache *sock_inode_cachep __read_mostly;
+
+static struct inode *sock_alloc_inode(struct super_block *sb)
+{
+	struct socket_alloc *ei;
+
+	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
+	if (!ei)
+		return NULL;
+	init_waitqueue_head(&ei->socket.wait);
+
+	ei->socket.fasync_list = NULL;
+	ei->socket.state = SS_UNCONNECTED;
+	ei->socket.flags = 0;
+	ei->socket.ops = NULL;
+	ei->socket.sk = NULL;
+	ei->socket.file = NULL;
+
+	return &ei->vfs_inode;
+}
+
+static void sock_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(sock_inode_cachep,
+			container_of(inode, struct socket_alloc, vfs_inode));
+}
+
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
+{
+	struct socket_alloc *ei = (struct socket_alloc *)foo;
+
+	inode_init_once(&ei->vfs_inode);
+}
+
+static int init_inodecache(void)
+{
+	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
+					      sizeof(struct socket_alloc),
+					      0,
+					      (SLAB_HWCACHE_ALIGN |
+					       SLAB_RECLAIM_ACCOUNT |
+					       SLAB_MEM_SPREAD),
+					      init_once,
+					      NULL);
+	if (sock_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static struct super_operations sockfs_ops = {
+	.alloc_inode =	sock_alloc_inode,
+	.destroy_inode =sock_destroy_inode,
+	.statfs =	simple_statfs,
+};
+
+static int sockfs_get_sb(struct file_system_type *fs_type,
+			 int flags, const char *dev_name, void *data,
+			 struct vfsmount *mnt)
+{
+	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
+			     mnt);
+}
+
+static struct vfsmount *sock_mnt __read_mostly;
+
+static struct file_system_type sock_fs_type = {
+	.name =		"sockfs",
+	.get_sb =	sockfs_get_sb,
+	.kill_sb =	kill_anon_super,
+};
+
+static int sockfs_delete_dentry(struct dentry *dentry)
+{
+	/*
+	 * At creation time, we pretended this dentry was hashed
+	 * (by clearing DCACHE_UNHASHED bit in d_flags)
+	 * At delete time, we restore the truth : not hashed.
+	 * (so that dput() can proceed correctly)
+	 */
+	dentry->d_flags |= DCACHE_UNHASHED;
+	return 0;
+}
+
+/*
+ * sockfs_dname() is called from d_path().
+ */
+static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+	return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
+				dentry->d_inode->i_ino);
+}
+
+static struct dentry_operations sockfs_dentry_operations = {
+	.d_delete = sockfs_delete_dentry,
+	.d_dname  = sockfs_dname,
+};
+
+/*
+ *	Obtains the first available file descriptor and sets it up for use.
+ *
+ *	These functions create file structures and maps them to fd space
+ *	of the current process. On success it returns file descriptor
+ *	and file struct implicitly stored in sock->file.
+ *	Note that another thread may close file descriptor before we return
+ *	from this function. We use the fact that now we do not refer
+ *	to socket after mapping. If one day we will need it, this
+ *	function will increment ref. count on file by 1.
+ *
+ *	In any case returned fd MAY BE not valid!
+ *	This race condition is unavoidable
+ *	with shared fd spaces, we cannot solve it inside kernel,
+ *	but we take care of internal coherence yet.
+ */
+
+static int sock_alloc_fd(struct file **filep)
+{
+	int fd;
+
+	fd = get_unused_fd();
+	if (likely(fd >= 0)) {
+		struct file *file = get_empty_filp();
+
+		*filep = file;
+		if (unlikely(!file)) {
+			put_unused_fd(fd);
+			return -ENFILE;
+		}
+	} else
+		*filep = NULL;
+	return fd;
+}
+
+static int sock_attach_fd(struct socket *sock, struct file *file)
+{
+	struct qstr name = { .name = "" };
+
+	file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+	if (unlikely(!file->f_path.dentry))
+		return -ENOMEM;
+
+	file->f_path.dentry->d_op = &sockfs_dentry_operations;
+	/*
+	 * We dont want to push this dentry into global dentry hash table.
+	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
+	 * This permits a working /proc/$pid/fd/XXX on sockets
+	 */
+	file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
+	file->f_path.mnt = mntget(sock_mnt);
+	file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
+
+	sock->file = file;
+	file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
+	file->f_mode = FMODE_READ | FMODE_WRITE;
+	file->f_flags = O_RDWR;
+	file->f_pos = 0;
+	file->private_data = sock;
+
+	return 0;
+}
+
+int sock_map_fd(struct socket *sock)
+{
+	struct file *newfile;
+	int fd = sock_alloc_fd(&newfile);
+
+	if (likely(fd >= 0)) {
+		int err = sock_attach_fd(sock, newfile);
+
+		if (unlikely(err < 0)) {
+			put_filp(newfile);
+			put_unused_fd(fd);
+			return err;
+		}
+		fd_install(fd, newfile);
+	}
+	return fd;
+}
+
+static struct socket *sock_from_file(struct file *file, int *err)
+{
+	if (file->f_op == &socket_file_ops)
+		return file->private_data;	/* set in sock_map_fd */
+
+	*err = -ENOTSOCK;
+	return NULL;
+}
+
+/**
+ *	sockfd_lookup	- 	Go from a file number to its socket slot
+ *	@fd: file handle
+ *	@err: pointer to an error code return
+ *
+ *	The file handle passed in is locked and the socket it is bound
+ *	too is returned. If an error occurs the err pointer is overwritten
+ *	with a negative errno code and NULL is returned. The function checks
+ *	for both invalid handles and passing a handle which is not a socket.
+ *
+ *	On a success the socket object pointer is returned.
+ */
+
+struct socket *sockfd_lookup(int fd, int *err)
+{
+	struct file *file;
+	struct socket *sock;
+
+	file = fget(fd);
+	if (!file) {
+		*err = -EBADF;
+		return NULL;
+	}
+
+	sock = sock_from_file(file, err);
+	if (!sock)
+		fput(file);
+	return sock;
+}
+
+static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
+{
+	struct file *file;
+	struct socket *sock;
+
+	*err = -EBADF;
+	file = fget_light(fd, fput_needed);
+	if (file) {
+		sock = sock_from_file(file, err);
+		if (sock)
+			return sock;
+		fput_light(file, *fput_needed);
+	}
+	return NULL;
+}
+
+/**
+ *	sock_alloc	-	allocate a socket
+ *
+ *	Allocate a new inode and socket object. The two are bound together
+ *	and initialised. The socket is then returned. If we are out of inodes
+ *	NULL is returned.
+ */
+
+static struct socket *sock_alloc(void)
+{
+	struct inode *inode;
+	struct socket *sock;
+
+	inode = new_inode(sock_mnt->mnt_sb);
+	if (!inode)
+		return NULL;
+
+	sock = SOCKET_I(inode);
+
+	inode->i_mode = S_IFSOCK | S_IRWXUGO;
+	inode->i_uid = current->fsuid;
+	inode->i_gid = current->fsgid;
+
+	get_cpu_var(sockets_in_use)++;
+	put_cpu_var(sockets_in_use);
+	return sock;
+}
+
+/*
+ *	In theory you can't get an open on this inode, but /proc provides
+ *	a back door. Remember to keep it shut otherwise you'll let the
+ *	creepy crawlies in.
+ */
+
+static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
+{
+	return -ENXIO;
+}
+
+const struct file_operations bad_sock_fops = {
+	.owner = THIS_MODULE,
+	.open = sock_no_open,
+};
+
+/**
+ *	sock_release	-	close a socket
+ *	@sock: socket to close
+ *
+ *	The socket is released from the protocol stack if it has a release
+ *	callback, and the inode is then released if the socket is bound to
+ *	an inode not a file.
+ */
+
+void sock_release(struct socket *sock)
+{
+	if (sock->ops) {
+		struct module *owner = sock->ops->owner;
+
+		sock->ops->release(sock);
+		sock->ops = NULL;
+		module_put(owner);
+	}
+
+	if (sock->fasync_list)
+		printk(KERN_ERR "sock_release: fasync list not empty!\n");
+
+	get_cpu_var(sockets_in_use)--;
+	put_cpu_var(sockets_in_use);
+	if (!sock->file) {
+		iput(SOCK_INODE(sock));
+		return;
+	}
+	sock->file = NULL;
+}
+
+static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
+				 struct msghdr *msg, size_t size)
+{
+	struct sock_iocb *si = kiocb_to_siocb(iocb);
+	int err, len;
+
+	si->sock = sock;
+	si->scm = NULL;
+	si->msg = msg;
+	si->size = size;
+
+	err = security_socket_sendmsg(sock, msg, size);
+	if (err)
+		return err;
+
+	len = sock->ops->sendmsg(iocb, sock, msg, size);
+	if (sock->sk) {
+		if (len == size)
+			vx_sock_send(sock->sk, size);
+		else
+			vx_sock_fail(sock->sk, size);
+	}
+	vxdprintk(VXD_CBIT(net, 7),
+		"__sock_sendmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
+		sock, sock->sk,
+		(sock->sk)?sock->sk->sk_nx_info:0,
+		(sock->sk)?sock->sk->sk_vx_info:0,
+		(sock->sk)?sock->sk->sk_xid:0,
+		(sock->sk)?sock->sk->sk_nid:0,
+		(unsigned int)size, len);
+	return len;
+}
+
+int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+{
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
+
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+	ret = __sock_sendmsg(&iocb, sock, msg, size);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
+int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
+		   struct kvec *vec, size_t num, size_t size)
+{
+	mm_segment_t oldfs = get_fs();
+	int result;
+
+	set_fs(KERNEL_DS);
+	/*
+	 * the following is safe, since for compiler definitions of kvec and
+	 * iovec are identical, yielding the same in-core layout and alignment
+	 */
+	msg->msg_iov = (struct iovec *)vec;
+	msg->msg_iovlen = num;
+	result = sock_sendmsg(sock, msg, size);
+	set_fs(oldfs);
+	return result;
+}
+
+/*
+ * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
+ */
+void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb)
+{
+	ktime_t kt = skb->tstamp;
+
+	if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+		struct timeval tv;
+		/* Race occurred between timestamp enabling and packet
+		   receiving.  Fill in the current time for now. */
+		if (kt.tv64 == 0)
+			kt = ktime_get_real();
+		skb->tstamp = kt;
+		tv = ktime_to_timeval(kt);
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
+	} else {
+		struct timespec ts;
+		/* Race occurred between timestamp enabling and packet
+		   receiving.  Fill in the current time for now. */
+		if (kt.tv64 == 0)
+			kt = ktime_get_real();
+		skb->tstamp = kt;
+		ts = ktime_to_timespec(kt);
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
+	}
+}
+
+EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
+
+static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+				 struct msghdr *msg, size_t size, int flags)
+{
+	int err, len;
+	struct sock_iocb *si = kiocb_to_siocb(iocb);
+
+	si->sock = sock;
+	si->scm = NULL;
+	si->msg = msg;
+	si->size = size;
+	si->flags = flags;
+
+	err = security_socket_recvmsg(sock, msg, size, flags);
+	if (err)
+		return err;
+
+	len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
+	if ((len >= 0) && sock->sk)
+		vx_sock_recv(sock->sk, len);
+	vxdprintk(VXD_CBIT(net, 7),
+		"__sock_recvmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
+		sock, sock->sk,
+		(sock->sk)?sock->sk->sk_nx_info:0,
+		(sock->sk)?sock->sk->sk_vx_info:0,
+		(sock->sk)?sock->sk->sk_xid:0,
+		(sock->sk)?sock->sk->sk_nid:0,
+		(unsigned int)size, len);
+	return len;
+}
+
+int sock_recvmsg(struct socket *sock, struct msghdr *msg,
+		 size_t size, int flags)
+{
+	struct kiocb iocb;
+	struct sock_iocb siocb;
+	int ret;
+
+	init_sync_kiocb(&iocb, NULL);
+	iocb.private = &siocb;
+	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&iocb);
+	return ret;
+}
+
+int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
+		   struct kvec *vec, size_t num, size_t size, int flags)
+{
+	mm_segment_t oldfs = get_fs();
+	int result;
+
+	set_fs(KERNEL_DS);
+	/*
+	 * the following is safe, since for compiler definitions of kvec and
+	 * iovec are identical, yielding the same in-core layout and alignment
+	 */
+	msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
+	result = sock_recvmsg(sock, msg, size, flags);
+	set_fs(oldfs);
+	return result;
+}
+
+static void sock_aio_dtor(struct kiocb *iocb)
+{
+	kfree(iocb->private);
+}
+
+static ssize_t sock_sendpage(struct file *file, struct page *page,
+			     int offset, size_t size, loff_t *ppos, int more)
+{
+	struct socket *sock;
+	int flags;
+
+	sock = file->private_data;
+
+	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
+	if (more)
+		flags |= MSG_MORE;
+
+	return sock->ops->sendpage(sock, page, offset, size, flags);
+}
+
+static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
+					 struct sock_iocb *siocb)
+{
+	if (!is_sync_kiocb(iocb)) {
+		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
+		if (!siocb)
+			return NULL;
+		iocb->ki_dtor = sock_aio_dtor;
+	}
+
+	siocb->kiocb = iocb;
+	iocb->private = siocb;
+	return siocb;
+}
+
+static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
+		struct file *file, const struct iovec *iov,
+		unsigned long nr_segs)
+{
+	struct socket *sock = file->private_data;
+	size_t size = 0;
+	int i;
+
+	for (i = 0; i < nr_segs; i++)
+		size += iov[i].iov_len;
+
+	msg->msg_name = NULL;
+	msg->msg_namelen = 0;
+	msg->msg_control = NULL;
+	msg->msg_controllen = 0;
+	msg->msg_iov = (struct iovec *)iov;
+	msg->msg_iovlen = nr_segs;
+	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+
+	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
+}
+
+static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos)
+{
+	struct sock_iocb siocb, *x;
+
+	if (pos != 0)
+		return -ESPIPE;
+
+	if (iocb->ki_left == 0)	/* Match SYS5 behaviour */
+		return 0;
+
+
+	x = alloc_sock_iocb(iocb, &siocb);
+	if (!x)
+		return -ENOMEM;
+	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
+}
+
+static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
+			struct file *file, const struct iovec *iov,
+			unsigned long nr_segs)
+{
+	struct socket *sock = file->private_data;
+	size_t size = 0;
+	int i;
+
+	for (i = 0; i < nr_segs; i++)
+		size += iov[i].iov_len;
+
+	msg->msg_name = NULL;
+	msg->msg_namelen = 0;
+	msg->msg_control = NULL;
+	msg->msg_controllen = 0;
+	msg->msg_iov = (struct iovec *)iov;
+	msg->msg_iovlen = nr_segs;
+	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+	if (sock->type == SOCK_SEQPACKET)
+		msg->msg_flags |= MSG_EOR;
+
+	return __sock_sendmsg(iocb, sock, msg, size);
+}
+
+static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
+			  unsigned long nr_segs, loff_t pos)
+{
+	struct sock_iocb siocb, *x;
+
+	if (pos != 0)
+		return -ESPIPE;
+
+	x = alloc_sock_iocb(iocb, &siocb);
+	if (!x)
+		return -ENOMEM;
+
+	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
+}
+
+/*
+ * Atomic setting of ioctl hooks to avoid race
+ * with module unload.
+ */
+
+static DEFINE_MUTEX(br_ioctl_mutex);
+static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
+
+void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
+{
+	mutex_lock(&br_ioctl_mutex);
+	br_ioctl_hook = hook;
+	mutex_unlock(&br_ioctl_mutex);
+}
+
+EXPORT_SYMBOL(brioctl_set);
+
+static DEFINE_MUTEX(vlan_ioctl_mutex);
+static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
+
+void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
+{
+	mutex_lock(&vlan_ioctl_mutex);
+	vlan_ioctl_hook = hook;
+	mutex_unlock(&vlan_ioctl_mutex);
+}
+
+EXPORT_SYMBOL(vlan_ioctl_set);
+
+static DEFINE_MUTEX(dlci_ioctl_mutex);
+static int (*dlci_ioctl_hook) (unsigned int, void __user *);
+
+void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
+{
+	mutex_lock(&dlci_ioctl_mutex);
+	dlci_ioctl_hook = hook;
+	mutex_unlock(&dlci_ioctl_mutex);
+}
+
+EXPORT_SYMBOL(dlci_ioctl_set);
+
+/*
+ *	With an ioctl, arg may well be a user mode pointer, but we don't know
+ *	what to do with it - that's up to the protocol still.
+ */
+
+static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+	struct socket *sock;
+	struct sock *sk;
+	void __user *argp = (void __user *)arg;
+	int pid, err;
+	struct net *net;
+
+	sock = file->private_data;
+	sk = sock->sk;
+	net = sk->sk_net;
+	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
+		err = dev_ioctl(net, cmd, argp);
+	} else
+#ifdef CONFIG_WIRELESS_EXT
+	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+		err = dev_ioctl(net, cmd, argp);
+	} else
+#endif				/* CONFIG_WIRELESS_EXT */
+		switch (cmd) {
+		case FIOSETOWN:
+		case SIOCSPGRP:
+			err = -EFAULT;
+			if (get_user(pid, (int __user *)argp))
+				break;
+			err = f_setown(sock->file, pid, 1);
+			break;
+		case FIOGETOWN:
+		case SIOCGPGRP:
+			err = put_user(f_getown(sock->file),
+				       (int __user *)argp);
+			break;
+		case SIOCGIFBR:
+		case SIOCSIFBR:
+		case SIOCBRADDBR:
+		case SIOCBRDELBR:
+			err = -ENOPKG;
+			if (!br_ioctl_hook)
+				request_module("bridge");
+
+			mutex_lock(&br_ioctl_mutex);
+			if (br_ioctl_hook)
+				err = br_ioctl_hook(net, cmd, argp);
+			mutex_unlock(&br_ioctl_mutex);
+			break;
+		case SIOCGIFVLAN:
+		case SIOCSIFVLAN:
+			err = -ENOPKG;
+			if (!vlan_ioctl_hook)
+				request_module("8021q");
+
+			mutex_lock(&vlan_ioctl_mutex);
+			if (vlan_ioctl_hook)
+				err = vlan_ioctl_hook(net, argp);
+			mutex_unlock(&vlan_ioctl_mutex);
+			break;
+		case SIOCADDDLCI:
+		case SIOCDELDLCI:
+			err = -ENOPKG;
+			if (!dlci_ioctl_hook)
+				request_module("dlci");
+
+			if (dlci_ioctl_hook) {
+				mutex_lock(&dlci_ioctl_mutex);
+				err = dlci_ioctl_hook(cmd, argp);
+				mutex_unlock(&dlci_ioctl_mutex);
+			}
+			break;
+		default:
+			err = sock->ops->ioctl(sock, cmd, arg);
+
+			/*
+			 * If this ioctl is unknown try to hand it down
+			 * to the NIC driver.
+			 */
+			if (err == -ENOIOCTLCMD)
+				err = dev_ioctl(net, cmd, argp);
+			break;
+		}
+	return err;
+}
+
+int sock_create_lite(int family, int type, int protocol, struct socket **res)
+{
+	int err;
+	struct socket *sock = NULL;
+
+	err = security_socket_create(family, type, protocol, 1);
+	if (err)
+		goto out;
+
+	sock = sock_alloc();
+	if (!sock) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	sock->type = type;
+	err = security_socket_post_create(sock, family, type, protocol, 1);
+	if (err)
+		goto out_release;
+
+out:
+	*res = sock;
+	return err;
+out_release:
+	sock_release(sock);
+	sock = NULL;
+	goto out;
+}
+
+/* No kernel lock held - perfect */
+static unsigned int sock_poll(struct file *file, poll_table *wait)
+{
+	struct socket *sock;
+
+	/*
+	 *      We can't return errors to poll, so it's either yes or no.
+	 */
+	sock = file->private_data;
+	return sock->ops->poll(file, sock, wait);
+}
+
+static int sock_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct socket *sock = file->private_data;
+
+	return sock->ops->mmap(file, sock, vma);
+}
+
+static int sock_close(struct inode *inode, struct file *filp)
+{
+	/*
+	 *      It was possible the inode is NULL we were
+	 *      closing an unfinished socket.
+	 */
+
+	if (!inode) {
+		printk(KERN_DEBUG "sock_close: NULL inode\n");
+		return 0;
+	}
+	sock_fasync(-1, filp, 0);
+	sock_release(SOCKET_I(inode));
+	return 0;
+}
+
+/*
+ *	Update the socket async list
+ *
+ *	Fasync_list locking strategy.
+ *
+ *	1. fasync_list is modified only under process context socket lock
+ *	   i.e. under semaphore.
+ *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
+ *	   or under socket lock.
+ *	3. fasync_list can be used from softirq context, so that
+ *	   modification under socket lock have to be enhanced with
+ *	   write_lock_bh(&sk->sk_callback_lock).
+ *							--ANK (990710)
+ */
+
+static int sock_fasync(int fd, struct file *filp, int on)
+{
+	struct fasync_struct *fa, *fna = NULL, **prev;
+	struct socket *sock;
+	struct sock *sk;
+
+	if (on) {
+		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
+		if (fna == NULL)
+			return -ENOMEM;
+	}
+
+	sock = filp->private_data;
+
+	sk = sock->sk;
+	if (sk == NULL) {
+		kfree(fna);
+		return -EINVAL;
+	}
+
+	lock_sock(sk);
+
+	prev = &(sock->fasync_list);
+
+	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
+		if (fa->fa_file == filp)
+			break;
+
+	if (on) {
+		if (fa != NULL) {
+			write_lock_bh(&sk->sk_callback_lock);
+			fa->fa_fd = fd;
+			write_unlock_bh(&sk->sk_callback_lock);
+
+			kfree(fna);
+			goto out;
+		}
+		fna->fa_file = filp;
+		fna->fa_fd = fd;
+		fna->magic = FASYNC_MAGIC;
+		fna->fa_next = sock->fasync_list;
+		write_lock_bh(&sk->sk_callback_lock);
+		sock->fasync_list = fna;
+		write_unlock_bh(&sk->sk_callback_lock);
+	} else {
+		if (fa != NULL) {
+			write_lock_bh(&sk->sk_callback_lock);
+			*prev = fa->fa_next;
+			write_unlock_bh(&sk->sk_callback_lock);
+			kfree(fa);
+		}
+	}
+
+out:
+	release_sock(sock->sk);
+	return 0;
+}
+
+/* This function may be called only under socket lock or callback_lock */
+
+int sock_wake_async(struct socket *sock, int how, int band)
+{
+	if (!sock || !sock->fasync_list)
+		return -1;
+	switch (how) {
+	case 1:
+
+		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
+			break;
+		goto call_kill;
+	case 2:
+		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
+			break;
+		/* fall through */
+	case 0:
+call_kill:
+		__kill_fasync(sock->fasync_list, SIGIO, band);
+		break;
+	case 3:
+		__kill_fasync(sock->fasync_list, SIGURG, band);
+	}
+	return 0;
+}
+
+static int __sock_create(struct net *net, int family, int type, int protocol,
+			 struct socket **res, int kern)
+{
+	int err;
+	struct socket *sock;
+	const struct net_proto_family *pf;
+
+	/*
+	 *      Check protocol is in range
+	 */
+	if (family < 0 || family >= NPROTO)
+		return -EAFNOSUPPORT;
+	if (type < 0 || type >= SOCK_MAX)
+		return -EINVAL;
+
+	if (!nx_check(0, VS_ADMIN)) {
+		if (family == PF_INET && !current_nx_info_has_v4())
+			return -EAFNOSUPPORT;
+		if (family == PF_INET6 && !current_nx_info_has_v6())
+			return -EAFNOSUPPORT;
+	}
+
+	/* Compatibility.
+
+	   This uglymoron is moved from INET layer to here to avoid
+	   deadlock in module load.
+	 */
+	if (family == PF_INET && type == SOCK_PACKET) {
+		static int warned;
+		if (!warned) {
+			warned = 1;
+			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
+			       current->comm);
+		}
+		family = PF_PACKET;
+	}
+
+	err = security_socket_create(family, type, protocol, kern);
+	if (err)
+		return err;
+
+	/*
+	 *	Allocate the socket and allow the family to set things up. if
+	 *	the protocol is 0, the family is instructed to select an appropriate
+	 *	default.
+	 */
+	sock = sock_alloc();
+	if (!sock) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "socket: no more sockets\n");
+		return -ENFILE;	/* Not exactly a match, but its the
+				   closest posix thing */
+	}
+
+	sock->type = type;
+
+#if defined(CONFIG_KMOD)
+	/* Attempt to load a protocol module if the find failed.
+	 *
+	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
+	 * requested real, full-featured networking support upon configuration.
+	 * Otherwise module support will break!
+	 */
+	if (net_families[family] == NULL)
+		request_module("net-pf-%d", family);
+#endif
+
+	rcu_read_lock();
+	pf = rcu_dereference(net_families[family]);
+	err = -EAFNOSUPPORT;
+	if (!pf)
+		goto out_release;
+
+	/*
+	 * We will call the ->create function, that possibly is in a loadable
+	 * module, so we have to bump that loadable module refcnt first.
+	 */
+	if (!try_module_get(pf->owner))
+		goto out_release;
+
+	/* Now protected by module ref count */
+	rcu_read_unlock();
+
+	err = pf->create(net, sock, protocol);
+	if (err < 0)
+		goto out_module_put;
+
+	/*
+	 * Now to bump the refcnt of the [loadable] module that owns this
+	 * socket at sock_release time we decrement its refcnt.
+	 */
+	if (!try_module_get(sock->ops->owner))
+		goto out_module_busy;
+
+	/*
+	 * Now that we're done with the ->create function, the [loadable]
+	 * module can have its refcnt decremented
+	 */
+	module_put(pf->owner);
+	err = security_socket_post_create(sock, family, type, protocol, kern);
+	if (err)
+		goto out_sock_release;
+	*res = sock;
+
+	return 0;
+
+out_module_busy:
+	err = -EAFNOSUPPORT;
+out_module_put:
+	sock->ops = NULL;
+	module_put(pf->owner);
+out_sock_release:
+	sock_release(sock);
+	return err;
+
+out_release:
+	rcu_read_unlock();
+	goto out_sock_release;
+}
+
+int sock_create(int family, int type, int protocol, struct socket **res)
+{
+	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
+}
+
+int sock_create_kern(int family, int type, int protocol, struct socket **res)
+{
+	return __sock_create(&init_net, family, type, protocol, res, 1);
+}
+
+asmlinkage long sys_socket(int family, int type, int protocol)
+{
+	int retval;
+	struct socket *sock;
+
+	retval = sock_create(family, type, protocol, &sock);
+	if (retval < 0)
+		goto out;
+
+	set_bit(SOCK_USER_SOCKET, &sock->flags);
+	retval = sock_map_fd(sock);
+	if (retval < 0)
+		goto out_release;
+
+out:
+	/* It may be already another descriptor 8) Not kernel problem. */
+	return retval;
+
+out_release:
+	sock_release(sock);
+	return retval;
+}
+
+/*
+ *	Create a pair of connected sockets.
+ */
+
+asmlinkage long sys_socketpair(int family, int type, int protocol,
+			       int __user *usockvec)
+{
+	struct socket *sock1, *sock2;
+	int fd1, fd2, err;
+	struct file *newfile1, *newfile2;
+
+	/*
+	 * Obtain the first socket and check if the underlying protocol
+	 * supports the socketpair call.
+	 */
+
+	err = sock_create(family, type, protocol, &sock1);
+	if (err < 0)
+		goto out;
+	set_bit(SOCK_USER_SOCKET, &sock1->flags);
+
+	err = sock_create(family, type, protocol, &sock2);
+	if (err < 0)
+		goto out_release_1;
+	set_bit(SOCK_USER_SOCKET, &sock2->flags);
+
+	err = sock1->ops->socketpair(sock1, sock2);
+	if (err < 0)
+		goto out_release_both;
+
+	fd1 = sock_alloc_fd(&newfile1);
+	if (unlikely(fd1 < 0)) {
+		err = fd1;
+		goto out_release_both;
+	}
+
+	fd2 = sock_alloc_fd(&newfile2);
+	if (unlikely(fd2 < 0)) {
+		err = fd2;
+		put_filp(newfile1);
+		put_unused_fd(fd1);
+		goto out_release_both;
+	}
+
+	err = sock_attach_fd(sock1, newfile1);
+	if (unlikely(err < 0)) {
+		goto out_fd2;
+	}
+
+	err = sock_attach_fd(sock2, newfile2);
+	if (unlikely(err < 0)) {
+		fput(newfile1);
+		goto out_fd1;
+	}
+
+	err = audit_fd_pair(fd1, fd2);
+	if (err < 0) {
+		fput(newfile1);
+		fput(newfile2);
+		goto out_fd;
+	}
+
+	fd_install(fd1, newfile1);
+	fd_install(fd2, newfile2);
+	/* fd1 and fd2 may be already another descriptors.
+	 * Not kernel problem.
+	 */
+
+	err = put_user(fd1, &usockvec[0]);
+	if (!err)
+		err = put_user(fd2, &usockvec[1]);
+	if (!err)
+		return 0;
+
+	sys_close(fd2);
+	sys_close(fd1);
+	return err;
+
+out_release_both:
+	sock_release(sock2);
+out_release_1:
+	sock_release(sock1);
+out:
+	return err;
+
+out_fd2:
+	put_filp(newfile1);
+	sock_release(sock1);
+out_fd1:
+	put_filp(newfile2);
+	sock_release(sock2);
+out_fd:
+	put_unused_fd(fd1);
+	put_unused_fd(fd2);
+	goto out;
+}
+
+/*
+ *	Bind a name to a socket. Nothing much to do here since it's
+ *	the protocol's responsibility to handle the local address.
+ *
+ *	We move the socket address to kernel space before we call
+ *	the protocol layer (having also checked the address is ok).
+ */
+
+asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int err, fput_needed;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock) {
+		err = move_addr_to_kernel(umyaddr, addrlen, address);
+		if (err >= 0) {
+			err = security_socket_bind(sock,
+						   (struct sockaddr *)address,
+						   addrlen);
+			if (!err)
+				err = sock->ops->bind(sock,
+						      (struct sockaddr *)
+						      address, addrlen);
+		}
+		fput_light(sock->file, fput_needed);
+	}
+	return err;
+}
+
+/*
+ *	Perform a listen. Basically, we allow the protocol to do anything
+ *	necessary for a listen, and if that works, we mark the socket as
+ *	ready for listening.
+ */
+
+asmlinkage long sys_listen(int fd, int backlog)
+{
+	struct socket *sock;
+	int err, fput_needed;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock) {
+		struct net *net = sock->sk->sk_net;
+		if ((unsigned)backlog > net->sysctl_somaxconn)
+			backlog = net->sysctl_somaxconn;
+
+		err = security_socket_listen(sock, backlog);
+		if (!err)
+			err = sock->ops->listen(sock, backlog);
+
+		fput_light(sock->file, fput_needed);
+	}
+	return err;
+}
+
+/*
+ *	For accept, we attempt to create a new socket, set up the link
+ *	with the client, wake up the client, then return the new
+ *	connected fd. We collect the address of the connector in kernel
+ *	space and move it to user at the very end. This is unclean because
+ *	we open the socket then return an error.
+ *
+ *	1003.1g adds the ability to recvmsg() to query connection pending
+ *	status to recvmsg. We need to add that support in a way thats
+ *	clean when we restucture accept also.
+ */
+
+asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
+			   int __user *upeer_addrlen)
+{
+	struct socket *sock, *newsock;
+	struct file *newfile;
+	int err, len, newfd, fput_needed;
+	char address[MAX_SOCK_ADDR];
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
+		goto out;
+
+	err = -ENFILE;
+	if (!(newsock = sock_alloc()))
+		goto out_put;
+
+	newsock->type = sock->type;
+	newsock->ops = sock->ops;
+
+	/*
+	 * We don't need try_module_get here, as the listening socket (sock)
+	 * has the protocol module (sock->ops->owner) held.
+	 */
+	__module_get(newsock->ops->owner);
+
+	newfd = sock_alloc_fd(&newfile);
+	if (unlikely(newfd < 0)) {
+		err = newfd;
+		sock_release(newsock);
+		goto out_put;
+	}
+
+	err = sock_attach_fd(newsock, newfile);
+	if (err < 0)
+		goto out_fd_simple;
+
+	err = security_socket_accept(sock, newsock);
+	if (err)
+		goto out_fd;
+
+	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
+	if (err < 0)
+		goto out_fd;
+
+	if (upeer_sockaddr) {
+		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
+					  &len, 2) < 0) {
+			err = -ECONNABORTED;
+			goto out_fd;
+		}
+		err = move_addr_to_user(address, len, upeer_sockaddr,
+					upeer_addrlen);
+		if (err < 0)
+			goto out_fd;
+	}
+
+	/* File flags are not inherited via accept() unlike another OSes. */
+
+	fd_install(newfd, newfile);
+	err = newfd;
+
+	security_socket_post_accept(sock, newsock);
+
+out_put:
+	fput_light(sock->file, fput_needed);
+out:
+	return err;
+out_fd_simple:
+	sock_release(newsock);
+	put_filp(newfile);
+	put_unused_fd(newfd);
+	goto out_put;
+out_fd:
+	fput(newfile);
+	put_unused_fd(newfd);
+	goto out_put;
+}
+
+/*
+ *	Attempt to connect to a socket with the server address.  The address
+ *	is in user space so we verify it is OK and move it to kernel space.
+ *
+ *	For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
+ *	break bindings
+ *
+ *	NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
+ *	other SEQPACKET protocols that take time to connect() as it doesn't
+ *	include the -EINPROGRESS status for such sockets.
+ */
+
+asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
+			    int addrlen)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int err, fput_needed;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
+		goto out;
+	err = move_addr_to_kernel(uservaddr, addrlen, address);
+	if (err < 0)
+		goto out_put;
+
+	err =
+	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
+	if (err)
+		goto out_put;
+
+	err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
+				 sock->file->f_flags);
+out_put:
+	fput_light(sock->file, fput_needed);
+out:
+	return err;
+}
+
+/*
+ *	Get the local address ('name') of a socket object. Move the obtained
+ *	name to user space.
+ */
+
+asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+				int __user *usockaddr_len)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int len, err, fput_needed;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
+		goto out;
+
+	err = security_socket_getsockname(sock);
+	if (err)
+		goto out_put;
+
+	err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
+	if (err)
+		goto out_put;
+	err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
+
+out_put:
+	fput_light(sock->file, fput_needed);
+out:
+	return err;
+}
+
+/*
+ *	Get the remote address ('name') of a socket object. Move the obtained
+ *	name to user space.
+ */
+
+asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+				int __user *usockaddr_len)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int len, err, fput_needed;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
+		err = security_socket_getpeername(sock);
+		if (err) {
+			fput_light(sock->file, fput_needed);
+			return err;
+		}
+
+		err =
+		    sock->ops->getname(sock, (struct sockaddr *)address, &len,
+				       1);
+		if (!err)
+			err = move_addr_to_user(address, len, usockaddr,
+						usockaddr_len);
+		fput_light(sock->file, fput_needed);
+	}
+	return err;
+}
+
+/*
+ *	Send a datagram to a given address. We move the address into kernel
+ *	space and check the user space data area is readable before invoking
+ *	the protocol.
+ */
+
+asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
+			   unsigned flags, struct sockaddr __user *addr,
+			   int addr_len)
+{
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	int err;
+	struct msghdr msg;
+	struct iovec iov;
+	int fput_needed;
+	struct file *sock_file;
+
+	sock_file = fget_light(fd, &fput_needed);
+	err = -EBADF;
+	if (!sock_file)
+		goto out;
+
+	sock = sock_from_file(sock_file, &err);
+	if (!sock)
+		goto out_put;
+	iov.iov_base = buff;
+	iov.iov_len = len;
+	msg.msg_name = NULL;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_namelen = 0;
+	if (addr) {
+		err = move_addr_to_kernel(addr, addr_len, address);
+		if (err < 0)
+			goto out_put;
+		msg.msg_name = address;
+		msg.msg_namelen = addr_len;
+	}
+	if (sock->file->f_flags & O_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+	msg.msg_flags = flags;
+	err = sock_sendmsg(sock, &msg, len);
+
+out_put:
+	fput_light(sock_file, fput_needed);
+out:
+	return err;
+}
+
+/*
+ *	Send a datagram down a socket.
+ */
+
+asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
+{
+	return sys_sendto(fd, buff, len, flags, NULL, 0);
+}
+
+/*
+ *	Receive a frame from the socket and optionally record the address of the
+ *	sender. We verify the buffers are writable and if needed move the
+ *	sender address from kernel to user space.
+ */
+
+asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
+			     unsigned flags, struct sockaddr __user *addr,
+			     int __user *addr_len)
+{
+	struct socket *sock;
+	struct iovec iov;
+	struct msghdr msg;
+	char address[MAX_SOCK_ADDR];
+	int err, err2;
+	struct file *sock_file;
+	int fput_needed;
+
+	sock_file = fget_light(fd, &fput_needed);
+	err = -EBADF;
+	if (!sock_file)
+		goto out;
+
+	sock = sock_from_file(sock_file, &err);
+	if (!sock)
+		goto out_put;
+
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_iov = &iov;
+	iov.iov_len = size;
+	iov.iov_base = ubuf;
+	msg.msg_name = address;
+	msg.msg_namelen = MAX_SOCK_ADDR;
+	if (sock->file->f_flags & O_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+	err = sock_recvmsg(sock, &msg, size, flags);
+
+	if (err >= 0 && addr != NULL) {
+		err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+		if (err2 < 0)
+			err = err2;
+	}
+out_put:
+	fput_light(sock_file, fput_needed);
+out:
+	return err;
+}
+
+/*
+ *	Receive a datagram from a socket.
+ */
+
+asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
+			 unsigned flags)
+{
+	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
+}
+
+/*
+ *	Set a socket option. Because we don't know the option lengths we have
+ *	to pass the user mode parameter for the protocols to sort out.
+ */
+
+asmlinkage long sys_setsockopt(int fd, int level, int optname,
+			       char __user *optval, int optlen)
+{
+	int err, fput_needed;
+	struct socket *sock;
+
+	if (optlen < 0)
+		return -EINVAL;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
+		err = security_socket_setsockopt(sock, level, optname);
+		if (err)
+			goto out_put;
+
+		if (level == SOL_SOCKET)
+			err =
+			    sock_setsockopt(sock, level, optname, optval,
+					    optlen);
+		else
+			err =
+			    sock->ops->setsockopt(sock, level, optname, optval,
+						  optlen);
+out_put:
+		fput_light(sock->file, fput_needed);
+	}
+	return err;
+}
+
+/*
+ *	Get a socket option. Because we don't know the option lengths we have
+ *	to pass a user mode parameter for the protocols to sort out.
+ */
+
+asmlinkage long sys_getsockopt(int fd, int level, int optname,
+			       char __user *optval, int __user *optlen)
+{
+	int err, fput_needed;
+	struct socket *sock;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
+		err = security_socket_getsockopt(sock, level, optname);
+		if (err)
+			goto out_put;
+
+		if (level == SOL_SOCKET)
+			err =
+			    sock_getsockopt(sock, level, optname, optval,
+					    optlen);
+		else
+			err =
+			    sock->ops->getsockopt(sock, level, optname, optval,
+						  optlen);
+out_put:
+		fput_light(sock->file, fput_needed);
+	}
+	return err;
+}
+
+/*
+ *	Shutdown a socket.
+ */
+
+asmlinkage long sys_shutdown(int fd, int how)
+{
+	int err, fput_needed;
+	struct socket *sock;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (sock != NULL) {
+		err = security_socket_shutdown(sock, how);
+		if (!err)
+			err = sock->ops->shutdown(sock, how);
+		fput_light(sock->file, fput_needed);
+	}
+	return err;
+}
+
+/* A couple of helpful macros for getting the address of the 32/64 bit
+ * fields which are the same type (int / unsigned) on our platforms.
+ */
+#define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
+#define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
+#define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
+
+/*
+ *	BSD sendmsg interface
+ */
+
+asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
+{
+	struct compat_msghdr __user *msg_compat =
+	    (struct compat_msghdr __user *)msg;
+	struct socket *sock;
+	char address[MAX_SOCK_ADDR];
+	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+	unsigned char ctl[sizeof(struct cmsghdr) + 20]
+	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
+	/* 20 is size of ipv6_pktinfo */
+	unsigned char *ctl_buf = ctl;
+	struct msghdr msg_sys;
+	int err, ctl_len, iov_size, total_len;
+	int fput_needed;
+
+	err = -EFAULT;
+	if (MSG_CMSG_COMPAT & flags) {
+		if (get_compat_msghdr(&msg_sys, msg_compat))
+			return -EFAULT;
+	}
+	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+		return -EFAULT;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
+		goto out;
+
+	/* do not move before msg_sys is valid */
+	err = -EMSGSIZE;
+	if (msg_sys.msg_iovlen > UIO_MAXIOV)
+		goto out_put;
+
+	/* Check whether to allocate the iovec area */
+	err = -ENOMEM;
+	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
+	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+		if (!iov)
+			goto out_put;
+	}
+
+	/* This will also move the address data into kernel space */
+	if (MSG_CMSG_COMPAT & flags) {
+		err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
+	} else
+		err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
+	if (err < 0)
+		goto out_freeiov;
+	total_len = err;
+
+	err = -ENOBUFS;
+
+	if (msg_sys.msg_controllen > INT_MAX)
+		goto out_freeiov;
+	ctl_len = msg_sys.msg_controllen;
+	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
+		err =
+		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
+						     sizeof(ctl));
+		if (err)
+			goto out_freeiov;
+		ctl_buf = msg_sys.msg_control;
+		ctl_len = msg_sys.msg_controllen;
+	} else if (ctl_len) {
+		if (ctl_len > sizeof(ctl)) {
+			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
+			if (ctl_buf == NULL)
+				goto out_freeiov;
+		}
+		err = -EFAULT;
+		/*
+		 * Careful! Before this, msg_sys.msg_control contains a user pointer.
+		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
+		 * checking falls down on this.
+		 */
+		if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
+				   ctl_len))
+			goto out_freectl;
+		msg_sys.msg_control = ctl_buf;
+	}
+	msg_sys.msg_flags = flags;
+
+	if (sock->file->f_flags & O_NONBLOCK)
+		msg_sys.msg_flags |= MSG_DONTWAIT;
+	err = sock_sendmsg(sock, &msg_sys, total_len);
+
+out_freectl:
+	if (ctl_buf != ctl)
+		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
+out_freeiov:
+	if (iov != iovstack)
+		sock_kfree_s(sock->sk, iov, iov_size);
+out_put:
+	fput_light(sock->file, fput_needed);
+out:
+	return err;
+}
+
+/*
+ *	BSD recvmsg interface
+ */
+
+asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
+			    unsigned int flags)
+{
+	struct compat_msghdr __user *msg_compat =
+	    (struct compat_msghdr __user *)msg;
+	struct socket *sock;
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov = iovstack;
+	struct msghdr msg_sys;
+	unsigned long cmsg_ptr;
+	int err, iov_size, total_len, len;
+	int fput_needed;
+
+	/* kernel mode address */
+	char addr[MAX_SOCK_ADDR];
+
+	/* user mode address pointers */
+	struct sockaddr __user *uaddr;
+	int __user *uaddr_len;
+
+	if (MSG_CMSG_COMPAT & flags) {
+		if (get_compat_msghdr(&msg_sys, msg_compat))
+			return -EFAULT;
+	}
+	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+		return -EFAULT;
+
+	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+	if (!sock)
+		goto out;
+
+	err = -EMSGSIZE;
+	if (msg_sys.msg_iovlen > UIO_MAXIOV)
+		goto out_put;
+
+	/* Check whether to allocate the iovec area */
+	err = -ENOMEM;
+	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
+	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
+		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
+		if (!iov)
+			goto out_put;
+	}
+
+	/*
+	 *      Save the user-mode address (verify_iovec will change the
+	 *      kernel msghdr to use the kernel address space)
+	 */
+
+	uaddr = (void __user *)msg_sys.msg_name;
+	uaddr_len = COMPAT_NAMELEN(msg);
+	if (MSG_CMSG_COMPAT & flags) {
+		err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
+	} else
+		err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
+	if (err < 0)
+		goto out_freeiov;
+	total_len = err;
+
+	cmsg_ptr = (unsigned long)msg_sys.msg_control;
+	msg_sys.msg_flags = 0;
+	if (MSG_CMSG_COMPAT & flags)
+		msg_sys.msg_flags = MSG_CMSG_COMPAT;
+
+	if (sock->file->f_flags & O_NONBLOCK)
+		flags |= MSG_DONTWAIT;
+	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
+	if (err < 0)
+		goto out_freeiov;
+	len = err;
+
+	if (uaddr != NULL) {
+		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
+					uaddr_len);
+		if (err < 0)
+			goto out_freeiov;
+	}
+	err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
+			 COMPAT_FLAGS(msg));
+	if (err)
+		goto out_freeiov;
+	if (MSG_CMSG_COMPAT & flags)
+		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
+				 &msg_compat->msg_controllen);
+	else
+		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
+				 &msg->msg_controllen);
+	if (err)
+		goto out_freeiov;
+	err = len;
+
+out_freeiov:
+	if (iov != iovstack)
+		sock_kfree_s(sock->sk, iov, iov_size);
+out_put:
+	fput_light(sock->file, fput_needed);
+out:
+	return err;
+}
+
+#ifdef __ARCH_WANT_SYS_SOCKETCALL
+
+/* Argument list sizes for sys_socketcall */
+#define AL(x) ((x) * sizeof(unsigned long))
+static const unsigned char nargs[18]={
+	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
+};
+
+#undef AL
+
+/*
+ *	System call vectors.
+ *
+ *	Argument checking cleaned up. Saved 20% in size.
+ *  This function doesn't need to set the kernel lock because
+ *  it is set by the callees.
+ */
+
+asmlinkage long sys_socketcall(int call, unsigned long __user *args)
+{
+	unsigned long a[6];
+	unsigned long a0, a1;
+	int err;
+
+	if (call < 1 || call > SYS_RECVMSG)
+		return -EINVAL;
+
+	/* copy_from_user should be SMP safe. */
+	if (copy_from_user(a, args, nargs[call]))
+		return -EFAULT;
+
+	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
+	if (err)
+		return err;
+
+	a0 = a[0];
+	a1 = a[1];
+
+	switch (call) {
+	case SYS_SOCKET:
+		err = sys_socket(a0, a1, a[2]);
+		break;
+	case SYS_BIND:
+		err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
+		break;
+	case SYS_CONNECT:
+		err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
+		break;
+	case SYS_LISTEN:
+		err = sys_listen(a0, a1);
+		break;
+	case SYS_ACCEPT:
+		err =
+		    sys_accept(a0, (struct sockaddr __user *)a1,
+			       (int __user *)a[2]);
+		break;
+	case SYS_GETSOCKNAME:
+		err =
+		    sys_getsockname(a0, (struct sockaddr __user *)a1,
+				    (int __user *)a[2]);
+		break;
+	case SYS_GETPEERNAME:
+		err =
+		    sys_getpeername(a0, (struct sockaddr __user *)a1,
+				    (int __user *)a[2]);
+		break;
+	case SYS_SOCKETPAIR:
+		err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
+		break;
+	case SYS_SEND:
+		err = sys_send(a0, (void __user *)a1, a[2], a[3]);
+		break;
+	case SYS_SENDTO:
+		err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
+				 (struct sockaddr __user *)a[4], a[5]);
+		break;
+	case SYS_RECV:
+		err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
+		break;
+	case SYS_RECVFROM:
+		err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
+				   (struct sockaddr __user *)a[4],
+				   (int __user *)a[5]);
+		break;
+	case SYS_SHUTDOWN:
+		err = sys_shutdown(a0, a1);
+		break;
+	case SYS_SETSOCKOPT:
+		err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
+		break;
+	case SYS_GETSOCKOPT:
+		err =
+		    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
+				   (int __user *)a[4]);
+		break;
+	case SYS_SENDMSG:
+		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
+		break;
+	case SYS_RECVMSG:
+		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+	return err;
+}
+
+#endif				/* __ARCH_WANT_SYS_SOCKETCALL */
+
+/**
+ *	sock_register - add a socket protocol handler
+ *	@ops: description of protocol
+ *
+ *	This function is called by a protocol handler that wants to
+ *	advertise its address family, and have it linked into the
+ *	socket interface. The value ops->family coresponds to the
+ *	socket system call protocol family.
+ */
+int sock_register(const struct net_proto_family *ops)
+{
+	int err;
+
+	if (ops->family >= NPROTO) {
+		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
+		       NPROTO);
+		return -ENOBUFS;
+	}
+
+	spin_lock(&net_family_lock);
+	if (net_families[ops->family])
+		err = -EEXIST;
+	else {
+		net_families[ops->family] = ops;
+		err = 0;
+	}
+	spin_unlock(&net_family_lock);
+
+	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
+	return err;
+}
+
+/**
+ *	sock_unregister - remove a protocol handler
+ *	@family: protocol family to remove
+ *
+ *	This function is called by a protocol handler that wants to
+ *	remove its address family, and have it unlinked from the
+ *	new socket creation.
+ *
+ *	If protocol handler is a module, then it can use module reference
+ *	counts to protect against new references. If protocol handler is not
+ *	a module then it needs to provide its own protection in
+ *	the ops->create routine.
+ */
+void sock_unregister(int family)
+{
+	BUG_ON(family < 0 || family >= NPROTO);
+
+	spin_lock(&net_family_lock);
+	net_families[family] = NULL;
+	spin_unlock(&net_family_lock);
+
+	synchronize_rcu();
+
+	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
+}
+
+static int sock_pernet_init(struct net *net)
+{
+	net->sysctl_somaxconn = SOMAXCONN;
+	return 0;
+}
+
+static struct pernet_operations sock_net_ops = {
+	.init = sock_pernet_init,
+};
+
+static int __init sock_init(void)
+{
+	/*
+	 *      Initialize sock SLAB cache.
+	 */
+
+	sk_init();
+
+	/*
+	 *      Initialize skbuff SLAB cache
+	 */
+	skb_init();
+
+	/*
+	 *      Initialize the protocols module.
+	 */
+
+	init_inodecache();
+	register_filesystem(&sock_fs_type);
+	sock_mnt = kern_mount(&sock_fs_type);
+
+	/* The real protocol initialization is performed in later initcalls.
+	 */
+
+#ifdef CONFIG_NETFILTER
+	netfilter_init();
+#endif
+
+	register_pernet_subsys(&sock_net_ops);
+
+	return 0;
+}
+
+core_initcall(sock_init);	/* early initcall */
+
+#ifdef CONFIG_PROC_FS
+void socket_seq_show(struct seq_file *seq)
+{
+	int cpu;
+	int counter = 0;
+
+	for_each_possible_cpu(cpu)
+	    counter += per_cpu(sockets_in_use, cpu);
+
+	/* It can be negative, by the way. 8) */
+	if (counter < 0)
+		counter = 0;
+
+	seq_printf(seq, "sockets: used %d\n", counter);
+}
+#endif				/* CONFIG_PROC_FS */
+
+#ifdef CONFIG_COMPAT
+static long compat_sock_ioctl(struct file *file, unsigned cmd,
+			      unsigned long arg)
+{
+	struct socket *sock = file->private_data;
+	int ret = -ENOIOCTLCMD;
+
+	if (sock->ops->compat_ioctl)
+		ret = sock->ops->compat_ioctl(sock, cmd, arg);
+
+	return ret;
+}
+#endif
+
+int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+	return sock->ops->bind(sock, addr, addrlen);
+}
+
+int kernel_listen(struct socket *sock, int backlog)
+{
+	return sock->ops->listen(sock, backlog);
+}
+
+int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
+{
+	struct sock *sk = sock->sk;
+	int err;
+
+	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
+			       newsock);
+	if (err < 0)
+		goto done;
+
+	err = sock->ops->accept(sock, *newsock, flags);
+	if (err < 0) {
+		sock_release(*newsock);
+		goto done;
+	}
+
+	(*newsock)->ops = sock->ops;
+
+done:
+	return err;
+}
+
+int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+		   int flags)
+{
+	return sock->ops->connect(sock, addr, addrlen, flags);
+}
+
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+			 int *addrlen)
+{
+	return sock->ops->getname(sock, addr, addrlen, 0);
+}
+
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+			 int *addrlen)
+{
+	return sock->ops->getname(sock, addr, addrlen, 1);
+}
+
+int kernel_getsockopt(struct socket *sock, int level, int optname,
+			char *optval, int *optlen)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	if (level == SOL_SOCKET)
+		err = sock_getsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->getsockopt(sock, level, optname, optval,
+					    optlen);
+	set_fs(oldfs);
+	return err;
+}
+
+int kernel_setsockopt(struct socket *sock, int level, int optname,
+			char *optval, int optlen)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	if (level == SOL_SOCKET)
+		err = sock_setsockopt(sock, level, optname, optval, optlen);
+	else
+		err = sock->ops->setsockopt(sock, level, optname, optval,
+					    optlen);
+	set_fs(oldfs);
+	return err;
+}
+
+int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+		    size_t size, int flags)
+{
+	if (sock->ops->sendpage)
+		return sock->ops->sendpage(sock, page, offset, size, flags);
+
+	return sock_no_sendpage(sock, page, offset, size, flags);
+}
+
+int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
+{
+	mm_segment_t oldfs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	err = sock->ops->ioctl(sock, cmd, arg);
+	set_fs(oldfs);
+
+	return err;
+}
+
+/* ABI emulation layers need these two */
+EXPORT_SYMBOL(move_addr_to_kernel);
+EXPORT_SYMBOL(move_addr_to_user);
+EXPORT_SYMBOL(sock_create);
+EXPORT_SYMBOL(sock_create_kern);
+EXPORT_SYMBOL(sock_create_lite);
+EXPORT_SYMBOL(sock_map_fd);
+EXPORT_SYMBOL(sock_recvmsg);
+EXPORT_SYMBOL(sock_register);
+EXPORT_SYMBOL(sock_release);
+EXPORT_SYMBOL(sock_sendmsg);
+EXPORT_SYMBOL(sock_unregister);
+EXPORT_SYMBOL(sock_wake_async);
+EXPORT_SYMBOL(sockfd_lookup);
+EXPORT_SYMBOL(kernel_sendmsg);
+EXPORT_SYMBOL(kernel_recvmsg);
+EXPORT_SYMBOL(kernel_bind);
+EXPORT_SYMBOL(kernel_listen);
+EXPORT_SYMBOL(kernel_accept);
+EXPORT_SYMBOL(kernel_connect);
+EXPORT_SYMBOL(kernel_getsockname);
+EXPORT_SYMBOL(kernel_getpeername);
+EXPORT_SYMBOL(kernel_getsockopt);
+EXPORT_SYMBOL(kernel_setsockopt);
+EXPORT_SYMBOL(kernel_sendpage);
+EXPORT_SYMBOL(kernel_sock_ioctl);