From: Sapan Bhatia <sapanb@cs.princeton.edu>
Date: Thu, 20 Mar 2008 04:15:10 +0000 (+0000)
Subject: Vserver/NetNS fix hopeful
X-Git-Tag: trellis-2.6.22-Jan-2009~35
X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=21c23b172d34b301ca2656efea87a9d6155d9909;p=linux-2.6.git

Vserver/NetNS fix hopeful
---

diff --git a/linux-2.6-595-vserver-new-netns.patch b/linux-2.6-595-vserver-new-netns.patch
index f946b5037..c2d92cd65 100644
--- a/linux-2.6-595-vserver-new-netns.patch
+++ b/linux-2.6-595-vserver-new-netns.patch
@@ -1,5 +1,1650 @@
---- linux-2.6.22-590/kernel/vserver/space.c.orig	2008-02-29 09:01:28.000000000 -0500
-+++ linux-2.6.22-590/kernel/vserver/space.c	2008-03-06 15:47:26.000000000 -0500
+diff -Nurb linux-2.6.22-594/include/linux/vserver/network.h.orig.orig linux-2.6.22-595/include/linux/vserver/network.h.orig.orig
+--- linux-2.6.22-594/include/linux/vserver/network.h.orig.orig	2008-03-20 00:04:54.000000000 -0400
++++ linux-2.6.22-595/include/linux/vserver/network.h.orig.orig	1969-12-31 19:00:00.000000000 -0500
+@@ -1,143 +0,0 @@
+-#ifndef _VX_NETWORK_H
+-#define _VX_NETWORK_H
+-
+-#include <linux/types.h>
+-
+-
+-#define MAX_N_CONTEXT	65535	/* Arbitrary limit */
+-
+-
+-/* network flags */
+-
+-#define NXF_INFO_PRIVATE	0x00000008
+-
+-#define NXF_SINGLE_IP		0x00000100
+-#define NXF_LBACK_REMAP		0x00000200
+-
+-#define NXF_HIDE_NETIF		0x02000000
+-#define NXF_HIDE_LBACK		0x04000000
+-
+-#define NXF_STATE_SETUP		(1ULL << 32)
+-#define NXF_STATE_ADMIN		(1ULL << 34)
+-
+-#define NXF_SC_HELPER		(1ULL << 36)
+-#define NXF_PERSISTENT		(1ULL << 38)
+-
+-#define NXF_ONE_TIME		(0x0005ULL << 32)
+-
+-
+-#define	NXF_INIT_SET		(__nxf_init_set())
+-
+-static inline uint64_t __nxf_init_set(void) {
+-	return	  NXF_STATE_ADMIN
+-#ifdef	CONFIG_VSERVER_AUTO_LBACK
+-		| NXF_LBACK_REMAP
+-		| NXF_HIDE_LBACK
+-#endif
+-#ifdef	CONFIG_VSERVER_AUTO_SINGLE
+-		| NXF_SINGLE_IP
+-#endif
+-		| NXF_HIDE_NETIF;
+-}
+-
+-
+-/* network caps */
+-
+-#define NXC_RAW_ICMP		0x00000100
+-
+-
+-/* address types */
+-
+-#define NXA_TYPE_IPV4		0x0001
+-#define NXA_TYPE_IPV6		0x0002
+-
+-#define NXA_TYPE_NONE		0x0000
+-#define NXA_TYPE_ANY		0x00FF
+-
+-#define NXA_TYPE_ADDR		0x0010
+-#define NXA_TYPE_MASK		0x0020
+-#define NXA_TYPE_RANGE		0x0040
+-
+-#define NXA_MASK_ALL		(NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
+-
+-#define NXA_MOD_BCAST		0x0100
+-#define NXA_MOD_LBACK		0x0200
+-
+-#define NXA_LOOPBACK		0x1000
+-
+-#define NXA_MASK_BIND		(NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
+-#define NXA_MASK_SHOW		(NXA_MASK_ALL | NXA_LOOPBACK)
+-
+-#ifdef	__KERNEL__
+-
+-#include <linux/list.h>
+-#include <linux/spinlock.h>
+-#include <linux/rcupdate.h>
+-#include <linux/in.h>
+-#include <linux/in6.h>
+-#include <asm/atomic.h>
+-
+-struct nx_addr_v4 {
+-	struct nx_addr_v4 *next;
+-	struct in_addr ip[2];
+-	struct in_addr mask;
+-	uint16_t type;
+-	uint16_t flags;
+-};
+-
+-struct nx_addr_v6 {
+-	struct nx_addr_v6 *next;
+-	struct in6_addr ip;
+-	struct in6_addr mask;
+-	uint32_t prefix;
+-	uint16_t type;
+-	uint16_t flags;
+-};
+-
+-struct nx_info {
+-	struct hlist_node nx_hlist;	/* linked list of nxinfos */
+-	nid_t nx_id;			/* vnet id */
+-	atomic_t nx_usecnt;		/* usage count */
+-	atomic_t nx_tasks;		/* tasks count */
+-	int nx_state;			/* context state */
+-
+-	uint64_t nx_flags;		/* network flag word */
+-	uint64_t nx_ncaps;		/* network capabilities */
+-
+-	struct in_addr v4_lback;	/* Loopback address */
+-	struct in_addr v4_bcast;	/* Broadcast address */
+-	struct nx_addr_v4 v4;		/* First/Single ipv4 address */
+-#ifdef	CONFIG_IPV6
+-	struct nx_addr_v6 v6;		/* First/Single ipv6 address */
+-#endif
+-	char nx_name[65];		/* network context name */
+-};
+-
+-
+-/* status flags */
+-
+-#define NXS_HASHED      0x0001
+-#define NXS_SHUTDOWN    0x0100
+-#define NXS_RELEASED    0x8000
+-
+-extern struct nx_info *lookup_nx_info(int);
+-
+-extern int get_nid_list(int, unsigned int *, int);
+-extern int nid_is_hashed(nid_t);
+-
+-extern int nx_migrate_task(struct task_struct *, struct nx_info *);
+-
+-extern long vs_net_change(struct nx_info *, unsigned int);
+-
+-struct sock;
+-
+-
+-#define NX_IPV4(n)	((n)->v4.type != NXA_TYPE_NONE)
+-#ifdef  CONFIG_IPV6
+-#define NX_IPV6(n)	((n)->v6.type != NXA_TYPE_NONE)
+-#else
+-#define NX_IPV6(n)	(0)
+-#endif
+-
+-#endif	/* __KERNEL__ */
+-#endif	/* _VX_NETWORK_H */
+diff -Nurb linux-2.6.22-594/kernel/nsproxy.c.orig linux-2.6.22-595/kernel/nsproxy.c.orig
+--- linux-2.6.22-594/kernel/nsproxy.c.orig	2008-03-20 00:05:18.000000000 -0400
++++ linux-2.6.22-595/kernel/nsproxy.c.orig	1969-12-31 19:00:00.000000000 -0500
+@@ -1,264 +0,0 @@
+-/*
+- *  Copyright (C) 2006 IBM Corporation
+- *
+- *  Author: Serge Hallyn <serue@us.ibm.com>
+- *
+- *  This program is free software; you can redistribute it and/or
+- *  modify it under the terms of the GNU General Public License as
+- *  published by the Free Software Foundation, version 2 of the
+- *  License.
+- *
+- *  Jun 2006 - namespaces support
+- *             OpenVZ, SWsoft Inc.
+- *             Pavel Emelianov <xemul@openvz.org>
+- */
+-
+-#include <linux/module.h>
+-#include <linux/version.h>
+-#include <linux/nsproxy.h>
+-#include <linux/init_task.h>
+-#include <linux/mnt_namespace.h>
+-#include <linux/utsname.h>
+-#include <net/net_namespace.h>
+-#include <linux/pid_namespace.h>
+-#include <linux/vserver/global.h>
+-#include <linux/vserver/debug.h>
+-
+-static struct kmem_cache *nsproxy_cachep;
+-
+-struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+-
+-void get_task_namespaces(struct task_struct *tsk)
+-{
+-	struct nsproxy *ns = tsk->nsproxy;
+-	if (ns) {
+-		get_nsproxy(ns);
+-	}
+-}
+-
+-/*
+- * creates a copy of "orig" with refcount 1.
+- */
+-static inline struct nsproxy *clone_nsproxy(struct nsproxy *orig)
+-{
+-	struct nsproxy *ns;
+-
+-	ns = kmemdup(orig, sizeof(struct nsproxy), GFP_KERNEL);
+-	if (ns)
+-		atomic_set(&ns->count, 1);
+-	vxdprintk(VXD_CBIT(space, 2), "clone_nsproxy(%p[%u] = %p[1]",
+-		orig, atomic_read(&orig->count), ns);
+-	atomic_inc(&vs_global_nsproxy);
+-	return ns;
+-}
+-
+-/*
+- * Create new nsproxy and all of its the associated namespaces.
+- * Return the newly created nsproxy.  Do not attach this to the task,
+- * leave it to the caller to do proper locking and attach it to task.
+- */
+-static struct nsproxy *unshare_namespaces(int flags, struct nsproxy *orig,
+-			struct fs_struct *new_fs)
+-{
+-	struct nsproxy *new_nsp;
+-	int err = -ENOMEM;
+-
+-	vxdprintk(VXD_CBIT(space, 4),
+-		"unshare_namespaces(0x%08x,%p,%p)",
+-		flags, orig, new_fs);
+-
+-	new_nsp = clone_nsproxy(orig);
+-	if (!new_nsp)
+-		return ERR_PTR(-ENOMEM);
+-
+-	new_nsp->mnt_ns = copy_mnt_ns(flags, orig->mnt_ns, new_fs);
+-	if (IS_ERR(new_nsp->mnt_ns))
+-		goto out_ns;
+-
+-	new_nsp->uts_ns = copy_utsname(flags, orig->uts_ns);
+-	if (IS_ERR(new_nsp->uts_ns))
+-		goto out_uts;
+-
+-	new_nsp->ipc_ns = copy_ipcs(flags, orig->ipc_ns);
+-	if (IS_ERR(new_nsp->ipc_ns))
+-		goto out_ipc;
+-
+-	new_nsp->pid_ns = copy_pid_ns(flags, orig->pid_ns);
+-	if (IS_ERR(new_nsp->pid_ns))
+-		goto out_pid;
+-
+-	new_nsp->user_ns = copy_user_ns(flags, orig->user_ns);
+-	if (IS_ERR(new_nsp->user_ns))
+-		goto out_user;
+-
+-	new_nsp->net_ns = copy_net_ns(flags, orig->net_ns);
+-	if (IS_ERR(new_nsp->net_ns))
+-		goto out_net;
+-
+-	return new_nsp;
+-
+-out_net:
+-	if (new_nsp->user_ns)
+-		put_user_ns(new_nsp->user_ns);
+-	if (new_nsp->net_ns)
+- 		put_net(new_nsp->net_ns);
+-out_user:
+-	if (new_nsp->pid_ns)
+-		put_pid_ns(new_nsp->pid_ns);
+-out_pid:
+-	if (new_nsp->ipc_ns)
+-		put_ipc_ns(new_nsp->ipc_ns);
+-out_ipc:
+-	if (new_nsp->uts_ns)
+-		put_uts_ns(new_nsp->uts_ns);
+-out_uts:
+-	if (new_nsp->mnt_ns)
+-		put_mnt_ns(new_nsp->mnt_ns);
+-out_ns:
+-	kmem_cache_free(nsproxy_cachep, new_nsp);
+-	return ERR_PTR(err);
+-}
+-
+-static struct nsproxy *create_new_namespaces(unsigned long flags, struct task_struct *tsk,
+-			struct fs_struct *new_fs)
+-{
+-	return unshare_namespaces(flags, tsk->nsproxy, new_fs);
+-}
+-
+-/*
+- * copies the nsproxy, setting refcount to 1, and grabbing a
+- * reference to all contained namespaces.
+- */
+-struct nsproxy *copy_nsproxy(struct nsproxy *orig)
+-{
+-	struct nsproxy *ns = clone_nsproxy(orig);
+-
+-	if (ns) {
+-		if (ns->mnt_ns)
+-			get_mnt_ns(ns->mnt_ns);
+-		if (ns->uts_ns)
+-			get_uts_ns(ns->uts_ns);
+-		if (ns->ipc_ns)
+-			get_ipc_ns(ns->ipc_ns);
+-		if (ns->pid_ns)
+-			get_pid_ns(ns->pid_ns);
+-	}
+-	return ns;
+-}
+-
+-/*
+- * called from clone.  This now handles copy for nsproxy and all
+- * namespaces therein.
+- */
+-int copy_namespaces(unsigned long flags, struct task_struct *tsk)
+-{
+-	struct nsproxy *old_ns = tsk->nsproxy;
+-	struct nsproxy *new_ns = NULL;
+-	int err = 0;
+-
+-	vxdprintk(VXD_CBIT(space, 7), "copy_namespaces(0x%08x,%p[%p])",
+-		flags, tsk, old_ns);
+-
+-	if (!old_ns)
+-		return 0;
+-
+-	get_nsproxy(old_ns);
+-	return 0;
+-
+-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET)))
+-		return 0;
+-
+-	 #ifndef CONFIG_NET_NS
+-		if (unshare_flags & CLONE_NEWNET)
+-			return -EINVAL;
+-	 #endif
+-
+-
+-	if (!capable(CAP_SYS_ADMIN)) {
+-		err = -EPERM;
+-		goto out;
+-	}
+-
+-	new_ns = create_new_namespaces(flags, tsk, tsk->fs);
+-	if (IS_ERR(new_ns)) {
+-		err = PTR_ERR(new_ns);
+-		goto out;
+-	}
+-
+-	err = ns_container_clone(tsk);
+-	if (err) {
+-		put_nsproxy(new_ns);
+-		goto out;
+-	}
+-
+-	tsk->nsproxy = new_ns;
+-
+-out:
+-	put_nsproxy(old_ns);
+-	vxdprintk(VXD_CBIT(space, 3),
+-		"copy_namespaces(0x%08x,%p[%p]) = %d [%p]",
+-		flags, tsk, old_ns, err, new_ns);
+-	return err;
+-}
+-
+-void free_nsproxy(struct nsproxy *ns)
+-{
+-	if (ns->mnt_ns)
+-		put_mnt_ns(ns->mnt_ns);
+-	if (ns->uts_ns)
+-		put_uts_ns(ns->uts_ns);
+-	if (ns->ipc_ns)
+-		put_ipc_ns(ns->ipc_ns);
+-	if (ns->pid_ns)
+-		put_pid_ns(ns->pid_ns);
+-	atomic_dec(&vs_global_nsproxy);
+-	kfree(ns);
+-}
+-
+-/*
+- * Called from unshare. Unshare all the namespaces part of nsproxy.
+- * On success, returns the new nsproxy.
+- */
+-int unshare_nsproxy_namespaces(unsigned long unshare_flags,
+-		struct nsproxy **new_nsp, struct fs_struct *new_fs)
+-{
+-	int err = 0;
+-
+-	vxdprintk(VXD_CBIT(space, 4),
+-		"unshare_nsproxy_namespaces(0x%08lx,[%p])",
+-		unshare_flags, current->nsproxy);
+-
+-	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+-			       CLONE_NEWUSER | CLONE_NEWNET)))
+-		return 0;
+-
+-#ifndef CONFIG_NET_NS
+-	if (unshare_flags & CLONE_NEWNET)
+-		return -EINVAL;
+-#endif
+-	if (!capable(CAP_SYS_ADMIN))
+-		return -EPERM;
+-
+-	*new_nsp = create_new_namespaces(unshare_flags, current,
+-				new_fs ? new_fs : current->fs);
+-	if (IS_ERR(*new_nsp)) {
+-		err = PTR_ERR(*new_nsp);
+-		goto out;
+-	}
+-
+-	err = ns_container_clone(current);
+-	if (err)
+-		put_nsproxy(*new_nsp);
+-
+-out:
+-	return err;
+-}
+-
+-static int __init nsproxy_cache_init(void)
+-{
+-	nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
+-					   0, SLAB_PANIC, NULL, NULL);
+-	return 0;
+-}
+-
+-module_init(nsproxy_cache_init);
+diff -Nurb linux-2.6.22-594/kernel/user.c.orig linux-2.6.22-595/kernel/user.c.orig
+--- linux-2.6.22-594/kernel/user.c.orig	2008-03-20 00:05:18.000000000 -0400
++++ linux-2.6.22-595/kernel/user.c.orig	1969-12-31 19:00:00.000000000 -0500
+@@ -1,227 +0,0 @@
+-/*
+- * The "user cache".
+- *
+- * (C) Copyright 1991-2000 Linus Torvalds
+- *
+- * We have a per-user structure to keep track of how many
+- * processes, files etc the user has claimed, in order to be
+- * able to have per-user limits for system resources. 
+- */
+-
+-#include <linux/init.h>
+-#include <linux/sched.h>
+-#include <linux/slab.h>
+-#include <linux/bitops.h>
+-#include <linux/key.h>
+-#include <linux/interrupt.h>
+-#include <linux/module.h>
+-#include <linux/user_namespace.h>
+-
+-/*
+- * UID task count cache, to get fast user lookup in "alloc_uid"
+- * when changing user ID's (ie setuid() and friends).
+- */
+-
+-#define UIDHASH_MASK		(UIDHASH_SZ - 1)
+-#define __uidhashfn(xid,uid)	((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK)
+-#define uidhashentry(ns, xid, uid)	((ns)->uidhash_table + __uidhashfn(xid, uid))
+-
+-static struct kmem_cache *uid_cachep;
+-static struct list_head uidhash_table[UIDHASH_SZ];
+-
+-/*
+- * The uidhash_lock is mostly taken from process context, but it is
+- * occasionally also taken from softirq/tasklet context, when
+- * task-structs get RCU-freed. Hence all locking must be softirq-safe.
+- * But free_uid() is also called with local interrupts disabled, and running
+- * local_bh_enable() with local interrupts disabled is an error - we'll run
+- * softirq callbacks, and they can unconditionally enable interrupts, and
+- * the caller of free_uid() didn't expect that..
+- */
+-static DEFINE_SPINLOCK(uidhash_lock);
+-
+-struct user_struct root_user = {
+-	.__count	= ATOMIC_INIT(1),
+-	.processes	= ATOMIC_INIT(1),
+-	.files		= ATOMIC_INIT(0),
+-	.sigpending	= ATOMIC_INIT(0),
+-	.mq_bytes	= 0,
+-	.locked_shm     = 0,
+-#ifdef CONFIG_KEYS
+-	.uid_keyring	= &root_user_keyring,
+-	.session_keyring = &root_session_keyring,
+-#endif
+-};
+-
+-/*
+- * These routines must be called with the uidhash spinlock held!
+- */
+-static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent)
+-{
+-	list_add(&up->uidhash_list, hashent);
+-}
+-
+-static inline void uid_hash_remove(struct user_struct *up)
+-{
+-	list_del(&up->uidhash_list);
+-}
+-
+-static inline struct user_struct *uid_hash_find(xid_t xid, uid_t uid, struct list_head *hashent)
+-{
+-	struct list_head *up;
+-
+-	list_for_each(up, hashent) {
+-		struct user_struct *user;
+-
+-		user = list_entry(up, struct user_struct, uidhash_list);
+-
+-		if(user->uid == uid && user->xid == xid) {
+-			atomic_inc(&user->__count);
+-			return user;
+-		}
+-	}
+-
+-	return NULL;
+-}
+-
+-/*
+- * Locate the user_struct for the passed UID.  If found, take a ref on it.  The
+- * caller must undo that ref with free_uid().
+- *
+- * If the user_struct could not be found, return NULL.
+- */
+-struct user_struct *find_user(xid_t xid, uid_t uid)
+-{
+-	struct user_struct *ret;
+-	unsigned long flags;
+- 	struct user_namespace *ns = current->nsproxy->user_ns;
+-
+-	spin_lock_irqsave(&uidhash_lock, flags);
+- 	ret = uid_hash_find(xid, uid, uidhashentry(ns, xid, uid));
+-	spin_unlock_irqrestore(&uidhash_lock, flags);
+-	return ret;
+-}
+-
+-void free_uid(struct user_struct *up)
+-{
+-	unsigned long flags;
+-
+-	if (!up)
+-		return;
+-
+-	local_irq_save(flags);
+-	if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
+-		uid_hash_remove(up);
+-		spin_unlock_irqrestore(&uidhash_lock, flags);
+-		key_put(up->uid_keyring);
+-		key_put(up->session_keyring);
+-		kmem_cache_free(uid_cachep, up);
+-	} else {
+-		local_irq_restore(flags);
+-	}
+-}
+-
+-struct user_struct * alloc_uid(xid_t xid, uid_t uid)
+-{
+-	struct user_namespace *ns = current->nsproxy->user_ns;
+- 	struct list_head *hashent = uidhashentry(ns,xid, uid);
+-	struct user_struct *up;
+-
+-	spin_lock_irq(&uidhash_lock);
+-	up = uid_hash_find(xid, uid, hashent);
+-	spin_unlock_irq(&uidhash_lock);
+-
+-	if (!up) {
+-		struct user_struct *new;
+-
+-		new = kmem_cache_alloc(uid_cachep, GFP_KERNEL);
+-		if (!new)
+-			return NULL;
+-		new->uid = uid;
+-		new->xid = xid;
+-		atomic_set(&new->__count, 1);
+-		atomic_set(&new->processes, 0);
+-		atomic_set(&new->files, 0);
+-		atomic_set(&new->sigpending, 0);
+-#ifdef CONFIG_INOTIFY_USER
+-		atomic_set(&new->inotify_watches, 0);
+-		atomic_set(&new->inotify_devs, 0);
+-#endif
+-
+-		new->mq_bytes = 0;
+-		new->locked_shm = 0;
+-
+-		if (alloc_uid_keyring(new, current) < 0) {
+-			kmem_cache_free(uid_cachep, new);
+-			return NULL;
+-		}
+-
+-		/*
+-		 * Before adding this, check whether we raced
+-		 * on adding the same user already..
+-		 */
+-		spin_lock_irq(&uidhash_lock);
+-		up = uid_hash_find(xid, uid, hashent);
+-		if (up) {
+-			key_put(new->uid_keyring);
+-			key_put(new->session_keyring);
+-			kmem_cache_free(uid_cachep, new);
+-		} else {
+-			uid_hash_insert(new, hashent);
+-			up = new;
+-		}
+-		spin_unlock_irq(&uidhash_lock);
+-
+-	}
+-	return up;
+-}
+-
+-void switch_uid(struct user_struct *new_user)
+-{
+-	struct user_struct *old_user;
+-
+-	/* What if a process setreuid()'s and this brings the
+-	 * new uid over his NPROC rlimit?  We can check this now
+-	 * cheaply with the new uid cache, so if it matters
+-	 * we should be checking for it.  -DaveM
+-	 */
+-	old_user = current->user;
+-	atomic_inc(&new_user->processes);
+-	atomic_dec(&old_user->processes);
+-	switch_uid_keyring(new_user);
+-	current->user = new_user;
+-
+-	/*
+-	 * We need to synchronize with __sigqueue_alloc()
+-	 * doing a get_uid(p->user).. If that saw the old
+-	 * user value, we need to wait until it has exited
+-	 * its critical region before we can free the old
+-	 * structure.
+-	 */
+-	smp_mb();
+-	spin_unlock_wait(&current->sighand->siglock);
+-
+-	free_uid(old_user);
+-	suid_keys(current);
+-}
+-
+-
+-static int __init uid_cache_init(void)
+-{
+-	int n;
+-
+-	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
+-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+-
+-	for(n = 0; n < UIDHASH_SZ; ++n)
+- 		INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
+-
+-	/* Insert the root user immediately (init already runs as root) */
+-	spin_lock_irq(&uidhash_lock);
+- 	uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0, 0));
+-	spin_unlock_irq(&uidhash_lock);
+-
+-	return 0;
+-}
+-
+-module_init(uid_cache_init);
+diff -Nurb linux-2.6.22-594/kernel/vserver/context.c linux-2.6.22-595/kernel/vserver/context.c
+--- linux-2.6.22-594/kernel/vserver/context.c	2008-03-20 00:04:46.000000000 -0400
++++ linux-2.6.22-595/kernel/vserver/context.c	2008-03-20 00:13:22.000000000 -0400
+@@ -589,13 +589,13 @@
+ 			struct nsproxy *old_nsp, *new_nsp;
+ 
+ 			ret = unshare_nsproxy_namespaces(
+-				CLONE_NEWUTS | CLONE_NEWIPC,
++				CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET,
+ 				&new_nsp, NULL);
+ 			if (ret)
+ 				goto out;
+ 
+ 			old_nsp = xchg(&p->nsproxy, new_nsp);
+-			vx_set_space(vxi, CLONE_NEWUTS | CLONE_NEWIPC);
++			vx_set_space(vxi, CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET);
+ 			put_nsproxy(old_nsp);
+ 		}
+ 	}
+@@ -781,7 +781,7 @@
+ 	if (vs_state_change(new_vxi, VSC_STARTUP))
+ 		goto out;
+ 
+-	ret = vx_migrate_task(current, new_vxi, (!data));
++	ret = vx_migrate_task(current, new_vxi, 1 /*(!data) Hack no. 1 - Sapan*/);
+ 	if (ret)
+ 		goto out;
+ 
+diff -Nurb linux-2.6.22-594/kernel/vserver/context.c.orig linux-2.6.22-595/kernel/vserver/context.c.orig
+--- linux-2.6.22-594/kernel/vserver/context.c.orig	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-595/kernel/vserver/context.c.orig	2008-03-20 00:04:46.000000000 -0400
+@@ -0,0 +1,966 @@
++/*
++ *  linux/kernel/vserver/context.c
++ *
++ *  Virtual Server: Context Support
++ *
++ *  Copyright (C) 2003-2007  Herbert Pötzl
++ *
++ *  V0.01  context helper
++ *  V0.02  vx_ctx_kill syscall command
++ *  V0.03  replaced context_info calls
++ *  V0.04  redesign of struct (de)alloc
++ *  V0.05  rlimit basic implementation
++ *  V0.06  task_xid and info commands
++ *  V0.07  context flags and caps
++ *  V0.08  switch to RCU based hash
++ *  V0.09  revert to non RCU for now
++ *  V0.10  and back to working RCU hash
++ *  V0.11  and back to locking again
++ *  V0.12  referenced context store
++ *  V0.13  separate per cpu data
++ *  V0.14  changed vcmds to vxi arg
++ *  V0.15  added context stat
++ *  V0.16  have __create claim() the vxi
++ *  V0.17  removed older and legacy stuff
++ *
++ */
++
++#include <linux/slab.h>
++#include <linux/types.h>
++#include <linux/pid_namespace.h>
++
++#include <linux/vserver/context.h>
++#include <linux/vserver/network.h>
++#include <linux/vserver/debug.h>
++#include <linux/vserver/limit.h>
++#include <linux/vserver/limit_int.h>
++#include <linux/vserver/space.h>
++
++#include <linux/vs_context.h>
++#include <linux/vs_limit.h>
++#include <linux/vserver/context_cmd.h>
++
++#include "cvirt_init.h"
++#include "cacct_init.h"
++#include "limit_init.h"
++#include "sched_init.h"
++
++
++atomic_t vx_global_ctotal	= ATOMIC_INIT(0);
++atomic_t vx_global_cactive	= ATOMIC_INIT(0);
++
++
++/*	now inactive context structures */
++
++static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
++
++static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED;
++
++
++/*	__alloc_vx_info()
++
++	* allocate an initialized vx_info struct
++	* doesn't make it visible (hash)			*/
++
++static struct vx_info *__alloc_vx_info(xid_t xid)
++{
++	struct vx_info *new = NULL;
++	int cpu;
++
++	vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
++
++	/* would this benefit from a slab cache? */
++	new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
++	if (!new)
++		return 0;
++
++	memset(new, 0, sizeof(struct vx_info));
++#ifdef CONFIG_SMP
++	new->ptr_pc = alloc_percpu(struct _vx_info_pc);
++	if (!new->ptr_pc)
++		goto error;
++#endif
++	new->vx_id = xid;
++	INIT_HLIST_NODE(&new->vx_hlist);
++	atomic_set(&new->vx_usecnt, 0);
++	atomic_set(&new->vx_tasks, 0);
++	new->vx_parent = NULL;
++	new->vx_state = 0;
++	init_waitqueue_head(&new->vx_wait);
++
++	/* prepare reaper */
++	get_task_struct(init_pid_ns.child_reaper);
++	new->vx_reaper = init_pid_ns.child_reaper;
++	new->vx_badness_bias = 0;
++
++	/* rest of init goes here */
++	vx_info_init_limit(&new->limit);
++	vx_info_init_sched(&new->sched);
++	vx_info_init_cvirt(&new->cvirt);
++	vx_info_init_cacct(&new->cacct);
++
++	/* per cpu data structures */
++	for_each_possible_cpu(cpu) {
++		vx_info_init_sched_pc(
++			&vx_per_cpu(new, sched_pc, cpu), cpu);
++		vx_info_init_cvirt_pc(
++			&vx_per_cpu(new, cvirt_pc, cpu), cpu);
++	}
++
++	new->vx_flags = VXF_INIT_SET;
++	new->vx_bcaps = CAP_INIT_EFF_SET;
++	new->vx_ccaps = 0;
++	new->vx_cap_bset = cap_bset;
++
++	new->reboot_cmd = 0;
++	new->exit_code = 0;
++
++	new->vx_nsproxy = copy_nsproxy(current->nsproxy);
++
++	vxdprintk(VXD_CBIT(xid, 0),
++		"alloc_vx_info(%d) = %p", xid, new);
++	vxh_alloc_vx_info(new);
++	atomic_inc(&vx_global_ctotal);
++	return new;
++#ifdef CONFIG_SMP
++error:
++	kfree(new);
++	return 0;
++#endif
++}
++
++/*	__dealloc_vx_info()
++
++	* final disposal of vx_info				*/
++
++static void __dealloc_vx_info(struct vx_info *vxi)
++{
++	int cpu;
++
++	vxdprintk(VXD_CBIT(xid, 0),
++		"dealloc_vx_info(%p)", vxi);
++	vxh_dealloc_vx_info(vxi);
++
++	vxi->vx_id = -1;
++
++	vx_info_exit_limit(&vxi->limit);
++	vx_info_exit_sched(&vxi->sched);
++	vx_info_exit_cvirt(&vxi->cvirt);
++	vx_info_exit_cacct(&vxi->cacct);
++
++	for_each_possible_cpu(cpu) {
++		vx_info_exit_sched_pc(
++			&vx_per_cpu(vxi, sched_pc, cpu), cpu);
++		vx_info_exit_cvirt_pc(
++			&vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
++	}
++
++	vxi->vx_state |= VXS_RELEASED;
++
++#ifdef CONFIG_SMP
++	free_percpu(vxi->ptr_pc);
++#endif
++	kfree(vxi);
++	atomic_dec(&vx_global_ctotal);
++}
++
++static void __shutdown_vx_info(struct vx_info *vxi)
++{
++	struct nsproxy *nsproxy;
++	struct fs_struct *fs;
++
++	might_sleep();
++
++	vxi->vx_state |= VXS_SHUTDOWN;
++	vs_state_change(vxi, VSC_SHUTDOWN);
++
++	nsproxy = xchg(&vxi->vx_nsproxy, NULL);
++	fs = xchg(&vxi->vx_fs, NULL);
++
++	if (nsproxy)
++		put_nsproxy(nsproxy);
++	if (fs)
++		put_fs_struct(fs);
++}
++
++/* exported stuff */
++
++void free_vx_info(struct vx_info *vxi)
++{
++	unsigned long flags;
++
++	/* check for reference counts first */
++	BUG_ON(atomic_read(&vxi->vx_usecnt));
++	BUG_ON(atomic_read(&vxi->vx_tasks));
++
++	/* context must not be hashed */
++	BUG_ON(vx_info_state(vxi, VXS_HASHED));
++
++	/* context shutdown is mandatory */
++	BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
++
++	BUG_ON(vxi->vx_nsproxy);
++	BUG_ON(vxi->vx_fs);
++
++	spin_lock_irqsave(&vx_info_inactive_lock, flags);
++	hlist_del(&vxi->vx_hlist);
++	spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
++
++	__dealloc_vx_info(vxi);
++}
++
++
++/*	hash table for vx_info hash */
++
++#define VX_HASH_SIZE	13
++
++static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
++	{ [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
++
++static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
++
++
++static inline unsigned int __hashval(xid_t xid)
++{
++	return (xid % VX_HASH_SIZE);
++}
++
++
++
++/*	__hash_vx_info()
++
++	* add the vxi to the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __hash_vx_info(struct vx_info *vxi)
++{
++	struct hlist_head *head;
++
++	vxd_assert_lock(&vx_info_hash_lock);
++	vxdprintk(VXD_CBIT(xid, 4),
++		"__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
++	vxh_hash_vx_info(vxi);
++
++	/* context must not be hashed */
++	BUG_ON(vx_info_state(vxi, VXS_HASHED));
++
++	vxi->vx_state |= VXS_HASHED;
++	head = &vx_info_hash[__hashval(vxi->vx_id)];
++	hlist_add_head(&vxi->vx_hlist, head);
++	atomic_inc(&vx_global_cactive);
++}
++
++/*	__unhash_vx_info()
++
++	* remove the vxi from the global hash table
++	* requires the hash_lock to be held			*/
++
++static inline void __unhash_vx_info(struct vx_info *vxi)
++{
++	unsigned long flags;
++
++	vxd_assert_lock(&vx_info_hash_lock);
++	vxdprintk(VXD_CBIT(xid, 4),
++		"__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
++		atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
++	vxh_unhash_vx_info(vxi);
++
++	/* context must be hashed */
++	BUG_ON(!vx_info_state(vxi, VXS_HASHED));
++	/* but without tasks */
++	BUG_ON(atomic_read(&vxi->vx_tasks));
++
++	vxi->vx_state &= ~VXS_HASHED;
++	hlist_del_init(&vxi->vx_hlist);
++	spin_lock_irqsave(&vx_info_inactive_lock, flags);
++	hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
++	spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
++	atomic_dec(&vx_global_cactive);
++}
++
++
++/*	__lookup_vx_info()
++
++	* requires the hash_lock to be held
++	* doesn't increment the vx_refcnt			*/
++
++static inline struct vx_info *__lookup_vx_info(xid_t xid)
++{
++	struct hlist_head *head = &vx_info_hash[__hashval(xid)];
++	struct hlist_node *pos;
++	struct vx_info *vxi;
++
++	vxd_assert_lock(&vx_info_hash_lock);
++	hlist_for_each(pos, head) {
++		vxi = hlist_entry(pos, struct vx_info, vx_hlist);
++
++		if (vxi->vx_id == xid)
++			goto found;
++	}
++	vxi = NULL;
++found:
++	vxdprintk(VXD_CBIT(xid, 0),
++		"__lookup_vx_info(#%u): %p[#%u]",
++		xid, vxi, vxi ? vxi->vx_id : 0);
++	vxh_lookup_vx_info(vxi, xid);
++	return vxi;
++}
++
++
++/*	__create_vx_info()
++
++	* create the requested context
++	* get(), claim() and hash it				*/
++
++static struct vx_info *__create_vx_info(int id)
++{
++	struct vx_info *new, *vxi = NULL;
++
++	vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
++
++	if (!(new = __alloc_vx_info(id)))
++		return ERR_PTR(-ENOMEM);
++
++	/* required to make dynamic xids unique */
++	spin_lock(&vx_info_hash_lock);
++
++	/* static context requested */
++	if ((vxi = __lookup_vx_info(id))) {
++		vxdprintk(VXD_CBIT(xid, 0),
++			"create_vx_info(%d) = %p (already there)", id, vxi);
++		if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
++			vxi = ERR_PTR(-EBUSY);
++		else
++			vxi = ERR_PTR(-EEXIST);
++		goto out_unlock;
++	}
++	/* new context */
++	vxdprintk(VXD_CBIT(xid, 0),
++		"create_vx_info(%d) = %p (new)", id, new);
++	claim_vx_info(new, NULL);
++	__hash_vx_info(get_vx_info(new));
++	vxi = new, new = NULL;
++
++out_unlock:
++	spin_unlock(&vx_info_hash_lock);
++	vxh_create_vx_info(IS_ERR(vxi) ? NULL : vxi, id);
++	if (new)
++		__dealloc_vx_info(new);
++	return vxi;
++}
++
++
++/*	exported stuff						*/
++
++
++void unhash_vx_info(struct vx_info *vxi)
++{
++	__shutdown_vx_info(vxi);
++	spin_lock(&vx_info_hash_lock);
++	__unhash_vx_info(vxi);
++	spin_unlock(&vx_info_hash_lock);
++	__wakeup_vx_info(vxi);
++}
++
++
++/*	lookup_vx_info()
++
++	* search for a vx_info and get() it
++	* negative id means current				*/
++
++struct vx_info *lookup_vx_info(int id)
++{
++	struct vx_info *vxi = NULL;
++
++	if (id < 0) {
++		vxi = get_vx_info(current->vx_info);
++	} else if (id > 1) {
++		spin_lock(&vx_info_hash_lock);
++		vxi = get_vx_info(__lookup_vx_info(id));
++		spin_unlock(&vx_info_hash_lock);
++	}
++	return vxi;
++}
++
++/*	xid_is_hashed()
++
++	* verify that xid is still hashed			*/
++
++int xid_is_hashed(xid_t xid)
++{
++	int hashed;
++
++	spin_lock(&vx_info_hash_lock);
++	hashed = (__lookup_vx_info(xid) != NULL);
++	spin_unlock(&vx_info_hash_lock);
++	return hashed;
++}
++
++#ifdef	CONFIG_PROC_FS
++
++/*	get_xid_list()
++
++	* get a subset of hashed xids for proc
++	* assumes size is at least one				*/
++
++int get_xid_list(int index, unsigned int *xids, int size)
++{
++	int hindex, nr_xids = 0;
++
++	/* only show current and children */
++	if (!vx_check(0, VS_ADMIN | VS_WATCH)) {
++		if (index > 0)
++			return 0;
++		xids[nr_xids] = vx_current_xid();
++		return 1;
++	}
++
++	for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
++		struct hlist_head *head = &vx_info_hash[hindex];
++		struct hlist_node *pos;
++
++		spin_lock(&vx_info_hash_lock);
++		hlist_for_each(pos, head) {
++			struct vx_info *vxi;
++
++			if (--index > 0)
++				continue;
++
++			vxi = hlist_entry(pos, struct vx_info, vx_hlist);
++			xids[nr_xids] = vxi->vx_id;
++			if (++nr_xids >= size) {
++				spin_unlock(&vx_info_hash_lock);
++				goto out;
++			}
++		}
++		/* keep the lock time short */
++		spin_unlock(&vx_info_hash_lock);
++	}
++out:
++	return nr_xids;
++}
++#endif
++
++#ifdef	CONFIG_VSERVER_DEBUG
++
++void	dump_vx_info_inactive(int level)
++{
++	struct hlist_node *entry, *next;
++
++	hlist_for_each_safe(entry, next, &vx_info_inactive) {
++		struct vx_info *vxi =
++			list_entry(entry, struct vx_info, vx_hlist);
++
++		dump_vx_info(vxi, level);
++	}
++}
++
++#endif
++
++int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
++{
++	struct user_struct *new_user, *old_user;
++
++	if (!p || !vxi)
++		BUG();
++
++	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
++		return -EACCES;
++
++	new_user = alloc_uid(vxi->vx_id, p->uid);
++	if (!new_user)
++		return -ENOMEM;
++
++	old_user = p->user;
++	if (new_user != old_user) {
++		atomic_inc(&new_user->processes);
++		atomic_dec(&old_user->processes);
++		p->user = new_user;
++	}
++	free_uid(old_user);
++	return 0;
++}
++
++void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
++{
++	p->cap_effective &= vxi->vx_cap_bset;
++	p->cap_inheritable &= vxi->vx_cap_bset;
++	p->cap_permitted &= vxi->vx_cap_bset;
++}
++
++
++#include <linux/file.h>
++
++static int vx_openfd_task(struct task_struct *tsk)
++{
++	struct files_struct *files = tsk->files;
++	struct fdtable *fdt;
++	const unsigned long *bptr;
++	int count, total;
++
++	/* no rcu_read_lock() because of spin_lock() */
++	spin_lock(&files->file_lock);
++	fdt = files_fdtable(files);
++	bptr = fdt->open_fds->fds_bits;
++	count = fdt->max_fds / (sizeof(unsigned long) * 8);
++	for (total = 0; count > 0; count--) {
++		if (*bptr)
++			total += hweight_long(*bptr);
++		bptr++;
++	}
++	spin_unlock(&files->file_lock);
++	return total;
++}
++
++
++/* 	for *space compatibility */
++
++asmlinkage long sys_unshare(unsigned long);
++
++/*
++ *	migrate task to new context
++ *	gets vxi, puts old_vxi on change
++ *	optionally unshares namespaces (hack)
++ */
++
++int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
++{
++	struct vx_info *old_vxi;
++	int ret = 0;
++
++	if (!p || !vxi)
++		BUG();
++
++	vxdprintk(VXD_CBIT(xid, 5),
++		"vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
++		vxi->vx_id, atomic_read(&vxi->vx_usecnt));
++
++	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
++		!vx_info_flags(vxi, VXF_STATE_SETUP, 0))
++		return -EACCES;
++
++	if (vx_info_state(vxi, VXS_SHUTDOWN))
++		return -EFAULT;
++
++	old_vxi = task_get_vx_info(p);
++	if (old_vxi == vxi)
++		goto out;
++
++	if (!(ret = vx_migrate_user(p, vxi))) {
++		int openfd;
++
++		task_lock(p);
++		openfd = vx_openfd_task(p);
++
++		if (old_vxi) {
++			atomic_dec(&old_vxi->cvirt.nr_threads);
++			atomic_dec(&old_vxi->cvirt.nr_running);
++			__rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
++			/* FIXME: what about the struct files here? */
++			__rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
++			/* account for the executable */
++			__rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
++		}
++		atomic_inc(&vxi->cvirt.nr_threads);
++		atomic_inc(&vxi->cvirt.nr_running);
++		__rlim_inc(&vxi->limit, RLIMIT_NPROC);
++		/* FIXME: what about the struct files here? */
++		__rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
++		/* account for the executable */
++		__rlim_inc(&vxi->limit, VLIMIT_DENTRY);
++
++		if (old_vxi) {
++			release_vx_info(old_vxi, p);
++			clr_vx_info(&p->vx_info);
++		}
++		claim_vx_info(vxi, p);
++		set_vx_info(&p->vx_info, vxi);
++		p->xid = vxi->vx_id;
++
++		vxdprintk(VXD_CBIT(xid, 5),
++			"moved task %p into vxi:%p[#%d]",
++			p, vxi, vxi->vx_id);
++
++		vx_mask_cap_bset(vxi, p);
++		task_unlock(p);
++
++		/* hack for *spaces to provide compatibility */
++		if (unshare) {
++			struct nsproxy *old_nsp, *new_nsp;
++
++			ret = unshare_nsproxy_namespaces(
++				CLONE_NEWUTS | CLONE_NEWIPC,
++				&new_nsp, NULL);
++			if (ret)
++				goto out;
++
++			old_nsp = xchg(&p->nsproxy, new_nsp);
++			vx_set_space(vxi, CLONE_NEWUTS | CLONE_NEWIPC);
++			put_nsproxy(old_nsp);
++		}
++	}
++out:
++	put_vx_info(old_vxi);
++	return ret;
++}
++
++int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
++{
++	struct task_struct *old_reaper;
++
++	if (!vxi)
++		return -EINVAL;
++
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_set_reaper(%p[#%d],%p[#%d,%d])",
++		vxi, vxi->vx_id, p, p->xid, p->pid);
++
++	old_reaper = vxi->vx_reaper;
++	if (old_reaper == p)
++		return 0;
++
++	/* set new child reaper */
++	get_task_struct(p);
++	vxi->vx_reaper = p;
++	put_task_struct(old_reaper);
++	return 0;
++}
++
++int vx_set_init(struct vx_info *vxi, struct task_struct *p)
++{
++	if (!vxi)
++		return -EINVAL;
++
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_set_init(%p[#%d],%p[#%d,%d,%d])",
++		vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
++
++	vxi->vx_flags &= ~VXF_STATE_INIT;
++	vxi->vx_initpid = p->tgid;
++	return 0;
++}
++
++void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
++{
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
++		vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
++
++	vxi->exit_code = code;
++	vxi->vx_initpid = 0;
++}
++
++
++void vx_set_persistent(struct vx_info *vxi)
++{
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
++
++	get_vx_info(vxi);
++	claim_vx_info(vxi, NULL);
++}
++
++void vx_clear_persistent(struct vx_info *vxi)
++{
++	vxdprintk(VXD_CBIT(xid, 6),
++		"vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
++
++	release_vx_info(vxi, NULL);
++	put_vx_info(vxi);
++}
++
++void vx_update_persistent(struct vx_info *vxi)
++{
++	if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
++		vx_set_persistent(vxi);
++	else
++		vx_clear_persistent(vxi);
++}
++
++
++/*	task must be current or locked		*/
++
++void	exit_vx_info(struct task_struct *p, int code)
++{
++	struct vx_info *vxi = p->vx_info;
++
++	if (vxi) {
++		atomic_dec(&vxi->cvirt.nr_threads);
++		vx_nproc_dec(p);
++
++		vxi->exit_code = code;
++		release_vx_info(vxi, p);
++	}
++}
++
++void	exit_vx_info_early(struct task_struct *p, int code)
++{
++	struct vx_info *vxi = p->vx_info;
++
++	if (vxi) {
++		if (vxi->vx_initpid == p->tgid)
++			vx_exit_init(vxi, p, code);
++		if (vxi->vx_reaper == p)
++			vx_set_reaper(vxi, init_pid_ns.child_reaper);
++	}
++}
++
++
++/* vserver syscall commands below here */
++
++/* taks xid and vx_info functions */
++
++#include <asm/uaccess.h>
++
++
++int vc_task_xid(uint32_t id)
++{
++	xid_t xid;
++
++	if (id) {
++		struct task_struct *tsk;
++
++		read_lock(&tasklist_lock);
++		tsk = find_task_by_real_pid(id);
++		xid = (tsk) ? tsk->xid : -ESRCH;
++		read_unlock(&tasklist_lock);
++	} else
++		xid = vx_current_xid();
++	return xid;
++}
++
++
++int vc_vx_info(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_vx_info_v0 vc_data;
++
++	vc_data.xid = vxi->vx_id;
++	vc_data.initpid = vxi->vx_initpid;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++int vc_ctx_stat(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_stat_v0 vc_data;
++
++	vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
++	vc_data.tasks = atomic_read(&vxi->vx_tasks);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++
++/* context functions */
++
++int vc_ctx_create(uint32_t xid, void __user *data)
++{
++	struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
++	struct vx_info *new_vxi;
++	int ret;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	if ((xid > MAX_S_CONTEXT) || (xid < 2))
++		return -EINVAL;
++
++	new_vxi = __create_vx_info(xid);
++	if (IS_ERR(new_vxi))
++		return PTR_ERR(new_vxi);
++
++	/* initial flags */
++	new_vxi->vx_flags = vc_data.flagword;
++
++	ret = -ENOEXEC;
++	if (vs_state_change(new_vxi, VSC_STARTUP))
++		goto out;
++
++	ret = vx_migrate_task(current, new_vxi, (!data));
++	if (ret)
++		goto out;
++
++	/* return context id on success */
++	ret = new_vxi->vx_id;
++
++	/* get a reference for persistent contexts */
++	if ((vc_data.flagword & VXF_PERSISTENT))
++		vx_set_persistent(new_vxi);
++out:
++	release_vx_info(new_vxi, NULL);
++	put_vx_info(new_vxi);
++	return ret;
++}
++
++
++int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
++	int ret;
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	ret = vx_migrate_task(current, vxi, 0);
++	if (ret)
++		return ret;
++	if (vc_data.flagword & VXM_SET_INIT)
++		ret = vx_set_init(vxi, current);
++	if (ret)
++		return ret;
++	if (vc_data.flagword & VXM_SET_REAPER)
++		ret = vx_set_reaper(vxi, current);
++	return ret;
++}
++
++
++int vc_get_cflags(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_flags_v0 vc_data;
++
++	vc_data.flagword = vxi->vx_flags;
++
++	/* special STATE flag handling */
++	vc_data.mask = vs_mask_flags(~0ULL, vxi->vx_flags, VXF_ONE_TIME);
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_cflags(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_flags_v0 vc_data;
++	uint64_t mask, trigger;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	/* special STATE flag handling */
++	mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
++	trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
++
++	if (vxi == current->vx_info) {
++		if (trigger & VXF_STATE_SETUP)
++			vx_mask_cap_bset(vxi, current);
++		if (trigger & VXF_STATE_INIT) {
++			int ret;
++
++			ret = vx_set_init(vxi, current);
++			if (ret)
++				return ret;
++			ret = vx_set_reaper(vxi, current);
++			if (ret)
++				return ret;
++		}
++	}
++
++	vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
++		vc_data.flagword, mask);
++	if (trigger & VXF_PERSISTENT)
++		vx_update_persistent(vxi);
++
++	return 0;
++}
++
++static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
++{
++	if (bcaps)
++		*bcaps = vxi->vx_bcaps;
++	if (ccaps)
++		*ccaps = vxi->vx_ccaps;
++
++	return 0;
++}
++
++int vc_get_ccaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_caps_v1 vc_data;
++	int ret;
++
++	ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
++	if (ret)
++		return ret;
++	vc_data.cmask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++static int do_set_caps(struct vx_info *vxi,
++	uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
++{
++	vxi->vx_bcaps = vs_mask_flags(vxi->vx_bcaps, bcaps, bmask);
++	vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
++
++	return 0;
++}
++
++int vc_set_ccaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_ctx_caps_v1 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
++}
++
++int vc_get_bcaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_bcaps vc_data;
++	int ret;
++
++	ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
++	if (ret)
++		return ret;
++	vc_data.bmask = ~0ULL;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_bcaps(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_bcaps vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
++}
++
++
++int vc_get_badness(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_badness_v0 vc_data;
++
++	vc_data.bias = vxi->vx_badness_bias;
++
++	if (copy_to_user(data, &vc_data, sizeof(vc_data)))
++		return -EFAULT;
++	return 0;
++}
++
++int vc_set_badness(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_badness_v0 vc_data;
++
++	if (copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	vxi->vx_badness_bias = vc_data.bias;
++	return 0;
++}
++
++#include <linux/module.h>
++
++EXPORT_SYMBOL_GPL(free_vx_info);
++
+diff -Nurb linux-2.6.22-594/kernel/vserver/space.c linux-2.6.22-595/kernel/vserver/space.c
+--- linux-2.6.22-594/kernel/vserver/space.c	2008-03-20 00:05:21.000000000 -0400
++++ linux-2.6.22-595/kernel/vserver/space.c	2008-03-20 00:08:28.000000000 -0400
 @@ -15,6 +15,7 @@
  #include <linux/utsname.h>
  #include <linux/nsproxy.h>
@@ -8,7 +1653,7 @@
  #include <asm/uaccess.h>
  
  #include <linux/vs_context.h>
-@@ -54,6 +55,7 @@
+@@ -55,6 +56,7 @@
  	struct mnt_namespace *old_ns;
  	struct uts_namespace *old_uts;
  	struct ipc_namespace *old_ipc;
@@ -16,11 +1661,10 @@
  	struct nsproxy *nsproxy;
  
  	nsproxy = copy_nsproxy(old_nsproxy);
-@@ -83,6 +85,17 @@
- 			get_ipc_ns(nsproxy->ipc_ns);
+@@ -85,12 +87,26 @@
  	} else
  		old_ipc = NULL;
-+	
+ 
 +	if (mask & CLONE_NEWNET) {
 +		old_net = nsproxy->net_ns;
 +		nsproxy->net_ns = new_nsproxy->net_ns;
@@ -31,10 +1675,10 @@
 +	} else
 +		old_net = NULL;
 +
- 
++
  	if (old_ns)
  		put_mnt_ns(old_ns);
-@@ -90,6 +101,9 @@
+ 	if (old_uts)
  		put_uts_ns(old_uts);
  	if (old_ipc)
  		put_ipc_ns(old_ipc);
@@ -44,13 +1688,6894 @@
  out:
  	return nsproxy;
  }
-@@ -250,7 +264,8 @@
+@@ -251,6 +267,7 @@
  
  int vc_enter_space(struct vx_info *vxi, void __user *data)
  {
--	struct vcmd_space_mask vc_data = { .mask = 0 };
 +	/* Ask dhozac how to pass this flag from user space - Sapan*/
-+	struct vcmd_space_mask vc_data = { .mask = CLONE_NEWNET };
+ 	struct vcmd_space_mask vc_data = { .mask = 0 };
  
  	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
- 		return -EFAULT;
+diff -Nurb linux-2.6.22-594/kernel/vserver/space.c.orig linux-2.6.22-595/kernel/vserver/space.c.orig
+--- linux-2.6.22-594/kernel/vserver/space.c.orig	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-595/kernel/vserver/space.c.orig	2008-03-20 00:05:28.000000000 -0400
+@@ -0,0 +1,295 @@
++/*
++ *  linux/kernel/vserver/space.c
++ *
++ *  Virtual Server: Context Space Support
++ *
++ *  Copyright (C) 2003-2007  Herbert Pötzl
++ *
++ *  V0.01  broken out from context.c 0.07
++ *  V0.02  added task locking for namespace
++ *  V0.03  broken out vx_enter_namespace
++ *  V0.04  added *space support and commands
++ *
++ */
++
++#include <linux/utsname.h>
++#include <linux/nsproxy.h>
++#include <linux/err.h>
++#include <net/net_namespace.h>
++#include <asm/uaccess.h>
++
++#include <linux/vs_context.h>
++#include <linux/vserver/space.h>
++#include <linux/vserver/space_cmd.h>
++
++
++atomic_t vs_global_nsproxy	= ATOMIC_INIT(0);
++atomic_t vs_global_fs		= ATOMIC_INIT(0);
++atomic_t vs_global_mnt_ns	= ATOMIC_INIT(0);
++atomic_t vs_global_uts_ns	= ATOMIC_INIT(0);
++atomic_t vs_global_ipc_ns	= ATOMIC_INIT(0);
++
++
++/* namespace functions */
++
++#include <linux/mnt_namespace.h>
++
++const struct vcmd_space_mask space_mask = {
++	.mask = CLONE_NEWNS |
++		CLONE_NEWUTS |
++		CLONE_NEWIPC |
++		CLONE_FS | 
++		CLONE_NEWNET
++};
++
++
++/*
++ *	build a new nsproxy mix
++ *      assumes that both proxies are 'const'
++ *	does not touch nsproxy refcounts
++ *	will hold a reference on the result.
++ */
++
++struct nsproxy *vs_mix_nsproxy(struct nsproxy *old_nsproxy,
++	struct nsproxy *new_nsproxy, unsigned long mask)
++{
++	struct mnt_namespace *old_ns;
++	struct uts_namespace *old_uts;
++	struct ipc_namespace *old_ipc;
++	struct net *old_net;
++	struct nsproxy *nsproxy;
++
++	nsproxy = copy_nsproxy(old_nsproxy);
++	if (!nsproxy)
++		goto out;
++
++	if (mask & CLONE_NEWNS) {
++		old_ns = nsproxy->mnt_ns;
++		nsproxy->mnt_ns = new_nsproxy->mnt_ns;
++		if (nsproxy->mnt_ns)
++			get_mnt_ns(nsproxy->mnt_ns);
++	} else
++		old_ns = NULL;
++
++	if (mask & CLONE_NEWUTS) {
++		old_uts = nsproxy->uts_ns;
++		nsproxy->uts_ns = new_nsproxy->uts_ns;
++		if (nsproxy->uts_ns)
++			get_uts_ns(nsproxy->uts_ns);
++	} else
++		old_uts = NULL;
++
++	if (mask & CLONE_NEWIPC) {
++		old_ipc = nsproxy->ipc_ns;
++		nsproxy->ipc_ns = new_nsproxy->ipc_ns;
++		if (nsproxy->ipc_ns)
++			get_ipc_ns(nsproxy->ipc_ns);
++	} else
++		old_ipc = NULL;
++	
++	if (mask & CLONE_NEWNET) {
++		old_net = nsproxy->net_ns;
++		nsproxy->net_ns = new_nsproxy->net_ns;
++		if (nsproxy->net_ns) {
++			get_net(nsproxy->net_ns);
++ 		    	printk(KERN_ALERT "Cloning network namespace\n"); 
++		}	
++	} else
++		old_net = NULL;
++
++
++	if (old_ns)
++		put_mnt_ns(old_ns);
++	if (old_uts)
++		put_uts_ns(old_uts);
++	if (old_ipc)
++		put_ipc_ns(old_ipc);
++	if (old_net)
++		put_net(old_net);
++
++out:
++	return nsproxy;
++}
++
++
++/*
++ *	merge two nsproxy structs into a new one.
++ *	will hold a reference on the result.
++ */
++
++static inline
++struct nsproxy *__vs_merge_nsproxy(struct nsproxy *old,
++	struct nsproxy *proxy, unsigned long mask)
++{
++	struct nsproxy null_proxy = { .mnt_ns = NULL };
++
++	if (!proxy)
++		return NULL;
++
++	if (mask) {
++		/* vs_mix_nsproxy returns with reference */
++		return vs_mix_nsproxy(old ? old : &null_proxy,
++			proxy, mask);
++	}
++	get_nsproxy(proxy);
++	return proxy;
++}
++
++/*
++ *	merge two fs structs into a new one.
++ *	will take a reference on the result.
++ */
++
++static inline
++struct fs_struct *__vs_merge_fs(struct fs_struct *old,
++	struct fs_struct *fs, unsigned long mask)
++{
++	if (!(mask & CLONE_FS)) {
++		if (old)
++			atomic_inc(&old->count);
++		return old;
++	}
++
++	if (!fs)
++		return NULL;
++
++	return copy_fs_struct(fs);
++}
++
++
++int vx_enter_space(struct vx_info *vxi, unsigned long mask)
++{
++	struct nsproxy *proxy, *proxy_cur, *proxy_new;
++	struct fs_struct *fs, *fs_cur, *fs_new;
++	int ret;
++
++	if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
++		return -EACCES;
++
++	if (!mask)
++		mask = vxi->vx_nsmask;
++
++	if ((mask & vxi->vx_nsmask) != mask)
++		return -EINVAL;
++
++	proxy = vxi->vx_nsproxy;
++	fs = vxi->vx_fs;
++
++	task_lock(current);
++	fs_cur = current->fs;
++	atomic_inc(&fs_cur->count);
++	proxy_cur = current->nsproxy;
++	get_nsproxy(proxy_cur);
++	task_unlock(current);
++
++	fs_new = __vs_merge_fs(fs_cur, fs, mask);
++	if (IS_ERR(fs_new)) {
++		ret = PTR_ERR(fs_new);
++		goto out_put;
++	}
++
++	proxy_new = __vs_merge_nsproxy(proxy_cur, proxy, mask);
++	if (IS_ERR(proxy_new)) {
++		ret = PTR_ERR(proxy_new);
++		goto out_put_fs;
++	}
++
++	fs_new = xchg(&current->fs, fs_new);
++	proxy_new = xchg(&current->nsproxy, proxy_new);
++	ret = 0;
++
++	if (proxy_new)
++		put_nsproxy(proxy_new);
++out_put_fs:
++	if (fs_new)
++		put_fs_struct(fs_new);
++out_put:
++	if (proxy_cur)
++		put_nsproxy(proxy_cur);
++	if (fs_cur)
++		put_fs_struct(fs_cur);
++	return ret;
++}
++
++
++int vx_set_space(struct vx_info *vxi, unsigned long mask)
++{
++	struct nsproxy *proxy_vxi, *proxy_cur, *proxy_new;
++	struct fs_struct *fs_vxi, *fs_cur, *fs_new;
++	int ret;
++
++	if (!mask)
++		mask = space_mask.mask;
++
++	if ((mask & space_mask.mask) != mask)
++		return -EINVAL;
++
++	proxy_vxi = vxi->vx_nsproxy;
++	fs_vxi = vxi->vx_fs;
++
++	task_lock(current);
++	fs_cur = current->fs;
++	atomic_inc(&fs_cur->count);
++	proxy_cur = current->nsproxy;
++	get_nsproxy(proxy_cur);
++	task_unlock(current);
++
++	fs_new = __vs_merge_fs(fs_vxi, fs_cur, mask);
++	if (IS_ERR(fs_new)) {
++		ret = PTR_ERR(fs_new);
++		goto out_put;
++	}
++
++	proxy_new = __vs_merge_nsproxy(proxy_vxi, proxy_cur, mask);
++	if (IS_ERR(proxy_new)) {
++		ret = PTR_ERR(proxy_new);
++		goto out_put_fs;
++	}
++
++	fs_new = xchg(&vxi->vx_fs, fs_new);
++	proxy_new = xchg(&vxi->vx_nsproxy, proxy_new);
++	vxi->vx_nsmask |= mask;
++	ret = 0;
++
++	if (proxy_new)
++		put_nsproxy(proxy_new);
++out_put_fs:
++	if (fs_new)
++		put_fs_struct(fs_new);
++out_put:
++	if (proxy_cur)
++		put_nsproxy(proxy_cur);
++	if (fs_cur)
++		put_fs_struct(fs_cur);
++	return ret;
++}
++
++
++int vc_enter_space(struct vx_info *vxi, void __user *data)
++{
++	/* Ask dhozac how to pass this flag from user space - Sapan*/
++	struct vcmd_space_mask vc_data = { .mask = CLONE_NEWNET };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return vx_enter_space(vxi, vc_data.mask);
++}
++
++int vc_set_space(struct vx_info *vxi, void __user *data)
++{
++	struct vcmd_space_mask vc_data = { .mask = 0 };
++
++	if (data && copy_from_user(&vc_data, data, sizeof(vc_data)))
++		return -EFAULT;
++
++	return vx_set_space(vxi, vc_data.mask);
++}
++
++int vc_get_space_mask(struct vx_info *vxi, void __user *data)
++{
++	if (copy_to_user(data, &space_mask, sizeof(space_mask)))
++		return -EFAULT;
++	return 0;
++}
++
+diff -Nurb linux-2.6.22-594/net/core/net_namespace.c linux-2.6.22-595/net/core/net_namespace.c
+--- linux-2.6.22-594/net/core/net_namespace.c	2008-03-20 00:05:18.000000000 -0400
++++ linux-2.6.22-595/net/core/net_namespace.c	2008-03-20 00:14:56.000000000 -0400
+@@ -112,10 +112,12 @@
+ 		ops = list_entry(ptr, struct pernet_operations, list);
+ 		if (ops->init) {
+ 			error = ops->init(net);
+-			if (error < 0)
++			if (error < 0) {
++				printk(KERN_ALERT "Error setting up netns: %x\n", ops->init);
+ 				goto out_undo;
+ 		}
+ 	}
++	}
+ out:
+ 	return error;
+ out_undo:
+diff -Nurb linux-2.6.22-594/net/core/net_namespace.c.orig linux-2.6.22-595/net/core/net_namespace.c.orig
+--- linux-2.6.22-594/net/core/net_namespace.c.orig	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-595/net/core/net_namespace.c.orig	2008-03-20 00:05:18.000000000 -0400
+@@ -0,0 +1,332 @@
++#include <linux/workqueue.h>
++#include <linux/rtnetlink.h>
++#include <linux/cache.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/delay.h>
++#include <net/net_namespace.h>
++
++/*
++ *	Our network namespace constructor/destructor lists
++ */
++
++static LIST_HEAD(pernet_list);
++static struct list_head *first_device = &pernet_list;
++static DEFINE_MUTEX(net_mutex);
++
++static DEFINE_MUTEX(net_list_mutex);
++LIST_HEAD(net_namespace_list);
++
++static struct kmem_cache *net_cachep;
++
++struct net init_net;
++EXPORT_SYMBOL_GPL(init_net);
++
++void net_lock(void)
++{
++	mutex_lock(&net_list_mutex);
++}
++
++void net_unlock(void)
++{
++	mutex_unlock(&net_list_mutex);
++}
++
++static struct net *net_alloc(void)
++{
++	return kmem_cache_alloc(net_cachep, GFP_KERNEL);
++}
++
++static void net_free(struct net *net)
++{
++	if (!net)
++		return;
++
++	if (unlikely(atomic_read(&net->use_count) != 0)) {
++		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
++			atomic_read(&net->use_count));
++		return;
++	}
++
++	kmem_cache_free(net_cachep, net);
++}
++
++static void cleanup_net(struct work_struct *work)
++{
++	struct pernet_operations *ops;
++	struct list_head *ptr;
++	struct net *net;
++
++	net = container_of(work, struct net, work);
++
++	mutex_lock(&net_mutex);
++
++	/* Don't let anyone else find us. */
++	net_lock();
++	list_del(&net->list);
++	net_unlock();
++
++	/* Run all of the network namespace exit methods */
++	list_for_each_prev(ptr, &pernet_list) {
++		ops = list_entry(ptr, struct pernet_operations, list);
++		if (ops->exit)
++			ops->exit(net);
++	}
++
++	mutex_unlock(&net_mutex);
++
++	/* Ensure there are no outstanding rcu callbacks using this
++	 * network namespace.
++	 */
++	rcu_barrier();
++
++	/* Finally it is safe to free my network namespace structure */
++	net_free(net);
++}
++
++
++void __put_net(struct net *net)
++{
++	/* Cleanup the network namespace in process context */
++	INIT_WORK(&net->work, cleanup_net);
++	schedule_work(&net->work);
++}
++EXPORT_SYMBOL_GPL(__put_net);
++
++/*
++ * setup_net runs the initializers for the network namespace object.
++ */
++static int setup_net(struct net *net)
++{
++	/* Must be called with net_mutex held */
++	struct pernet_operations *ops;
++	struct list_head *ptr;
++	int error;
++
++	memset(net, 0, sizeof(struct net));
++	atomic_set(&net->count, 1);
++	atomic_set(&net->use_count, 0);
++
++	error = 0;
++	list_for_each(ptr, &pernet_list) {
++		ops = list_entry(ptr, struct pernet_operations, list);
++		if (ops->init) {
++			error = ops->init(net);
++			if (error < 0)
++				goto out_undo;
++		}
++	}
++out:
++	return error;
++out_undo:
++	/* Walk through the list backwards calling the exit functions
++	 * for the pernet modules whose init functions did not fail.
++	 */
++	for (ptr = ptr->prev; ptr != &pernet_list; ptr = ptr->prev) {
++		ops = list_entry(ptr, struct pernet_operations, list);
++		if (ops->exit)
++			ops->exit(net);
++	}
++	goto out;
++}
++
++struct net *copy_net_ns(unsigned long flags, struct net *old_net)
++{
++	struct net *new_net = NULL;
++	int err;
++
++	get_net(old_net);
++
++	if (!(flags & CLONE_NEWNET))
++		return old_net;
++
++	err = -EPERM;
++	if (!capable(CAP_SYS_ADMIN))
++		goto out;
++
++	err = -ENOMEM;
++	new_net = net_alloc();
++	if (!new_net)
++		goto out;
++
++	mutex_lock(&net_mutex);
++	err = setup_net(new_net);
++	if (err)
++		goto out_unlock;
++
++	net_lock();
++	list_add_tail(&new_net->list, &net_namespace_list);
++	net_unlock();
++
++
++out_unlock:
++	mutex_unlock(&net_mutex);
++out:
++	put_net(old_net);
++	if (err) {
++		net_free(new_net);
++		new_net = ERR_PTR(err);
++	}
++	return new_net;
++}
++
++static int __init net_ns_init(void)
++{
++	int err;
++
++	printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
++	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
++					SMP_CACHE_BYTES,
++					SLAB_PANIC, NULL, NULL);
++	mutex_lock(&net_mutex);
++	err = setup_net(&init_net);
++
++	net_lock();
++	list_add_tail(&init_net.list, &net_namespace_list);
++	net_unlock();
++
++	mutex_unlock(&net_mutex);
++	if (err)
++		panic("Could not setup the initial network namespace");
++
++	return 0;
++}
++
++pure_initcall(net_ns_init);
++
++static int register_pernet_operations(struct list_head *list,
++				      struct pernet_operations *ops)
++{
++	struct net *net, *undo_net;
++	int error;
++
++	error = 0;
++	list_add_tail(&ops->list, list);
++	for_each_net(net) {
++		if (ops->init) {
++			error = ops->init(net);
++			if (error)
++				goto out_undo;
++		}
++	}
++out:
++	return error;
++
++out_undo:
++	/* If I have an error cleanup all namespaces I initialized */
++	list_del(&ops->list);
++	for_each_net(undo_net) {
++		if (undo_net == net)
++			goto undone;
++		if (ops->exit)
++			ops->exit(undo_net);
++	}
++undone:
++	goto out;
++}
++
++static void unregister_pernet_operations(struct pernet_operations *ops)
++{
++	struct net *net;
++
++	list_del(&ops->list);
++	for_each_net(net)
++		if (ops->exit)
++			ops->exit(net);
++}
++
++/**
++ *      register_pernet_subsys - register a network namespace subsystem
++ *	@ops:  pernet operations structure for the subsystem
++ *
++ *	Register a subsystem which has init and exit functions
++ *	that are called when network namespaces are created and
++ *	destroyed respectively.
++ *
++ *	When registered all network namespace init functions are
++ *	called for every existing network namespace.  Allowing kernel
++ *	modules to have a race free view of the set of network namespaces.
++ *
++ *	When a new network namespace is created all of the init
++ *	methods are called in the order in which they were registered.
++ *
++ *	When a network namespace is destroyed all of the exit methods
++ *	are called in the reverse of the order with which they were
++ *	registered.
++ */
++int register_pernet_subsys(struct pernet_operations *ops)
++{
++	int error;
++	mutex_lock(&net_mutex);
++	error =  register_pernet_operations(first_device, ops);
++	mutex_unlock(&net_mutex);
++	return error;
++}
++EXPORT_SYMBOL_GPL(register_pernet_subsys);
++
++/**
++ *      unregister_pernet_subsys - unregister a network namespace subsystem
++ *	@ops: pernet operations structure to manipulate
++ *
++ *	Remove the pernet operations structure from the list to be
++ *	used when network namespaces are created or destoryed.  In
++ *	addition run the exit method for all existing network
++ *	namespaces.
++ */
++void unregister_pernet_subsys(struct pernet_operations *module)
++{
++	mutex_lock(&net_mutex);
++	unregister_pernet_operations(module);
++	mutex_unlock(&net_mutex);
++}
++EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
++
++/**
++ *      register_pernet_device - register a network namespace device
++ *	@ops:  pernet operations structure for the subsystem
++ *
++ *	Register a device which has init and exit functions
++ *	that are called when network namespaces are created and
++ *	destroyed respectively.
++ *
++ *	When registered all network namespace init functions are
++ *	called for every existing network namespace.  Allowing kernel
++ *	modules to have a race free view of the set of network namespaces.
++ *
++ *	When a new network namespace is created all of the init
++ *	methods are called in the order in which they were registered.
++ *
++ *	When a network namespace is destroyed all of the exit methods
++ *	are called in the reverse of the order with which they were
++ *	registered.
++ */
++int register_pernet_device(struct pernet_operations *ops)
++{
++	int error;
++	mutex_lock(&net_mutex);
++	error = register_pernet_operations(&pernet_list, ops);
++	if (!error && (first_device == &pernet_list))
++		first_device = &ops->list;
++	mutex_unlock(&net_mutex);
++	return error;
++}
++EXPORT_SYMBOL_GPL(register_pernet_device);
++
++/**
++ *      unregister_pernet_device - unregister a network namespace netdevice
++ *	@ops: pernet operations structure to manipulate
++ *
++ *	Remove the pernet operations structure from the list to be
++ *	used when network namespaces are created or destoryed.  In
++ *	addition run the exit method for all existing network
++ *	namespaces.
++ */
++void unregister_pernet_device(struct pernet_operations *ops)
++{
++	mutex_lock(&net_mutex);
++	if (&ops->list == first_device)
++		first_device = first_device->next;
++	unregister_pernet_operations(ops);
++	mutex_unlock(&net_mutex);
++}
++EXPORT_SYMBOL_GPL(unregister_pernet_device);
+diff -Nurb linux-2.6.22-594/net/ipv4/af_inet.c.orig linux-2.6.22-595/net/ipv4/af_inet.c.orig
+--- linux-2.6.22-594/net/ipv4/af_inet.c.orig	2008-03-20 00:05:18.000000000 -0400
++++ linux-2.6.22-595/net/ipv4/af_inet.c.orig	1969-12-31 19:00:00.000000000 -0500
+@@ -1,1522 +0,0 @@
+-/*
+- * INET		An implementation of the TCP/IP protocol suite for the LINUX
+- *		operating system.  INET is implemented using the  BSD Socket
+- *		interface as the means of communication with the user level.
+- *
+- *		PF_INET protocol family socket handler.
+- *
+- * Version:	$Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $
+- *
+- * Authors:	Ross Biro
+- *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+- *		Florian La Roche, <flla@stud.uni-sb.de>
+- *		Alan Cox, <A.Cox@swansea.ac.uk>
+- *
+- * Changes (see also sock.c)
+- *
+- *		piggy,
+- *		Karl Knutson	:	Socket protocol table
+- *		A.N.Kuznetsov	:	Socket death error in accept().
+- *		John Richardson :	Fix non blocking error in connect()
+- *					so sockets that fail to connect
+- *					don't return -EINPROGRESS.
+- *		Alan Cox	:	Asynchronous I/O support
+- *		Alan Cox	:	Keep correct socket pointer on sock
+- *					structures
+- *					when accept() ed
+- *		Alan Cox	:	Semantics of SO_LINGER aren't state
+- *					moved to close when you look carefully.
+- *					With this fixed and the accept bug fixed
+- *					some RPC stuff seems happier.
+- *		Niibe Yutaka	:	4.4BSD style write async I/O
+- *		Alan Cox,
+- *		Tony Gale 	:	Fixed reuse semantics.
+- *		Alan Cox	:	bind() shouldn't abort existing but dead
+- *					sockets. Stops FTP netin:.. I hope.
+- *		Alan Cox	:	bind() works correctly for RAW sockets.
+- *					Note that FreeBSD at least was broken
+- *					in this respect so be careful with
+- *					compatibility tests...
+- *		Alan Cox	:	routing cache support
+- *		Alan Cox	:	memzero the socket structure for
+- *					compactness.
+- *		Matt Day	:	nonblock connect error handler
+- *		Alan Cox	:	Allow large numbers of pending sockets
+- *					(eg for big web sites), but only if
+- *					specifically application requested.
+- *		Alan Cox	:	New buffering throughout IP. Used
+- *					dumbly.
+- *		Alan Cox	:	New buffering now used smartly.
+- *		Alan Cox	:	BSD rather than common sense
+- *					interpretation of listen.
+- *		Germano Caronni	:	Assorted small races.
+- *		Alan Cox	:	sendmsg/recvmsg basic support.
+- *		Alan Cox	:	Only sendmsg/recvmsg now supported.
+- *		Alan Cox	:	Locked down bind (see security list).
+- *		Alan Cox	:	Loosened bind a little.
+- *		Mike McLagan	:	ADD/DEL DLCI Ioctls
+- *	Willy Konynenberg	:	Transparent proxying support.
+- *		David S. Miller	:	New socket lookup architecture.
+- *					Some other random speedups.
+- *		Cyrus Durgin	:	Cleaned up file for kmod hacks.
+- *		Andi Kleen	:	Fix inet_stream_connect TCP race.
+- *
+- *		This program is free software; you can redistribute it and/or
+- *		modify it under the terms of the GNU General Public License
+- *		as published by the Free Software Foundation; either version
+- *		2 of the License, or (at your option) any later version.
+- */
+-
+-#include <linux/err.h>
+-#include <linux/errno.h>
+-#include <linux/types.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/sched.h>
+-#include <linux/timer.h>
+-#include <linux/string.h>
+-#include <linux/sockios.h>
+-#include <linux/net.h>
+-#include <linux/capability.h>
+-#include <linux/fcntl.h>
+-#include <linux/mm.h>
+-#include <linux/interrupt.h>
+-#include <linux/stat.h>
+-#include <linux/init.h>
+-#include <linux/poll.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <linux/random.h>
+-
+-#include <asm/uaccess.h>
+-#include <asm/system.h>
+-
+-#include <linux/inet.h>
+-#include <linux/igmp.h>
+-#include <linux/inetdevice.h>
+-#include <linux/netdevice.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <net/arp.h>
+-#include <net/route.h>
+-#include <net/ip_fib.h>
+-#include <net/inet_connection_sock.h>
+-#include <net/tcp.h>
+-#include <net/udp.h>
+-#include <net/udplite.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <net/raw.h>
+-#include <net/icmp.h>
+-#include <net/ipip.h>
+-#include <net/inet_common.h>
+-#include <net/xfrm.h>
+-#ifdef CONFIG_IP_MROUTE
+-#include <linux/mroute.h>
+-#endif
+-#include <linux/vs_limit.h>
+-
+-DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
+-
+-extern void ip_mc_drop_socket(struct sock *sk);
+-
+-/* The inetsw table contains everything that inet_create needs to
+- * build a new socket.
+- */
+-static struct list_head inetsw[SOCK_MAX];
+-static DEFINE_SPINLOCK(inetsw_lock);
+-
+-/* New destruction routine */
+-
+-void inet_sock_destruct(struct sock *sk)
+-{
+-	struct inet_sock *inet = inet_sk(sk);
+-
+-	__skb_queue_purge(&sk->sk_receive_queue);
+-	__skb_queue_purge(&sk->sk_error_queue);
+-
+-	if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
+-		printk("Attempt to release TCP socket in state %d %p\n",
+-		       sk->sk_state, sk);
+-		return;
+-	}
+-	if (!sock_flag(sk, SOCK_DEAD)) {
+-		printk("Attempt to release alive inet socket %p\n", sk);
+-		return;
+-	}
+-
+-	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
+-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+-	BUG_TRAP(!sk->sk_wmem_queued);
+-	BUG_TRAP(!sk->sk_forward_alloc);
+-
+-	kfree(inet->opt);
+-	dst_release(sk->sk_dst_cache);
+-	sk_refcnt_debug_dec(sk);
+-}
+-
+-/*
+- *	The routines beyond this point handle the behaviour of an AF_INET
+- *	socket object. Mostly it punts to the subprotocols of IP to do
+- *	the work.
+- */
+-
+-/*
+- *	Automatically bind an unbound socket.
+- */
+-
+-static int inet_autobind(struct sock *sk)
+-{
+-	struct inet_sock *inet;
+-	/* We may need to bind the socket. */
+-	lock_sock(sk);
+-	inet = inet_sk(sk);
+-	if (!inet->num) {
+-		if (sk->sk_prot->get_port(sk, 0)) {
+-			release_sock(sk);
+-			return -EAGAIN;
+-		}
+-		inet->sport = htons(inet->num);
+-		sk->sk_xid = vx_current_xid();
+-		sk->sk_nid = nx_current_nid();
+-	}
+-	release_sock(sk);
+-	return 0;
+-}
+-
+-/*
+- *	Move a socket into listening state.
+- */
+-int inet_listen(struct socket *sock, int backlog)
+-{
+-	struct sock *sk = sock->sk;
+-	unsigned char old_state;
+-	int err;
+-
+-	lock_sock(sk);
+-
+-	err = -EINVAL;
+-	if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
+-		goto out;
+-
+-	old_state = sk->sk_state;
+-	if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+-		goto out;
+-
+-	/* Really, if the socket is already in listen state
+-	 * we can only allow the backlog to be adjusted.
+-	 */
+-	if (old_state != TCP_LISTEN) {
+-		err = inet_csk_listen_start(sk, backlog);
+-		if (err)
+-			goto out;
+-	}
+-	sk->sk_max_ack_backlog = backlog;
+-	err = 0;
+-
+-out:
+-	release_sock(sk);
+-	return err;
+-}
+-
+-u32 inet_ehash_secret __read_mostly;
+-EXPORT_SYMBOL(inet_ehash_secret);
+-
+-/*
+- * inet_ehash_secret must be set exactly once
+- * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
+- */
+-void build_ehash_secret(void)
+-{
+-	u32 rnd;
+-	do {
+-		get_random_bytes(&rnd, sizeof(rnd));
+-	} while (rnd == 0);
+-	spin_lock_bh(&inetsw_lock);
+-	if (!inet_ehash_secret)
+-		inet_ehash_secret = rnd;
+-	spin_unlock_bh(&inetsw_lock);
+-}
+-EXPORT_SYMBOL(build_ehash_secret);
+-
+-/*
+- *	Create an inet socket.
+- */
+-
+-static int inet_create(struct socket *sock, int protocol)
+-{
+-	struct sock *sk;
+-	struct list_head *p;
+-	struct inet_protosw *answer;
+-	struct inet_sock *inet;
+-	struct proto *answer_prot;
+-	unsigned char answer_flags;
+-	char answer_no_check;
+-	int try_loading_module = 0;
+-	int err;
+-
+-	if (sock->type != SOCK_RAW &&
+-	    sock->type != SOCK_DGRAM &&
+-	    !inet_ehash_secret)
+-		build_ehash_secret();
+-
+-	sock->state = SS_UNCONNECTED;
+-
+-	/* Look for the requested type/protocol pair. */
+-	answer = NULL;
+-lookup_protocol:
+-	err = -ESOCKTNOSUPPORT;
+-	rcu_read_lock();
+-	list_for_each_rcu(p, &inetsw[sock->type]) {
+-		answer = list_entry(p, struct inet_protosw, list);
+-
+-		/* Check the non-wild match. */
+-		if (protocol == answer->protocol) {
+-			if (protocol != IPPROTO_IP)
+-				break;
+-		} else {
+-			/* Check for the two wild cases. */
+-			if (IPPROTO_IP == protocol) {
+-				protocol = answer->protocol;
+-				break;
+-			}
+-			if (IPPROTO_IP == answer->protocol)
+-				break;
+-		}
+-		err = -EPROTONOSUPPORT;
+-		answer = NULL;
+-	}
+-
+-	if (unlikely(answer == NULL)) {
+-		if (try_loading_module < 2) {
+-			rcu_read_unlock();
+-			/*
+-			 * Be more specific, e.g. net-pf-2-proto-132-type-1
+-			 * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
+-			 */
+-			if (++try_loading_module == 1)
+-				request_module("net-pf-%d-proto-%d-type-%d",
+-					       PF_INET, protocol, sock->type);
+-			/*
+-			 * Fall back to generic, e.g. net-pf-2-proto-132
+-			 * (net-pf-PF_INET-proto-IPPROTO_SCTP)
+-			 */
+-			else
+-				request_module("net-pf-%d-proto-%d",
+-					       PF_INET, protocol);
+-			goto lookup_protocol;
+-		} else
+-			goto out_rcu_unlock;
+-	}
+-
+-	err = -EPERM;
+-	if ((protocol == IPPROTO_ICMP) &&
+-		nx_capable(answer->capability, NXC_RAW_ICMP))
+-		goto override;
+-	if (sock->type == SOCK_RAW &&
+-		nx_capable(answer->capability, NXC_RAW_SOCKET))
+-		goto override;
+-	if (answer->capability > 0 && !capable(answer->capability))
+-		goto out_rcu_unlock;
+-override:
+-	sock->ops = answer->ops;
+-	answer_prot = answer->prot;
+-	answer_no_check = answer->no_check;
+-	answer_flags = answer->flags;
+-	rcu_read_unlock();
+-
+-	BUG_TRAP(answer_prot->slab != NULL);
+-
+-	err = -ENOBUFS;
+-	sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1);
+-	if (sk == NULL)
+-		goto out;
+-
+-	err = 0;
+-	sk->sk_no_check = answer_no_check;
+-	if (INET_PROTOSW_REUSE & answer_flags)
+-		sk->sk_reuse = 1;
+-
+-	inet = inet_sk(sk);
+-	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
+-
+-	if (SOCK_RAW == sock->type) {
+-		inet->num = protocol;
+-		if (IPPROTO_RAW == protocol)
+-			inet->hdrincl = 1;
+-	}
+-
+-	if (ipv4_config.no_pmtu_disc)
+-		inet->pmtudisc = IP_PMTUDISC_DONT;
+-	else
+-		inet->pmtudisc = IP_PMTUDISC_WANT;
+-
+-	inet->id = 0;
+-
+-	sock_init_data(sock, sk);
+-
+-	sk->sk_destruct	   = inet_sock_destruct;
+-	sk->sk_family	   = PF_INET;
+-	sk->sk_protocol	   = protocol;
+-	sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+-
+-	inet->uc_ttl	= -1;
+-	inet->mc_loop	= 1;
+-	inet->mc_ttl	= 1;
+-	inet->mc_index	= 0;
+-	inet->mc_list	= NULL;
+-
+-	sk_refcnt_debug_inc(sk);
+-
+-	if (inet->num) {
+-		/* It assumes that any protocol which allows
+-		 * the user to assign a number at socket
+-		 * creation time automatically
+-		 * shares.
+-		 */
+-		inet->sport = htons(inet->num);
+-		/* Add to protocol hash chains. */
+-		sk->sk_prot->hash(sk);
+-	}
+-
+-	if (sk->sk_prot->init) {
+-		err = sk->sk_prot->init(sk);
+-		if (err)
+-			sk_common_release(sk);
+-	}
+-out:
+-	return err;
+-out_rcu_unlock:
+-	rcu_read_unlock();
+-	goto out;
+-}
+-
+-
+-/*
+- *	The peer socket should always be NULL (or else). When we call this
+- *	function we are destroying the object and from then on nobody
+- *	should refer to it.
+- */
+-int inet_release(struct socket *sock)
+-{
+-	struct sock *sk = sock->sk;
+-
+-	if (sk) {
+-		long timeout;
+-
+-		/* Applications forget to leave groups before exiting */
+-		ip_mc_drop_socket(sk);
+-
+-		/* If linger is set, we don't return until the close
+-		 * is complete.  Otherwise we return immediately. The
+-		 * actually closing is done the same either way.
+-		 *
+-		 * If the close is due to the process exiting, we never
+-		 * linger..
+-		 */
+-		timeout = 0;
+-		if (sock_flag(sk, SOCK_LINGER) &&
+-		    !(current->flags & PF_EXITING))
+-			timeout = sk->sk_lingertime;
+-		sock->sk = NULL;
+-		sk->sk_prot->close(sk, timeout);
+-	}
+-	return 0;
+-}
+-
+-/* It is off by default, see below. */
+-int sysctl_ip_nonlocal_bind __read_mostly;
+-
+-int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+-{
+-	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+-	struct sock *sk = sock->sk;
+-	struct inet_sock *inet = inet_sk(sk);
+-	struct nx_v4_sock_addr nsa;
+-	unsigned short snum;
+-	int chk_addr_ret;
+-	int err;
+-
+-	/* If the socket has its own bind function then use it. (RAW) */
+-	if (sk->sk_prot->bind) {
+-		err = sk->sk_prot->bind(sk, uaddr, addr_len);
+-		goto out;
+-	}
+-	err = -EINVAL;
+-	if (addr_len < sizeof(struct sockaddr_in))
+-		goto out;
+-
+-	err = v4_map_sock_addr(inet, addr, &nsa);
+-	if (err)
+-		goto out;
+-
+-	chk_addr_ret = inet_addr_type(nsa.saddr);
+-
+-	/* Not specified by any standard per-se, however it breaks too
+-	 * many applications when removed.  It is unfortunate since
+-	 * allowing applications to make a non-local bind solves
+-	 * several problems with systems using dynamic addressing.
+-	 * (ie. your servers still start up even if your ISDN link
+-	 *  is temporarily down)
+-	 */
+-	err = -EADDRNOTAVAIL;
+-	if (!sysctl_ip_nonlocal_bind &&
+-	    !inet->freebind &&
+-	    nsa.saddr != INADDR_ANY &&
+-	    chk_addr_ret != RTN_LOCAL &&
+-	    chk_addr_ret != RTN_MULTICAST &&
+-	    chk_addr_ret != RTN_BROADCAST)
+-		goto out;
+-
+-	snum = ntohs(addr->sin_port);
+-	err = -EACCES;
+-	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+-		goto out;
+-
+-	/*      We keep a pair of addresses. rcv_saddr is the one
+-	 *      used by hash lookups, and saddr is used for transmit.
+-	 *
+-	 *      In the BSD API these are the same except where it
+-	 *      would be illegal to use them (multicast/broadcast) in
+-	 *      which case the sending device address is used.
+-	 */
+-	lock_sock(sk);
+-
+-	/* Check these errors (active socket, double bind). */
+-	err = -EINVAL;
+-	if (sk->sk_state != TCP_CLOSE || inet->num)
+-		goto out_release_sock;
+-
+-	v4_set_sock_addr(inet, &nsa);
+-	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+-		inet->saddr = 0;  /* Use device */
+-
+-	/* Make sure we are allowed to bind here. */
+-	if (sk->sk_prot->get_port(sk, snum)) {
+-		inet->saddr = inet->rcv_saddr = 0;
+-		err = -EADDRINUSE;
+-		goto out_release_sock;
+-	}
+-
+-	if (inet->rcv_saddr)
+-		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+-	if (snum)
+-		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
+-	inet->sport = htons(inet->num);
+-	inet->daddr = 0;
+-	inet->dport = 0;
+-	sk_dst_reset(sk);
+-	err = 0;
+-out_release_sock:
+-	release_sock(sk);
+-out:
+-	return err;
+-}
+-
+-int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
+-		       int addr_len, int flags)
+-{
+-	struct sock *sk = sock->sk;
+-
+-	if (uaddr->sa_family == AF_UNSPEC)
+-		return sk->sk_prot->disconnect(sk, flags);
+-
+-	if (!inet_sk(sk)->num && inet_autobind(sk))
+-		return -EAGAIN;
+-	return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
+-}
+-
+-static long inet_wait_for_connect(struct sock *sk, long timeo)
+-{
+-	DEFINE_WAIT(wait);
+-
+-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+-
+-	/* Basic assumption: if someone sets sk->sk_err, he _must_
+-	 * change state of the socket from TCP_SYN_*.
+-	 * Connect() does not allow to get error notifications
+-	 * without closing the socket.
+-	 */
+-	while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+-		release_sock(sk);
+-		timeo = schedule_timeout(timeo);
+-		lock_sock(sk);
+-		if (signal_pending(current) || !timeo)
+-			break;
+-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+-	}
+-	finish_wait(sk->sk_sleep, &wait);
+-	return timeo;
+-}
+-
+-/*
+- *	Connect to a remote host. There is regrettably still a little
+- *	TCP 'magic' in here.
+- */
+-int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+-			int addr_len, int flags)
+-{
+-	struct sock *sk = sock->sk;
+-	int err;
+-	long timeo;
+-
+-	lock_sock(sk);
+-
+-	if (uaddr->sa_family == AF_UNSPEC) {
+-		err = sk->sk_prot->disconnect(sk, flags);
+-		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
+-		goto out;
+-	}
+-
+-	switch (sock->state) {
+-	default:
+-		err = -EINVAL;
+-		goto out;
+-	case SS_CONNECTED:
+-		err = -EISCONN;
+-		goto out;
+-	case SS_CONNECTING:
+-		err = -EALREADY;
+-		/* Fall out of switch with err, set for this state */
+-		break;
+-	case SS_UNCONNECTED:
+-		err = -EISCONN;
+-		if (sk->sk_state != TCP_CLOSE)
+-			goto out;
+-
+-		err = sk->sk_prot->connect(sk, uaddr, addr_len);
+-		if (err < 0)
+-			goto out;
+-
+-		sock->state = SS_CONNECTING;
+-
+-		/* Just entered SS_CONNECTING state; the only
+-		 * difference is that return value in non-blocking
+-		 * case is EINPROGRESS, rather than EALREADY.
+-		 */
+-		err = -EINPROGRESS;
+-		break;
+-	}
+-
+-	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+-
+-	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+-		/* Error code is set above */
+-		if (!timeo || !inet_wait_for_connect(sk, timeo))
+-			goto out;
+-
+-		err = sock_intr_errno(timeo);
+-		if (signal_pending(current))
+-			goto out;
+-	}
+-
+-	/* Connection was closed by RST, timeout, ICMP error
+-	 * or another process disconnected us.
+-	 */
+-	if (sk->sk_state == TCP_CLOSE)
+-		goto sock_error;
+-
+-	/* sk->sk_err may be not zero now, if RECVERR was ordered by user
+-	 * and error was received after socket entered established state.
+-	 * Hence, it is handled normally after connect() return successfully.
+-	 */
+-
+-	sock->state = SS_CONNECTED;
+-	err = 0;
+-out:
+-	release_sock(sk);
+-	return err;
+-
+-sock_error:
+-	err = sock_error(sk) ? : -ECONNABORTED;
+-	sock->state = SS_UNCONNECTED;
+-	if (sk->sk_prot->disconnect(sk, flags))
+-		sock->state = SS_DISCONNECTING;
+-	goto out;
+-}
+-
+-/*
+- *	Accept a pending connection. The TCP layer now gives BSD semantics.
+- */
+-
+-int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+-{
+-	struct sock *sk1 = sock->sk;
+-	int err = -EINVAL;
+-	struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
+-
+-	if (!sk2)
+-		goto do_err;
+-
+-	lock_sock(sk2);
+-
+-	BUG_TRAP((1 << sk2->sk_state) &
+-		 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE));
+-
+-	sock_graft(sk2, newsock);
+-
+-	newsock->state = SS_CONNECTED;
+-	err = 0;
+-	release_sock(sk2);
+-do_err:
+-	return err;
+-}
+-
+-
+-/*
+- *	This does both peername and sockname.
+- */
+-int inet_getname(struct socket *sock, struct sockaddr *uaddr,
+-			int *uaddr_len, int peer)
+-{
+-	struct sock *sk		= sock->sk;
+-	struct inet_sock *inet	= inet_sk(sk);
+-	struct sockaddr_in *sin	= (struct sockaddr_in *)uaddr;
+-
+-	sin->sin_family = AF_INET;
+-	if (peer) {
+-		if (!inet->dport ||
+-		    (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+-		     peer == 1))
+-			return -ENOTCONN;
+-		sin->sin_port = inet->dport;
+-		sin->sin_addr.s_addr =
+-			nx_map_sock_lback(sk->sk_nx_info, inet->daddr);
+-	} else {
+-		__be32 addr = inet->rcv_saddr;
+-		if (!addr)
+-			addr = inet->saddr;
+-		addr = nx_map_sock_lback(sk->sk_nx_info, addr);
+-		sin->sin_port = inet->sport;
+-		sin->sin_addr.s_addr = addr;
+-	}
+-	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+-	*uaddr_len = sizeof(*sin);
+-	return 0;
+-}
+-
+-int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+-		 size_t size)
+-{
+-	struct sock *sk = sock->sk;
+-
+-	/* We may need to bind the socket. */
+-	if (!inet_sk(sk)->num && inet_autobind(sk))
+-		return -EAGAIN;
+-
+-	return sk->sk_prot->sendmsg(iocb, sk, msg, size);
+-}
+-
+-
+-static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
+-{
+-	struct sock *sk = sock->sk;
+-
+-	/* We may need to bind the socket. */
+-	if (!inet_sk(sk)->num && inet_autobind(sk))
+-		return -EAGAIN;
+-
+-	if (sk->sk_prot->sendpage)
+-		return sk->sk_prot->sendpage(sk, page, offset, size, flags);
+-	return sock_no_sendpage(sock, page, offset, size, flags);
+-}
+-
+-
+-int inet_shutdown(struct socket *sock, int how)
+-{
+-	struct sock *sk = sock->sk;
+-	int err = 0;
+-
+-	/* This should really check to make sure
+-	 * the socket is a TCP socket. (WHY AC...)
+-	 */
+-	how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
+-		       1->2 bit 2 snds.
+-		       2->3 */
+-	if ((how & ~SHUTDOWN_MASK) || !how)	/* MAXINT->0 */
+-		return -EINVAL;
+-
+-	lock_sock(sk);
+-	if (sock->state == SS_CONNECTING) {
+-		if ((1 << sk->sk_state) &
+-		    (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
+-			sock->state = SS_DISCONNECTING;
+-		else
+-			sock->state = SS_CONNECTED;
+-	}
+-
+-	switch (sk->sk_state) {
+-	case TCP_CLOSE:
+-		err = -ENOTCONN;
+-		/* Hack to wake up other listeners, who can poll for
+-		   POLLHUP, even on eg. unconnected UDP sockets -- RR */
+-	default:
+-		sk->sk_shutdown |= how;
+-		if (sk->sk_prot->shutdown)
+-			sk->sk_prot->shutdown(sk, how);
+-		break;
+-
+-	/* Remaining two branches are temporary solution for missing
+-	 * close() in multithreaded environment. It is _not_ a good idea,
+-	 * but we have no choice until close() is repaired at VFS level.
+-	 */
+-	case TCP_LISTEN:
+-		if (!(how & RCV_SHUTDOWN))
+-			break;
+-		/* Fall through */
+-	case TCP_SYN_SENT:
+-		err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
+-		sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
+-		break;
+-	}
+-
+-	/* Wake up anyone sleeping in poll. */
+-	sk->sk_state_change(sk);
+-	release_sock(sk);
+-	return err;
+-}
+-
+-/*
+- *	ioctl() calls you can issue on an INET socket. Most of these are
+- *	device configuration and stuff and very rarely used. Some ioctls
+- *	pass on to the socket itself.
+- *
+- *	NOTE: I like the idea of a module for the config stuff. ie ifconfig
+- *	loads the devconfigure module does its configuring and unloads it.
+- *	There's a good 20K of config code hanging around the kernel.
+- */
+-
+-int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+-{
+-	struct sock *sk = sock->sk;
+-	int err = 0;
+-
+-	switch (cmd) {
+-		case SIOCGSTAMP:
+-			err = sock_get_timestamp(sk, (struct timeval __user *)arg);
+-			break;
+-		case SIOCGSTAMPNS:
+-			err = sock_get_timestampns(sk, (struct timespec __user *)arg);
+-			break;
+-		case SIOCADDRT:
+-		case SIOCDELRT:
+-		case SIOCRTMSG:
+-			err = ip_rt_ioctl(cmd, (void __user *)arg);
+-			break;
+-		case SIOCDARP:
+-		case SIOCGARP:
+-		case SIOCSARP:
+-			err = arp_ioctl(cmd, (void __user *)arg);
+-			break;
+-		case SIOCGIFADDR:
+-		case SIOCSIFADDR:
+-		case SIOCGIFBRDADDR:
+-		case SIOCSIFBRDADDR:
+-		case SIOCGIFNETMASK:
+-		case SIOCSIFNETMASK:
+-		case SIOCGIFDSTADDR:
+-		case SIOCSIFDSTADDR:
+-		case SIOCSIFPFLAGS:
+-		case SIOCGIFPFLAGS:
+-		case SIOCSIFFLAGS:
+-			err = devinet_ioctl(cmd, (void __user *)arg);
+-			break;
+-		default:
+-			if (sk->sk_prot->ioctl)
+-				err = sk->sk_prot->ioctl(sk, cmd, arg);
+-			else
+-				err = -ENOIOCTLCMD;
+-			break;
+-	}
+-	return err;
+-}
+-
+-const struct proto_ops inet_stream_ops = {
+-	.family		   = PF_INET,
+-	.owner		   = THIS_MODULE,
+-	.release	   = inet_release,
+-	.bind		   = inet_bind,
+-	.connect	   = inet_stream_connect,
+-	.socketpair	   = sock_no_socketpair,
+-	.accept		   = inet_accept,
+-	.getname	   = inet_getname,
+-	.poll		   = tcp_poll,
+-	.ioctl		   = inet_ioctl,
+-	.listen		   = inet_listen,
+-	.shutdown	   = inet_shutdown,
+-	.setsockopt	   = sock_common_setsockopt,
+-	.getsockopt	   = sock_common_getsockopt,
+-	.sendmsg	   = tcp_sendmsg,
+-	.recvmsg	   = sock_common_recvmsg,
+-	.mmap		   = sock_no_mmap,
+-	.sendpage	   = tcp_sendpage,
+-#ifdef CONFIG_COMPAT
+-	.compat_setsockopt = compat_sock_common_setsockopt,
+-	.compat_getsockopt = compat_sock_common_getsockopt,
+-#endif
+-};
+-
+-const struct proto_ops inet_dgram_ops = {
+-	.family		   = PF_INET,
+-	.owner		   = THIS_MODULE,
+-	.release	   = inet_release,
+-	.bind		   = inet_bind,
+-	.connect	   = inet_dgram_connect,
+-	.socketpair	   = sock_no_socketpair,
+-	.accept		   = sock_no_accept,
+-	.getname	   = inet_getname,
+-	.poll		   = udp_poll,
+-	.ioctl		   = inet_ioctl,
+-	.listen		   = sock_no_listen,
+-	.shutdown	   = inet_shutdown,
+-	.setsockopt	   = sock_common_setsockopt,
+-	.getsockopt	   = sock_common_getsockopt,
+-	.sendmsg	   = inet_sendmsg,
+-	.recvmsg	   = sock_common_recvmsg,
+-	.mmap		   = sock_no_mmap,
+-	.sendpage	   = inet_sendpage,
+-#ifdef CONFIG_COMPAT
+-	.compat_setsockopt = compat_sock_common_setsockopt,
+-	.compat_getsockopt = compat_sock_common_getsockopt,
+-#endif
+-};
+-
+-/*
+- * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
+- * udp_poll
+- */
+-static const struct proto_ops inet_sockraw_ops = {
+-	.family		   = PF_INET,
+-	.owner		   = THIS_MODULE,
+-	.release	   = inet_release,
+-	.bind		   = inet_bind,
+-	.connect	   = inet_dgram_connect,
+-	.socketpair	   = sock_no_socketpair,
+-	.accept		   = sock_no_accept,
+-	.getname	   = inet_getname,
+-	.poll		   = datagram_poll,
+-	.ioctl		   = inet_ioctl,
+-	.listen		   = sock_no_listen,
+-	.shutdown	   = inet_shutdown,
+-	.setsockopt	   = sock_common_setsockopt,
+-	.getsockopt	   = sock_common_getsockopt,
+-	.sendmsg	   = inet_sendmsg,
+-	.recvmsg	   = sock_common_recvmsg,
+-	.mmap		   = sock_no_mmap,
+-	.sendpage	   = inet_sendpage,
+-#ifdef CONFIG_COMPAT
+-	.compat_setsockopt = compat_sock_common_setsockopt,
+-	.compat_getsockopt = compat_sock_common_getsockopt,
+-#endif
+-};
+-
+-static struct net_proto_family inet_family_ops = {
+-	.family = PF_INET,
+-	.create = inet_create,
+-	.owner	= THIS_MODULE,
+-};
+-
+-/* Upon startup we insert all the elements in inetsw_array[] into
+- * the linked list inetsw.
+- */
+-static struct inet_protosw inetsw_array[] =
+-{
+-	{
+-		.type =       SOCK_STREAM,
+-		.protocol =   IPPROTO_TCP,
+-		.prot =       &tcp_prot,
+-		.ops =        &inet_stream_ops,
+-		.capability = -1,
+-		.no_check =   0,
+-		.flags =      INET_PROTOSW_PERMANENT |
+-			      INET_PROTOSW_ICSK,
+-	},
+-
+-	{
+-		.type =       SOCK_DGRAM,
+-		.protocol =   IPPROTO_UDP,
+-		.prot =       &udp_prot,
+-		.ops =        &inet_dgram_ops,
+-		.capability = -1,
+-		.no_check =   UDP_CSUM_DEFAULT,
+-		.flags =      INET_PROTOSW_PERMANENT,
+-       },
+-
+-
+-       {
+-	       .type =       SOCK_RAW,
+-	       .protocol =   IPPROTO_IP,	/* wild card */
+-	       .prot =       &raw_prot,
+-	       .ops =        &inet_sockraw_ops,
+-	       .capability = CAP_NET_RAW,
+-	       .no_check =   UDP_CSUM_DEFAULT,
+-	       .flags =      INET_PROTOSW_REUSE,
+-       }
+-};
+-
+-#define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw))
+-
+-void inet_register_protosw(struct inet_protosw *p)
+-{
+-	struct list_head *lh;
+-	struct inet_protosw *answer;
+-	int protocol = p->protocol;
+-	struct list_head *last_perm;
+-
+-	spin_lock_bh(&inetsw_lock);
+-
+-	if (p->type >= SOCK_MAX)
+-		goto out_illegal;
+-
+-	/* If we are trying to override a permanent protocol, bail. */
+-	answer = NULL;
+-	last_perm = &inetsw[p->type];
+-	list_for_each(lh, &inetsw[p->type]) {
+-		answer = list_entry(lh, struct inet_protosw, list);
+-
+-		/* Check only the non-wild match. */
+-		if (INET_PROTOSW_PERMANENT & answer->flags) {
+-			if (protocol == answer->protocol)
+-				break;
+-			last_perm = lh;
+-		}
+-
+-		answer = NULL;
+-	}
+-	if (answer)
+-		goto out_permanent;
+-
+-	/* Add the new entry after the last permanent entry if any, so that
+-	 * the new entry does not override a permanent entry when matched with
+-	 * a wild-card protocol. But it is allowed to override any existing
+-	 * non-permanent entry.  This means that when we remove this entry, the
+-	 * system automatically returns to the old behavior.
+-	 */
+-	list_add_rcu(&p->list, last_perm);
+-out:
+-	spin_unlock_bh(&inetsw_lock);
+-
+-	synchronize_net();
+-
+-	return;
+-
+-out_permanent:
+-	printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
+-	       protocol);
+-	goto out;
+-
+-out_illegal:
+-	printk(KERN_ERR
+-	       "Ignoring attempt to register invalid socket type %d.\n",
+-	       p->type);
+-	goto out;
+-}
+-
+-void inet_unregister_protosw(struct inet_protosw *p)
+-{
+-	if (INET_PROTOSW_PERMANENT & p->flags) {
+-		printk(KERN_ERR
+-		       "Attempt to unregister permanent protocol %d.\n",
+-		       p->protocol);
+-	} else {
+-		spin_lock_bh(&inetsw_lock);
+-		list_del_rcu(&p->list);
+-		spin_unlock_bh(&inetsw_lock);
+-
+-		synchronize_net();
+-	}
+-}
+-
+-/*
+- *      Shall we try to damage output packets if routing dev changes?
+- */
+-
+-int sysctl_ip_dynaddr __read_mostly;
+-
+-static int inet_sk_reselect_saddr(struct sock *sk)
+-{
+-	struct inet_sock *inet = inet_sk(sk);
+-	int err;
+-	struct rtable *rt;
+-	__be32 old_saddr = inet->saddr;
+-	__be32 new_saddr;
+-	__be32 daddr = inet->daddr;
+-
+-	if (inet->opt && inet->opt->srr)
+-		daddr = inet->opt->faddr;
+-
+-	/* Query new route. */
+-	err = ip_route_connect(&rt, daddr, 0,
+-			       RT_CONN_FLAGS(sk),
+-			       sk->sk_bound_dev_if,
+-			       sk->sk_protocol,
+-			       inet->sport, inet->dport, sk, 0);
+-	if (err)
+-		return err;
+-
+-	sk_setup_caps(sk, &rt->u.dst);
+-
+-	new_saddr = rt->rt_src;
+-
+-	if (new_saddr == old_saddr)
+-		return 0;
+-
+-	if (sysctl_ip_dynaddr > 1) {
+-		printk(KERN_INFO "%s(): shifting inet->"
+-				 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
+-		       __FUNCTION__,
+-		       NIPQUAD(old_saddr),
+-		       NIPQUAD(new_saddr));
+-	}
+-
+-	inet->saddr = inet->rcv_saddr = new_saddr;
+-
+-	/*
+-	 * XXX The only one ugly spot where we need to
+-	 * XXX really change the sockets identity after
+-	 * XXX it has entered the hashes. -DaveM
+-	 *
+-	 * Besides that, it does not check for connection
+-	 * uniqueness. Wait for troubles.
+-	 */
+-	__sk_prot_rehash(sk);
+-	return 0;
+-}
+-
+-int inet_sk_rebuild_header(struct sock *sk)
+-{
+-	struct inet_sock *inet = inet_sk(sk);
+-	struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
+-	__be32 daddr;
+-	int err;
+-
+-	/* Route is OK, nothing to do. */
+-	if (rt)
+-		return 0;
+-
+-	/* Reroute. */
+-	daddr = inet->daddr;
+-	if (inet->opt && inet->opt->srr)
+-		daddr = inet->opt->faddr;
+-{
+-	struct flowi fl = {
+-		.oif = sk->sk_bound_dev_if,
+-		.nl_u = {
+-			.ip4_u = {
+-				.daddr	= daddr,
+-				.saddr	= inet->saddr,
+-				.tos	= RT_CONN_FLAGS(sk),
+-			},
+-		},
+-		.proto = sk->sk_protocol,
+-		.uli_u = {
+-			.ports = {
+-				.sport = inet->sport,
+-				.dport = inet->dport,
+-			},
+-		},
+-	};
+-
+-	security_sk_classify_flow(sk, &fl);
+-	err = ip_route_output_flow(&rt, &fl, sk, 0);
+-}
+-	if (!err)
+-		sk_setup_caps(sk, &rt->u.dst);
+-	else {
+-		/* Routing failed... */
+-		sk->sk_route_caps = 0;
+-		/*
+-		 * Other protocols have to map its equivalent state to TCP_SYN_SENT.
+-		 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
+-		 */
+-		if (!sysctl_ip_dynaddr ||
+-		    sk->sk_state != TCP_SYN_SENT ||
+-		    (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
+-		    (err = inet_sk_reselect_saddr(sk)) != 0)
+-			sk->sk_err_soft = -err;
+-	}
+-
+-	return err;
+-}
+-
+-EXPORT_SYMBOL(inet_sk_rebuild_header);
+-
+-static int inet_gso_send_check(struct sk_buff *skb)
+-{
+-	struct iphdr *iph;
+-	struct net_protocol *ops;
+-	int proto;
+-	int ihl;
+-	int err = -EINVAL;
+-
+-	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+-		goto out;
+-
+-	iph = ip_hdr(skb);
+-	ihl = iph->ihl * 4;
+-	if (ihl < sizeof(*iph))
+-		goto out;
+-
+-	if (unlikely(!pskb_may_pull(skb, ihl)))
+-		goto out;
+-
+-	__skb_pull(skb, ihl);
+-	skb_reset_transport_header(skb);
+-	iph = ip_hdr(skb);
+-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+-	err = -EPROTONOSUPPORT;
+-
+-	rcu_read_lock();
+-	ops = rcu_dereference(inet_protos[proto]);
+-	if (likely(ops && ops->gso_send_check))
+-		err = ops->gso_send_check(skb);
+-	rcu_read_unlock();
+-
+-out:
+-	return err;
+-}
+-
+-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
+-{
+-	struct sk_buff *segs = ERR_PTR(-EINVAL);
+-	struct iphdr *iph;
+-	struct net_protocol *ops;
+-	int proto;
+-	int ihl;
+-	int id;
+-
+-	if (unlikely(skb_shinfo(skb)->gso_type &
+-		     ~(SKB_GSO_TCPV4 |
+-		       SKB_GSO_UDP |
+-		       SKB_GSO_DODGY |
+-		       SKB_GSO_TCP_ECN |
+-		       0)))
+-		goto out;
+-
+-	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+-		goto out;
+-
+-	iph = ip_hdr(skb);
+-	ihl = iph->ihl * 4;
+-	if (ihl < sizeof(*iph))
+-		goto out;
+-
+-	if (unlikely(!pskb_may_pull(skb, ihl)))
+-		goto out;
+-
+-	__skb_pull(skb, ihl);
+-	skb_reset_transport_header(skb);
+-	iph = ip_hdr(skb);
+-	id = ntohs(iph->id);
+-	proto = iph->protocol & (MAX_INET_PROTOS - 1);
+-	segs = ERR_PTR(-EPROTONOSUPPORT);
+-
+-	rcu_read_lock();
+-	ops = rcu_dereference(inet_protos[proto]);
+-	if (likely(ops && ops->gso_segment))
+-		segs = ops->gso_segment(skb, features);
+-	rcu_read_unlock();
+-
+-	if (!segs || unlikely(IS_ERR(segs)))
+-		goto out;
+-
+-	skb = segs;
+-	do {
+-		iph = ip_hdr(skb);
+-		iph->id = htons(id++);
+-		iph->tot_len = htons(skb->len - skb->mac_len);
+-		iph->check = 0;
+-		iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
+-	} while ((skb = skb->next));
+-
+-out:
+-	return segs;
+-}
+-
+-unsigned long snmp_fold_field(void *mib[], int offt)
+-{
+-	unsigned long res = 0;
+-	int i;
+-
+-	for_each_possible_cpu(i) {
+-		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
+-		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
+-	}
+-	return res;
+-}
+-EXPORT_SYMBOL_GPL(snmp_fold_field);
+-
+-int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+-{
+-	BUG_ON(ptr == NULL);
+-	ptr[0] = __alloc_percpu(mibsize);
+-	if (!ptr[0])
+-		goto err0;
+-	ptr[1] = __alloc_percpu(mibsize);
+-	if (!ptr[1])
+-		goto err1;
+-	return 0;
+-err1:
+-	free_percpu(ptr[0]);
+-	ptr[0] = NULL;
+-err0:
+-	return -ENOMEM;
+-}
+-EXPORT_SYMBOL_GPL(snmp_mib_init);
+-
+-void snmp_mib_free(void *ptr[2])
+-{
+-	BUG_ON(ptr == NULL);
+-	free_percpu(ptr[0]);
+-	free_percpu(ptr[1]);
+-	ptr[0] = ptr[1] = NULL;
+-}
+-EXPORT_SYMBOL_GPL(snmp_mib_free);
+-
+-#ifdef CONFIG_IP_MULTICAST
+-static struct net_protocol igmp_protocol = {
+-	.handler =	igmp_rcv,
+-};
+-#endif
+-
+-static struct net_protocol tcp_protocol = {
+-	.handler =	tcp_v4_rcv,
+-	.err_handler =	tcp_v4_err,
+-	.gso_send_check = tcp_v4_gso_send_check,
+-	.gso_segment =	tcp_tso_segment,
+-	.no_policy =	1,
+-};
+-
+-static struct net_protocol udp_protocol = {
+-	.handler =	udp_rcv,
+-	.err_handler =	udp_err,
+-	.no_policy =	1,
+-};
+-
+-static struct net_protocol icmp_protocol = {
+-	.handler =	icmp_rcv,
+-};
+-
+-static int __init init_ipv4_mibs(void)
+-{
+-	if (snmp_mib_init((void **)net_statistics,
+-			  sizeof(struct linux_mib),
+-			  __alignof__(struct linux_mib)) < 0)
+-		goto err_net_mib;
+-	if (snmp_mib_init((void **)ip_statistics,
+-			  sizeof(struct ipstats_mib),
+-			  __alignof__(struct ipstats_mib)) < 0)
+-		goto err_ip_mib;
+-	if (snmp_mib_init((void **)icmp_statistics,
+-			  sizeof(struct icmp_mib),
+-			  __alignof__(struct icmp_mib)) < 0)
+-		goto err_icmp_mib;
+-	if (snmp_mib_init((void **)tcp_statistics,
+-			  sizeof(struct tcp_mib),
+-			  __alignof__(struct tcp_mib)) < 0)
+-		goto err_tcp_mib;
+-	if (snmp_mib_init((void **)udp_statistics,
+-			  sizeof(struct udp_mib),
+-			  __alignof__(struct udp_mib)) < 0)
+-		goto err_udp_mib;
+-	if (snmp_mib_init((void **)udplite_statistics,
+-			  sizeof(struct udp_mib),
+-			  __alignof__(struct udp_mib)) < 0)
+-		goto err_udplite_mib;
+-
+-	tcp_mib_init();
+-
+-	return 0;
+-
+-err_udplite_mib:
+-	snmp_mib_free((void **)udp_statistics);
+-err_udp_mib:
+-	snmp_mib_free((void **)tcp_statistics);
+-err_tcp_mib:
+-	snmp_mib_free((void **)icmp_statistics);
+-err_icmp_mib:
+-	snmp_mib_free((void **)ip_statistics);
+-err_ip_mib:
+-	snmp_mib_free((void **)net_statistics);
+-err_net_mib:
+-	return -ENOMEM;
+-}
+-
+-static int ipv4_proc_init(void);
+-
+-/*
+- *	IP protocol layer initialiser
+- */
+-
+-static struct packet_type ip_packet_type = {
+-	.type = __constant_htons(ETH_P_IP),
+-	.func = ip_rcv,
+-	.gso_send_check = inet_gso_send_check,
+-	.gso_segment = inet_gso_segment,
+-};
+-
+-static int __init inet_init(void)
+-{
+-	struct sk_buff *dummy_skb;
+-	struct inet_protosw *q;
+-	struct list_head *r;
+-	int rc = -EINVAL;
+-
+-	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
+-
+-	rc = proto_register(&tcp_prot, 1);
+-	if (rc)
+-		goto out;
+-
+-	rc = proto_register(&udp_prot, 1);
+-	if (rc)
+-		goto out_unregister_tcp_proto;
+-
+-	rc = proto_register(&raw_prot, 1);
+-	if (rc)
+-		goto out_unregister_udp_proto;
+-
+-	/*
+-	 *	Tell SOCKET that we are alive...
+-	 */
+-
+-	(void)sock_register(&inet_family_ops);
+-
+-	/*
+-	 *	Add all the base protocols.
+-	 */
+-
+-	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
+-		printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
+-	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
+-		printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
+-	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
+-		printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
+-#ifdef CONFIG_IP_MULTICAST
+-	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
+-		printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
+-#endif
+-
+-	/* Register the socket-side information for inet_create. */
+-	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
+-		INIT_LIST_HEAD(r);
+-
+-	for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
+-		inet_register_protosw(q);
+-
+-	/*
+-	 *	Set the ARP module up
+-	 */
+-
+-	arp_init();
+-
+-	/*
+-	 *	Set the IP module up
+-	 */
+-
+-	ip_init();
+-
+-	tcp_v4_init(&inet_family_ops);
+-
+-	/* Setup TCP slab cache for open requests. */
+-	tcp_init();
+-
+-	/* Add UDP-Lite (RFC 3828) */
+-	udplite4_register();
+-
+-	/*
+-	 *	Set the ICMP layer up
+-	 */
+-
+-	icmp_init(&inet_family_ops);
+-
+-	/*
+-	 *	Initialise the multicast router
+-	 */
+-#if defined(CONFIG_IP_MROUTE)
+-	ip_mr_init();
+-#endif
+-	/*
+-	 *	Initialise per-cpu ipv4 mibs
+-	 */
+-
+-	if (init_ipv4_mibs())
+-		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
+-
+-	ipv4_proc_init();
+-
+-	ipfrag_init();
+-
+-	dev_add_pack(&ip_packet_type);
+-
+-	rc = 0;
+-out:
+-	return rc;
+-out_unregister_udp_proto:
+-	proto_unregister(&udp_prot);
+-out_unregister_tcp_proto:
+-	proto_unregister(&tcp_prot);
+-	goto out;
+-}
+-
+-fs_initcall(inet_init);
+-
+-/* ------------------------------------------------------------------------ */
+-
+-#ifdef CONFIG_PROC_FS
+-static int __init ipv4_proc_init(void)
+-{
+-	int rc = 0;
+-
+-	if (raw_proc_init())
+-		goto out_raw;
+-	if (tcp4_proc_init())
+-		goto out_tcp;
+-	if (udp4_proc_init())
+-		goto out_udp;
+-	if (fib_proc_init())
+-		goto out_fib;
+-	if (ip_misc_proc_init())
+-		goto out_misc;
+-out:
+-	return rc;
+-out_misc:
+-	fib_proc_exit();
+-out_fib:
+-	udp4_proc_exit();
+-out_udp:
+-	tcp4_proc_exit();
+-out_tcp:
+-	raw_proc_exit();
+-out_raw:
+-	rc = -ENOMEM;
+-	goto out;
+-}
+-
+-#else /* CONFIG_PROC_FS */
+-static int __init ipv4_proc_init(void)
+-{
+-	return 0;
+-}
+-#endif /* CONFIG_PROC_FS */
+-
+-MODULE_ALIAS_NETPROTO(PF_INET);
+-
+-EXPORT_SYMBOL(inet_accept);
+-EXPORT_SYMBOL(inet_bind);
+-EXPORT_SYMBOL(inet_dgram_connect);
+-EXPORT_SYMBOL(inet_dgram_ops);
+-EXPORT_SYMBOL(inet_getname);
+-EXPORT_SYMBOL(inet_ioctl);
+-EXPORT_SYMBOL(inet_listen);
+-EXPORT_SYMBOL(inet_register_protosw);
+-EXPORT_SYMBOL(inet_release);
+-EXPORT_SYMBOL(inet_sendmsg);
+-EXPORT_SYMBOL(inet_shutdown);
+-EXPORT_SYMBOL(inet_sock_destruct);
+-EXPORT_SYMBOL(inet_stream_connect);
+-EXPORT_SYMBOL(inet_stream_ops);
+-EXPORT_SYMBOL(inet_unregister_protosw);
+-EXPORT_SYMBOL(net_statistics);
+-EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
+diff -Nurb linux-2.6.22-594/net/netfilter/xt_MARK.c.orig linux-2.6.22-595/net/netfilter/xt_MARK.c.orig
+--- linux-2.6.22-594/net/netfilter/xt_MARK.c.orig	2008-03-20 00:05:19.000000000 -0400
++++ linux-2.6.22-595/net/netfilter/xt_MARK.c.orig	1969-12-31 19:00:00.000000000 -0500
+@@ -1,283 +0,0 @@
+-/* This is a module which is used for setting the NFMARK field of an skb. */
+-
+-/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License version 2 as
+- * published by the Free Software Foundation.
+- *
+- */
+-
+-#include <linux/module.h>
+-#include <linux/version.h>
+-#include <linux/skbuff.h>
+-#include <linux/ip.h>
+-#include <net/checksum.h>
+-#include <net/route.h>
+-#include <net/inet_hashtables.h>
+-
+-#include <net/netfilter/nf_conntrack.h>
+-#include <linux/netfilter/x_tables.h>
+-#include <linux/netfilter/xt_MARK.h>
+-
+-MODULE_LICENSE("GPL");
+-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
+-MODULE_DESCRIPTION("ip[6]tables MARK modification module");
+-MODULE_ALIAS("ipt_MARK");
+-MODULE_ALIAS("ip6t_MARK");
+-
+-static inline u_int16_t
+-get_dst_port(struct nf_conntrack_tuple *tuple)
+-{
+-	switch (tuple->dst.protonum) {
+-	case IPPROTO_GRE:
+-		/* XXX Truncate 32-bit GRE key to 16 bits */
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
+-		return tuple->dst.u.gre.key;
+-#else
+-		return htons(ntohl(tuple->dst.u.gre.key));
+-#endif  
+-	case IPPROTO_ICMP:
+-		/* Bind on ICMP echo ID */
+-		return tuple->src.u.icmp.id;
+-	case IPPROTO_TCP:
+-		return tuple->dst.u.tcp.port;
+-	case IPPROTO_UDP:
+-		return tuple->dst.u.udp.port;
+-	default:
+-		return tuple->dst.u.all;
+-	}
+-}
+-
+-static inline u_int16_t
+-get_src_port(struct nf_conntrack_tuple *tuple)
+-{
+-	switch (tuple->dst.protonum) {
+-	case IPPROTO_GRE:
+-		/* XXX Truncate 32-bit GRE key to 16 bits */
+-		return htons(ntohl(tuple->src.u.gre.key));
+-	case IPPROTO_ICMP:
+-		/* Bind on ICMP echo ID */
+-		return tuple->src.u.icmp.id;
+-	case IPPROTO_TCP:
+-		return tuple->src.u.tcp.port;
+-	case IPPROTO_UDP:
+-		return tuple->src.u.udp.port;
+-	default:
+-		return tuple->src.u.all;
+-	}
+-}
+-
+-static unsigned int
+-target_v0(struct sk_buff **pskb,
+-	  const struct net_device *in,
+-	  const struct net_device *out,
+-	  unsigned int hooknum,
+-	  const struct xt_target *target,
+-	  const void *targinfo)
+-{
+-	const struct xt_mark_target_info *markinfo = targinfo;
+-
+-	(*pskb)->mark = markinfo->mark;
+-	return XT_CONTINUE;
+-}
+-
+-static unsigned int
+-target_v1(struct sk_buff **pskb,
+-	  const struct net_device *in,
+-	  const struct net_device *out,
+-	  unsigned int hooknum,
+-	  const struct xt_target *target,
+-	  const void *targinfo)
+-{
+-	const struct xt_mark_target_info_v1 *markinfo = targinfo;
+-	int mark = -1;
+-
+-	switch (markinfo->mode) {
+-	case XT_MARK_SET:
+-		mark = markinfo->mark;
+-		break;
+-
+-	case XT_MARK_AND:
+-		mark = (*pskb)->mark & markinfo->mark;
+-		break;
+-
+-	case XT_MARK_OR:
+-		mark = (*pskb)->mark | markinfo->mark;
+-		break;
+-
+-	case XT_MARK_COPYXID: {
+-		enum ip_conntrack_info ctinfo;
+-		struct sock *connection_sk=NULL;
+-		int dif;
+-
+-		struct nf_conn *ct = nf_ct_get((*pskb), &ctinfo);
+-		extern struct inet_hashinfo tcp_hashinfo;
+-		enum ip_conntrack_dir dir;
+-		if (!ct) 
+-			break;
+-
+-		dir = CTINFO2DIR(ctinfo);
+-		u_int32_t src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
+-		u_int16_t src_port = get_src_port(&ct->tuplehash[dir].tuple);
+-		u_int16_t proto = ct->tuplehash[dir].tuple.dst.protonum;
+-
+-		u_int32_t ip;
+-		u_int16_t port;
+-
+-		dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
+-		ip = ct->tuplehash[dir].tuple.dst.u3.ip;
+-		port = get_dst_port(&ct->tuplehash[dir].tuple);
+-
+-		if (proto == 1 || proto == 17) {
+-			if (((*pskb)->mark!=-1) && (*pskb)->mark)
+-				ct->xid[0]=(*pskb)->mark;
+-			if (ct->xid[0]) 
+-				mark = ct->xid[0];
+-
+-		}
+-		else if (proto == 6) {
+-				if ((*pskb)->sk) 
+-					connection_sk = (*pskb)->sk;
+-				else {
+-					connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
+-				}
+-
+-				if (connection_sk) {
+-					connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir];
+-					ct->xid[!dir]=connection_sk->sk_xid;
+-					if (connection_sk->sk_xid != 0) 
+-						mark = connection_sk->sk_xid;
+-					if (connection_sk != (*pskb)->sk)
+-						sock_put(connection_sk);
+-				}
+-				break;
+-				}
+-			      }
+-	}
+-
+-	if (mark != -1)
+-	(*pskb)->mark = mark;
+-	return XT_CONTINUE;
+-}
+-
+-
+-static int
+-checkentry_v0(const char *tablename,
+-	      const void *entry,
+-	      const struct xt_target *target,
+-	      void *targinfo,
+-	      unsigned int hook_mask)
+-{
+-	struct xt_mark_target_info *markinfo = targinfo;
+-
+-	if (markinfo->mark > 0xffffffff) {
+-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+-		return 0;
+-	}
+-	return 1;
+-}
+-
+-static int
+-checkentry_v1(const char *tablename,
+-	      const void *entry,
+-	      const struct xt_target *target,
+-	      void *targinfo,
+-	      unsigned int hook_mask)
+-{
+-	struct xt_mark_target_info_v1 *markinfo = targinfo;
+-
+-	if (markinfo->mode != XT_MARK_SET
+-	    && markinfo->mode != XT_MARK_AND
+-	    && markinfo->mode != XT_MARK_OR
+-	    && markinfo->mode != XT_MARK_COPYXID) {
+-		printk(KERN_WARNING "MARK: unknown mode %u\n",
+-		       markinfo->mode);
+-		return 0;
+-	}
+-	if (markinfo->mark > 0xffffffff) {
+-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+-		return 0;
+-	}
+-	return 1;
+-}
+-
+-#ifdef CONFIG_COMPAT
+-struct compat_xt_mark_target_info_v1 {
+-	compat_ulong_t	mark;
+-	u_int8_t	mode;
+-	u_int8_t	__pad1;
+-	u_int16_t	__pad2;
+-};
+-
+-static void compat_from_user_v1(void *dst, void *src)
+-{
+-	struct compat_xt_mark_target_info_v1 *cm = src;
+-	struct xt_mark_target_info_v1 m = {
+-		.mark	= cm->mark,
+-		.mode	= cm->mode,
+-	};
+-	memcpy(dst, &m, sizeof(m));
+-}
+-
+-static int compat_to_user_v1(void __user *dst, void *src)
+-{
+-	struct xt_mark_target_info_v1 *m = src;
+-	struct compat_xt_mark_target_info_v1 cm = {
+-		.mark	= m->mark,
+-		.mode	= m->mode,
+-	};
+-	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
+-}
+-#endif /* CONFIG_COMPAT */
+-
+-static struct xt_target xt_mark_target[] = {
+-	{
+-		.name		= "MARK",
+-		.family		= AF_INET,
+-		.revision	= 0,
+-		.checkentry	= checkentry_v0,
+-		.target		= target_v0,
+-		.targetsize	= sizeof(struct xt_mark_target_info),
+-		.table		= "mangle",
+-		.me		= THIS_MODULE,
+-	},
+-	{
+-		.name		= "MARK",
+-		.family		= AF_INET,
+-		.revision	= 1,
+-		.checkentry	= checkentry_v1,
+-		.target		= target_v1,
+-		.targetsize	= sizeof(struct xt_mark_target_info_v1),
+-#ifdef CONFIG_COMPAT
+-		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
+-		.compat_from_user = compat_from_user_v1,
+-		.compat_to_user	= compat_to_user_v1,
+-#endif
+-		.table		= "mangle",
+-		.me		= THIS_MODULE,
+-	},
+-	{
+-		.name		= "MARK",
+-		.family		= AF_INET6,
+-		.revision	= 0,
+-		.checkentry	= checkentry_v0,
+-		.target		= target_v0,
+-		.targetsize	= sizeof(struct xt_mark_target_info),
+-		.table		= "mangle",
+-		.me		= THIS_MODULE,
+-	},
+-};
+-
+-static int __init xt_mark_init(void)
+-{
+-	return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
+-}
+-
+-static void __exit xt_mark_fini(void)
+-{
+-	xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
+-}
+-
+-module_init(xt_mark_init);
+-module_exit(xt_mark_fini);
+diff -Nurb linux-2.6.22-594/net/packet/af_packet.c.orig linux-2.6.22-595/net/packet/af_packet.c.orig
+--- linux-2.6.22-594/net/packet/af_packet.c.orig	2008-03-20 00:05:19.000000000 -0400
++++ linux-2.6.22-595/net/packet/af_packet.c.orig	1969-12-31 19:00:00.000000000 -0500
+@@ -1,1989 +0,0 @@
+-/*
+- * INET		An implementation of the TCP/IP protocol suite for the LINUX
+- *		operating system.  INET is implemented using the  BSD Socket
+- *		interface as the means of communication with the user level.
+- *
+- *		PACKET - implements raw packet sockets.
+- *
+- * Version:	$Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
+- *
+- * Authors:	Ross Biro
+- *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+- *		Alan Cox, <gw4pts@gw4pts.ampr.org>
+- *
+- * Fixes:
+- *		Alan Cox	:	verify_area() now used correctly
+- *		Alan Cox	:	new skbuff lists, look ma no backlogs!
+- *		Alan Cox	:	tidied skbuff lists.
+- *		Alan Cox	:	Now uses generic datagram routines I
+- *					added. Also fixed the peek/read crash
+- *					from all old Linux datagram code.
+- *		Alan Cox	:	Uses the improved datagram code.
+- *		Alan Cox	:	Added NULL's for socket options.
+- *		Alan Cox	:	Re-commented the code.
+- *		Alan Cox	:	Use new kernel side addressing
+- *		Rob Janssen	:	Correct MTU usage.
+- *		Dave Platt	:	Counter leaks caused by incorrect
+- *					interrupt locking and some slightly
+- *					dubious gcc output. Can you read
+- *					compiler: it said _VOLATILE_
+- *	Richard Kooijman	:	Timestamp fixes.
+- *		Alan Cox	:	New buffers. Use sk->mac.raw.
+- *		Alan Cox	:	sendmsg/recvmsg support.
+- *		Alan Cox	:	Protocol setting support
+- *	Alexey Kuznetsov	:	Untied from IPv4 stack.
+- *	Cyrus Durgin		:	Fixed kerneld for kmod.
+- *	Michal Ostrowski        :       Module initialization cleanup.
+- *         Ulises Alonso        :       Frame number limit removal and
+- *                                      packet_set_ring memory leak.
+- *		Eric Biederman	:	Allow for > 8 byte hardware addresses.
+- *					The convention is that longer addresses
+- *					will simply extend the hardware address
+- *					byte arrays at the end of sockaddr_ll
+- *					and packet_mreq.
+- *
+- *		This program is free software; you can redistribute it and/or
+- *		modify it under the terms of the GNU General Public License
+- *		as published by the Free Software Foundation; either version
+- *		2 of the License, or (at your option) any later version.
+- *
+- */
+-
+-#include <linux/types.h>
+-#include <linux/mm.h>
+-#include <linux/capability.h>
+-#include <linux/fcntl.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/inet.h>
+-#include <linux/netdevice.h>
+-#include <linux/if_packet.h>
+-#include <linux/wireless.h>
+-#include <linux/kernel.h>
+-#include <linux/kmod.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <linux/errno.h>
+-#include <linux/timer.h>
+-#include <asm/system.h>
+-#include <asm/uaccess.h>
+-#include <asm/ioctls.h>
+-#include <asm/page.h>
+-#include <asm/cacheflush.h>
+-#include <asm/io.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/poll.h>
+-#include <linux/module.h>
+-#include <linux/init.h>
+-#include <linux/vs_network.h>
+-
+-#ifdef CONFIG_INET
+-#include <net/inet_common.h>
+-#endif
+-
+-/*
+-   Assumptions:
+-   - if device has no dev->hard_header routine, it adds and removes ll header
+-     inside itself. In this case ll header is invisible outside of device,
+-     but higher levels still should reserve dev->hard_header_len.
+-     Some devices are enough clever to reallocate skb, when header
+-     will not fit to reserved space (tunnel), another ones are silly
+-     (PPP).
+-   - packet socket receives packets with pulled ll header,
+-     so that SOCK_RAW should push it back.
+-
+-On receive:
+------------
+-
+-Incoming, dev->hard_header!=NULL
+-   mac_header -> ll header
+-   data       -> data
+-
+-Outgoing, dev->hard_header!=NULL
+-   mac_header -> ll header
+-   data       -> ll header
+-
+-Incoming, dev->hard_header==NULL
+-   mac_header -> UNKNOWN position. It is very likely, that it points to ll
+-		 header.  PPP makes it, that is wrong, because introduce
+-                 assymetry between rx and tx paths.
+-   data       -> data
+-
+-Outgoing, dev->hard_header==NULL
+-   mac_header -> data. ll header is still not built!
+-   data       -> data
+-
+-Resume
+-  If dev->hard_header==NULL we are unlikely to restore sensible ll header.
+-
+-
+-On transmit:
+-------------
+-
+-dev->hard_header != NULL
+-   mac_header -> ll header
+-   data       -> ll header
+-
+-dev->hard_header == NULL (ll header is added by device, we cannot control it)
+-   mac_header -> data
+-   data       -> data
+-
+-   We should set nh.raw on output to correct posistion,
+-   packet classifier depends on it.
+- */
+-
+-/* List of all packet sockets. */
+-static HLIST_HEAD(packet_sklist);
+-static DEFINE_RWLOCK(packet_sklist_lock);
+-
+-static atomic_t packet_socks_nr;
+-
+-
+-/* Private packet socket structures. */
+-
+-struct packet_mclist
+-{
+-	struct packet_mclist	*next;
+-	int			ifindex;
+-	int			count;
+-	unsigned short		type;
+-	unsigned short		alen;
+-	unsigned char		addr[MAX_ADDR_LEN];
+-};
+-/* identical to struct packet_mreq except it has
+- * a longer address field.
+- */
+-struct packet_mreq_max
+-{
+-	int		mr_ifindex;
+-	unsigned short	mr_type;
+-	unsigned short	mr_alen;
+-	unsigned char	mr_address[MAX_ADDR_LEN];
+-};
+-
+-#ifdef CONFIG_PACKET_MMAP
+-static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
+-#endif
+-
+-static void packet_flush_mclist(struct sock *sk);
+-
+-struct packet_sock {
+-	/* struct sock has to be the first member of packet_sock */
+-	struct sock		sk;
+-	struct tpacket_stats	stats;
+-#ifdef CONFIG_PACKET_MMAP
+-	char *			*pg_vec;
+-	unsigned int		head;
+-	unsigned int            frames_per_block;
+-	unsigned int		frame_size;
+-	unsigned int		frame_max;
+-	int			copy_thresh;
+-#endif
+-	struct packet_type	prot_hook;
+-	spinlock_t		bind_lock;
+-	unsigned int		running:1,	/* prot_hook is attached*/
+-				auxdata:1,
+-				origdev:1;
+-	int			ifindex;	/* bound device		*/
+-	__be16			num;
+-	struct packet_mclist	*mclist;
+-#ifdef CONFIG_PACKET_MMAP
+-	atomic_t		mapped;
+-	unsigned int            pg_vec_order;
+-	unsigned int		pg_vec_pages;
+-	unsigned int		pg_vec_len;
+-#endif
+-};
+-
+-struct packet_skb_cb {
+-	unsigned int origlen;
+-	union {
+-		struct sockaddr_pkt pkt;
+-		struct sockaddr_ll ll;
+-	} sa;
+-};
+-
+-#define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
+-
+-#ifdef CONFIG_PACKET_MMAP
+-
+-static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
+-{
+-	unsigned int pg_vec_pos, frame_offset;
+-
+-	pg_vec_pos = position / po->frames_per_block;
+-	frame_offset = position % po->frames_per_block;
+-
+-	return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
+-}
+-#endif
+-
+-static inline struct packet_sock *pkt_sk(struct sock *sk)
+-{
+-	return (struct packet_sock *)sk;
+-}
+-
+-static void packet_sock_destruct(struct sock *sk)
+-{
+-	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
+-	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+-
+-	if (!sock_flag(sk, SOCK_DEAD)) {
+-		printk("Attempt to release alive packet socket: %p\n", sk);
+-		return;
+-	}
+-
+-	atomic_dec(&packet_socks_nr);
+-#ifdef PACKET_REFCNT_DEBUG
+-	printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
+-#endif
+-}
+-
+-
+-static const struct proto_ops packet_ops;
+-
+-static const struct proto_ops packet_ops_spkt;
+-
+-static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
+-{
+-	struct sock *sk;
+-	struct sockaddr_pkt *spkt;
+-
+-	/*
+-	 *	When we registered the protocol we saved the socket in the data
+-	 *	field for just this event.
+-	 */
+-
+-	sk = pt->af_packet_priv;
+-
+-	/*
+-	 *	Yank back the headers [hope the device set this
+-	 *	right or kerboom...]
+-	 *
+-	 *	Incoming packets have ll header pulled,
+-	 *	push it back.
+-	 *
+-	 *	For outgoing ones skb->data == skb_mac_header(skb)
+-	 *	so that this procedure is noop.
+-	 */
+-
+-	if (skb->pkt_type == PACKET_LOOPBACK)
+-		goto out;
+-
+-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+-		goto oom;
+-
+-	/* drop any routing info */
+-	dst_release(skb->dst);
+-	skb->dst = NULL;
+-
+-	/* drop conntrack reference */
+-	nf_reset(skb);
+-
+-	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
+-
+-	skb_push(skb, skb->data - skb_mac_header(skb));
+-
+-	/*
+-	 *	The SOCK_PACKET socket receives _all_ frames.
+-	 */
+-
+-	spkt->spkt_family = dev->type;
+-	strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
+-	spkt->spkt_protocol = skb->protocol;
+-
+-	/*
+-	 *	Charge the memory to the socket. This is done specifically
+-	 *	to prevent sockets using all the memory up.
+-	 */
+-
+-	if (sock_queue_rcv_skb(sk,skb) == 0)
+-		return 0;
+-
+-out:
+-	kfree_skb(skb);
+-oom:
+-	return 0;
+-}
+-
+-
+-/*
+- *	Output a raw packet to a device layer. This bypasses all the other
+- *	protocol layers and you must therefore supply it with a complete frame
+- */
+-
+-static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
+-			       struct msghdr *msg, size_t len)
+-{
+-	struct sock *sk = sock->sk;
+-	struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
+-	struct sk_buff *skb;
+-	struct net_device *dev;
+-	__be16 proto=0;
+-	int err;
+-
+-	if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
+-		return -EPERM;
+-
+-	/*
+-	 *	Get and verify the address.
+-	 */
+-
+-	if (saddr)
+-	{
+-		if (msg->msg_namelen < sizeof(struct sockaddr))
+-			return(-EINVAL);
+-		if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
+-			proto=saddr->spkt_protocol;
+-	}
+-	else
+-		return(-ENOTCONN);	/* SOCK_PACKET must be sent giving an address */
+-
+-	/*
+-	 *	Find the device first to size check it
+-	 */
+-
+-	saddr->spkt_device[13] = 0;
+-	dev = dev_get_by_name(saddr->spkt_device);
+-	err = -ENODEV;
+-	if (dev == NULL)
+-		goto out_unlock;
+-
+-	err = -ENETDOWN;
+-	if (!(dev->flags & IFF_UP))
+-		goto out_unlock;
+-
+-	/*
+-	 *	You may not queue a frame bigger than the mtu. This is the lowest level
+-	 *	raw protocol and you must do your own fragmentation at this level.
+-	 */
+-
+-	err = -EMSGSIZE;
+-	if (len > dev->mtu + dev->hard_header_len)
+-		goto out_unlock;
+-
+-	err = -ENOBUFS;
+-	skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
+-
+-	/*
+-	 *	If the write buffer is full, then tough. At this level the user gets to
+-	 *	deal with the problem - do your own algorithmic backoffs. That's far
+-	 *	more flexible.
+-	 */
+-
+-	if (skb == NULL)
+-		goto out_unlock;
+-
+-	/*
+-	 *	Fill it in
+-	 */
+-
+-	/* FIXME: Save some space for broken drivers that write a
+-	 * hard header at transmission time by themselves. PPP is the
+-	 * notable one here. This should really be fixed at the driver level.
+-	 */
+-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+-	skb_reset_network_header(skb);
+-
+-	/* Try to align data part correctly */
+-	if (dev->hard_header) {
+-		skb->data -= dev->hard_header_len;
+-		skb->tail -= dev->hard_header_len;
+-		if (len < dev->hard_header_len)
+-			skb_reset_network_header(skb);
+-	}
+-
+-	/* Returns -EFAULT on error */
+-	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+-	skb->protocol = proto;
+-	skb->dev = dev;
+-	skb->priority = sk->sk_priority;
+-	if (err)
+-		goto out_free;
+-
+-	/*
+-	 *	Now send it
+-	 */
+-
+-	dev_queue_xmit(skb);
+-	dev_put(dev);
+-	return(len);
+-
+-out_free:
+-	kfree_skb(skb);
+-out_unlock:
+-	if (dev)
+-		dev_put(dev);
+-	return err;
+-}
+-
+-static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
+-				      unsigned int res)
+-{
+-	struct sk_filter *filter;
+-	int tag = skb->skb_tag;
+-
+-	if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag))
+-		return 0;
+-
+-	rcu_read_lock_bh();
+-	filter = rcu_dereference(sk->sk_filter);
+-	if (filter != NULL)
+-		res = sk_run_filter(skb, filter->insns, filter->len);
+-	rcu_read_unlock_bh();
+-
+-	return res;
+-}
+-
+-/*
+-   This function makes lazy skb cloning in hope that most of packets
+-   are discarded by BPF.
+-
+-   Note tricky part: we DO mangle shared skb! skb->data, skb->len
+-   and skb->cb are mangled. It works because (and until) packets
+-   falling here are owned by current CPU. Output packets are cloned
+-   by dev_queue_xmit_nit(), input packets are processed by net_bh
+-   sequencially, so that if we return skb to original state on exit,
+-   we will not harm anyone.
+- */
+-
+-static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+-{
+-	struct sock *sk;
+-	struct sockaddr_ll *sll;
+-	struct packet_sock *po;
+-	u8 * skb_head = skb->data;
+-	int skb_len = skb->len;
+-	unsigned int snaplen, res;
+-
+-	if (skb->pkt_type == PACKET_LOOPBACK)
+-		goto drop;
+-
+-	sk = pt->af_packet_priv;
+-	po = pkt_sk(sk);
+-
+-	skb->dev = dev;
+-
+-	if (dev->hard_header) {
+-		/* The device has an explicit notion of ll header,
+-		   exported to higher levels.
+-
+-		   Otherwise, the device hides datails of it frame
+-		   structure, so that corresponding packet head
+-		   never delivered to user.
+-		 */
+-		if (sk->sk_type != SOCK_DGRAM)
+-			skb_push(skb, skb->data - skb_mac_header(skb));
+-		else if (skb->pkt_type == PACKET_OUTGOING) {
+-			/* Special case: outgoing packets have ll header at head */
+-			skb_pull(skb, skb_network_offset(skb));
+-		}
+-	}
+-
+-	snaplen = skb->len;
+-
+-	res = run_filter(skb, sk, snaplen);
+-	if (!res)
+-		goto drop_n_restore;
+-	if (snaplen > res)
+-		snaplen = res;
+-
+-	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+-	    (unsigned)sk->sk_rcvbuf)
+-		goto drop_n_acct;
+-
+-	if (skb_shared(skb)) {
+-		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+-		if (nskb == NULL)
+-			goto drop_n_acct;
+-
+-		if (skb_head != skb->data) {
+-			skb->data = skb_head;
+-			skb->len = skb_len;
+-		}
+-		kfree_skb(skb);
+-		skb = nskb;
+-	}
+-
+-	BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
+-		     sizeof(skb->cb));
+-
+-	sll = &PACKET_SKB_CB(skb)->sa.ll;
+-	sll->sll_family = AF_PACKET;
+-	sll->sll_hatype = dev->type;
+-	sll->sll_protocol = skb->protocol;
+-	sll->sll_pkttype = skb->pkt_type;
+-	if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+-		sll->sll_ifindex = orig_dev->ifindex;
+-	else
+-		sll->sll_ifindex = dev->ifindex;
+-	sll->sll_halen = 0;
+-
+-	if (dev->hard_header_parse)
+-		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
+-
+-	PACKET_SKB_CB(skb)->origlen = skb->len;
+-
+-	if (pskb_trim(skb, snaplen))
+-		goto drop_n_acct;
+-
+-	skb_set_owner_r(skb, sk);
+-	skb->dev = NULL;
+-	dst_release(skb->dst);
+-	skb->dst = NULL;
+-
+-	/* drop conntrack reference */
+-	nf_reset(skb);
+-
+-	spin_lock(&sk->sk_receive_queue.lock);
+-	po->stats.tp_packets++;
+-	__skb_queue_tail(&sk->sk_receive_queue, skb);
+-	spin_unlock(&sk->sk_receive_queue.lock);
+-	sk->sk_data_ready(sk, skb->len);
+-	return 0;
+-
+-drop_n_acct:
+-	spin_lock(&sk->sk_receive_queue.lock);
+-	po->stats.tp_drops++;
+-	spin_unlock(&sk->sk_receive_queue.lock);
+-
+-drop_n_restore:
+-	if (skb_head != skb->data && skb_shared(skb)) {
+-		skb->data = skb_head;
+-		skb->len = skb_len;
+-	}
+-drop:
+-	kfree_skb(skb);
+-	return 0;
+-}
+-
+-#ifdef CONFIG_PACKET_MMAP
+-static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+-{
+-	struct sock *sk;
+-	struct packet_sock *po;
+-	struct sockaddr_ll *sll;
+-	struct tpacket_hdr *h;
+-	u8 * skb_head = skb->data;
+-	int skb_len = skb->len;
+-	unsigned int snaplen, res;
+-	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
+-	unsigned short macoff, netoff;
+-	struct sk_buff *copy_skb = NULL;
+-	struct timeval tv;
+-
+-	if (skb->pkt_type == PACKET_LOOPBACK)
+-		goto drop;
+-
+-	sk = pt->af_packet_priv;
+-	po = pkt_sk(sk);
+-
+-	if (dev->hard_header) {
+-		if (sk->sk_type != SOCK_DGRAM)
+-			skb_push(skb, skb->data - skb_mac_header(skb));
+-		else if (skb->pkt_type == PACKET_OUTGOING) {
+-			/* Special case: outgoing packets have ll header at head */
+-			skb_pull(skb, skb_network_offset(skb));
+-		}
+-	}
+-
+-	if (skb->ip_summed == CHECKSUM_PARTIAL)
+-		status |= TP_STATUS_CSUMNOTREADY;
+-
+-	snaplen = skb->len;
+-
+-	res = run_filter(skb, sk, snaplen);
+-	if (!res)
+-		goto drop_n_restore;
+-	if (snaplen > res)
+-		snaplen = res;
+-
+-	if (sk->sk_type == SOCK_DGRAM) {
+-		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
+-	} else {
+-		unsigned maclen = skb_network_offset(skb);
+-		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
+-		macoff = netoff - maclen;
+-	}
+-
+-	if (macoff + snaplen > po->frame_size) {
+-		if (po->copy_thresh &&
+-		    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
+-		    (unsigned)sk->sk_rcvbuf) {
+-			if (skb_shared(skb)) {
+-				copy_skb = skb_clone(skb, GFP_ATOMIC);
+-			} else {
+-				copy_skb = skb_get(skb);
+-				skb_head = skb->data;
+-			}
+-			if (copy_skb)
+-				skb_set_owner_r(copy_skb, sk);
+-		}
+-		snaplen = po->frame_size - macoff;
+-		if ((int)snaplen < 0)
+-			snaplen = 0;
+-	}
+-
+-	spin_lock(&sk->sk_receive_queue.lock);
+-	h = packet_lookup_frame(po, po->head);
+-
+-	if (h->tp_status)
+-		goto ring_is_full;
+-	po->head = po->head != po->frame_max ? po->head+1 : 0;
+-	po->stats.tp_packets++;
+-	if (copy_skb) {
+-		status |= TP_STATUS_COPY;
+-		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
+-	}
+-	if (!po->stats.tp_drops)
+-		status &= ~TP_STATUS_LOSING;
+-	spin_unlock(&sk->sk_receive_queue.lock);
+-
+-	skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
+-
+-	h->tp_len = skb->len;
+-	h->tp_snaplen = snaplen;
+-	h->tp_mac = macoff;
+-	h->tp_net = netoff;
+-	if (skb->tstamp.tv64 == 0) {
+-		__net_timestamp(skb);
+-		sock_enable_timestamp(sk);
+-	}
+-	tv = ktime_to_timeval(skb->tstamp);
+-	h->tp_sec = tv.tv_sec;
+-	h->tp_usec = tv.tv_usec;
+-
+-	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
+-	sll->sll_halen = 0;
+-	if (dev->hard_header_parse)
+-		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
+-	sll->sll_family = AF_PACKET;
+-	sll->sll_hatype = dev->type;
+-	sll->sll_protocol = skb->protocol;
+-	sll->sll_pkttype = skb->pkt_type;
+-	if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+-		sll->sll_ifindex = orig_dev->ifindex;
+-	else
+-		sll->sll_ifindex = dev->ifindex;
+-
+-	h->tp_status = status;
+-	smp_mb();
+-
+-	{
+-		struct page *p_start, *p_end;
+-		u8 *h_end = (u8 *)h + macoff + snaplen - 1;
+-
+-		p_start = virt_to_page(h);
+-		p_end = virt_to_page(h_end);
+-		while (p_start <= p_end) {
+-			flush_dcache_page(p_start);
+-			p_start++;
+-		}
+-	}
+-
+-	sk->sk_data_ready(sk, 0);
+-
+-drop_n_restore:
+-	if (skb_head != skb->data && skb_shared(skb)) {
+-		skb->data = skb_head;
+-		skb->len = skb_len;
+-	}
+-drop:
+-	kfree_skb(skb);
+-	return 0;
+-
+-ring_is_full:
+-	po->stats.tp_drops++;
+-	spin_unlock(&sk->sk_receive_queue.lock);
+-
+-	sk->sk_data_ready(sk, 0);
+-	if (copy_skb)
+-		kfree_skb(copy_skb);
+-	goto drop_n_restore;
+-}
+-
+-#endif
+-
+-
+-static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
+-			  struct msghdr *msg, size_t len)
+-{
+-	struct sock *sk = sock->sk;
+-	struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
+-	struct sk_buff *skb;
+-	struct net_device *dev;
+-	__be16 proto;
+-	unsigned char *addr;
+-	int ifindex, err, reserve = 0;
+-
+-	if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
+-		return -EPERM;
+-
+-	/*
+-	 *	Get and verify the address.
+-	 */
+-
+-	if (saddr == NULL) {
+-		struct packet_sock *po = pkt_sk(sk);
+-
+-		ifindex	= po->ifindex;
+-		proto	= po->num;
+-		addr	= NULL;
+-	} else {
+-		err = -EINVAL;
+-		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
+-			goto out;
+-		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
+-			goto out;
+-		ifindex	= saddr->sll_ifindex;
+-		proto	= saddr->sll_protocol;
+-		addr	= saddr->sll_addr;
+-	}
+-
+-
+-	dev = dev_get_by_index(ifindex);
+-	err = -ENXIO;
+-	if (dev == NULL)
+-		goto out_unlock;
+-	if (sock->type == SOCK_RAW)
+-		reserve = dev->hard_header_len;
+-
+-	err = -ENETDOWN;
+-	if (!(dev->flags & IFF_UP))
+-		goto out_unlock;
+-
+-	err = -EMSGSIZE;
+-	if (len > dev->mtu+reserve)
+-		goto out_unlock;
+-
+-	skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
+-				msg->msg_flags & MSG_DONTWAIT, &err);
+-	if (skb==NULL)
+-		goto out_unlock;
+-
+-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+-	skb_reset_network_header(skb);
+-
+-	if (dev->hard_header) {
+-		int res;
+-		err = -EINVAL;
+-		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
+-		if (sock->type != SOCK_DGRAM) {
+-			skb_reset_tail_pointer(skb);
+-			skb->len = 0;
+-		} else if (res < 0)
+-			goto out_free;
+-	}
+-
+-	/* Returns -EFAULT on error */
+-	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+-	if (err)
+-		goto out_free;
+-
+-	skb->protocol = proto;
+-	skb->dev = dev;
+-	skb->priority = sk->sk_priority;
+-
+-	/*
+-	 *	Now send it
+-	 */
+-
+-	err = dev_queue_xmit(skb);
+-	if (err > 0 && (err = net_xmit_errno(err)) != 0)
+-		goto out_unlock;
+-
+-	dev_put(dev);
+-
+-	return(len);
+-
+-out_free:
+-	kfree_skb(skb);
+-out_unlock:
+-	if (dev)
+-		dev_put(dev);
+-out:
+-	return err;
+-}
+-
+-/*
+- *	Close a PACKET socket. This is fairly simple. We immediately go
+- *	to 'closed' state and remove our protocol entry in the device list.
+- */
+-
+-static int packet_release(struct socket *sock)
+-{
+-	struct sock *sk = sock->sk;
+-	struct packet_sock *po;
+-
+-	if (!sk)
+-		return 0;
+-
+-	po = pkt_sk(sk);
+-
+-	write_lock_bh(&packet_sklist_lock);
+-	sk_del_node_init(sk);
+-	write_unlock_bh(&packet_sklist_lock);
+-
+-	/*
+-	 *	Unhook packet receive handler.
+-	 */
+-
+-	if (po->running) {
+-		/*
+-		 *	Remove the protocol hook
+-		 */
+-		dev_remove_pack(&po->prot_hook);
+-		po->running = 0;
+-		po->num = 0;
+-		__sock_put(sk);
+-	}
+-
+-	packet_flush_mclist(sk);
+-
+-#ifdef CONFIG_PACKET_MMAP
+-	if (po->pg_vec) {
+-		struct tpacket_req req;
+-		memset(&req, 0, sizeof(req));
+-		packet_set_ring(sk, &req, 1);
+-	}
+-#endif
+-
+-	/*
+-	 *	Now the socket is dead. No more input will appear.
+-	 */
+-
+-	sock_orphan(sk);
+-	sock->sk = NULL;
+-
+-	/* Purge queues */
+-
+-	skb_queue_purge(&sk->sk_receive_queue);
+-
+-	sock_put(sk);
+-	return 0;
+-}
+-
+-/*
+- *	Attach a packet hook.
+- */
+-
+-static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
+-{
+-	struct packet_sock *po = pkt_sk(sk);
+-	/*
+-	 *	Detach an existing hook if present.
+-	 */
+-
+-	lock_sock(sk);
+-
+-	spin_lock(&po->bind_lock);
+-	if (po->running) {
+-		__sock_put(sk);
+-		po->running = 0;
+-		po->num = 0;
+-		spin_unlock(&po->bind_lock);
+-		dev_remove_pack(&po->prot_hook);
+-		spin_lock(&po->bind_lock);
+-	}
+-
+-	po->num = protocol;
+-	po->prot_hook.type = protocol;
+-	po->prot_hook.dev = dev;
+-
+-	po->ifindex = dev ? dev->ifindex : 0;
+-
+-	if (protocol == 0)
+-		goto out_unlock;
+-
+-	if (dev) {
+-		if (dev->flags&IFF_UP) {
+-			dev_add_pack(&po->prot_hook);
+-			sock_hold(sk);
+-			po->running = 1;
+-		} else {
+-			sk->sk_err = ENETDOWN;
+-			if (!sock_flag(sk, SOCK_DEAD))
+-				sk->sk_error_report(sk);
+-		}
+-	} else {
+-		dev_add_pack(&po->prot_hook);
+-		sock_hold(sk);
+-		po->running = 1;
+-	}
+-
+-out_unlock:
+-	spin_unlock(&po->bind_lock);
+-	release_sock(sk);
+-	return 0;
+-}
+-
+-/*
+- *	Bind a packet socket to a device
+- */
+-
+-static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+-{
+-	struct sock *sk=sock->sk;
+-	char name[15];
+-	struct net_device *dev;
+-	int err = -ENODEV;
+-
+-	/*
+-	 *	Check legality
+-	 */
+-
+-	if (addr_len != sizeof(struct sockaddr))
+-		return -EINVAL;
+-	strlcpy(name,uaddr->sa_data,sizeof(name));
+-
+-	dev = dev_get_by_name(name);
+-	if (dev) {
+-		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
+-		dev_put(dev);
+-	}
+-	return err;
+-}
+-
+-static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+-{
+-	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
+-	struct sock *sk=sock->sk;
+-	struct net_device *dev = NULL;
+-	int err;
+-
+-
+-	/*
+-	 *	Check legality
+-	 */
+-
+-	if (addr_len < sizeof(struct sockaddr_ll))
+-		return -EINVAL;
+-	if (sll->sll_family != AF_PACKET)
+-		return -EINVAL;
+-
+-	if (sll->sll_ifindex) {
+-		err = -ENODEV;
+-		dev = dev_get_by_index(sll->sll_ifindex);
+-		if (dev == NULL)
+-			goto out;
+-	}
+-	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
+-	if (dev)
+-		dev_put(dev);
+-
+-out:
+-	return err;
+-}
+-
+-static struct proto packet_proto = {
+-	.name	  = "PACKET",
+-	.owner	  = THIS_MODULE,
+-	.obj_size = sizeof(struct packet_sock),
+-};
+-
+-/*
+- *	Create a packet of type SOCK_PACKET.
+- */
+-
+-static int packet_create(struct socket *sock, int protocol)
+-{
+-	struct sock *sk;
+-	struct packet_sock *po;
+-	__be16 proto = (__force __be16)protocol; /* weird, but documented */
+-	int err;
+-
+-	if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
+-		return -EPERM;
+-	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
+-	    sock->type != SOCK_PACKET)
+-		return -ESOCKTNOSUPPORT;
+-
+-	sock->state = SS_UNCONNECTED;
+-
+-	err = -ENOBUFS;
+-	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
+-	if (sk == NULL)
+-		goto out;
+-
+-	sock->ops = &packet_ops;
+-	if (sock->type == SOCK_PACKET)
+-		sock->ops = &packet_ops_spkt;
+-
+-	sock_init_data(sock, sk);
+-
+-	po = pkt_sk(sk);
+-	sk->sk_family = PF_PACKET;
+-	po->num = proto;
+-
+-	sk->sk_destruct = packet_sock_destruct;
+-	atomic_inc(&packet_socks_nr);
+-
+-	/*
+-	 *	Attach a protocol block
+-	 */
+-
+-	spin_lock_init(&po->bind_lock);
+-	po->prot_hook.func = packet_rcv;
+-
+-	if (sock->type == SOCK_PACKET)
+-		po->prot_hook.func = packet_rcv_spkt;
+-
+-	po->prot_hook.af_packet_priv = sk;
+-
+-	if (proto) {
+-		po->prot_hook.type = proto;
+-		dev_add_pack(&po->prot_hook);
+-		sock_hold(sk);
+-		po->running = 1;
+-	}
+-
+-	write_lock_bh(&packet_sklist_lock);
+-	sk_add_node(sk, &packet_sklist);
+-	write_unlock_bh(&packet_sklist_lock);
+-	return(0);
+-out:
+-	return err;
+-}
+-
+-/*
+- *	Pull a packet from our receive queue and hand it to the user.
+- *	If necessary we block.
+- */
+-
+-static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
+-			  struct msghdr *msg, size_t len, int flags)
+-{
+-	struct sock *sk = sock->sk;
+-	struct sk_buff *skb;
+-	int copied, err;
+-	struct sockaddr_ll *sll;
+-
+-	err = -EINVAL;
+-	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
+-		goto out;
+-
+-#if 0
+-	/* What error should we return now? EUNATTACH? */
+-	if (pkt_sk(sk)->ifindex < 0)
+-		return -ENODEV;
+-#endif
+-
+-	/*
+-	 *	Call the generic datagram receiver. This handles all sorts
+-	 *	of horrible races and re-entrancy so we can forget about it
+-	 *	in the protocol layers.
+-	 *
+-	 *	Now it will return ENETDOWN, if device have just gone down,
+-	 *	but then it will block.
+-	 */
+-
+-	skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
+-
+-	/*
+-	 *	An error occurred so return it. Because skb_recv_datagram()
+-	 *	handles the blocking we don't see and worry about blocking
+-	 *	retries.
+-	 */
+-
+-	if (skb == NULL)
+-		goto out;
+-
+-	/*
+-	 *	If the address length field is there to be filled in, we fill
+-	 *	it in now.
+-	 */
+-
+-	sll = &PACKET_SKB_CB(skb)->sa.ll;
+-	if (sock->type == SOCK_PACKET)
+-		msg->msg_namelen = sizeof(struct sockaddr_pkt);
+-	else
+-		msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
+-
+-	/*
+-	 *	You lose any data beyond the buffer you gave. If it worries a
+-	 *	user program they can ask the device for its MTU anyway.
+-	 */
+-
+-	copied = skb->len;
+-	if (copied > len)
+-	{
+-		copied=len;
+-		msg->msg_flags|=MSG_TRUNC;
+-	}
+-
+-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+-	if (err)
+-		goto out_free;
+-
+-	sock_recv_timestamp(msg, sk, skb);
+-
+-	if (msg->msg_name)
+-		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
+-		       msg->msg_namelen);
+-
+-	if (pkt_sk(sk)->auxdata) {
+-		struct tpacket_auxdata aux;
+-
+-		aux.tp_status = TP_STATUS_USER;
+-		if (skb->ip_summed == CHECKSUM_PARTIAL)
+-			aux.tp_status |= TP_STATUS_CSUMNOTREADY;
+-		aux.tp_len = PACKET_SKB_CB(skb)->origlen;
+-		aux.tp_snaplen = skb->len;
+-		aux.tp_mac = 0;
+-		aux.tp_net = skb_network_offset(skb);
+-
+-		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
+-	}
+-
+-	/*
+-	 *	Free or return the buffer as appropriate. Again this
+-	 *	hides all the races and re-entrancy issues from us.
+-	 */
+-	err = (flags&MSG_TRUNC) ? skb->len : copied;
+-
+-out_free:
+-	skb_free_datagram(sk, skb);
+-out:
+-	return err;
+-}
+-
+-static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
+-			       int *uaddr_len, int peer)
+-{
+-	struct net_device *dev;
+-	struct sock *sk	= sock->sk;
+-
+-	if (peer)
+-		return -EOPNOTSUPP;
+-
+-	uaddr->sa_family = AF_PACKET;
+-	dev = dev_get_by_index(pkt_sk(sk)->ifindex);
+-	if (dev) {
+-		strlcpy(uaddr->sa_data, dev->name, 15);
+-		dev_put(dev);
+-	} else
+-		memset(uaddr->sa_data, 0, 14);
+-	*uaddr_len = sizeof(*uaddr);
+-
+-	return 0;
+-}
+-
+-static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
+-			  int *uaddr_len, int peer)
+-{
+-	struct net_device *dev;
+-	struct sock *sk = sock->sk;
+-	struct packet_sock *po = pkt_sk(sk);
+-	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
+-
+-	if (peer)
+-		return -EOPNOTSUPP;
+-
+-	sll->sll_family = AF_PACKET;
+-	sll->sll_ifindex = po->ifindex;
+-	sll->sll_protocol = po->num;
+-	dev = dev_get_by_index(po->ifindex);
+-	if (dev) {
+-		sll->sll_hatype = dev->type;
+-		sll->sll_halen = dev->addr_len;
+-		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
+-		dev_put(dev);
+-	} else {
+-		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
+-		sll->sll_halen = 0;
+-	}
+-	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
+-
+-	return 0;
+-}
+-
+-static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
+-{
+-	switch (i->type) {
+-	case PACKET_MR_MULTICAST:
+-		if (what > 0)
+-			dev_mc_add(dev, i->addr, i->alen, 0);
+-		else
+-			dev_mc_delete(dev, i->addr, i->alen, 0);
+-		break;
+-	case PACKET_MR_PROMISC:
+-		dev_set_promiscuity(dev, what);
+-		break;
+-	case PACKET_MR_ALLMULTI:
+-		dev_set_allmulti(dev, what);
+-		break;
+-	default:;
+-	}
+-}
+-
+-static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
+-{
+-	for ( ; i; i=i->next) {
+-		if (i->ifindex == dev->ifindex)
+-			packet_dev_mc(dev, i, what);
+-	}
+-}
+-
+-static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
+-{
+-	struct packet_sock *po = pkt_sk(sk);
+-	struct packet_mclist *ml, *i;
+-	struct net_device *dev;
+-	int err;
+-
+-	rtnl_lock();
+-
+-	err = -ENODEV;
+-	dev = __dev_get_by_index(mreq->mr_ifindex);
+-	if (!dev)
+-		goto done;
+-
+-	err = -EINVAL;
+-	if (mreq->mr_alen > dev->addr_len)
+-		goto done;
+-
+-	err = -ENOBUFS;
+-	i = kmalloc(sizeof(*i), GFP_KERNEL);
+-	if (i == NULL)
+-		goto done;
+-
+-	err = 0;
+-	for (ml = po->mclist; ml; ml = ml->next) {
+-		if (ml->ifindex == mreq->mr_ifindex &&
+-		    ml->type == mreq->mr_type &&
+-		    ml->alen == mreq->mr_alen &&
+-		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
+-			ml->count++;
+-			/* Free the new element ... */
+-			kfree(i);
+-			goto done;
+-		}
+-	}
+-
+-	i->type = mreq->mr_type;
+-	i->ifindex = mreq->mr_ifindex;
+-	i->alen = mreq->mr_alen;
+-	memcpy(i->addr, mreq->mr_address, i->alen);
+-	i->count = 1;
+-	i->next = po->mclist;
+-	po->mclist = i;
+-	packet_dev_mc(dev, i, +1);
+-
+-done:
+-	rtnl_unlock();
+-	return err;
+-}
+-
+-static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
+-{
+-	struct packet_mclist *ml, **mlp;
+-
+-	rtnl_lock();
+-
+-	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
+-		if (ml->ifindex == mreq->mr_ifindex &&
+-		    ml->type == mreq->mr_type &&
+-		    ml->alen == mreq->mr_alen &&
+-		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
+-			if (--ml->count == 0) {
+-				struct net_device *dev;
+-				*mlp = ml->next;
+-				dev = dev_get_by_index(ml->ifindex);
+-				if (dev) {
+-					packet_dev_mc(dev, ml, -1);
+-					dev_put(dev);
+-				}
+-				kfree(ml);
+-			}
+-			rtnl_unlock();
+-			return 0;
+-		}
+-	}
+-	rtnl_unlock();
+-	return -EADDRNOTAVAIL;
+-}
+-
+-static void packet_flush_mclist(struct sock *sk)
+-{
+-	struct packet_sock *po = pkt_sk(sk);
+-	struct packet_mclist *ml;
+-
+-	if (!po->mclist)
+-		return;
+-
+-	rtnl_lock();
+-	while ((ml = po->mclist) != NULL) {
+-		struct net_device *dev;
+-
+-		po->mclist = ml->next;
+-		if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
+-			packet_dev_mc(dev, ml, -1);
+-			dev_put(dev);
+-		}
+-		kfree(ml);
+-	}
+-	rtnl_unlock();
+-}
+-
+-static int
+-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+-{
+-	struct sock *sk = sock->sk;
+-	struct packet_sock *po = pkt_sk(sk);
+-	int ret;
+-
+-	if (level != SOL_PACKET)
+-		return -ENOPROTOOPT;
+-
+-	switch(optname)	{
+-	case PACKET_ADD_MEMBERSHIP:
+-	case PACKET_DROP_MEMBERSHIP:
+-	{
+-		struct packet_mreq_max mreq;
+-		int len = optlen;
+-		memset(&mreq, 0, sizeof(mreq));
+-		if (len < sizeof(struct packet_mreq))
+-			return -EINVAL;
+-		if (len > sizeof(mreq))
+-			len = sizeof(mreq);
+-		if (copy_from_user(&mreq,optval,len))
+-			return -EFAULT;
+-		if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
+-			return -EINVAL;
+-		if (optname == PACKET_ADD_MEMBERSHIP)
+-			ret = packet_mc_add(sk, &mreq);
+-		else
+-			ret = packet_mc_drop(sk, &mreq);
+-		return ret;
+-	}
+-
+-#ifdef CONFIG_PACKET_MMAP
+-	case PACKET_RX_RING:
+-	{
+-		struct tpacket_req req;
+-
+-		if (optlen<sizeof(req))
+-			return -EINVAL;
+-		if (copy_from_user(&req,optval,sizeof(req)))
+-			return -EFAULT;
+-		return packet_set_ring(sk, &req, 0);
+-	}
+-	case PACKET_COPY_THRESH:
+-	{
+-		int val;
+-
+-		if (optlen!=sizeof(val))
+-			return -EINVAL;
+-		if (copy_from_user(&val,optval,sizeof(val)))
+-			return -EFAULT;
+-
+-		pkt_sk(sk)->copy_thresh = val;
+-		return 0;
+-	}
+-#endif
+-	case PACKET_AUXDATA:
+-	{
+-		int val;
+-
+-		if (optlen < sizeof(val))
+-			return -EINVAL;
+-		if (copy_from_user(&val, optval, sizeof(val)))
+-			return -EFAULT;
+-
+-		po->auxdata = !!val;
+-		return 0;
+-	}
+-	case PACKET_ORIGDEV:
+-	{
+-		int val;
+-
+-		if (optlen < sizeof(val))
+-			return -EINVAL;
+-		if (copy_from_user(&val, optval, sizeof(val)))
+-			return -EFAULT;
+-
+-		po->origdev = !!val;
+-		return 0;
+-	}
+-	default:
+-		return -ENOPROTOOPT;
+-	}
+-}
+-
+-static int packet_getsockopt(struct socket *sock, int level, int optname,
+-			     char __user *optval, int __user *optlen)
+-{
+-	int len;
+-	int val;
+-	struct sock *sk = sock->sk;
+-	struct packet_sock *po = pkt_sk(sk);
+-	void *data;
+-	struct tpacket_stats st;
+-
+-	if (level != SOL_PACKET)
+-		return -ENOPROTOOPT;
+-
+-	if (get_user(len, optlen))
+-		return -EFAULT;
+-
+-	if (len < 0)
+-		return -EINVAL;
+-
+-	switch(optname)	{
+-	case PACKET_STATISTICS:
+-		if (len > sizeof(struct tpacket_stats))
+-			len = sizeof(struct tpacket_stats);
+-		spin_lock_bh(&sk->sk_receive_queue.lock);
+-		st = po->stats;
+-		memset(&po->stats, 0, sizeof(st));
+-		spin_unlock_bh(&sk->sk_receive_queue.lock);
+-		st.tp_packets += st.tp_drops;
+-
+-		data = &st;
+-		break;
+-	case PACKET_AUXDATA:
+-		if (len > sizeof(int))
+-			len = sizeof(int);
+-		val = po->auxdata;
+-
+-		data = &val;
+-		break;
+-	case PACKET_ORIGDEV:
+-		if (len > sizeof(int))
+-			len = sizeof(int);
+-		val = po->origdev;
+-
+-		data = &val;
+-		break;
+-	default:
+-		return -ENOPROTOOPT;
+-	}
+-
+-	if (put_user(len, optlen))
+-		return -EFAULT;
+-	if (copy_to_user(optval, data, len))
+-		return -EFAULT;
+-	return 0;
+-}
+-
+-
+-static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
+-{
+-	struct sock *sk;
+-	struct hlist_node *node;
+-	struct net_device *dev = data;
+-
+-	read_lock(&packet_sklist_lock);
+-	sk_for_each(sk, node, &packet_sklist) {
+-		struct packet_sock *po = pkt_sk(sk);
+-
+-		switch (msg) {
+-		case NETDEV_UNREGISTER:
+-			if (po->mclist)
+-				packet_dev_mclist(dev, po->mclist, -1);
+-			/* fallthrough */
+-
+-		case NETDEV_DOWN:
+-			if (dev->ifindex == po->ifindex) {
+-				spin_lock(&po->bind_lock);
+-				if (po->running) {
+-					__dev_remove_pack(&po->prot_hook);
+-					__sock_put(sk);
+-					po->running = 0;
+-					sk->sk_err = ENETDOWN;
+-					if (!sock_flag(sk, SOCK_DEAD))
+-						sk->sk_error_report(sk);
+-				}
+-				if (msg == NETDEV_UNREGISTER) {
+-					po->ifindex = -1;
+-					po->prot_hook.dev = NULL;
+-				}
+-				spin_unlock(&po->bind_lock);
+-			}
+-			break;
+-		case NETDEV_UP:
+-			spin_lock(&po->bind_lock);
+-			if (dev->ifindex == po->ifindex && po->num &&
+-			    !po->running) {
+-				dev_add_pack(&po->prot_hook);
+-				sock_hold(sk);
+-				po->running = 1;
+-			}
+-			spin_unlock(&po->bind_lock);
+-			break;
+-		}
+-	}
+-	read_unlock(&packet_sklist_lock);
+-	return NOTIFY_DONE;
+-}
+-
+-
+-static int packet_ioctl(struct socket *sock, unsigned int cmd,
+-			unsigned long arg)
+-{
+-	struct sock *sk = sock->sk;
+-
+-	switch(cmd) {
+-		case SIOCOUTQ:
+-		{
+-			int amount = atomic_read(&sk->sk_wmem_alloc);
+-			return put_user(amount, (int __user *)arg);
+-		}
+-		case SIOCINQ:
+-		{
+-			struct sk_buff *skb;
+-			int amount = 0;
+-
+-			spin_lock_bh(&sk->sk_receive_queue.lock);
+-			skb = skb_peek(&sk->sk_receive_queue);
+-			if (skb)
+-				amount = skb->len;
+-			spin_unlock_bh(&sk->sk_receive_queue.lock);
+-			return put_user(amount, (int __user *)arg);
+-		}
+-		case SIOCGSTAMP:
+-			return sock_get_timestamp(sk, (struct timeval __user *)arg);
+-		case SIOCGSTAMPNS:
+-			return sock_get_timestampns(sk, (struct timespec __user *)arg);
+-
+-#ifdef CONFIG_INET
+-		case SIOCADDRT:
+-		case SIOCDELRT:
+-		case SIOCDARP:
+-		case SIOCGARP:
+-		case SIOCSARP:
+-		case SIOCGIFADDR:
+-		case SIOCSIFADDR:
+-		case SIOCGIFBRDADDR:
+-		case SIOCSIFBRDADDR:
+-		case SIOCGIFNETMASK:
+-		case SIOCSIFNETMASK:
+-		case SIOCGIFDSTADDR:
+-		case SIOCSIFDSTADDR:
+-		case SIOCSIFFLAGS:
+-			return inet_dgram_ops.ioctl(sock, cmd, arg);
+-#endif
+-
+-		default:
+-			return -ENOIOCTLCMD;
+-	}
+-	return 0;
+-}
+-
+-#ifndef CONFIG_PACKET_MMAP
+-#define packet_mmap sock_no_mmap
+-#define packet_poll datagram_poll
+-#else
+-
+-static unsigned int packet_poll(struct file * file, struct socket *sock,
+-				poll_table *wait)
+-{
+-	struct sock *sk = sock->sk;
+-	struct packet_sock *po = pkt_sk(sk);
+-	unsigned int mask = datagram_poll(file, sock, wait);
+-
+-	spin_lock_bh(&sk->sk_receive_queue.lock);
+-	if (po->pg_vec) {
+-		unsigned last = po->head ? po->head-1 : po->frame_max;
+-		struct tpacket_hdr *h;
+-
+-		h = packet_lookup_frame(po, last);
+-
+-		if (h->tp_status)
+-			mask |= POLLIN | POLLRDNORM;
+-	}
+-	spin_unlock_bh(&sk->sk_receive_queue.lock);
+-	return mask;
+-}
+-
+-
+-/* Dirty? Well, I still did not learn better way to account
+- * for user mmaps.
+- */
+-
+-static void packet_mm_open(struct vm_area_struct *vma)
+-{
+-	struct file *file = vma->vm_file;
+-	struct socket * sock = file->private_data;
+-	struct sock *sk = sock->sk;
+-
+-	if (sk)
+-		atomic_inc(&pkt_sk(sk)->mapped);
+-}
+-
+-static void packet_mm_close(struct vm_area_struct *vma)
+-{
+-	struct file *file = vma->vm_file;
+-	struct socket * sock = file->private_data;
+-	struct sock *sk = sock->sk;
+-
+-	if (sk)
+-		atomic_dec(&pkt_sk(sk)->mapped);
+-}
+-
+-static struct vm_operations_struct packet_mmap_ops = {
+-	.open =	packet_mm_open,
+-	.close =packet_mm_close,
+-};
+-
+-static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
+-{
+-	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
+-}
+-
+-static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
+-{
+-	int i;
+-
+-	for (i = 0; i < len; i++) {
+-		if (likely(pg_vec[i]))
+-			free_pages((unsigned long) pg_vec[i], order);
+-	}
+-	kfree(pg_vec);
+-}
+-
+-static inline char *alloc_one_pg_vec_page(unsigned long order)
+-{
+-	return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
+-					 order);
+-}
+-
+-static char **alloc_pg_vec(struct tpacket_req *req, int order)
+-{
+-	unsigned int block_nr = req->tp_block_nr;
+-	char **pg_vec;
+-	int i;
+-
+-	pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
+-	if (unlikely(!pg_vec))
+-		goto out;
+-
+-	for (i = 0; i < block_nr; i++) {
+-		pg_vec[i] = alloc_one_pg_vec_page(order);
+-		if (unlikely(!pg_vec[i]))
+-			goto out_free_pgvec;
+-	}
+-
+-out:
+-	return pg_vec;
+-
+-out_free_pgvec:
+-	free_pg_vec(pg_vec, order, block_nr);
+-	pg_vec = NULL;
+-	goto out;
+-}
+-
+-static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
+-{
+-	char **pg_vec = NULL;
+-	struct packet_sock *po = pkt_sk(sk);
+-	int was_running, order = 0;
+-	__be16 num;
+-	int err = 0;
+-
+-	if (req->tp_block_nr) {
+-		int i, l;
+-
+-		/* Sanity tests and some calculations */
+-
+-		if (unlikely(po->pg_vec))
+-			return -EBUSY;
+-
+-		if (unlikely((int)req->tp_block_size <= 0))
+-			return -EINVAL;
+-		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
+-			return -EINVAL;
+-		if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
+-			return -EINVAL;
+-		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
+-			return -EINVAL;
+-
+-		po->frames_per_block = req->tp_block_size/req->tp_frame_size;
+-		if (unlikely(po->frames_per_block <= 0))
+-			return -EINVAL;
+-		if (unlikely((po->frames_per_block * req->tp_block_nr) !=
+-			     req->tp_frame_nr))
+-			return -EINVAL;
+-
+-		err = -ENOMEM;
+-		order = get_order(req->tp_block_size);
+-		pg_vec = alloc_pg_vec(req, order);
+-		if (unlikely(!pg_vec))
+-			goto out;
+-
+-		l = 0;
+-		for (i = 0; i < req->tp_block_nr; i++) {
+-			char *ptr = pg_vec[i];
+-			struct tpacket_hdr *header;
+-			int k;
+-
+-			for (k = 0; k < po->frames_per_block; k++) {
+-				header = (struct tpacket_hdr *) ptr;
+-				header->tp_status = TP_STATUS_KERNEL;
+-				ptr += req->tp_frame_size;
+-			}
+-		}
+-		/* Done */
+-	} else {
+-		if (unlikely(req->tp_frame_nr))
+-			return -EINVAL;
+-	}
+-
+-	lock_sock(sk);
+-
+-	/* Detach socket from network */
+-	spin_lock(&po->bind_lock);
+-	was_running = po->running;
+-	num = po->num;
+-	if (was_running) {
+-		__dev_remove_pack(&po->prot_hook);
+-		po->num = 0;
+-		po->running = 0;
+-		__sock_put(sk);
+-	}
+-	spin_unlock(&po->bind_lock);
+-
+-	synchronize_net();
+-
+-	err = -EBUSY;
+-	if (closing || atomic_read(&po->mapped) == 0) {
+-		err = 0;
+-#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
+-
+-		spin_lock_bh(&sk->sk_receive_queue.lock);
+-		pg_vec = XC(po->pg_vec, pg_vec);
+-		po->frame_max = (req->tp_frame_nr - 1);
+-		po->head = 0;
+-		po->frame_size = req->tp_frame_size;
+-		spin_unlock_bh(&sk->sk_receive_queue.lock);
+-
+-		order = XC(po->pg_vec_order, order);
+-		req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
+-
+-		po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
+-		po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
+-		skb_queue_purge(&sk->sk_receive_queue);
+-#undef XC
+-		if (atomic_read(&po->mapped))
+-			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
+-	}
+-
+-	spin_lock(&po->bind_lock);
+-	if (was_running && !po->running) {
+-		sock_hold(sk);
+-		po->running = 1;
+-		po->num = num;
+-		dev_add_pack(&po->prot_hook);
+-	}
+-	spin_unlock(&po->bind_lock);
+-
+-	release_sock(sk);
+-
+-	if (pg_vec)
+-		free_pg_vec(pg_vec, order, req->tp_block_nr);
+-out:
+-	return err;
+-}
+-
+-static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
+-{
+-	struct sock *sk = sock->sk;
+-	struct packet_sock *po = pkt_sk(sk);
+-	unsigned long size;
+-	unsigned long start;
+-	int err = -EINVAL;
+-	int i;
+-
+-	if (vma->vm_pgoff)
+-		return -EINVAL;
+-
+-	size = vma->vm_end - vma->vm_start;
+-
+-	lock_sock(sk);
+-	if (po->pg_vec == NULL)
+-		goto out;
+-	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
+-		goto out;
+-
+-	start = vma->vm_start;
+-	for (i = 0; i < po->pg_vec_len; i++) {
+-		struct page *page = virt_to_page(po->pg_vec[i]);
+-		int pg_num;
+-
+-		for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
+-			err = vm_insert_page(vma, start, page);
+-			if (unlikely(err))
+-				goto out;
+-			start += PAGE_SIZE;
+-		}
+-	}
+-	atomic_inc(&po->mapped);
+-	vma->vm_ops = &packet_mmap_ops;
+-	err = 0;
+-
+-out:
+-	release_sock(sk);
+-	return err;
+-}
+-#endif
+-
+-
+-static const struct proto_ops packet_ops_spkt = {
+-	.family =	PF_PACKET,
+-	.owner =	THIS_MODULE,
+-	.release =	packet_release,
+-	.bind =		packet_bind_spkt,
+-	.connect =	sock_no_connect,
+-	.socketpair =	sock_no_socketpair,
+-	.accept =	sock_no_accept,
+-	.getname =	packet_getname_spkt,
+-	.poll =		datagram_poll,
+-	.ioctl =	packet_ioctl,
+-	.listen =	sock_no_listen,
+-	.shutdown =	sock_no_shutdown,
+-	.setsockopt =	sock_no_setsockopt,
+-	.getsockopt =	sock_no_getsockopt,
+-	.sendmsg =	packet_sendmsg_spkt,
+-	.recvmsg =	packet_recvmsg,
+-	.mmap =		sock_no_mmap,
+-	.sendpage =	sock_no_sendpage,
+-};
+-
+-static const struct proto_ops packet_ops = {
+-	.family =	PF_PACKET,
+-	.owner =	THIS_MODULE,
+-	.release =	packet_release,
+-	.bind =		packet_bind,
+-	.connect =	sock_no_connect,
+-	.socketpair =	sock_no_socketpair,
+-	.accept =	sock_no_accept,
+-	.getname =	packet_getname,
+-	.poll =		packet_poll,
+-	.ioctl =	packet_ioctl,
+-	.listen =	sock_no_listen,
+-	.shutdown =	sock_no_shutdown,
+-	.setsockopt =	packet_setsockopt,
+-	.getsockopt =	packet_getsockopt,
+-	.sendmsg =	packet_sendmsg,
+-	.recvmsg =	packet_recvmsg,
+-	.mmap =		packet_mmap,
+-	.sendpage =	sock_no_sendpage,
+-};
+-
+-static struct net_proto_family packet_family_ops = {
+-	.family =	PF_PACKET,
+-	.create =	packet_create,
+-	.owner	=	THIS_MODULE,
+-};
+-
+-static struct notifier_block packet_netdev_notifier = {
+-	.notifier_call =packet_notifier,
+-};
+-
+-#ifdef CONFIG_PROC_FS
+-static inline struct sock *packet_seq_idx(loff_t off)
+-{
+-	struct sock *s;
+-	struct hlist_node *node;
+-
+-	sk_for_each(s, node, &packet_sklist) {
+-		if (!off--)
+-			return s;
+-	}
+-	return NULL;
+-}
+-
+-static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
+-{
+-	read_lock(&packet_sklist_lock);
+-	return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
+-}
+-
+-static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+-{
+-	++*pos;
+-	return  (v == SEQ_START_TOKEN)
+-		? sk_head(&packet_sklist)
+-		: sk_next((struct sock*)v) ;
+-}
+-
+-static void packet_seq_stop(struct seq_file *seq, void *v)
+-{
+-	read_unlock(&packet_sklist_lock);
+-}
+-
+-static int packet_seq_show(struct seq_file *seq, void *v)
+-{
+-	if (v == SEQ_START_TOKEN)
+-		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
+-	else {
+-		struct sock *s = v;
+-		const struct packet_sock *po = pkt_sk(s);
+-
+-		seq_printf(seq,
+-			   "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
+-			   s,
+-			   atomic_read(&s->sk_refcnt),
+-			   s->sk_type,
+-			   ntohs(po->num),
+-			   po->ifindex,
+-			   po->running,
+-			   atomic_read(&s->sk_rmem_alloc),
+-			   sock_i_uid(s),
+-			   sock_i_ino(s) );
+-	}
+-
+-	return 0;
+-}
+-
+-static struct seq_operations packet_seq_ops = {
+-	.start	= packet_seq_start,
+-	.next	= packet_seq_next,
+-	.stop	= packet_seq_stop,
+-	.show	= packet_seq_show,
+-};
+-
+-static int packet_seq_open(struct inode *inode, struct file *file)
+-{
+-	return seq_open(file, &packet_seq_ops);
+-}
+-
+-static const struct file_operations packet_seq_fops = {
+-	.owner		= THIS_MODULE,
+-	.open		= packet_seq_open,
+-	.read		= seq_read,
+-	.llseek		= seq_lseek,
+-	.release	= seq_release,
+-};
+-
+-#endif
+-
+-static void __exit packet_exit(void)
+-{
+-	proc_net_remove("packet");
+-	unregister_netdevice_notifier(&packet_netdev_notifier);
+-	sock_unregister(PF_PACKET);
+-	proto_unregister(&packet_proto);
+-}
+-
+-static int __init packet_init(void)
+-{
+-	int rc = proto_register(&packet_proto, 0);
+-
+-	if (rc != 0)
+-		goto out;
+-
+-	sock_register(&packet_family_ops);
+-	register_netdevice_notifier(&packet_netdev_notifier);
+-	proc_net_fops_create("packet", 0, &packet_seq_fops);
+-out:
+-	return rc;
+-}
+-
+-module_init(packet_init);
+-module_exit(packet_exit);
+-MODULE_LICENSE("GPL");
+-MODULE_ALIAS_NETPROTO(PF_PACKET);
+diff -Nurb linux-2.6.22-594/net/socket.c linux-2.6.22-595/net/socket.c
+--- linux-2.6.22-594/net/socket.c	2008-03-20 00:05:19.000000000 -0400
++++ linux-2.6.22-595/net/socket.c	2008-03-20 00:14:03.000000000 -0400
+@@ -1122,12 +1122,17 @@
+ 	if (type < 0 || type >= SOCK_MAX)
+ 		return -EINVAL;
+ 
++	/*
++	 * Hack no. 2 - Sapan
++	 * Clean this up later
++	 *
+ 	if (!nx_check(0, VS_ADMIN)) {
+ 		if (family == PF_INET && !current_nx_info_has_v4())
+ 			return -EAFNOSUPPORT;
+ 		if (family == PF_INET6 && !current_nx_info_has_v6())
+ 			return -EAFNOSUPPORT;
+ 	}
++	*/
+ 
+ 	/* Compatibility.
+ 
+diff -Nurb linux-2.6.22-594/net/socket.c.orig linux-2.6.22-595/net/socket.c.orig
+--- linux-2.6.22-594/net/socket.c.orig	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-595/net/socket.c.orig	2008-03-20 00:05:19.000000000 -0400
+@@ -0,0 +1,2400 @@
++/*
++ * NET		An implementation of the SOCKET network access protocol.
++ *
++ * Version:	@(#)socket.c	1.1.93	18/02/95
++ *
++ * Authors:	Orest Zborowski, <obz@Kodak.COM>
++ *		Ross Biro
++ *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
++ *
++ * Fixes:
++ *		Anonymous	:	NOTSOCK/BADF cleanup. Error fix in
++ *					shutdown()
++ *		Alan Cox	:	verify_area() fixes
++ *		Alan Cox	:	Removed DDI
++ *		Jonathan Kamens	:	SOCK_DGRAM reconnect bug
++ *		Alan Cox	:	Moved a load of checks to the very
++ *					top level.
++ *		Alan Cox	:	Move address structures to/from user
++ *					mode above the protocol layers.
++ *		Rob Janssen	:	Allow 0 length sends.
++ *		Alan Cox	:	Asynchronous I/O support (cribbed from the
++ *					tty drivers).
++ *		Niibe Yutaka	:	Asynchronous I/O for writes (4.4BSD style)
++ *		Jeff Uphoff	:	Made max number of sockets command-line
++ *					configurable.
++ *		Matti Aarnio	:	Made the number of sockets dynamic,
++ *					to be allocated when needed, and mr.
++ *					Uphoff's max is used as max to be
++ *					allowed to allocate.
++ *		Linus		:	Argh. removed all the socket allocation
++ *					altogether: it's in the inode now.
++ *		Alan Cox	:	Made sock_alloc()/sock_release() public
++ *					for NetROM and future kernel nfsd type
++ *					stuff.
++ *		Alan Cox	:	sendmsg/recvmsg basics.
++ *		Tom Dyas	:	Export net symbols.
++ *		Marcin Dalecki	:	Fixed problems with CONFIG_NET="n".
++ *		Alan Cox	:	Added thread locking to sys_* calls
++ *					for sockets. May have errors at the
++ *					moment.
++ *		Kevin Buhr	:	Fixed the dumb errors in the above.
++ *		Andi Kleen	:	Some small cleanups, optimizations,
++ *					and fixed a copy_from_user() bug.
++ *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
++ *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
++ *					protocol-independent
++ *
++ *
++ *		This program is free software; you can redistribute it and/or
++ *		modify it under the terms of the GNU General Public License
++ *		as published by the Free Software Foundation; either version
++ *		2 of the License, or (at your option) any later version.
++ *
++ *
++ *	This module is effectively the top level interface to the BSD socket
++ *	paradigm.
++ *
++ *	Based upon Swansea University Computer Society NET3.039
++ */
++
++#include <linux/mm.h>
++#include <linux/socket.h>
++#include <linux/file.h>
++#include <linux/net.h>
++#include <linux/interrupt.h>
++#include <linux/rcupdate.h>
++#include <linux/netdevice.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/mutex.h>
++#include <linux/wanrouter.h>
++#include <linux/if_bridge.h>
++#include <linux/if_frad.h>
++#include <linux/if_vlan.h>
++#include <linux/init.h>
++#include <linux/poll.h>
++#include <linux/cache.h>
++#include <linux/module.h>
++#include <linux/highmem.h>
++#include <linux/mount.h>
++#include <linux/security.h>
++#include <linux/syscalls.h>
++#include <linux/compat.h>
++#include <linux/kmod.h>
++#include <linux/audit.h>
++#include <linux/wireless.h>
++#include <linux/nsproxy.h>
++
++#include <asm/uaccess.h>
++#include <asm/unistd.h>
++
++#include <net/compat.h>
++
++#include <net/sock.h>
++#include <linux/netfilter.h>
++#include <linux/vs_base.h>
++#include <linux/vs_socket.h>
++#include <linux/vs_inet.h>
++#include <linux/vs_inet6.h>
++
++static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
++static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
++			 unsigned long nr_segs, loff_t pos);
++static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
++			  unsigned long nr_segs, loff_t pos);
++static int sock_mmap(struct file *file, struct vm_area_struct *vma);
++
++static int sock_close(struct inode *inode, struct file *file);
++static unsigned int sock_poll(struct file *file,
++			      struct poll_table_struct *wait);
++static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
++#ifdef CONFIG_COMPAT
++static long compat_sock_ioctl(struct file *file,
++			      unsigned int cmd, unsigned long arg);
++#endif
++static int sock_fasync(int fd, struct file *filp, int on);
++static ssize_t sock_sendpage(struct file *file, struct page *page,
++			     int offset, size_t size, loff_t *ppos, int more);
++
++/*
++ *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
++ *	in the operation structures but are done directly via the socketcall() multiplexor.
++ */
++
++static const struct file_operations socket_file_ops = {
++	.owner =	THIS_MODULE,
++	.llseek =	no_llseek,
++	.aio_read =	sock_aio_read,
++	.aio_write =	sock_aio_write,
++	.poll =		sock_poll,
++	.unlocked_ioctl = sock_ioctl,
++#ifdef CONFIG_COMPAT
++	.compat_ioctl = compat_sock_ioctl,
++#endif
++	.mmap =		sock_mmap,
++	.open =		sock_no_open,	/* special open code to disallow open via /proc */
++	.release =	sock_close,
++	.fasync =	sock_fasync,
++	.sendpage =	sock_sendpage,
++	.splice_write = generic_splice_sendpage,
++};
++
++/*
++ *	The protocol list. Each protocol is registered in here.
++ */
++
++static DEFINE_SPINLOCK(net_family_lock);
++static const struct net_proto_family *net_families[NPROTO] __read_mostly;
++
++/*
++ *	Statistics counters of the socket lists
++ */
++
++static DEFINE_PER_CPU(int, sockets_in_use) = 0;
++
++/*
++ * Support routines.
++ * Move socket addresses back and forth across the kernel/user
++ * divide and look after the messy bits.
++ */
++
++#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
++					   16 for IP, 16 for IPX,
++					   24 for IPv6,
++					   about 80 for AX.25
++					   must be at least one bigger than
++					   the AF_UNIX size (see net/unix/af_unix.c
++					   :unix_mkname()).
++					 */
++
++/**
++ *	move_addr_to_kernel	-	copy a socket address into kernel space
++ *	@uaddr: Address in user space
++ *	@kaddr: Address in kernel space
++ *	@ulen: Length in user space
++ *
++ *	The address is copied into kernel space. If the provided address is
++ *	too long an error code of -EINVAL is returned. If the copy gives
++ *	invalid addresses -EFAULT is returned. On a success 0 is returned.
++ */
++
++int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
++{
++	if (ulen < 0 || ulen > MAX_SOCK_ADDR)
++		return -EINVAL;
++	if (ulen == 0)
++		return 0;
++	if (copy_from_user(kaddr, uaddr, ulen))
++		return -EFAULT;
++	return audit_sockaddr(ulen, kaddr);
++}
++
++/**
++ *	move_addr_to_user	-	copy an address to user space
++ *	@kaddr: kernel space address
++ *	@klen: length of address in kernel
++ *	@uaddr: user space address
++ *	@ulen: pointer to user length field
++ *
++ *	The value pointed to by ulen on entry is the buffer length available.
++ *	This is overwritten with the buffer space used. -EINVAL is returned
++ *	if an overlong buffer is specified or a negative buffer size. -EFAULT
++ *	is returned if either the buffer or the length field are not
++ *	accessible.
++ *	After copying the data up to the limit the user specifies, the true
++ *	length of the data is written over the length limit the user
++ *	specified. Zero is returned for a success.
++ */
++
++int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
++		      int __user *ulen)
++{
++	int err;
++	int len;
++
++	err = get_user(len, ulen);
++	if (err)
++		return err;
++	if (len > klen)
++		len = klen;
++	if (len < 0 || len > MAX_SOCK_ADDR)
++		return -EINVAL;
++	if (len) {
++		if (audit_sockaddr(klen, kaddr))
++			return -ENOMEM;
++		if (copy_to_user(uaddr, kaddr, len))
++			return -EFAULT;
++	}
++	/*
++	 *      "fromlen shall refer to the value before truncation.."
++	 *                      1003.1g
++	 */
++	return __put_user(klen, ulen);
++}
++
++#define SOCKFS_MAGIC 0x534F434B
++
++static struct kmem_cache *sock_inode_cachep __read_mostly;
++
++static struct inode *sock_alloc_inode(struct super_block *sb)
++{
++	struct socket_alloc *ei;
++
++	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
++	if (!ei)
++		return NULL;
++	init_waitqueue_head(&ei->socket.wait);
++
++	ei->socket.fasync_list = NULL;
++	ei->socket.state = SS_UNCONNECTED;
++	ei->socket.flags = 0;
++	ei->socket.ops = NULL;
++	ei->socket.sk = NULL;
++	ei->socket.file = NULL;
++
++	return &ei->vfs_inode;
++}
++
++static void sock_destroy_inode(struct inode *inode)
++{
++	kmem_cache_free(sock_inode_cachep,
++			container_of(inode, struct socket_alloc, vfs_inode));
++}
++
++static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
++{
++	struct socket_alloc *ei = (struct socket_alloc *)foo;
++
++	inode_init_once(&ei->vfs_inode);
++}
++
++static int init_inodecache(void)
++{
++	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
++					      sizeof(struct socket_alloc),
++					      0,
++					      (SLAB_HWCACHE_ALIGN |
++					       SLAB_RECLAIM_ACCOUNT |
++					       SLAB_MEM_SPREAD),
++					      init_once,
++					      NULL);
++	if (sock_inode_cachep == NULL)
++		return -ENOMEM;
++	return 0;
++}
++
++static struct super_operations sockfs_ops = {
++	.alloc_inode =	sock_alloc_inode,
++	.destroy_inode =sock_destroy_inode,
++	.statfs =	simple_statfs,
++};
++
++static int sockfs_get_sb(struct file_system_type *fs_type,
++			 int flags, const char *dev_name, void *data,
++			 struct vfsmount *mnt)
++{
++	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
++			     mnt);
++}
++
++static struct vfsmount *sock_mnt __read_mostly;
++
++static struct file_system_type sock_fs_type = {
++	.name =		"sockfs",
++	.get_sb =	sockfs_get_sb,
++	.kill_sb =	kill_anon_super,
++};
++
++static int sockfs_delete_dentry(struct dentry *dentry)
++{
++	/*
++	 * At creation time, we pretended this dentry was hashed
++	 * (by clearing DCACHE_UNHASHED bit in d_flags)
++	 * At delete time, we restore the truth : not hashed.
++	 * (so that dput() can proceed correctly)
++	 */
++	dentry->d_flags |= DCACHE_UNHASHED;
++	return 0;
++}
++
++/*
++ * sockfs_dname() is called from d_path().
++ */
++static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
++{
++	return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
++				dentry->d_inode->i_ino);
++}
++
++static struct dentry_operations sockfs_dentry_operations = {
++	.d_delete = sockfs_delete_dentry,
++	.d_dname  = sockfs_dname,
++};
++
++/*
++ *	Obtains the first available file descriptor and sets it up for use.
++ *
++ *	These functions create file structures and maps them to fd space
++ *	of the current process. On success it returns file descriptor
++ *	and file struct implicitly stored in sock->file.
++ *	Note that another thread may close file descriptor before we return
++ *	from this function. We use the fact that now we do not refer
++ *	to socket after mapping. If one day we will need it, this
++ *	function will increment ref. count on file by 1.
++ *
++ *	In any case returned fd MAY BE not valid!
++ *	This race condition is unavoidable
++ *	with shared fd spaces, we cannot solve it inside kernel,
++ *	but we take care of internal coherence yet.
++ */
++
++static int sock_alloc_fd(struct file **filep)
++{
++	int fd;
++
++	fd = get_unused_fd();
++	if (likely(fd >= 0)) {
++		struct file *file = get_empty_filp();
++
++		*filep = file;
++		if (unlikely(!file)) {
++			put_unused_fd(fd);
++			return -ENFILE;
++		}
++	} else
++		*filep = NULL;
++	return fd;
++}
++
++static int sock_attach_fd(struct socket *sock, struct file *file)
++{
++	struct qstr name = { .name = "" };
++
++	file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
++	if (unlikely(!file->f_path.dentry))
++		return -ENOMEM;
++
++	file->f_path.dentry->d_op = &sockfs_dentry_operations;
++	/*
++	 * We dont want to push this dentry into global dentry hash table.
++	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
++	 * This permits a working /proc/$pid/fd/XXX on sockets
++	 */
++	file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
++	d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
++	file->f_path.mnt = mntget(sock_mnt);
++	file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
++
++	sock->file = file;
++	file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
++	file->f_mode = FMODE_READ | FMODE_WRITE;
++	file->f_flags = O_RDWR;
++	file->f_pos = 0;
++	file->private_data = sock;
++
++	return 0;
++}
++
++int sock_map_fd(struct socket *sock)
++{
++	struct file *newfile;
++	int fd = sock_alloc_fd(&newfile);
++
++	if (likely(fd >= 0)) {
++		int err = sock_attach_fd(sock, newfile);
++
++		if (unlikely(err < 0)) {
++			put_filp(newfile);
++			put_unused_fd(fd);
++			return err;
++		}
++		fd_install(fd, newfile);
++	}
++	return fd;
++}
++
++static struct socket *sock_from_file(struct file *file, int *err)
++{
++	if (file->f_op == &socket_file_ops)
++		return file->private_data;	/* set in sock_map_fd */
++
++	*err = -ENOTSOCK;
++	return NULL;
++}
++
++/**
++ *	sockfd_lookup	- 	Go from a file number to its socket slot
++ *	@fd: file handle
++ *	@err: pointer to an error code return
++ *
++ *	The file handle passed in is locked and the socket it is bound
++ *	too is returned. If an error occurs the err pointer is overwritten
++ *	with a negative errno code and NULL is returned. The function checks
++ *	for both invalid handles and passing a handle which is not a socket.
++ *
++ *	On a success the socket object pointer is returned.
++ */
++
++struct socket *sockfd_lookup(int fd, int *err)
++{
++	struct file *file;
++	struct socket *sock;
++
++	file = fget(fd);
++	if (!file) {
++		*err = -EBADF;
++		return NULL;
++	}
++
++	sock = sock_from_file(file, err);
++	if (!sock)
++		fput(file);
++	return sock;
++}
++
++static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
++{
++	struct file *file;
++	struct socket *sock;
++
++	*err = -EBADF;
++	file = fget_light(fd, fput_needed);
++	if (file) {
++		sock = sock_from_file(file, err);
++		if (sock)
++			return sock;
++		fput_light(file, *fput_needed);
++	}
++	return NULL;
++}
++
++/**
++ *	sock_alloc	-	allocate a socket
++ *
++ *	Allocate a new inode and socket object. The two are bound together
++ *	and initialised. The socket is then returned. If we are out of inodes
++ *	NULL is returned.
++ */
++
++static struct socket *sock_alloc(void)
++{
++	struct inode *inode;
++	struct socket *sock;
++
++	inode = new_inode(sock_mnt->mnt_sb);
++	if (!inode)
++		return NULL;
++
++	sock = SOCKET_I(inode);
++
++	inode->i_mode = S_IFSOCK | S_IRWXUGO;
++	inode->i_uid = current->fsuid;
++	inode->i_gid = current->fsgid;
++
++	get_cpu_var(sockets_in_use)++;
++	put_cpu_var(sockets_in_use);
++	return sock;
++}
++
++/*
++ *	In theory you can't get an open on this inode, but /proc provides
++ *	a back door. Remember to keep it shut otherwise you'll let the
++ *	creepy crawlies in.
++ */
++
++static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
++{
++	return -ENXIO;
++}
++
++const struct file_operations bad_sock_fops = {
++	.owner = THIS_MODULE,
++	.open = sock_no_open,
++};
++
++/**
++ *	sock_release	-	close a socket
++ *	@sock: socket to close
++ *
++ *	The socket is released from the protocol stack if it has a release
++ *	callback, and the inode is then released if the socket is bound to
++ *	an inode not a file.
++ */
++
++void sock_release(struct socket *sock)
++{
++	if (sock->ops) {
++		struct module *owner = sock->ops->owner;
++
++		sock->ops->release(sock);
++		sock->ops = NULL;
++		module_put(owner);
++	}
++
++	if (sock->fasync_list)
++		printk(KERN_ERR "sock_release: fasync list not empty!\n");
++
++	get_cpu_var(sockets_in_use)--;
++	put_cpu_var(sockets_in_use);
++	if (!sock->file) {
++		iput(SOCK_INODE(sock));
++		return;
++	}
++	sock->file = NULL;
++}
++
++static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
++				 struct msghdr *msg, size_t size)
++{
++	struct sock_iocb *si = kiocb_to_siocb(iocb);
++	int err, len;
++
++	si->sock = sock;
++	si->scm = NULL;
++	si->msg = msg;
++	si->size = size;
++
++	err = security_socket_sendmsg(sock, msg, size);
++	if (err)
++		return err;
++
++	len = sock->ops->sendmsg(iocb, sock, msg, size);
++	if (sock->sk) {
++		if (len == size)
++			vx_sock_send(sock->sk, size);
++		else
++			vx_sock_fail(sock->sk, size);
++	}
++	vxdprintk(VXD_CBIT(net, 7),
++		"__sock_sendmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
++		sock, sock->sk,
++		(sock->sk)?sock->sk->sk_nx_info:0,
++		(sock->sk)?sock->sk->sk_vx_info:0,
++		(sock->sk)?sock->sk->sk_xid:0,
++		(sock->sk)?sock->sk->sk_nid:0,
++		(unsigned int)size, len);
++	return len;
++}
++
++int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
++{
++	struct kiocb iocb;
++	struct sock_iocb siocb;
++	int ret;
++
++	init_sync_kiocb(&iocb, NULL);
++	iocb.private = &siocb;
++	ret = __sock_sendmsg(&iocb, sock, msg, size);
++	if (-EIOCBQUEUED == ret)
++		ret = wait_on_sync_kiocb(&iocb);
++	return ret;
++}
++
++int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
++		   struct kvec *vec, size_t num, size_t size)
++{
++	mm_segment_t oldfs = get_fs();
++	int result;
++
++	set_fs(KERNEL_DS);
++	/*
++	 * the following is safe, since for compiler definitions of kvec and
++	 * iovec are identical, yielding the same in-core layout and alignment
++	 */
++	msg->msg_iov = (struct iovec *)vec;
++	msg->msg_iovlen = num;
++	result = sock_sendmsg(sock, msg, size);
++	set_fs(oldfs);
++	return result;
++}
++
++/*
++ * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
++ */
++void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
++	struct sk_buff *skb)
++{
++	ktime_t kt = skb->tstamp;
++
++	if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
++		struct timeval tv;
++		/* Race occurred between timestamp enabling and packet
++		   receiving.  Fill in the current time for now. */
++		if (kt.tv64 == 0)
++			kt = ktime_get_real();
++		skb->tstamp = kt;
++		tv = ktime_to_timeval(kt);
++		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
++	} else {
++		struct timespec ts;
++		/* Race occurred between timestamp enabling and packet
++		   receiving.  Fill in the current time for now. */
++		if (kt.tv64 == 0)
++			kt = ktime_get_real();
++		skb->tstamp = kt;
++		ts = ktime_to_timespec(kt);
++		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
++	}
++}
++
++EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
++
++static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
++				 struct msghdr *msg, size_t size, int flags)
++{
++	int err, len;
++	struct sock_iocb *si = kiocb_to_siocb(iocb);
++
++	si->sock = sock;
++	si->scm = NULL;
++	si->msg = msg;
++	si->size = size;
++	si->flags = flags;
++
++	err = security_socket_recvmsg(sock, msg, size, flags);
++	if (err)
++		return err;
++
++	len = sock->ops->recvmsg(iocb, sock, msg, size, flags);
++	if ((len >= 0) && sock->sk)
++		vx_sock_recv(sock->sk, len);
++	vxdprintk(VXD_CBIT(net, 7),
++		"__sock_recvmsg: %p[%p,%p,%p;%d/%d]:%d/%d",
++		sock, sock->sk,
++		(sock->sk)?sock->sk->sk_nx_info:0,
++		(sock->sk)?sock->sk->sk_vx_info:0,
++		(sock->sk)?sock->sk->sk_xid:0,
++		(sock->sk)?sock->sk->sk_nid:0,
++		(unsigned int)size, len);
++	return len;
++}
++
++int sock_recvmsg(struct socket *sock, struct msghdr *msg,
++		 size_t size, int flags)
++{
++	struct kiocb iocb;
++	struct sock_iocb siocb;
++	int ret;
++
++	init_sync_kiocb(&iocb, NULL);
++	iocb.private = &siocb;
++	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
++	if (-EIOCBQUEUED == ret)
++		ret = wait_on_sync_kiocb(&iocb);
++	return ret;
++}
++
++int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
++		   struct kvec *vec, size_t num, size_t size, int flags)
++{
++	mm_segment_t oldfs = get_fs();
++	int result;
++
++	set_fs(KERNEL_DS);
++	/*
++	 * the following is safe, since for compiler definitions of kvec and
++	 * iovec are identical, yielding the same in-core layout and alignment
++	 */
++	msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
++	result = sock_recvmsg(sock, msg, size, flags);
++	set_fs(oldfs);
++	return result;
++}
++
++static void sock_aio_dtor(struct kiocb *iocb)
++{
++	kfree(iocb->private);
++}
++
++static ssize_t sock_sendpage(struct file *file, struct page *page,
++			     int offset, size_t size, loff_t *ppos, int more)
++{
++	struct socket *sock;
++	int flags;
++
++	sock = file->private_data;
++
++	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
++	if (more)
++		flags |= MSG_MORE;
++
++	return sock->ops->sendpage(sock, page, offset, size, flags);
++}
++
++static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
++					 struct sock_iocb *siocb)
++{
++	if (!is_sync_kiocb(iocb)) {
++		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
++		if (!siocb)
++			return NULL;
++		iocb->ki_dtor = sock_aio_dtor;
++	}
++
++	siocb->kiocb = iocb;
++	iocb->private = siocb;
++	return siocb;
++}
++
++static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
++		struct file *file, const struct iovec *iov,
++		unsigned long nr_segs)
++{
++	struct socket *sock = file->private_data;
++	size_t size = 0;
++	int i;
++
++	for (i = 0; i < nr_segs; i++)
++		size += iov[i].iov_len;
++
++	msg->msg_name = NULL;
++	msg->msg_namelen = 0;
++	msg->msg_control = NULL;
++	msg->msg_controllen = 0;
++	msg->msg_iov = (struct iovec *)iov;
++	msg->msg_iovlen = nr_segs;
++	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
++
++	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
++}
++
++static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
++				unsigned long nr_segs, loff_t pos)
++{
++	struct sock_iocb siocb, *x;
++
++	if (pos != 0)
++		return -ESPIPE;
++
++	if (iocb->ki_left == 0)	/* Match SYS5 behaviour */
++		return 0;
++
++
++	x = alloc_sock_iocb(iocb, &siocb);
++	if (!x)
++		return -ENOMEM;
++	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
++}
++
++static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
++			struct file *file, const struct iovec *iov,
++			unsigned long nr_segs)
++{
++	struct socket *sock = file->private_data;
++	size_t size = 0;
++	int i;
++
++	for (i = 0; i < nr_segs; i++)
++		size += iov[i].iov_len;
++
++	msg->msg_name = NULL;
++	msg->msg_namelen = 0;
++	msg->msg_control = NULL;
++	msg->msg_controllen = 0;
++	msg->msg_iov = (struct iovec *)iov;
++	msg->msg_iovlen = nr_segs;
++	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
++	if (sock->type == SOCK_SEQPACKET)
++		msg->msg_flags |= MSG_EOR;
++
++	return __sock_sendmsg(iocb, sock, msg, size);
++}
++
++static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
++			  unsigned long nr_segs, loff_t pos)
++{
++	struct sock_iocb siocb, *x;
++
++	if (pos != 0)
++		return -ESPIPE;
++
++	x = alloc_sock_iocb(iocb, &siocb);
++	if (!x)
++		return -ENOMEM;
++
++	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
++}
++
++/*
++ * Atomic setting of ioctl hooks to avoid race
++ * with module unload.
++ */
++
++static DEFINE_MUTEX(br_ioctl_mutex);
++static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
++
++void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
++{
++	mutex_lock(&br_ioctl_mutex);
++	br_ioctl_hook = hook;
++	mutex_unlock(&br_ioctl_mutex);
++}
++
++EXPORT_SYMBOL(brioctl_set);
++
++static DEFINE_MUTEX(vlan_ioctl_mutex);
++static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
++
++void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
++{
++	mutex_lock(&vlan_ioctl_mutex);
++	vlan_ioctl_hook = hook;
++	mutex_unlock(&vlan_ioctl_mutex);
++}
++
++EXPORT_SYMBOL(vlan_ioctl_set);
++
++static DEFINE_MUTEX(dlci_ioctl_mutex);
++static int (*dlci_ioctl_hook) (unsigned int, void __user *);
++
++void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
++{
++	mutex_lock(&dlci_ioctl_mutex);
++	dlci_ioctl_hook = hook;
++	mutex_unlock(&dlci_ioctl_mutex);
++}
++
++EXPORT_SYMBOL(dlci_ioctl_set);
++
++/*
++ *	With an ioctl, arg may well be a user mode pointer, but we don't know
++ *	what to do with it - that's up to the protocol still.
++ */
++
++static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
++{
++	struct socket *sock;
++	struct sock *sk;
++	void __user *argp = (void __user *)arg;
++	int pid, err;
++	struct net *net;
++
++	sock = file->private_data;
++	sk = sock->sk;
++	net = sk->sk_net;
++	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
++		err = dev_ioctl(net, cmd, argp);
++	} else
++#ifdef CONFIG_WIRELESS_EXT
++	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
++		err = dev_ioctl(net, cmd, argp);
++	} else
++#endif				/* CONFIG_WIRELESS_EXT */
++		switch (cmd) {
++		case FIOSETOWN:
++		case SIOCSPGRP:
++			err = -EFAULT;
++			if (get_user(pid, (int __user *)argp))
++				break;
++			err = f_setown(sock->file, pid, 1);
++			break;
++		case FIOGETOWN:
++		case SIOCGPGRP:
++			err = put_user(f_getown(sock->file),
++				       (int __user *)argp);
++			break;
++		case SIOCGIFBR:
++		case SIOCSIFBR:
++		case SIOCBRADDBR:
++		case SIOCBRDELBR:
++			err = -ENOPKG;
++			if (!br_ioctl_hook)
++				request_module("bridge");
++
++			mutex_lock(&br_ioctl_mutex);
++			if (br_ioctl_hook)
++				err = br_ioctl_hook(net, cmd, argp);
++			mutex_unlock(&br_ioctl_mutex);
++			break;
++		case SIOCGIFVLAN:
++		case SIOCSIFVLAN:
++			err = -ENOPKG;
++			if (!vlan_ioctl_hook)
++				request_module("8021q");
++
++			mutex_lock(&vlan_ioctl_mutex);
++			if (vlan_ioctl_hook)
++				err = vlan_ioctl_hook(net, argp);
++			mutex_unlock(&vlan_ioctl_mutex);
++			break;
++		case SIOCADDDLCI:
++		case SIOCDELDLCI:
++			err = -ENOPKG;
++			if (!dlci_ioctl_hook)
++				request_module("dlci");
++
++			if (dlci_ioctl_hook) {
++				mutex_lock(&dlci_ioctl_mutex);
++				err = dlci_ioctl_hook(cmd, argp);
++				mutex_unlock(&dlci_ioctl_mutex);
++			}
++			break;
++		default:
++			err = sock->ops->ioctl(sock, cmd, arg);
++
++			/*
++			 * If this ioctl is unknown try to hand it down
++			 * to the NIC driver.
++			 */
++			if (err == -ENOIOCTLCMD)
++				err = dev_ioctl(net, cmd, argp);
++			break;
++		}
++	return err;
++}
++
++int sock_create_lite(int family, int type, int protocol, struct socket **res)
++{
++	int err;
++	struct socket *sock = NULL;
++
++	err = security_socket_create(family, type, protocol, 1);
++	if (err)
++		goto out;
++
++	sock = sock_alloc();
++	if (!sock) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	sock->type = type;
++	err = security_socket_post_create(sock, family, type, protocol, 1);
++	if (err)
++		goto out_release;
++
++out:
++	*res = sock;
++	return err;
++out_release:
++	sock_release(sock);
++	sock = NULL;
++	goto out;
++}
++
++/* No kernel lock held - perfect */
++static unsigned int sock_poll(struct file *file, poll_table *wait)
++{
++	struct socket *sock;
++
++	/*
++	 *      We can't return errors to poll, so it's either yes or no.
++	 */
++	sock = file->private_data;
++	return sock->ops->poll(file, sock, wait);
++}
++
++static int sock_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	struct socket *sock = file->private_data;
++
++	return sock->ops->mmap(file, sock, vma);
++}
++
++static int sock_close(struct inode *inode, struct file *filp)
++{
++	/*
++	 *      It was possible the inode is NULL we were
++	 *      closing an unfinished socket.
++	 */
++
++	if (!inode) {
++		printk(KERN_DEBUG "sock_close: NULL inode\n");
++		return 0;
++	}
++	sock_fasync(-1, filp, 0);
++	sock_release(SOCKET_I(inode));
++	return 0;
++}
++
++/*
++ *	Update the socket async list
++ *
++ *	Fasync_list locking strategy.
++ *
++ *	1. fasync_list is modified only under process context socket lock
++ *	   i.e. under semaphore.
++ *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
++ *	   or under socket lock.
++ *	3. fasync_list can be used from softirq context, so that
++ *	   modification under socket lock have to be enhanced with
++ *	   write_lock_bh(&sk->sk_callback_lock).
++ *							--ANK (990710)
++ */
++
++static int sock_fasync(int fd, struct file *filp, int on)
++{
++	struct fasync_struct *fa, *fna = NULL, **prev;
++	struct socket *sock;
++	struct sock *sk;
++
++	if (on) {
++		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
++		if (fna == NULL)
++			return -ENOMEM;
++	}
++
++	sock = filp->private_data;
++
++	sk = sock->sk;
++	if (sk == NULL) {
++		kfree(fna);
++		return -EINVAL;
++	}
++
++	lock_sock(sk);
++
++	prev = &(sock->fasync_list);
++
++	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
++		if (fa->fa_file == filp)
++			break;
++
++	if (on) {
++		if (fa != NULL) {
++			write_lock_bh(&sk->sk_callback_lock);
++			fa->fa_fd = fd;
++			write_unlock_bh(&sk->sk_callback_lock);
++
++			kfree(fna);
++			goto out;
++		}
++		fna->fa_file = filp;
++		fna->fa_fd = fd;
++		fna->magic = FASYNC_MAGIC;
++		fna->fa_next = sock->fasync_list;
++		write_lock_bh(&sk->sk_callback_lock);
++		sock->fasync_list = fna;
++		write_unlock_bh(&sk->sk_callback_lock);
++	} else {
++		if (fa != NULL) {
++			write_lock_bh(&sk->sk_callback_lock);
++			*prev = fa->fa_next;
++			write_unlock_bh(&sk->sk_callback_lock);
++			kfree(fa);
++		}
++	}
++
++out:
++	release_sock(sock->sk);
++	return 0;
++}
++
++/* This function may be called only under socket lock or callback_lock */
++
++int sock_wake_async(struct socket *sock, int how, int band)
++{
++	if (!sock || !sock->fasync_list)
++		return -1;
++	switch (how) {
++	case 1:
++
++		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
++			break;
++		goto call_kill;
++	case 2:
++		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
++			break;
++		/* fall through */
++	case 0:
++call_kill:
++		__kill_fasync(sock->fasync_list, SIGIO, band);
++		break;
++	case 3:
++		__kill_fasync(sock->fasync_list, SIGURG, band);
++	}
++	return 0;
++}
++
++static int __sock_create(struct net *net, int family, int type, int protocol,
++			 struct socket **res, int kern)
++{
++	int err;
++	struct socket *sock;
++	const struct net_proto_family *pf;
++
++	/*
++	 *      Check protocol is in range
++	 */
++	if (family < 0 || family >= NPROTO)
++		return -EAFNOSUPPORT;
++	if (type < 0 || type >= SOCK_MAX)
++		return -EINVAL;
++
++	if (!nx_check(0, VS_ADMIN)) {
++		if (family == PF_INET && !current_nx_info_has_v4())
++			return -EAFNOSUPPORT;
++		if (family == PF_INET6 && !current_nx_info_has_v6())
++			return -EAFNOSUPPORT;
++	}
++
++	/* Compatibility.
++
++	   This uglymoron is moved from INET layer to here to avoid
++	   deadlock in module load.
++	 */
++	if (family == PF_INET && type == SOCK_PACKET) {
++		static int warned;
++		if (!warned) {
++			warned = 1;
++			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
++			       current->comm);
++		}
++		family = PF_PACKET;
++	}
++
++	err = security_socket_create(family, type, protocol, kern);
++	if (err)
++		return err;
++
++	/*
++	 *	Allocate the socket and allow the family to set things up. if
++	 *	the protocol is 0, the family is instructed to select an appropriate
++	 *	default.
++	 */
++	sock = sock_alloc();
++	if (!sock) {
++		if (net_ratelimit())
++			printk(KERN_WARNING "socket: no more sockets\n");
++		return -ENFILE;	/* Not exactly a match, but its the
++				   closest posix thing */
++	}
++
++	sock->type = type;
++
++#if defined(CONFIG_KMOD)
++	/* Attempt to load a protocol module if the find failed.
++	 *
++	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
++	 * requested real, full-featured networking support upon configuration.
++	 * Otherwise module support will break!
++	 */
++	if (net_families[family] == NULL)
++		request_module("net-pf-%d", family);
++#endif
++
++	rcu_read_lock();
++	pf = rcu_dereference(net_families[family]);
++	err = -EAFNOSUPPORT;
++	if (!pf)
++		goto out_release;
++
++	/*
++	 * We will call the ->create function, that possibly is in a loadable
++	 * module, so we have to bump that loadable module refcnt first.
++	 */
++	if (!try_module_get(pf->owner))
++		goto out_release;
++
++	/* Now protected by module ref count */
++	rcu_read_unlock();
++
++	err = pf->create(net, sock, protocol);
++	if (err < 0)
++		goto out_module_put;
++
++	/*
++	 * Now to bump the refcnt of the [loadable] module that owns this
++	 * socket at sock_release time we decrement its refcnt.
++	 */
++	if (!try_module_get(sock->ops->owner))
++		goto out_module_busy;
++
++	/*
++	 * Now that we're done with the ->create function, the [loadable]
++	 * module can have its refcnt decremented
++	 */
++	module_put(pf->owner);
++	err = security_socket_post_create(sock, family, type, protocol, kern);
++	if (err)
++		goto out_sock_release;
++	*res = sock;
++
++	return 0;
++
++out_module_busy:
++	err = -EAFNOSUPPORT;
++out_module_put:
++	sock->ops = NULL;
++	module_put(pf->owner);
++out_sock_release:
++	sock_release(sock);
++	return err;
++
++out_release:
++	rcu_read_unlock();
++	goto out_sock_release;
++}
++
++int sock_create(int family, int type, int protocol, struct socket **res)
++{
++	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
++}
++
++int sock_create_kern(int family, int type, int protocol, struct socket **res)
++{
++	return __sock_create(&init_net, family, type, protocol, res, 1);
++}
++
++asmlinkage long sys_socket(int family, int type, int protocol)
++{
++	int retval;
++	struct socket *sock;
++
++	retval = sock_create(family, type, protocol, &sock);
++	if (retval < 0)
++		goto out;
++
++	set_bit(SOCK_USER_SOCKET, &sock->flags);
++	retval = sock_map_fd(sock);
++	if (retval < 0)
++		goto out_release;
++
++out:
++	/* It may be already another descriptor 8) Not kernel problem. */
++	return retval;
++
++out_release:
++	sock_release(sock);
++	return retval;
++}
++
++/*
++ *	Create a pair of connected sockets.
++ */
++
++asmlinkage long sys_socketpair(int family, int type, int protocol,
++			       int __user *usockvec)
++{
++	struct socket *sock1, *sock2;
++	int fd1, fd2, err;
++	struct file *newfile1, *newfile2;
++
++	/*
++	 * Obtain the first socket and check if the underlying protocol
++	 * supports the socketpair call.
++	 */
++
++	err = sock_create(family, type, protocol, &sock1);
++	if (err < 0)
++		goto out;
++	set_bit(SOCK_USER_SOCKET, &sock1->flags);
++
++	err = sock_create(family, type, protocol, &sock2);
++	if (err < 0)
++		goto out_release_1;
++	set_bit(SOCK_USER_SOCKET, &sock2->flags);
++
++	err = sock1->ops->socketpair(sock1, sock2);
++	if (err < 0)
++		goto out_release_both;
++
++	fd1 = sock_alloc_fd(&newfile1);
++	if (unlikely(fd1 < 0)) {
++		err = fd1;
++		goto out_release_both;
++	}
++
++	fd2 = sock_alloc_fd(&newfile2);
++	if (unlikely(fd2 < 0)) {
++		err = fd2;
++		put_filp(newfile1);
++		put_unused_fd(fd1);
++		goto out_release_both;
++	}
++
++	err = sock_attach_fd(sock1, newfile1);
++	if (unlikely(err < 0)) {
++		goto out_fd2;
++	}
++
++	err = sock_attach_fd(sock2, newfile2);
++	if (unlikely(err < 0)) {
++		fput(newfile1);
++		goto out_fd1;
++	}
++
++	err = audit_fd_pair(fd1, fd2);
++	if (err < 0) {
++		fput(newfile1);
++		fput(newfile2);
++		goto out_fd;
++	}
++
++	fd_install(fd1, newfile1);
++	fd_install(fd2, newfile2);
++	/* fd1 and fd2 may be already another descriptors.
++	 * Not kernel problem.
++	 */
++
++	err = put_user(fd1, &usockvec[0]);
++	if (!err)
++		err = put_user(fd2, &usockvec[1]);
++	if (!err)
++		return 0;
++
++	sys_close(fd2);
++	sys_close(fd1);
++	return err;
++
++out_release_both:
++	sock_release(sock2);
++out_release_1:
++	sock_release(sock1);
++out:
++	return err;
++
++out_fd2:
++	put_filp(newfile1);
++	sock_release(sock1);
++out_fd1:
++	put_filp(newfile2);
++	sock_release(sock2);
++out_fd:
++	put_unused_fd(fd1);
++	put_unused_fd(fd2);
++	goto out;
++}
++
++/*
++ *	Bind a name to a socket. Nothing much to do here since it's
++ *	the protocol's responsibility to handle the local address.
++ *
++ *	We move the socket address to kernel space before we call
++ *	the protocol layer (having also checked the address is ok).
++ */
++
++asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
++{
++	struct socket *sock;
++	char address[MAX_SOCK_ADDR];
++	int err, fput_needed;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (sock) {
++		err = move_addr_to_kernel(umyaddr, addrlen, address);
++		if (err >= 0) {
++			err = security_socket_bind(sock,
++						   (struct sockaddr *)address,
++						   addrlen);
++			if (!err)
++				err = sock->ops->bind(sock,
++						      (struct sockaddr *)
++						      address, addrlen);
++		}
++		fput_light(sock->file, fput_needed);
++	}
++	return err;
++}
++
++/*
++ *	Perform a listen. Basically, we allow the protocol to do anything
++ *	necessary for a listen, and if that works, we mark the socket as
++ *	ready for listening.
++ */
++
++asmlinkage long sys_listen(int fd, int backlog)
++{
++	struct socket *sock;
++	int err, fput_needed;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (sock) {
++		struct net *net = sock->sk->sk_net;
++		if ((unsigned)backlog > net->sysctl_somaxconn)
++			backlog = net->sysctl_somaxconn;
++
++		err = security_socket_listen(sock, backlog);
++		if (!err)
++			err = sock->ops->listen(sock, backlog);
++
++		fput_light(sock->file, fput_needed);
++	}
++	return err;
++}
++
++/*
++ *	For accept, we attempt to create a new socket, set up the link
++ *	with the client, wake up the client, then return the new
++ *	connected fd. We collect the address of the connector in kernel
++ *	space and move it to user at the very end. This is unclean because
++ *	we open the socket then return an error.
++ *
++ *	1003.1g adds the ability to recvmsg() to query connection pending
++ *	status to recvmsg. We need to add that support in a way thats
++ *	clean when we restucture accept also.
++ */
++
++asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
++			   int __user *upeer_addrlen)
++{
++	struct socket *sock, *newsock;
++	struct file *newfile;
++	int err, len, newfd, fput_needed;
++	char address[MAX_SOCK_ADDR];
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (!sock)
++		goto out;
++
++	err = -ENFILE;
++	if (!(newsock = sock_alloc()))
++		goto out_put;
++
++	newsock->type = sock->type;
++	newsock->ops = sock->ops;
++
++	/*
++	 * We don't need try_module_get here, as the listening socket (sock)
++	 * has the protocol module (sock->ops->owner) held.
++	 */
++	__module_get(newsock->ops->owner);
++
++	newfd = sock_alloc_fd(&newfile);
++	if (unlikely(newfd < 0)) {
++		err = newfd;
++		sock_release(newsock);
++		goto out_put;
++	}
++
++	err = sock_attach_fd(newsock, newfile);
++	if (err < 0)
++		goto out_fd_simple;
++
++	err = security_socket_accept(sock, newsock);
++	if (err)
++		goto out_fd;
++
++	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
++	if (err < 0)
++		goto out_fd;
++
++	if (upeer_sockaddr) {
++		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
++					  &len, 2) < 0) {
++			err = -ECONNABORTED;
++			goto out_fd;
++		}
++		err = move_addr_to_user(address, len, upeer_sockaddr,
++					upeer_addrlen);
++		if (err < 0)
++			goto out_fd;
++	}
++
++	/* File flags are not inherited via accept() unlike another OSes. */
++
++	fd_install(newfd, newfile);
++	err = newfd;
++
++	security_socket_post_accept(sock, newsock);
++
++out_put:
++	fput_light(sock->file, fput_needed);
++out:
++	return err;
++out_fd_simple:
++	sock_release(newsock);
++	put_filp(newfile);
++	put_unused_fd(newfd);
++	goto out_put;
++out_fd:
++	fput(newfile);
++	put_unused_fd(newfd);
++	goto out_put;
++}
++
++/*
++ *	Attempt to connect to a socket with the server address.  The address
++ *	is in user space so we verify it is OK and move it to kernel space.
++ *
++ *	For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
++ *	break bindings
++ *
++ *	NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
++ *	other SEQPACKET protocols that take time to connect() as it doesn't
++ *	include the -EINPROGRESS status for such sockets.
++ */
++
++asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
++			    int addrlen)
++{
++	struct socket *sock;
++	char address[MAX_SOCK_ADDR];
++	int err, fput_needed;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (!sock)
++		goto out;
++	err = move_addr_to_kernel(uservaddr, addrlen, address);
++	if (err < 0)
++		goto out_put;
++
++	err =
++	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
++	if (err)
++		goto out_put;
++
++	err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
++				 sock->file->f_flags);
++out_put:
++	fput_light(sock->file, fput_needed);
++out:
++	return err;
++}
++
++/*
++ *	Get the local address ('name') of a socket object. Move the obtained
++ *	name to user space.
++ */
++
++asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
++				int __user *usockaddr_len)
++{
++	struct socket *sock;
++	char address[MAX_SOCK_ADDR];
++	int len, err, fput_needed;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (!sock)
++		goto out;
++
++	err = security_socket_getsockname(sock);
++	if (err)
++		goto out_put;
++
++	err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
++	if (err)
++		goto out_put;
++	err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
++
++out_put:
++	fput_light(sock->file, fput_needed);
++out:
++	return err;
++}
++
++/*
++ *	Get the remote address ('name') of a socket object. Move the obtained
++ *	name to user space.
++ */
++
++asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
++				int __user *usockaddr_len)
++{
++	struct socket *sock;
++	char address[MAX_SOCK_ADDR];
++	int len, err, fput_needed;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (sock != NULL) {
++		err = security_socket_getpeername(sock);
++		if (err) {
++			fput_light(sock->file, fput_needed);
++			return err;
++		}
++
++		err =
++		    sock->ops->getname(sock, (struct sockaddr *)address, &len,
++				       1);
++		if (!err)
++			err = move_addr_to_user(address, len, usockaddr,
++						usockaddr_len);
++		fput_light(sock->file, fput_needed);
++	}
++	return err;
++}
++
++/*
++ *	Send a datagram to a given address. We move the address into kernel
++ *	space and check the user space data area is readable before invoking
++ *	the protocol.
++ */
++
++asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
++			   unsigned flags, struct sockaddr __user *addr,
++			   int addr_len)
++{
++	struct socket *sock;
++	char address[MAX_SOCK_ADDR];
++	int err;
++	struct msghdr msg;
++	struct iovec iov;
++	int fput_needed;
++	struct file *sock_file;
++
++	sock_file = fget_light(fd, &fput_needed);
++	err = -EBADF;
++	if (!sock_file)
++		goto out;
++
++	sock = sock_from_file(sock_file, &err);
++	if (!sock)
++		goto out_put;
++	iov.iov_base = buff;
++	iov.iov_len = len;
++	msg.msg_name = NULL;
++	msg.msg_iov = &iov;
++	msg.msg_iovlen = 1;
++	msg.msg_control = NULL;
++	msg.msg_controllen = 0;
++	msg.msg_namelen = 0;
++	if (addr) {
++		err = move_addr_to_kernel(addr, addr_len, address);
++		if (err < 0)
++			goto out_put;
++		msg.msg_name = address;
++		msg.msg_namelen = addr_len;
++	}
++	if (sock->file->f_flags & O_NONBLOCK)
++		flags |= MSG_DONTWAIT;
++	msg.msg_flags = flags;
++	err = sock_sendmsg(sock, &msg, len);
++
++out_put:
++	fput_light(sock_file, fput_needed);
++out:
++	return err;
++}
++
++/*
++ *	Send a datagram down a socket.
++ */
++
++asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
++{
++	return sys_sendto(fd, buff, len, flags, NULL, 0);
++}
++
++/*
++ *	Receive a frame from the socket and optionally record the address of the
++ *	sender. We verify the buffers are writable and if needed move the
++ *	sender address from kernel to user space.
++ */
++
++asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
++			     unsigned flags, struct sockaddr __user *addr,
++			     int __user *addr_len)
++{
++	struct socket *sock;
++	struct iovec iov;
++	struct msghdr msg;
++	char address[MAX_SOCK_ADDR];
++	int err, err2;
++	struct file *sock_file;
++	int fput_needed;
++
++	sock_file = fget_light(fd, &fput_needed);
++	err = -EBADF;
++	if (!sock_file)
++		goto out;
++
++	sock = sock_from_file(sock_file, &err);
++	if (!sock)
++		goto out_put;
++
++	msg.msg_control = NULL;
++	msg.msg_controllen = 0;
++	msg.msg_iovlen = 1;
++	msg.msg_iov = &iov;
++	iov.iov_len = size;
++	iov.iov_base = ubuf;
++	msg.msg_name = address;
++	msg.msg_namelen = MAX_SOCK_ADDR;
++	if (sock->file->f_flags & O_NONBLOCK)
++		flags |= MSG_DONTWAIT;
++	err = sock_recvmsg(sock, &msg, size, flags);
++
++	if (err >= 0 && addr != NULL) {
++		err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
++		if (err2 < 0)
++			err = err2;
++	}
++out_put:
++	fput_light(sock_file, fput_needed);
++out:
++	return err;
++}
++
++/*
++ *	Receive a datagram from a socket.
++ */
++
++asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
++			 unsigned flags)
++{
++	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
++}
++
++/*
++ *	Set a socket option. Because we don't know the option lengths we have
++ *	to pass the user mode parameter for the protocols to sort out.
++ */
++
++asmlinkage long sys_setsockopt(int fd, int level, int optname,
++			       char __user *optval, int optlen)
++{
++	int err, fput_needed;
++	struct socket *sock;
++
++	if (optlen < 0)
++		return -EINVAL;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (sock != NULL) {
++		err = security_socket_setsockopt(sock, level, optname);
++		if (err)
++			goto out_put;
++
++		if (level == SOL_SOCKET)
++			err =
++			    sock_setsockopt(sock, level, optname, optval,
++					    optlen);
++		else
++			err =
++			    sock->ops->setsockopt(sock, level, optname, optval,
++						  optlen);
++out_put:
++		fput_light(sock->file, fput_needed);
++	}
++	return err;
++}
++
++/*
++ *	Get a socket option. Because we don't know the option lengths we have
++ *	to pass a user mode parameter for the protocols to sort out.
++ */
++
++asmlinkage long sys_getsockopt(int fd, int level, int optname,
++			       char __user *optval, int __user *optlen)
++{
++	int err, fput_needed;
++	struct socket *sock;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (sock != NULL) {
++		err = security_socket_getsockopt(sock, level, optname);
++		if (err)
++			goto out_put;
++
++		if (level == SOL_SOCKET)
++			err =
++			    sock_getsockopt(sock, level, optname, optval,
++					    optlen);
++		else
++			err =
++			    sock->ops->getsockopt(sock, level, optname, optval,
++						  optlen);
++out_put:
++		fput_light(sock->file, fput_needed);
++	}
++	return err;
++}
++
++/*
++ *	Shutdown a socket.
++ */
++
++asmlinkage long sys_shutdown(int fd, int how)
++{
++	int err, fput_needed;
++	struct socket *sock;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (sock != NULL) {
++		err = security_socket_shutdown(sock, how);
++		if (!err)
++			err = sock->ops->shutdown(sock, how);
++		fput_light(sock->file, fput_needed);
++	}
++	return err;
++}
++
++/* A couple of helpful macros for getting the address of the 32/64 bit
++ * fields which are the same type (int / unsigned) on our platforms.
++ */
++#define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
++#define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
++#define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
++
++/*
++ *	BSD sendmsg interface
++ */
++
++asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
++{
++	struct compat_msghdr __user *msg_compat =
++	    (struct compat_msghdr __user *)msg;
++	struct socket *sock;
++	char address[MAX_SOCK_ADDR];
++	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
++	unsigned char ctl[sizeof(struct cmsghdr) + 20]
++	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
++	/* 20 is size of ipv6_pktinfo */
++	unsigned char *ctl_buf = ctl;
++	struct msghdr msg_sys;
++	int err, ctl_len, iov_size, total_len;
++	int fput_needed;
++
++	err = -EFAULT;
++	if (MSG_CMSG_COMPAT & flags) {
++		if (get_compat_msghdr(&msg_sys, msg_compat))
++			return -EFAULT;
++	}
++	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
++		return -EFAULT;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (!sock)
++		goto out;
++
++	/* do not move before msg_sys is valid */
++	err = -EMSGSIZE;
++	if (msg_sys.msg_iovlen > UIO_MAXIOV)
++		goto out_put;
++
++	/* Check whether to allocate the iovec area */
++	err = -ENOMEM;
++	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
++	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
++		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
++		if (!iov)
++			goto out_put;
++	}
++
++	/* This will also move the address data into kernel space */
++	if (MSG_CMSG_COMPAT & flags) {
++		err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
++	} else
++		err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
++	if (err < 0)
++		goto out_freeiov;
++	total_len = err;
++
++	err = -ENOBUFS;
++
++	if (msg_sys.msg_controllen > INT_MAX)
++		goto out_freeiov;
++	ctl_len = msg_sys.msg_controllen;
++	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
++		err =
++		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
++						     sizeof(ctl));
++		if (err)
++			goto out_freeiov;
++		ctl_buf = msg_sys.msg_control;
++		ctl_len = msg_sys.msg_controllen;
++	} else if (ctl_len) {
++		if (ctl_len > sizeof(ctl)) {
++			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
++			if (ctl_buf == NULL)
++				goto out_freeiov;
++		}
++		err = -EFAULT;
++		/*
++		 * Careful! Before this, msg_sys.msg_control contains a user pointer.
++		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
++		 * checking falls down on this.
++		 */
++		if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
++				   ctl_len))
++			goto out_freectl;
++		msg_sys.msg_control = ctl_buf;
++	}
++	msg_sys.msg_flags = flags;
++
++	if (sock->file->f_flags & O_NONBLOCK)
++		msg_sys.msg_flags |= MSG_DONTWAIT;
++	err = sock_sendmsg(sock, &msg_sys, total_len);
++
++out_freectl:
++	if (ctl_buf != ctl)
++		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
++out_freeiov:
++	if (iov != iovstack)
++		sock_kfree_s(sock->sk, iov, iov_size);
++out_put:
++	fput_light(sock->file, fput_needed);
++out:
++	return err;
++}
++
++/*
++ *	BSD recvmsg interface
++ */
++
++asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
++			    unsigned int flags)
++{
++	struct compat_msghdr __user *msg_compat =
++	    (struct compat_msghdr __user *)msg;
++	struct socket *sock;
++	struct iovec iovstack[UIO_FASTIOV];
++	struct iovec *iov = iovstack;
++	struct msghdr msg_sys;
++	unsigned long cmsg_ptr;
++	int err, iov_size, total_len, len;
++	int fput_needed;
++
++	/* kernel mode address */
++	char addr[MAX_SOCK_ADDR];
++
++	/* user mode address pointers */
++	struct sockaddr __user *uaddr;
++	int __user *uaddr_len;
++
++	if (MSG_CMSG_COMPAT & flags) {
++		if (get_compat_msghdr(&msg_sys, msg_compat))
++			return -EFAULT;
++	}
++	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
++		return -EFAULT;
++
++	sock = sockfd_lookup_light(fd, &err, &fput_needed);
++	if (!sock)
++		goto out;
++
++	err = -EMSGSIZE;
++	if (msg_sys.msg_iovlen > UIO_MAXIOV)
++		goto out_put;
++
++	/* Check whether to allocate the iovec area */
++	err = -ENOMEM;
++	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
++	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
++		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
++		if (!iov)
++			goto out_put;
++	}
++
++	/*
++	 *      Save the user-mode address (verify_iovec will change the
++	 *      kernel msghdr to use the kernel address space)
++	 */
++
++	uaddr = (void __user *)msg_sys.msg_name;
++	uaddr_len = COMPAT_NAMELEN(msg);
++	if (MSG_CMSG_COMPAT & flags) {
++		err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
++	} else
++		err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
++	if (err < 0)
++		goto out_freeiov;
++	total_len = err;
++
++	cmsg_ptr = (unsigned long)msg_sys.msg_control;
++	msg_sys.msg_flags = 0;
++	if (MSG_CMSG_COMPAT & flags)
++		msg_sys.msg_flags = MSG_CMSG_COMPAT;
++
++	if (sock->file->f_flags & O_NONBLOCK)
++		flags |= MSG_DONTWAIT;
++	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
++	if (err < 0)
++		goto out_freeiov;
++	len = err;
++
++	if (uaddr != NULL) {
++		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
++					uaddr_len);
++		if (err < 0)
++			goto out_freeiov;
++	}
++	err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
++			 COMPAT_FLAGS(msg));
++	if (err)
++		goto out_freeiov;
++	if (MSG_CMSG_COMPAT & flags)
++		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
++				 &msg_compat->msg_controllen);
++	else
++		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
++				 &msg->msg_controllen);
++	if (err)
++		goto out_freeiov;
++	err = len;
++
++out_freeiov:
++	if (iov != iovstack)
++		sock_kfree_s(sock->sk, iov, iov_size);
++out_put:
++	fput_light(sock->file, fput_needed);
++out:
++	return err;
++}
++
++#ifdef __ARCH_WANT_SYS_SOCKETCALL
++
++/* Argument list sizes for sys_socketcall */
++#define AL(x) ((x) * sizeof(unsigned long))
++static const unsigned char nargs[18]={
++	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
++	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
++	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
++};
++
++#undef AL
++
++/*
++ *	System call vectors.
++ *
++ *	Argument checking cleaned up. Saved 20% in size.
++ *  This function doesn't need to set the kernel lock because
++ *  it is set by the callees.
++ */
++
++asmlinkage long sys_socketcall(int call, unsigned long __user *args)
++{
++	unsigned long a[6];
++	unsigned long a0, a1;
++	int err;
++
++	if (call < 1 || call > SYS_RECVMSG)
++		return -EINVAL;
++
++	/* copy_from_user should be SMP safe. */
++	if (copy_from_user(a, args, nargs[call]))
++		return -EFAULT;
++
++	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
++	if (err)
++		return err;
++
++	a0 = a[0];
++	a1 = a[1];
++
++	switch (call) {
++	case SYS_SOCKET:
++		err = sys_socket(a0, a1, a[2]);
++		break;
++	case SYS_BIND:
++		err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
++		break;
++	case SYS_CONNECT:
++		err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
++		break;
++	case SYS_LISTEN:
++		err = sys_listen(a0, a1);
++		break;
++	case SYS_ACCEPT:
++		err =
++		    sys_accept(a0, (struct sockaddr __user *)a1,
++			       (int __user *)a[2]);
++		break;
++	case SYS_GETSOCKNAME:
++		err =
++		    sys_getsockname(a0, (struct sockaddr __user *)a1,
++				    (int __user *)a[2]);
++		break;
++	case SYS_GETPEERNAME:
++		err =
++		    sys_getpeername(a0, (struct sockaddr __user *)a1,
++				    (int __user *)a[2]);
++		break;
++	case SYS_SOCKETPAIR:
++		err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
++		break;
++	case SYS_SEND:
++		err = sys_send(a0, (void __user *)a1, a[2], a[3]);
++		break;
++	case SYS_SENDTO:
++		err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
++				 (struct sockaddr __user *)a[4], a[5]);
++		break;
++	case SYS_RECV:
++		err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
++		break;
++	case SYS_RECVFROM:
++		err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
++				   (struct sockaddr __user *)a[4],
++				   (int __user *)a[5]);
++		break;
++	case SYS_SHUTDOWN:
++		err = sys_shutdown(a0, a1);
++		break;
++	case SYS_SETSOCKOPT:
++		err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
++		break;
++	case SYS_GETSOCKOPT:
++		err =
++		    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
++				   (int __user *)a[4]);
++		break;
++	case SYS_SENDMSG:
++		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
++		break;
++	case SYS_RECVMSG:
++		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
++		break;
++	default:
++		err = -EINVAL;
++		break;
++	}
++	return err;
++}
++
++#endif				/* __ARCH_WANT_SYS_SOCKETCALL */
++
++/**
++ *	sock_register - add a socket protocol handler
++ *	@ops: description of protocol
++ *
++ *	This function is called by a protocol handler that wants to
++ *	advertise its address family, and have it linked into the
++ *	socket interface. The value ops->family coresponds to the
++ *	socket system call protocol family.
++ */
++int sock_register(const struct net_proto_family *ops)
++{
++	int err;
++
++	if (ops->family >= NPROTO) {
++		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
++		       NPROTO);
++		return -ENOBUFS;
++	}
++
++	spin_lock(&net_family_lock);
++	if (net_families[ops->family])
++		err = -EEXIST;
++	else {
++		net_families[ops->family] = ops;
++		err = 0;
++	}
++	spin_unlock(&net_family_lock);
++
++	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
++	return err;
++}
++
++/**
++ *	sock_unregister - remove a protocol handler
++ *	@family: protocol family to remove
++ *
++ *	This function is called by a protocol handler that wants to
++ *	remove its address family, and have it unlinked from the
++ *	new socket creation.
++ *
++ *	If protocol handler is a module, then it can use module reference
++ *	counts to protect against new references. If protocol handler is not
++ *	a module then it needs to provide its own protection in
++ *	the ops->create routine.
++ */
++void sock_unregister(int family)
++{
++	BUG_ON(family < 0 || family >= NPROTO);
++
++	spin_lock(&net_family_lock);
++	net_families[family] = NULL;
++	spin_unlock(&net_family_lock);
++
++	synchronize_rcu();
++
++	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
++}
++
++static int sock_pernet_init(struct net *net)
++{
++	net->sysctl_somaxconn = SOMAXCONN;
++	return 0;
++}
++
++static struct pernet_operations sock_net_ops = {
++	.init = sock_pernet_init,
++};
++
++static int __init sock_init(void)
++{
++	/*
++	 *      Initialize sock SLAB cache.
++	 */
++
++	sk_init();
++
++	/*
++	 *      Initialize skbuff SLAB cache
++	 */
++	skb_init();
++
++	/*
++	 *      Initialize the protocols module.
++	 */
++
++	init_inodecache();
++	register_filesystem(&sock_fs_type);
++	sock_mnt = kern_mount(&sock_fs_type);
++
++	/* The real protocol initialization is performed in later initcalls.
++	 */
++
++#ifdef CONFIG_NETFILTER
++	netfilter_init();
++#endif
++
++	register_pernet_subsys(&sock_net_ops);
++
++	return 0;
++}
++
++core_initcall(sock_init);	/* early initcall */
++
++#ifdef CONFIG_PROC_FS
++void socket_seq_show(struct seq_file *seq)
++{
++	int cpu;
++	int counter = 0;
++
++	for_each_possible_cpu(cpu)
++	    counter += per_cpu(sockets_in_use, cpu);
++
++	/* It can be negative, by the way. 8) */
++	if (counter < 0)
++		counter = 0;
++
++	seq_printf(seq, "sockets: used %d\n", counter);
++}
++#endif				/* CONFIG_PROC_FS */
++
++#ifdef CONFIG_COMPAT
++static long compat_sock_ioctl(struct file *file, unsigned cmd,
++			      unsigned long arg)
++{
++	struct socket *sock = file->private_data;
++	int ret = -ENOIOCTLCMD;
++
++	if (sock->ops->compat_ioctl)
++		ret = sock->ops->compat_ioctl(sock, cmd, arg);
++
++	return ret;
++}
++#endif
++
++int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
++{
++	return sock->ops->bind(sock, addr, addrlen);
++}
++
++int kernel_listen(struct socket *sock, int backlog)
++{
++	return sock->ops->listen(sock, backlog);
++}
++
++int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
++{
++	struct sock *sk = sock->sk;
++	int err;
++
++	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
++			       newsock);
++	if (err < 0)
++		goto done;
++
++	err = sock->ops->accept(sock, *newsock, flags);
++	if (err < 0) {
++		sock_release(*newsock);
++		goto done;
++	}
++
++	(*newsock)->ops = sock->ops;
++
++done:
++	return err;
++}
++
++int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
++		   int flags)
++{
++	return sock->ops->connect(sock, addr, addrlen, flags);
++}
++
++int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
++			 int *addrlen)
++{
++	return sock->ops->getname(sock, addr, addrlen, 0);
++}
++
++int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
++			 int *addrlen)
++{
++	return sock->ops->getname(sock, addr, addrlen, 1);
++}
++
++int kernel_getsockopt(struct socket *sock, int level, int optname,
++			char *optval, int *optlen)
++{
++	mm_segment_t oldfs = get_fs();
++	int err;
++
++	set_fs(KERNEL_DS);
++	if (level == SOL_SOCKET)
++		err = sock_getsockopt(sock, level, optname, optval, optlen);
++	else
++		err = sock->ops->getsockopt(sock, level, optname, optval,
++					    optlen);
++	set_fs(oldfs);
++	return err;
++}
++
++int kernel_setsockopt(struct socket *sock, int level, int optname,
++			char *optval, int optlen)
++{
++	mm_segment_t oldfs = get_fs();
++	int err;
++
++	set_fs(KERNEL_DS);
++	if (level == SOL_SOCKET)
++		err = sock_setsockopt(sock, level, optname, optval, optlen);
++	else
++		err = sock->ops->setsockopt(sock, level, optname, optval,
++					    optlen);
++	set_fs(oldfs);
++	return err;
++}
++
++int kernel_sendpage(struct socket *sock, struct page *page, int offset,
++		    size_t size, int flags)
++{
++	if (sock->ops->sendpage)
++		return sock->ops->sendpage(sock, page, offset, size, flags);
++
++	return sock_no_sendpage(sock, page, offset, size, flags);
++}
++
++int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
++{
++	mm_segment_t oldfs = get_fs();
++	int err;
++
++	set_fs(KERNEL_DS);
++	err = sock->ops->ioctl(sock, cmd, arg);
++	set_fs(oldfs);
++
++	return err;
++}
++
++/* ABI emulation layers need these two */
++EXPORT_SYMBOL(move_addr_to_kernel);
++EXPORT_SYMBOL(move_addr_to_user);
++EXPORT_SYMBOL(sock_create);
++EXPORT_SYMBOL(sock_create_kern);
++EXPORT_SYMBOL(sock_create_lite);
++EXPORT_SYMBOL(sock_map_fd);
++EXPORT_SYMBOL(sock_recvmsg);
++EXPORT_SYMBOL(sock_register);
++EXPORT_SYMBOL(sock_release);
++EXPORT_SYMBOL(sock_sendmsg);
++EXPORT_SYMBOL(sock_unregister);
++EXPORT_SYMBOL(sock_wake_async);
++EXPORT_SYMBOL(sockfd_lookup);
++EXPORT_SYMBOL(kernel_sendmsg);
++EXPORT_SYMBOL(kernel_recvmsg);
++EXPORT_SYMBOL(kernel_bind);
++EXPORT_SYMBOL(kernel_listen);
++EXPORT_SYMBOL(kernel_accept);
++EXPORT_SYMBOL(kernel_connect);
++EXPORT_SYMBOL(kernel_getsockname);
++EXPORT_SYMBOL(kernel_getpeername);
++EXPORT_SYMBOL(kernel_getsockopt);
++EXPORT_SYMBOL(kernel_setsockopt);
++EXPORT_SYMBOL(kernel_sendpage);
++EXPORT_SYMBOL(kernel_sock_ioctl);