From 77122f66710e7f2bd2910e1df7d5d3ae2974d461 Mon Sep 17 00:00:00 2001
From: Marc Fiuczynski <mef@cs.princeton.edu>
Date: Wed, 16 Jun 2004 18:16:03 +0000
Subject: [PATCH] ckrm-E13

---
 fs/Kconfig               |  56 +++++++++++--
 fs/Makefile              |   2 +
 fs/exec.c                |   3 +
 fs/proc/array.c          |  19 +++++
 fs/proc/base.c           |  18 +++++
 include/linux/sched.h    |  70 +++++++++++++++++
 include/linux/tcp.h      |  37 ++++++++-
 include/net/sock.h       |   1 +
 include/net/tcp.h        | 107 +++++++++++++++++++++++++
 init/Kconfig             |  78 +++++++++++++++++++
 init/main.c              |   4 +
 kernel/Makefile          |   2 +-
 kernel/exit.c            |   5 ++
 kernel/fork.c            |  11 +++
 kernel/sched.c           |  18 +++++
 kernel/sys.c             |  16 ++++
 mm/memory.c              |   9 ++-
 net/ipv4/Kconfig         |  23 ++++++
 net/ipv4/tcp.c           | 164 ++++++++++++++++++++++++++++++++++++++-
 net/ipv4/tcp_ipv4.c      |  36 ++++++++-
 net/ipv4/tcp_minisocks.c |   7 ++
 net/ipv4/tcp_timer.c     |  24 +++++-
 net/ipv6/tcp_ipv6.c      |  42 +++++++++-
 23 files changed, 730 insertions(+), 22 deletions(-)

diff --git a/fs/Kconfig b/fs/Kconfig
index 20fb92ab6..bbae9c942 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -912,6 +912,56 @@ config RAMFS
 	  To compile this as a module, choose M here: the module will be called
 	  ramfs.
 
+config RELAYFS_FS
+	tristate "Relayfs file system support"
+	---help---
+	  Relayfs is a high-speed data relay filesystem designed to provide
+	  an efficient mechanism for tools and facilities to relay large
+	  amounts of data from kernel space to user space.  It's not useful
+	  on its own, and should only be enabled if other facilities that
+	  need it are enabled, such as for example klog or the Linux Trace
+	  Toolkit.
+
+	  See <file:Documentation/filesystems/relayfs.txt> for further
+	  information.
+
+	  This file system is also available as a module ( = code which can be
+	  inserted in and removed from the running kernel whenever you want).
+	  The module is called relayfs.  If you want to compile it as a
+	  module, say M here and read <file:Documentation/modules.txt>.
+
+	  If unsure, say N.
+
+config KLOG_CHANNEL
+	bool "Enable klog debugging support"
+	depends on RELAYFS_FS
+	help
+	  If you say Y to this, a relayfs channel named klog will be created
+	  in the root of the relayfs file system.  You can write to the klog
+	  channel using klog() or klog_raw() from within the kernel or
+	  kernel modules, and read from the klog channel by mounting relayfs
+	  and using read(2) to read from it (or using cat).  If you're not  
+	  sure, say N.
+
+config KLOG_CHANNEL_AUTOENABLE
+	bool "Enable klog logging on startup"
+	depends on KLOG_CHANNEL
+	default y
+	help
+	  If you say Y to this, the klog channel will be automatically enabled
+	  on startup.  Otherwise, to turn klog logging on, you need use
+	  sysctl (fs.relayfs.klog_enabled).  This option is used in cases where
+	  you don't actually want the channel to be written to until it's
+	  enabled.  If you're not sure, say Y.
+
+config KLOG_CHANNEL_SHIFT
+	depends on KLOG_CHANNEL
+	int "klog debugging channel size (14 => 16KB, 22 => 4MB)"
+	range 14 22
+	default 21
+	help
+	  Select klog debugging channel size as a power of 2.
+
 endmenu
 
 menu "Miscellaneous filesystems"
@@ -1176,8 +1226,6 @@ config HPFS_FS
 	  To compile this file system support as a module, choose M here: the
 	  module will be called hpfs.  If unsure, say N.
 
-
-
 config QNX4FS_FS
 	tristate "QNX4 file system support (read only)"
 	help
@@ -1203,8 +1251,6 @@ config QNX4FS_RW
 	  It's currently broken, so for now:
 	  answer N.
 
-
-
 config SYSV_FS
 	tristate "System V/Xenix/V7/Coherent file system support"
 	help
@@ -1241,8 +1287,6 @@ config SYSV_FS
 
 	  If you haven't heard about all of this before, it's safe to say N.
 
-
-
 config UFS_FS
 	tristate "UFS file system support (read only)"
 	help
diff --git a/fs/Makefile b/fs/Makefile
index a288c0cb3..0166f67e2 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_EXT2_FS)		+= ext2/
 obj-$(CONFIG_CRAMFS)		+= cramfs/
 obj-$(CONFIG_RAMFS)		+= ramfs/
 obj-$(CONFIG_HUGETLBFS)		+= hugetlbfs/
+obj-$(CONFIG_RELAYFS_FS)	+= relayfs/
 obj-$(CONFIG_CODA_FS)		+= coda/
 obj-$(CONFIG_INTERMEZZO_FS)	+= intermezzo/
 obj-$(CONFIG_MINIX_FS)		+= minix/
@@ -92,3 +93,4 @@ obj-$(CONFIG_JFS_FS)		+= jfs/
 obj-$(CONFIG_XFS_FS)		+= xfs/
 obj-$(CONFIG_AFS_FS)		+= afs/
 obj-$(CONFIG_BEFS_FS)		+= befs/
+obj-$(CONFIG_RCFS_FS)		+= rcfs/
diff --git a/fs/exec.c b/fs/exec.c
index f73d2c4cc..7ef46fec6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -46,6 +46,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/rmap.h>
+#include <linux/ckrm.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
@@ -1143,6 +1144,8 @@ int do_execve(char * filename,
 	if (retval >= 0) {
 		free_arg_pages(&bprm);
 
+		ckrm_cb_exec(filename);
+
 		/* execve success */
 		security_bprm_free(&bprm);
 		return retval;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6bdd15bfb..572197df3 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -424,3 +424,22 @@ int proc_pid_statm(struct task_struct *task, char *buffer)
 	return sprintf(buffer,"%d %d %d %d %d %d %d\n",
 		       size, resident, shared, text, lib, data, 0);
 }
+
+
+int proc_pid_delay(struct task_struct *task, char * buffer)
+{
+	int res;
+
+	res  = sprintf(buffer,"%lu %lu %lu %lu %lu %lu %lu\n",
+		       get_delay(task,runs),
+		       get_delay(task,runcpu_total),
+		       get_delay(task,waitcpu_total),
+		       get_delay(task,iowait_total),
+		       get_delay(task,num_iowaits),
+		       get_delay(task,mem_iowait_total),
+		       get_delay(task,num_memwaits)
+		       
+		);
+	return res;
+}
+
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7ff742cec..4d1995e2d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -89,6 +89,10 @@ enum pid_directory_inos {
 	PROC_TID_ATTR_PREV,
 	PROC_TID_ATTR_EXEC,
 	PROC_TID_ATTR_FSCREATE,
+#endif
+#ifdef CONFIG_DELAY_ACCT
+        PROC_TID_DELAY_ACCT,
+        PROC_TGID_DELAY_ACCT,
 #endif
 	PROC_TID_FD_DIR = 0x8000,	/* 0x8000-0xffff */
 };
@@ -120,6 +124,9 @@ static struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_SECURITY
 	E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
 #endif
+#ifdef CONFIG_DELAY_ACCT
+	E(PROC_TGID_DELAY_ACCT,"delay",   S_IFREG|S_IRUGO),
+#endif
 #ifdef CONFIG_KALLSYMS
 	E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO),
 #endif
@@ -142,6 +149,9 @@ static struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_SECURITY
 	E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
 #endif
+#ifdef CONFIG_DELAY_ACCT
+	E(PROC_TGID_DELAY_ACCT,"delay",   S_IFREG|S_IRUGO),
+#endif
 #ifdef CONFIG_KALLSYMS
 	E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO),
 #endif
@@ -181,6 +191,7 @@ int proc_pid_stat(struct task_struct*,char*);
 int proc_pid_status(struct task_struct*,char*);
 int proc_pid_statm(struct task_struct*,char*);
 int proc_pid_cpu(struct task_struct*,char*);
+int proc_pid_delay(struct task_struct*,char*);
 
 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
 {
@@ -1374,6 +1385,13 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
 			inode->i_fop = &proc_info_file_operations;
 			ei->op.proc_read = proc_pid_wchan;
 			break;
+#endif
+#ifdef CONFIG_DELAY_ACCT
+		case PROC_TID_DELAY_ACCT:
+		case PROC_TGID_DELAY_ACCT:
+			inode->i_fop = &proc_info_file_operations;
+			ei->op.proc_read = proc_pid_delay;
+			break;
 #endif
 		default:
 			printk("procfs: impossible type (%d)",p->type);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 73d7127e3..7bfbdc7cd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -133,6 +133,7 @@ struct sched_param {
 
 #ifdef __KERNEL__
 
+#include <linux/taskdelays.h>
 #include <linux/spinlock.h>
 
 /*
@@ -504,6 +505,18 @@ struct task_struct {
 
 	unsigned long ptrace_message;
 	siginfo_t *last_siginfo; /* For ptrace use.  */
+
+#ifdef CONFIG_CKRM
+	spinlock_t  ckrm_tsklock; 
+	void       *ce_data;
+#ifdef CONFIG_CKRM_TYPE_TASKCLASS
+	// .. Hubertus should change to CONFIG_CKRM_TYPE_TASKCLASS 
+	struct ckrm_task_class *taskclass;
+	struct list_head        taskclass_link;
+#endif // CONFIG_CKRM_TYPE_TASKCLASS
+#endif // CONFIG_CKRM
+
+	struct task_delay_info  delays;
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
@@ -541,6 +554,9 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_SYNCWRITE	0x00200000	/* I am doing a sync write */
 
+#define PF_MEMIO   	0x00400000      /* I am  potentially doing I/O for mem */
+#define PF_IOWAIT       0x00800000      /* I am waiting on disk I/O */
+
 #ifdef CONFIG_SMP
 extern int set_cpus_allowed(task_t *p, cpumask_t new_mask);
 #else
@@ -944,6 +960,60 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
+
+/* API for registering delay info */
+#ifdef CONFIG_DELAY_ACCT
+
+#define test_delay_flag(tsk,flg)                ((tsk)->flags & (flg))
+#define set_delay_flag(tsk,flg)                 ((tsk)->flags |= (flg))
+#define clear_delay_flag(tsk,flg)               ((tsk)->flags &= ~(flg))
+
+#define def_delay_var(var)		        unsigned long long var
+#define get_delay(tsk,field)                    ((tsk)->delays.field)
+#define delay_value(x)				(((unsigned long)(x))/1000)
+
+#define start_delay(var)                        ((var) = sched_clock())
+#define start_delay_set(var,flg)                (set_delay_flag(current,flg),(var) = sched_clock())
+
+#define inc_delay(tsk,field) (((tsk)->delays.field)++)
+#define add_delay_ts(tsk,field,start_ts,end_ts) ((tsk)->delays.field += delay_value((end_ts)-(start_ts)))
+#define add_delay_clear(tsk,field,start_ts,flg) (add_delay_ts(tsk,field,start_ts,sched_clock()),clear_delay_flag(tsk,flg))
+
+static inline void add_io_delay(unsigned long dstart) 
+{
+	struct task_struct * tsk = current;
+	unsigned long val = delay_value(sched_clock()-dstart);
+	if (test_delay_flag(tsk,PF_MEMIO)) {
+		tsk->delays.mem_iowait_total += val;
+		tsk->delays.num_memwaits++;
+	} else {
+		tsk->delays.iowait_total += val;
+		tsk->delays.num_iowaits++;
+	}
+	clear_delay_flag(tsk,PF_IOWAIT);
+}
+
+
+#else
+
+#define test_delay_flag(tsk,flg)                (0)
+#define set_delay_flag(tsk,flg)                 do { } while (0)
+#define clear_delay_flag(tsk,flg)               do { } while (0)
+
+#define def_delay_var(var)			      
+#define get_delay(tsk,field)                    (0)
+
+#define start_delay(var)                        do { } while (0)
+#define start_delay_set(var,flg)                do { } while (0)
+
+#define inc_delay(tsk,field)                    do { } while (0)
+#define add_delay_ts(tsk,field,start_ts,now)    do { } while (0)
+#define add_delay_clear(tsk,field,start_ts,flg) do { } while (0)
+#define add_io_delay(dstart)			do { } while (0) 
+#endif
+
+
+
 #endif /* __KERNEL__ */
 
 #endif
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b6511999f..960b8537b 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -128,6 +128,10 @@ enum {
 #define TCP_INFO		11	/* Information about this connection. */
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 
+#ifdef CONFIG_ACCEPT_QUEUES
+#define TCP_ACCEPTQ_SHARE	13	/* Set accept queue share */
+#endif
+
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
 #define TCPI_OPT_WSCALE		4
@@ -185,6 +189,18 @@ struct tcp_info
 	__u32	tcpi_reordering;
 };
 
+#ifdef CONFIG_ACCEPT_QUEUES
+
+#define NUM_ACCEPT_QUEUES	8 	/* Must be power of 2 */
+
+struct tcp_acceptq_info {
+	unsigned char acceptq_shares;
+	unsigned long acceptq_wait_time;
+	unsigned int acceptq_qcount;
+	unsigned int acceptq_count;
+};
+#endif
+
 #ifdef __KERNEL__
 
 #include <linux/config.h>
@@ -366,8 +382,9 @@ struct tcp_opt {
 
 	/* FIFO of established children */
 	struct open_request	*accept_queue;
-	struct open_request	*accept_queue_tail;
-
+#ifndef CONFIG_ACCEPT_QUEUES
+	struct open_request     *accept_queue_tail;
+#endif
 	int			write_pending;	/* A write to socket waits to start. */
 
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
@@ -407,6 +424,22 @@ struct tcp_opt {
 		__u32 	last_max_cwnd;	/* last maximium snd_cwnd */
 		__u32	last_cwnd;	/* the last snd_cwnd */
 	} bictcp;
+
+#ifdef CONFIG_ACCEPT_QUEUES
+	/* move to listen opt... */
+	char		class_index;
+	struct {
+		struct open_request     *aq_head;
+		struct open_request     *aq_tail;
+		unsigned int		 aq_cnt;
+		unsigned int		 aq_ratio;
+		unsigned int             aq_count;
+		unsigned int             aq_qcount;
+		unsigned int             aq_backlog;
+		unsigned int             aq_wait_time;
+		int			 aq_valid;
+ 	} acceptq[NUM_ACCEPT_QUEUES];
+#endif
 };
 
 /* WARNING: don't change the layout of the members in tcp_sock! */
diff --git a/include/net/sock.h b/include/net/sock.h
index e01e61768..401a0446c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -245,6 +245,7 @@ struct sock {
 	struct timeval		sk_stamp;
 	struct socket		*sk_socket;
 	void			*sk_user_data;
+	void                    *sk_ns;        // For use by CKRM
 	struct module		*sk_owner;
 	void			*sk_security;
 	void			(*sk_state_change)(struct sock *sk);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index cba1f701e..ec69ae6ed 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -669,6 +669,10 @@ struct open_request {
 		struct tcp_v6_open_req v6_req;
 #endif
 	} af;
+#ifdef CONFIG_ACCEPT_QUEUES
+	unsigned long acceptq_time_stamp;
+	int	      acceptq_class;
+#endif
 };
 
 /* SLAB cache for open requests. */
@@ -1789,6 +1793,69 @@ static inline int tcp_full_space( struct sock *sk)
 	return tcp_win_from_space(sk->sk_rcvbuf); 
 }
 
+#ifdef CONFIG_ACCEPT_QUEUES
+static inline void tcp_acceptq_removed(struct sock *sk, int class)
+{
+	tcp_sk(sk)->acceptq[class].aq_backlog--;
+}
+
+static inline void tcp_acceptq_added(struct sock *sk, int class)
+{
+	tcp_sk(sk)->acceptq[class].aq_backlog++;
+}
+
+static inline int tcp_acceptq_is_full(struct sock *sk, int class)
+{
+	return tcp_sk(sk)->acceptq[class].aq_backlog >
+		sk->sk_max_ack_backlog;
+}
+
+static inline void tcp_set_acceptq(struct tcp_opt *tp, struct open_request *req)
+{
+	int class = req->acceptq_class;
+	int prev_class;
+
+	if (!tp->acceptq[class].aq_ratio) {
+		req->acceptq_class = 0;
+		class = 0;
+	}
+
+	tp->acceptq[class].aq_qcount++;
+	req->acceptq_time_stamp = jiffies;
+
+	if (tp->acceptq[class].aq_tail) {
+		req->dl_next = tp->acceptq[class].aq_tail->dl_next;
+		tp->acceptq[class].aq_tail->dl_next = req;
+		tp->acceptq[class].aq_tail = req;
+	} else { /* if first request in the class */
+		tp->acceptq[class].aq_head = req;
+		tp->acceptq[class].aq_tail = req;
+
+		prev_class = class - 1;
+		while (prev_class >= 0) {
+			if (tp->acceptq[prev_class].aq_tail)
+				break;
+			prev_class--;
+		}
+		if (prev_class < 0) {
+			req->dl_next = tp->accept_queue;
+			tp->accept_queue = req;
+		}
+		else {
+			req->dl_next = tp->acceptq[prev_class].aq_tail->dl_next;
+			tp->acceptq[prev_class].aq_tail->dl_next = req;
+		}
+	}
+}
+static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
+					 struct sock *child)
+{
+	tcp_set_acceptq(tcp_sk(sk),req);
+	req->sk = child;
+	tcp_acceptq_added(sk,req->acceptq_class);
+}
+
+#else
 static inline void tcp_acceptq_removed(struct sock *sk)
 {
 	sk->sk_ack_backlog--;
@@ -1821,16 +1888,55 @@ static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
 	req->dl_next = NULL;
 }
 
+#endif
+
 struct tcp_listen_opt
 {
 	u8			max_qlen_log;	/* log_2 of maximal queued SYNs */
 	int			qlen;
+#ifdef CONFIG_ACCEPT_QUEUES
+	int			qlen_young[NUM_ACCEPT_QUEUES];
+#else
 	int			qlen_young;
+#endif
 	int			clock_hand;
 	u32			hash_rnd;
 	struct open_request	*syn_table[TCP_SYNQ_HSIZE];
 };
 
+#ifdef CONFIG_ACCEPT_QUEUES
+static inline void
+tcp_synq_removed(struct sock *sk, struct open_request *req)
+{
+	struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt;
+
+	if (--lopt->qlen == 0)
+		tcp_delete_keepalive_timer(sk);
+	if (req->retrans == 0)
+		lopt->qlen_young[req->acceptq_class]--;
+}
+
+static inline void tcp_synq_added(struct sock *sk, struct open_request *req)
+{
+	struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt;
+
+	if (lopt->qlen++ == 0)
+		tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
+	lopt->qlen_young[req->acceptq_class]++;
+}
+
+static inline int tcp_synq_len(struct sock *sk)
+{
+	return tcp_sk(sk)->listen_opt->qlen;
+}
+
+static inline int tcp_synq_young(struct sock *sk, int class)
+{
+	return tcp_sk(sk)->listen_opt->qlen_young[class];
+}
+
+#else
+
 static inline void
 tcp_synq_removed(struct sock *sk, struct open_request *req)
 {
@@ -1860,6 +1966,7 @@ static inline int tcp_synq_young(struct sock *sk)
 {
 	return tcp_sk(sk)->listen_opt->qlen_young;
 }
+#endif
 
 static inline int tcp_synq_is_full(struct sock *sk)
 {
diff --git a/init/Kconfig b/init/Kconfig
index 337feb102..f2bbec792 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -121,6 +121,75 @@ config BSD_PROCESS_ACCT
 	  up to the user level program to do useful things with this
 	  information.  This is generally a good idea, so say Y.
 
+menu "Class Based Kernel Resource Management"
+
+config CKRM
+	bool "Class Based Kernel Resource Management Core"
+	depends on EXPERIMENTAL
+	help
+	  Class-based Kernel Resource Management is a framework for controlling
+	  and monitoring resource allocation of user-defined groups of tasks or
+	  incoming socket connections. For more information, please visit
+	  http://ckrm.sf.net. 
+
+	  If you say Y here, enable the Resource Class File System and atleast
+	  one of the resource controllers below. Say N if you are unsure. 
+
+config RCFS_FS
+       tristate "Resource Class File System (User API)"
+       depends on CKRM
+       help
+	  RCFS is the filesystem API for CKRM. This separate configuration 
+	  option is provided only for debugging and will eventually disappear 
+	  since rcfs will be automounted whenever CKRM is configured. 
+
+          Say N if unsure, Y if you've enabled CKRM, M to debug rcfs 
+	  initialization.
+
+config CKRM_TYPE_TASKCLASS
+	bool "Class Manager for Task Groups"
+	depends on CKRM
+	help
+	  TASKCLASS provides the extensions for CKRM to track task classes
+	  This is the base to enable task class based resource control for
+	  cpu, memory and disk I/O.
+	
+	  Say N if unsure 
+
+config CKRM_RES_NUMTASKS
+	tristate "Number of Tasks Resource Manager"
+	depends on CKRM_TYPE_TASKCLASS
+	default m
+	help
+	  Provides a Resource Controller for CKRM that allows limiting no of
+	  tasks a task class can have.
+	
+	  Say N if unsure, Y to use the feature.
+
+config CKRM_TYPE_SOCKETCLASS
+	bool "Class Manager for socket groups"
+	depends on CKRM
+	help
+	  SOCKET provides the extensions for CKRM to track per socket
+	  classes.  This is the base to enable socket based resource 
+	  control for inbound connection control, bandwidth control etc.
+	
+	  Say N if unsure.  
+
+config CKRM_RES_LISTENAQ
+	tristate "Multiple Accept Queues Resource Manager"
+	depends on CKRM_TYPE_SOCKETCLASS && ACCEPT_QUEUES
+	default m
+	help
+	  Provides a  resource controller for CKRM to prioritize inbound
+	  connection requests. See inbound control description for
+	  "IP: TCP Multiple accept queues support". If you choose that
+	  option choose this option to control the queue weights.
+ 
+	  If unsure, say N.
+
+endmenu
+
 config SYSCTL
 	bool "Sysctl support"
 	---help---
@@ -227,6 +296,15 @@ menuconfig EMBEDDED
           environments which can tolerate a "non-standard" kernel.
           Only use this if you really know what you are doing.
 
+config DELAY_ACCT
+	bool "Enable delay accounting (EXPERIMENTAL)"
+	help
+	  In addition to counting frequency the total delay in ns is also
+	  recorded. CPU delays are specified as cpu-wait and cpu-run. Memory
+	  delay is recorded for minor and major faults. Information is
+	  accessible through /proc/<pid>/delay.
+
+
 config KALLSYMS
 	 bool "Load all symbols for debugging/kksymoops" if EMBEDDED
 	 default y
diff --git a/init/main.c b/init/main.c
index 77e962511..173997886 100644
--- a/init/main.c
+++ b/init/main.c
@@ -46,6 +46,8 @@
 #include <asm/io.h>
 #include <asm/bugs.h>
 
+#include <linux/ckrm.h>
+
 /*
  * This is one of the first .c files built. Error out early
  * if we have compiler trouble..
@@ -428,6 +430,7 @@ asmlinkage void __init start_kernel(void)
 	rcu_init();
 	init_IRQ();
 	pidhash_init();
+	ckrm_init();
 	sched_init();
 	softirq_init();
 	time_init();
@@ -475,6 +478,7 @@ asmlinkage void __init start_kernel(void)
 #ifdef CONFIG_PROC_FS
 	proc_root_init();
 #endif
+
 	check_bugs();
 	printk("POSIX conformance testing by UNIFIX\n");
 
diff --git a/kernel/Makefile b/kernel/Makefile
index 238c65f60..107df8eb1 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -7,7 +7,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \
-	    kthread.o
+	    kthread.o ckrm/
 
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 96d7394cc..b5e3aea7c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -22,6 +22,8 @@
 #include <linux/profile.h>
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
+#include <linux/ckrm.h>
+#include <linux/ckrm_tsk.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -637,6 +639,8 @@ static void exit_notify(struct task_struct *tsk)
 	int state;
 	struct task_struct *t;
 
+	ckrm_cb_exit(tsk);
+
 	if (signal_pending(tsk) && !tsk->signal->group_exit
 	    && !thread_group_empty(tsk)) {
 		/*
@@ -799,6 +803,7 @@ asmlinkage NORET_TYPE void do_exit(long code)
 		module_put(tsk->binfmt->module);
 
 	tsk->exit_code = code;
+	numtasks_put_ref(tsk->taskclass);
 	exit_notify(tsk);
 	schedule();
 	BUG();
diff --git a/kernel/fork.c b/kernel/fork.c
index 68597bc34..676eaa1cf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,6 +33,8 @@
 #include <linux/ptrace.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/ckrm.h>
+#include <linux/ckrm_tsk.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -260,6 +262,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	tsk->thread_info = ti;
 	ti->task = tsk;
 
+	ckrm_cb_newtask(tsk);
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
 	return tsk;
@@ -1157,6 +1160,10 @@ long do_fork(unsigned long clone_flags,
 			clone_flags |= CLONE_PTRACE;
 	}
 
+	if (numtasks_get_ref(current->taskclass, 0) == 0) {
+		return -ENOMEM;
+	}
+
 	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
@@ -1167,6 +1174,8 @@ long do_fork(unsigned long clone_flags,
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		ckrm_cb_fork(p);
+
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
@@ -1201,6 +1210,8 @@ long do_fork(unsigned long clone_flags,
 			 * COW overhead when the child exec()s afterwards.
 			 */
 			set_need_resched();
+	} else {
+		numtasks_put_ref(current->taskclass);
 	}
 	return pid;
 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 1493acff5..b05d1b6e6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1706,9 +1706,12 @@ switch_tasks:
 		if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
 			prev->interactive_credit--;
 	}
+	add_delay_ts(prev,runcpu_total,prev->timestamp,now);
 	prev->timestamp = now;
 
 	if (likely(prev != next)) {
+		add_delay_ts(next,waitcpu_total,next->timestamp,now);
+		inc_delay(next,runs);
 		next->timestamp = now;
 		rq->nr_switches++;
 		rq->curr = next;
@@ -2457,10 +2460,13 @@ EXPORT_SYMBOL(yield);
 void __sched io_schedule(void)
 {
 	struct runqueue *rq = this_rq();
+	def_delay_var(dstart);
 
+	start_delay_set(dstart,PF_IOWAIT);
 	atomic_inc(&rq->nr_iowait);
 	schedule();
 	atomic_dec(&rq->nr_iowait);
+	add_io_delay(dstart);
 }
 
 EXPORT_SYMBOL(io_schedule);
@@ -2469,10 +2475,13 @@ long __sched io_schedule_timeout(long timeout)
 {
 	struct runqueue *rq = this_rq();
 	long ret;
+	def_delay_var(dstart);
 
+	start_delay_set(dstart,PF_IOWAIT);
 	atomic_inc(&rq->nr_iowait);
 	ret = schedule_timeout(timeout);
 	atomic_dec(&rq->nr_iowait);
+	add_io_delay(dstart);
 	return ret;
 }
 
@@ -3053,3 +3062,12 @@ void __sched __preempt_write_lock(rwlock_t *lock)
 
 EXPORT_SYMBOL(__preempt_write_lock);
 #endif /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) */
+
+#ifdef CONFIG_DELAY_ACCT
+int task_running_sys(struct task_struct *p)
+{
+       return task_running(task_rq(p),p);
+}
+EXPORT_SYMBOL(task_running_sys);
+#endif
+
diff --git a/kernel/sys.c b/kernel/sys.c
index 4d414d925..bf057b71d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -23,6 +23,7 @@
 #include <linux/security.h>
 #include <linux/dcookies.h>
 #include <linux/suspend.h>
+#include <linux/ckrm.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -593,6 +594,9 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 	current->fsgid = new_egid;
 	current->egid = new_egid;
 	current->gid = new_rgid;
+
+	ckrm_cb_gid();
+
 	return 0;
 }
 
@@ -630,6 +634,9 @@ asmlinkage long sys_setgid(gid_t gid)
 	}
 	else
 		return -EPERM;
+
+	ckrm_cb_gid();
+
 	return 0;
 }
   
@@ -718,6 +725,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 		current->suid = current->euid;
 	current->fsuid = current->euid;
 
+	ckrm_cb_uid();
+
 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
 }
 
@@ -763,6 +772,8 @@ asmlinkage long sys_setuid(uid_t uid)
 	current->fsuid = current->euid = uid;
 	current->suid = new_suid;
 
+	ckrm_cb_uid();
+
 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
 }
 
@@ -809,6 +820,8 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 	if (suid != (uid_t) -1)
 		current->suid = suid;
 
+	ckrm_cb_uid();
+
 	return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
 }
 
@@ -858,6 +871,9 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 		current->gid = rgid;
 	if (sgid != (gid_t) -1)
 		current->sgid = sgid;
+
+	ckrm_cb_gid();
+
 	return 0;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 5ae7c99ae..78865fb45 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1695,15 +1695,20 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 	 * We need the page table lock to synchronize with kswapd
 	 * and the SMP-safe atomic PTE updates.
 	 */
+	set_delay_flag(current,PF_MEMIO);
 	spin_lock(&mm->page_table_lock);
 	pmd = pmd_alloc(mm, pgd, address);
 
 	if (pmd) {
 		pte_t * pte = pte_alloc_map(mm, pmd, address);
-		if (pte)
-			return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+		if (pte) {
+			int rc = handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+			clear_delay_flag(current,PF_MEMIO);
+			return rc;
+		}
 	}
 	spin_unlock(&mm->page_table_lock);
+	clear_delay_flag(current,PF_MEMIO);
 	return VM_FAULT_OOM;
 }
 
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 0bb1b8808..d59797c7e 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -360,5 +360,28 @@ config INET_IPCOMP
 	  
 	  If unsure, say Y.
 
+config ACCEPT_QUEUES 
+	bool "IP: TCP Multiple accept queues support"
+	depends on INET && NETFILTER
+	---help---
+	  Support multiple accept queues per listening socket. If you say Y
+	  here, multiple accept queues will be configured per listening
+	  socket.
+	  
+	  Each queue is mapped to a priority class. Incoming connection 
+	  requests can be classified (see iptables(8), MARK target), depending
+	  on the packet's src/dest address or other parameters, into one of 
+	  the priority classes. The requests are then queued to the relevant
+	  accept queue. 
+
+	  Each of the queues can be assigned a weight. The accept()ance 
+	  of packets is then scheduled in accordance with the weight 
+	  assigned to the priority class. 
+	  
+	  Be sure to enable "Network packet filtering" if you wish
+	  to use this feature.
+
+	  If unsure, say N.
+
 source "net/ipv4/ipvs/Kconfig"
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f5d991b37..b2e0fd592 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -256,6 +256,7 @@
 #include <linux/smp_lock.h>
 #include <linux/fs.h>
 #include <linux/random.h>
+#include <linux/ckrm.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
@@ -534,13 +535,34 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 int tcp_listen_start(struct sock *sk)
 {
+#ifdef CONFIG_ACCEPT_QUEUES
+	int i = 0;
+#endif
 	struct inet_opt *inet = inet_sk(sk);
 	struct tcp_opt *tp = tcp_sk(sk);
 	struct tcp_listen_opt *lopt;
 
 	sk->sk_max_ack_backlog = 0;
 	sk->sk_ack_backlog = 0;
-	tp->accept_queue = tp->accept_queue_tail = NULL;
+	tp->accept_queue = NULL;
+#ifdef CONFIG_ACCEPT_QUEUES
+	tp->class_index = 0;
+	for (i=0; i < NUM_ACCEPT_QUEUES; i++) {
+		tp->acceptq[i].aq_tail = NULL;
+		tp->acceptq[i].aq_head = NULL;
+		tp->acceptq[i].aq_wait_time = 0; 
+		tp->acceptq[i].aq_qcount = 0; 
+		tp->acceptq[i].aq_count = 0; 
+		if (i == 0) {
+			tp->acceptq[i].aq_valid = 1; 
+			tp->acceptq[i].aq_ratio = 1; 
+		}
+		else {
+			tp->acceptq[i].aq_valid = 0; 
+			tp->acceptq[i].aq_ratio = 0; 
+		}
+	}
+#endif
 	tp->syn_wait_lock = RW_LOCK_UNLOCKED;
 	tcp_delack_init(tp);
 
@@ -570,6 +592,10 @@ int tcp_listen_start(struct sock *sk)
 		sk_dst_reset(sk);
 		sk->sk_prot->hash(sk);
 
+#ifdef CONFIG_CKRM
+		ckrm_cb_listen_start(sk);
+#endif
+
 		return 0;
 	}
 
@@ -600,7 +626,18 @@ static void tcp_listen_stop (struct sock *sk)
 	write_lock_bh(&tp->syn_wait_lock);
 	tp->listen_opt = NULL;
 	write_unlock_bh(&tp->syn_wait_lock);
-	tp->accept_queue = tp->accept_queue_tail = NULL;
+
+#ifdef CONFIG_CKRM
+		ckrm_cb_listen_stop(sk);
+#endif
+
+#ifdef CONFIG_ACCEPT_QUEUES
+	for (i = 0; i < NUM_ACCEPT_QUEUES; i++)
+		tp->acceptq[i].aq_head = tp->acceptq[i].aq_tail = NULL;
+#else
+	tp->accept_queue_tail = NULL;
+#endif
+	tp->accept_queue = NULL;
 
 	if (lopt->qlen) {
 		for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
@@ -646,7 +683,11 @@ static void tcp_listen_stop (struct sock *sk)
 		local_bh_enable();
 		sock_put(child);
 
+#ifdef CONFIG_ACCEPT_QUEUES
+		tcp_acceptq_removed(sk, req->acceptq_class);
+#else
 		tcp_acceptq_removed(sk);
+#endif
 		tcp_openreq_fastfree(req);
 	}
 	BUG_TRAP(!sk->sk_ack_backlog);
@@ -2227,6 +2268,10 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
 	struct open_request *req;
 	struct sock *newsk;
 	int error;
+#ifdef CONFIG_ACCEPT_QUEUES	
+	int prev_class = 0;
+	int first;
+#endif
 
 	lock_sock(sk);
 
@@ -2240,7 +2285,6 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
 	/* Find already established connection */
 	if (!tp->accept_queue) {
 		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
-
 		/* If this is a non blocking socket don't sleep */
 		error = -EAGAIN;
 		if (!timeo)
@@ -2251,12 +2295,46 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
 			goto out;
 	}
 
+#ifndef CONFIG_ACCEPT_QUEUES
 	req = tp->accept_queue;
 	if ((tp->accept_queue = req->dl_next) == NULL)
 		tp->accept_queue_tail = NULL;
 
- 	newsk = req->sk;
 	tcp_acceptq_removed(sk);
+#else
+	first = tp->class_index;
+	/* We should always have  request queued here. The accept_queue
+	 * is already checked for NULL above.
+	 */
+	while(!tp->acceptq[first].aq_head) {
+		tp->acceptq[first].aq_cnt = 0;
+		first = (first+1) & ~NUM_ACCEPT_QUEUES; 
+	}
+        req = tp->acceptq[first].aq_head;
+	tp->acceptq[first].aq_qcount--;
+	tp->acceptq[first].aq_count++;
+	tp->acceptq[first].aq_wait_time+=(jiffies - req->acceptq_time_stamp);
+
+	for (prev_class= first-1 ; prev_class >=0; prev_class--)
+		if (tp->acceptq[prev_class].aq_tail)
+			break;
+	if (prev_class>=0)
+		tp->acceptq[prev_class].aq_tail->dl_next = req->dl_next; 
+	else 
+		tp->accept_queue = req->dl_next;
+
+	if (req == tp->acceptq[first].aq_tail) 
+		tp->acceptq[first].aq_head = tp->acceptq[first].aq_tail = NULL;
+	else
+		tp->acceptq[first].aq_head = req->dl_next;
+
+	if((++(tp->acceptq[first].aq_cnt)) >= tp->acceptq[first].aq_ratio){
+		tp->acceptq[first].aq_cnt = 0;
+		tp->class_index = ++first & ~NUM_ACCEPT_QUEUES;
+	}	
+	tcp_acceptq_removed(sk, req->acceptq_class);
+#endif
+ 	newsk = req->sk;
 	tcp_openreq_fastfree(req);
 	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
 	release_sock(sk);
@@ -2426,6 +2504,49 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval,
 			}
 		}
 		break;
+		
+#ifdef CONFIG_ACCEPT_QUEUES
+	case TCP_ACCEPTQ_SHARE:
+		{
+			char share_wt[NUM_ACCEPT_QUEUES];
+			int i,j;
+
+			if (sk->sk_state != TCP_LISTEN)
+				return -EOPNOTSUPP;
+
+			if (copy_from_user(share_wt,optval, optlen)) {
+				err = -EFAULT;
+				break;
+			}
+			j = 0;
+			for (i = 0; i < NUM_ACCEPT_QUEUES; i++) {
+				if (share_wt[i]) {
+					if (!j)
+						j = share_wt[i];
+					else if (share_wt[i] < j) {
+						j = share_wt[i];
+					}
+					tp->acceptq[i].aq_valid = 1;
+				}
+				else
+					tp->acceptq[i].aq_valid = 0;
+					
+			}
+			if (j == 0) {
+				/* Class 0 is always valid. If nothing is 
+				 * specified set class 0 as 1.
+				 */
+				share_wt[0] = 1;
+				tp->acceptq[0].aq_valid = 1;
+				j = 1;
+			}
+			for (i=0; i < NUM_ACCEPT_QUEUES; i++)  {
+				tp->acceptq[i].aq_ratio = share_wt[i]/j;
+				tp->acceptq[i].aq_cnt = 0;
+			}
+		}
+		break;
+#endif
 
 	default:
 		err = -ENOPROTOOPT;
@@ -2552,6 +2673,41 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval,
 	case TCP_QUICKACK:
 		val = !tp->ack.pingpong;
 		break;
+
+#ifdef CONFIG_ACCEPT_QUEUES
+	case TCP_ACCEPTQ_SHARE: {
+		struct tcp_acceptq_info tinfo[NUM_ACCEPT_QUEUES];
+		int i;
+
+		if (sk->sk_state != TCP_LISTEN)
+			return -EOPNOTSUPP;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		memset(tinfo, 0, sizeof(tinfo));
+
+		for(i=0; i < NUM_ACCEPT_QUEUES; i++) {
+			tinfo[i].acceptq_wait_time = 
+				tp->acceptq[i].aq_wait_time/(HZ/USER_HZ);
+			tinfo[i].acceptq_qcount = tp->acceptq[i].aq_qcount;
+			tinfo[i].acceptq_count = tp->acceptq[i].aq_count;
+			if (tp->acceptq[i].aq_valid) 
+				tinfo[i].acceptq_shares=tp->acceptq[i].aq_ratio;
+			else
+				tinfo[i].acceptq_shares = 0;
+		}
+
+		len = min_t(unsigned int, len, sizeof(tinfo));
+		if (put_user(len, optlen)) 
+			return -EFAULT;
+			
+		if (copy_to_user(optval, (char *)tinfo, len))
+			return -EFAULT;
+		
+		return 0;
+	}
+#endif
 	default:
 		return -ENOPROTOOPT;
 	};
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ac9211504..fb4448fe9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -458,7 +458,6 @@ inline struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum,
 	head = &tcp_listening_hash[tcp_lhashfn(hnum)];
 	if (!hlist_empty(head)) {
 		struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
-
 		if (inet->num == hnum && !sk->sk_node.next &&
 		    (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
@@ -916,7 +915,11 @@ static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
 	lopt->syn_table[h] = req;
 	write_unlock(&tp->syn_wait_lock);
 
+#ifdef CONFIG_ACCEPT_QUEUES
+	tcp_synq_added(sk, req);
+#else
 	tcp_synq_added(sk);
+#endif
 }
 
 
@@ -1413,6 +1416,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	__u32 daddr = skb->nh.iph->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
+#ifdef CONFIG_ACCEPT_QUEUES
+	int class = 0;
+#endif
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
 #else
@@ -1437,12 +1443,31 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
+#ifdef CONFIG_ACCEPT_QUEUES
+	class = (skb->nfmark <= 0) ? 0 :
+		((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
+	/*
+	 * Accept only if the class has shares set or if the default class
+	 * i.e. class 0 has shares
+	 */
+	if (!(tcp_sk(sk)->acceptq[class].aq_valid)) {
+		if (tcp_sk(sk)->acceptq[0].aq_valid) 
+			class = 0;
+		else
+			goto drop;
+	}
+#endif
+
 	/* Accept backlog is full. If we have already queued enough
 	 * of warm entries in syn queue, drop request. It is better than
 	 * clogging syn queue with openreqs with exponentially increasing
 	 * timeout.
 	 */
+#ifdef CONFIG_ACCEPT_QUEUES
+	if (tcp_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
+#else
 	if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+#endif
 		goto drop;
 
 	req = tcp_openreq_alloc();
@@ -1472,7 +1497,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tp.tstamp_ok = tp.saw_tstamp;
 
 	tcp_openreq_init(req, &tp, skb);
-
+#ifdef CONFIG_ACCEPT_QUEUES
+	req->acceptq_class = class;
+	req->acceptq_time_stamp = jiffies;
+#endif
 	req->af.v4_req.loc_addr = daddr;
 	req->af.v4_req.rmt_addr = saddr;
 	req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
@@ -1567,7 +1595,11 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	struct tcp_opt *newtp;
 	struct sock *newsk;
 
+#ifdef CONFIG_ACCEPT_QUEUES
+	if (tcp_acceptq_is_full(sk, req->acceptq_class))
+#else
 	if (tcp_acceptq_is_full(sk))
+#endif
 		goto exit_overflow;
 
 	if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d4c0d84d1..6a4578035 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -790,7 +790,14 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 		newtp->num_sacks = 0;
 		newtp->urg_data = 0;
 		newtp->listen_opt = NULL;
+#ifdef CONFIG_ACCEPT_QUEUES
+		newtp->accept_queue = NULL;
+		memset(newtp->acceptq, 0,sizeof(newtp->acceptq));
+		newtp->class_index = 0;
+
+#else
 		newtp->accept_queue = newtp->accept_queue_tail = NULL;
+#endif
 		/* Deinitialize syn_wait_lock to trap illegal accesses. */
 		memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 58796c6c9..695f8befa 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -498,7 +498,16 @@ static void tcp_synack_timer(struct sock *sk)
 	 * ones are about to clog our table.
 	 */
 	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
+#ifdef CONFIG_ACCEPT_QUEUES
+		int young = 0;
+	       
+		for(i=0; i < NUM_ACCEPT_QUEUES; i++) 
+			young += lopt->qlen_young[i];
+		
+		young <<= 1;
+#else
 		int young = (lopt->qlen_young<<1);
+#endif
 
 		while (thresh > 2) {
 			if (lopt->qlen < young)
@@ -524,9 +533,12 @@ static void tcp_synack_timer(struct sock *sk)
 					unsigned long timeo;
 
 					if (req->retrans++ == 0)
-						lopt->qlen_young--;
-					timeo = min((TCP_TIMEOUT_INIT << req->retrans),
-						    TCP_RTO_MAX);
+#ifdef CONFIG_ACCEPT_QUEUES
+			         		lopt->qlen_young[req->acceptq_class]--;
+#else
+			         		lopt->qlen_young--;
+#endif
+					timeo = min((TCP_TIMEOUT_INIT << req->retrans), TCP_RTO_MAX);
 					req->expires = now + timeo;
 					reqp = &req->dl_next;
 					continue;
@@ -538,7 +550,11 @@ static void tcp_synack_timer(struct sock *sk)
 				write_unlock(&tp->syn_wait_lock);
 				lopt->qlen--;
 				if (req->retrans == 0)
-					lopt->qlen_young--;
+#ifdef CONFIG_ACCEPT_QUEUES
+			         		lopt->qlen_young[req->acceptq_class]--;
+#else
+			         		lopt->qlen_young--;
+#endif
 				tcp_openreq_free(req);
 				continue;
 			}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2949cd143..d2a34fd98 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1154,7 +1154,11 @@ static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
 	lopt->syn_table[h] = req;
 	write_unlock(&tp->syn_wait_lock);
 
+#ifdef CONFIG_ACCEPT_QUEUES
+	tcp_synq_added(sk, req);
+#else
 	tcp_synq_added(sk);
+#endif
 }
 
 
@@ -1167,6 +1171,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct tcp_opt tmptp, *tp = tcp_sk(sk);
 	struct open_request *req = NULL;
 	__u32 isn = TCP_SKB_CB(skb)->when;
+#ifdef CONFIG_ACCEPT_QUEUES
+	int class = 0;
+#endif
 
 	if (skb->protocol == htons(ETH_P_IP))
 		return tcp_v4_conn_request(sk, skb);
@@ -1174,6 +1181,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (!ipv6_unicast_destination(skb))
 		goto drop; 
 
+
 	/*
 	 *	There are no SYN attacks on IPv6, yet...	
 	 */
@@ -1183,9 +1191,34 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop;		
 	}
 
+#ifdef CONFIG_ACCEPT_QUEUES
+        class = (skb->nfmark <= 0) ? 0 :
+	                ((skb->nfmark >= NUM_ACCEPT_QUEUES) ? 0: skb->nfmark);
+        /*
+	 * Accept only if the class has shares set or if the default class
+	 * i.e. class 0 has shares
+	 */
+        if (!(tcp_sk(sk)->acceptq[class].aq_valid)) {
+		if (tcp_sk(sk)->acceptq[0].aq_valid) 
+			class = 0; 
+		else 
+			goto drop;
+	}
+#endif
+
+        /* Accept backlog is full. If we have already queued enough
+	 * of warm entries in syn queue, drop request. It is better than
+	 * clogging syn queue with openreqs with exponentially increasing
+	 * timeout.
+	 */
+#ifdef CONFIG_ACCEPT_QUEUES
+	 if (tcp_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
+#else
 	if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+#endif
 		goto drop;
 
+
 	req = tcp_openreq_alloc();
 	if (req == NULL)
 		goto drop;
@@ -1198,7 +1231,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	tmptp.tstamp_ok = tmptp.saw_tstamp;
 	tcp_openreq_init(req, &tmptp, skb);
-
+#ifdef CONFIG_ACCEPT_QUEUES
+	req->acceptq_class = class;
+	req->acceptq_time_stamp = jiffies;
+#endif
 	req->class = &or_ipv6;
 	ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
 	ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
@@ -1300,7 +1336,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	opt = np->opt;
 
+#ifdef CONFIG_ACCEPT_QUEUES
+	if (tcp_acceptq_is_full(sk, req->acceptq_class))
+#else
 	if (tcp_acceptq_is_full(sk))
+#endif
 		goto out_overflow;
 
 	if (np->rxopt.bits.srcrt == 2 &&
-- 
2.47.0