From fea0f430d41789c732e3c2f684e385a337d20cbf Mon Sep 17 00:00:00 2001
From: Planet-Lab Support <support@planet-lab.org>
Date: Fri, 21 Jan 2005 03:34:26 +0000
Subject: [PATCH] This commit was manufactured by cvs2svn to create tag
 'after-bindmountpatch-merge'.

---
 Documentation/ckrm/block_io                   |  154 --
 Documentation/ckrm/ckrm_basics                |   66 -
 Documentation/ckrm/core_usage                 |   72 -
 Documentation/ckrm/crbce                      |   33 -
 Documentation/ckrm/installation               |   70 -
 Documentation/ckrm/mem_rc.design              |  134 --
 Documentation/ckrm/mem_rc.usage               |   72 -
 Documentation/ckrm/rbce_basics                |   67 -
 Documentation/ckrm/rbce_usage                 |   98 -
 Makefile                                      |    2 +-
 arch/i386/kernel/entry.S                      |    2 -
 arch/ppc/kernel/misc.S                        |    2 -
 ...kernel-2.6.8-i686-planetlab-desktop.config | 1750 -----------------
 configs/kernel-2.6.8-i686-planetlab.config    |   33 +-
 drivers/block/Makefile                        |    3 +-
 drivers/block/cfq-iosched.c                   | 1061 ++--------
 drivers/block/ckrm-io.c                       |  163 +-
 drivers/block/ckrm-iostub.c                   |    4 +-
 drivers/block/elevator.c                      |   10 +-
 drivers/block/ll_rw_blk.c                     |   45 +-
 drivers/char/hangcheck-timer.c                |    2 +-
 fs/exec.c                                     |   13 -
 fs/ext2/acl.c                                 |    4 -
 fs/ext2/inode.c                               |    2 +-
 fs/ext2/ioctl.c                               |    8 +-
 fs/ext3/acl.c                                 |    4 -
 fs/ext3/inode.c                               |    2 +-
 fs/ext3/ioctl.c                               |    8 +-
 fs/ioctl.c                                    |   13 -
 fs/namei.c                                    |   29 +-
 fs/rcfs/dir.c                                 |    2 +-
 fs/rcfs/magic.c                               |   67 +-
 fs/rcfs/rootdir.c                             |    2 +-
 fs/rcfs/socket_fs.c                           |    6 -
 fs/rcfs/super.c                               |    2 +-
 fs/rcfs/tc_magic.c                            |   13 +-
 fs/reiserfs/xattr.c                           |    4 -
 include/asm-i386/unistd.h                     |    4 +-
 include/asm-ppc/unistd.h                      |    4 +-
 include/asm-x86_64/unistd.h                   |    6 +-
 include/linux/ckrm-io.h                       |    7 +-
 include/linux/ckrm.h                          |   11 +-
 include/linux/ckrm_ce.h                       |   13 +-
 include/linux/ckrm_classqueue.h               |    5 +-
 include/linux/ckrm_mem.h                      |   23 +-
 include/linux/ckrm_mem_inline.h               |  110 +-
 include/linux/ckrm_rc.h                       |    8 +-
 include/linux/ckrm_sched.h                    |  482 +----
 include/linux/crbce.h                         |  175 --
 include/linux/elevator.h                      |    5 -
 include/linux/ext2_fs.h                       |    5 -
 include/linux/ext3_fs.h                       |    5 -
 include/linux/fs.h                            |   14 +-
 include/linux/init_task.h                     |    1 -
 include/linux/mm.h                            |    3 -
 include/linux/mm_inline.h                     |    7 -
 .../linux/netfilter_ipv4/ip_conntrack_pptp.h  |  310 ---
 .../netfilter_ipv4/ip_conntrack_proto_gre.h   |  123 --
 include/linux/netfilter_ipv4/ip_nat_pptp.h    |   11 -
 include/linux/page-flags.h                    |    1 -
 include/linux/rbce.h                          |  127 --
 include/linux/rcfs.h                          |    1 -
 include/linux/sched.h                         |  152 +-
 include/linux/socket.h                        |    3 -
 include/linux/taskdelays.h                    |    4 +-
 include/linux/tcp.h                           |    1 +
 include/linux/vserver/inode.h                 |    7 -
 include/net/sock.h                            |    4 +-
 init/Kconfig                                  |   74 +-
 init/main.c                                   |    4 -
 kernel/Makefile                               |    5 +-
 kernel/ckrm/Makefile                          |   10 +-
 kernel/ckrm/ckrm.c                            |   43 +-
 kernel/ckrm/ckrm_cpu_class.c                  |  145 +-
 kernel/ckrm/ckrm_cpu_monitor.c                |  828 ++------
 kernel/ckrm/ckrm_laq.c                        |  495 -----
 kernel/ckrm/ckrm_mem.c                        |  204 +-
 kernel/ckrm/ckrm_sockc.c                      |   14 +-
 kernel/ckrm/{ckrm_numtasks.c => ckrm_tasks.c} |   49 +-
 ...ckrm_numtasks_stub.c => ckrm_tasks_stub.c} |    0
 kernel/ckrm/ckrm_tc.c                         |   97 +-
 kernel/ckrm/ckrmutils.c                       |   19 +
 kernel/ckrm/rbce/bitvector.h                  |    6 +-
 kernel/ckrm/rbce/info.h                       |    6 +
 kernel/ckrm/rbce/rbce_fs.c                    |   51 +-
 kernel/ckrm/rbce/rbcemod.c                    |  168 +-
 kernel/ckrm/rbce/rbcemod_ext.c                |   35 +-
 kernel/ckrm/rbce/token.c                      |   25 +-
 kernel/ckrm_classqueue.c                      |   49 +-
 kernel/ckrm_sched.c                           |  213 +-
 kernel/exit.c                                 |   10 +-
 kernel/exit.c.orig                            | 1192 +++++++++++
 kernel/fork.c                                 |   21 -
 kernel/itimer.c                               |    4 +-
 kernel/panic.c                                |    4 +-
 kernel/sched.c                                |  954 ++++-----
 kernel/signal.c                               |   22 +-
 kernel/vserver/inode.c                        |   31 -
 kernel/vserver/sysctl.c                       |    2 -
 mm/Makefile                                   |    4 +-
 mm/memory.c                                   |    5 +-
 mm/oom_panic.c                                |   51 -
 mm/page_alloc.c                               |    7 -
 mm/vmscan.c                                   |  173 +-
 net/core/sock.c                               |   12 -
 net/ipv4/netfilter/ip_conntrack_core.c        |    2 -
 net/ipv4/netfilter/ip_conntrack_pptp.c        |  712 -------
 net/ipv4/netfilter/ip_conntrack_pptp_priv.h   |   24 -
 net/ipv4/netfilter/ip_conntrack_proto_gre.c   |  349 ----
 net/ipv4/netfilter/ip_conntrack_standalone.c  |    4 +-
 net/ipv4/netfilter/ip_nat_pptp.c              |  477 -----
 net/ipv4/netfilter/ip_nat_proto_gre.c         |  210 --
 net/ipv4/tcp_ipv4.c                           |    2 +-
 net/packet/af_packet.c                        |    2 +-
 scripts/kernel-2.6-planetlab.spec             |   33 +-
 115 files changed, 2706 insertions(+), 9795 deletions(-)
 delete mode 100644 Documentation/ckrm/block_io
 delete mode 100644 Documentation/ckrm/ckrm_basics
 delete mode 100644 Documentation/ckrm/core_usage
 delete mode 100644 Documentation/ckrm/crbce
 delete mode 100644 Documentation/ckrm/installation
 delete mode 100644 Documentation/ckrm/mem_rc.design
 delete mode 100644 Documentation/ckrm/mem_rc.usage
 delete mode 100644 Documentation/ckrm/rbce_basics
 delete mode 100644 Documentation/ckrm/rbce_usage
 delete mode 100644 configs/kernel-2.6.8-i686-planetlab-desktop.config
 delete mode 100644 include/linux/crbce.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_pptp.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_nat_pptp.h
 delete mode 100644 include/linux/rbce.h
 delete mode 100644 kernel/ckrm/ckrm_laq.c
 rename kernel/ckrm/{ckrm_numtasks.c => ckrm_tasks.c} (90%)
 rename kernel/ckrm/{ckrm_numtasks_stub.c => ckrm_tasks_stub.c} (100%)
 create mode 100644 kernel/exit.c.orig
 delete mode 100644 mm/oom_panic.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_pptp.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_pptp_priv.h
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_proto_gre.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_pptp.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_proto_gre.c

diff --git a/Documentation/ckrm/block_io b/Documentation/ckrm/block_io
deleted file mode 100644
index e4a0b8b95..000000000
--- a/Documentation/ckrm/block_io
+++ /dev/null
@@ -1,154 +0,0 @@
-CKRM I/O controller
-
-Last updated: Sep 21, 2004
-
-
-Intro
------
-
-CKRM's I/O scheduler is developed as a delta over a modified version of
-the Complete Fair Queuing scheduler (CFQ) that implements I/O priorities.
-The latter's original posting can be found at:
-    http://www.ussg.iu.edu/hypermail/linux/kernel/0311.1/0019.html
-
-Please note that this is not the CFQ version currently in the linus kernel 
-(2.6.8.1 at time of writing) which provides equal, not prioritized, 
-bandwidth allocation amongst processes. Since the CFQ in the kernel is likely
-to eventually move towards I/O priority implementation, CKRM has not renamed
-the underlying I/O scheduler and simply replaces drivers/block/cfq-iosched.c
-with the modified version.
-
-Installation
-------------
-
-1. Configure "Disk I/O Resource Controller" under CKRM (see
-Documentation/ckrm/installation) 
-
-2. After booting into the new kernel, load ckrm-io
-   # modprobe ckrm-io
-
-3. Verify that reading /rcfs/taskclass/shares displays values for the
-I/O controller (res=cki).
-
-4. Mount sysfs for monitoring bandwidth received (temporary solution till
-a userlevel tool is developed)
-   # mount -t sysfs none /sys
-
-
-Usage
------
-
-For brevity, we assume we are in the /rcfs/taskclass directory for all the 
-code snippets below.
-
-Initially, the systemwide default class gets 100% of the I/O bandwidth. 
-
-	$ cat stats
-
-	<display from other controllers, snipped>
-	20 total ioprio
-	20 unused/default ioprio
-
-The first value is the share of a class, as a parent. The second is the share
-of its default subclass. Initially the two are equal. As named subclasses get
-created and assigned shares, the default subclass' share (which equals the
-"unused" portion of the parent's allocation) dwindles.
-
-
-CFQ assigns one of  20 I/O priorities to all I/O requests. Each priority level
-gets a fixed proportion of the total bandwidth in increments of 5%. e.g.
-     ioprio=1 gets 5%, 
-     ioprio=2 gets 10%.....
-     all the way through ioprio=19 getting 95%
-
-ioprio=0 gets bandwidth only if no other priority level submits I/O i.e. it can
-get starved.
-ioprio=20 is considered realtime I/O and always gets priority.
-
-CKRM's I/O scheduler distributes these 20 priority levels amongst the hierarchy
-of classes according to the relative share of each class. Thus, root starts out
-with the total allocation of 20 initially. As children get created and shares
-assigned to them, root's allocation reduces. At any time, the sum of absolute
-share values of all classes equals 20.
-
- 
-
-Class creation 
---------------
-
-       $ mkdir a
-
-Its initial share is zero. The parent's share values will be unchanged. Note
-that even classes with zero share get unused bandwidth under CFQ.
-
-Setting a new class share
--------------------------
-	
-	$ echo "res=cki,guarantee=20" > /rcfs/taskclass/a/shares
-	Set cki shares to 20 -1 -1 -1
-
-	$ echo a/shares	
-	
-	res=cki,guarantee=20,limit=100,total_guarantee=100,max_limit=100
-
-The limit and max_limit fields can be ignored as they are not implemented.
-The absolute share of a is 20% of parent's absolute total (20) and can be seen
-through
-	$ echo a/stats
-
-	<snip>
-	4 total ioprio
-	4 unused/default ioprio
-
-Since a gets 4, parent's default's share diminishes accordingly. Thus
-
-	$ echo stats
-	
-	<snip>
-	20 total ioprio
-	16 unused/default ioprio
-
-
-Monitoring
-----------
-
-Each priority level's request service rate can be viewed through sysfs (mounted
-during installation). To view the servicing of priority 4's requests,
-
-       $  while : ; echo /sys/block/<device>/queue/iosched/p4 ; sleep 1 ; done
-       rq (10,15) sec (20,30) q (40,50)
-
-       <data above updated in a loop>
-
-where 
-      rq = cumulative I/O requests received (10) and serviced (15)
-      sec = cumulative sectors requested (20) and served (30)
-      q = cumulative number of times the queue was created(40)/destroyed (50)
-
-The rate at which requests or sectors are serviced should differ for different
-priority levels. The difference in received and serviced values indicates queue
-depth - with insufficient depth, differentiation between I/O priority levels
-will not be observed.
-
-The rate of q creation is not significant for CKRM. 
-
-
-Caveats
--------
-
-CFQ's I/O differentiation is still being worked upon so its better to choose
-widely separated share values to observe differences in delivered I/O
-bandwidth.
-
-CFQ, and consequently CKRM, does not provide limits yet. So it is not possible
-to completely limit an I/O hog process by putting it in a class with a low I/O
-share. Only if the competing classes maintain sufficient queue depth (i.e a
-high I/O issue rate) will they get preferential treatment. However, they may
-still see latency degradation due to seeks caused by servicing of the low
-priority class.
-
-When limits are implemented, this behaviour will be rectified. 
-
-Please post questions on the CKRM I/O scheduler on ckrm-tech@lists.sf.net.
-
-
diff --git a/Documentation/ckrm/ckrm_basics b/Documentation/ckrm/ckrm_basics
deleted file mode 100644
index cfd9a9256..000000000
--- a/Documentation/ckrm/ckrm_basics
+++ /dev/null
@@ -1,66 +0,0 @@
-CKRM Basics
--------------
-A brief review of CKRM concepts and terminology will help make installation
-and testing easier. For more details, please visit http://ckrm.sf.net. 
-
-Currently there are two class types, taskclass and socketclass for grouping,
-regulating and monitoring tasks and sockets respectively.
-
-To avoid repeating instructions for each classtype, this document assumes a
-task to be the kernel object being grouped. By and large, one can replace task
-with socket and taskclass with socketclass.
-
-RCFS depicts a CKRM class as a directory. Hierarchy of classes can be
-created in which children of a class share resources allotted to
-the parent. Tasks can be classified to any class which is at any level.
-There is no correlation between parent-child relationship of tasks and
-the parent-child relationship of classes they belong to.
-
-Without a Classification Engine, class is inherited by a task. A privileged
-user can reassigned a task to a class as described below, after which all
-the child tasks under that task will be assigned to that class, unless the
-user reassigns any of them.
-
-A Classification Engine, if one exists, will be used by CKRM to
-classify a task to a class. The Rule based classification engine uses some
-of the attributes of the task to classify a task. When a CE is present
-class is not inherited by a task.
-
-Characteristics of a class can be accessed/changed through the following magic
-files under the directory representing the class:
-
-shares:  allows to change the shares of different resources managed by the
-         class
-stats:   allows to see the statistics associated with each resources managed
-         by the class
-target:  allows to assign a task to a class. If a CE is present, assigning
-         a task to a class through this interface will prevent CE from
-		 reassigning the task to any class during reclassification.
-members: allows to see which tasks has been assigned to a class
-config:  allow to view and modify configuration information of different
-         resources in a class.
-
-Resource allocations for a class is controlled by the parameters:
-
-guarantee: specifies how much of a resource is guranteed to a class. A
-           special value DONT_CARE(-2) mean that there is no specific
-	   guarantee of a resource is specified, this class may not get
-	   any resource if the system is runing short of resources
-limit:     specifies the maximum amount of resource that is allowed to be
-           allocated by a class. A special value DONT_CARE(-2) mean that
-	   there is no specific limit is specified, this class can get all
-	   the resources available.
-total_guarantee: total guarantee that is allowed among the children of this
-           class. In other words, the sum of "guarantee"s of all children
-	   of this class cannot exit this number.
-max_limit: Maximum "limit" allowed for any of this class's children. In
-	   other words, "limit" of any children of this class cannot exceed
-	   this value.
-
-None of this parameters are absolute or have any units associated with
-them. These are just numbers(that are relative to its parents') that are
-used to calculate the absolute number of resource available for a specific
-class.
-
-Note: The root class has an absolute number of resource units associated with it.
-
diff --git a/Documentation/ckrm/core_usage b/Documentation/ckrm/core_usage
deleted file mode 100644
index 6b5d808c3..000000000
--- a/Documentation/ckrm/core_usage
+++ /dev/null
@@ -1,72 +0,0 @@
-Usage of CKRM without a classification engine
------------------------------------------------
-
-1. Create a class
-
-   # mkdir /rcfs/taskclass/c1
-   creates a taskclass named c1 , while
-   # mkdir /rcfs/socket_class/s1
-   creates a socketclass named s1 
-
-The newly created class directory is automatically populated by magic files
-shares, stats, members, target and config.
-
-2. View default shares 
-
-   # cat /rcfs/taskclass/c1/shares
-
-   "guarantee=-2,limit=-2,total_guarantee=100,max_limit=100" is the default
-   value set for resources that have controllers registered with CKRM.
-
-3. change shares of a <class>
-
-   One or more of the following fields can/must be specified
-       res=<res_name> #mandatory
-       guarantee=<number>
-       limit=<number>
-       total_guarantee=<number>
-       max_limit=<number>
-   e.g.
-	# echo "res=numtasks,limit=20" > /rcfs/taskclass/c1
-
-   If any of these parameters are not specified, the current value will be
-   retained. 
-
-4. Reclassify a task (listening socket)
-
-   write the pid of the process to the destination class' target file
-   # echo 1004 > /rcfs/taskclass/c1/target	
-
-   write the "<ipaddress>\<port>" string to the destination class' target file 
-   # echo "0.0.0.0\32770"  > /rcfs/taskclass/c1/target
-
-5. Get a list of tasks (sockets) assigned to a taskclass (socketclass)
-
-   # cat /rcfs/taskclass/c1/members
-   lists pids of tasks belonging to c1
-
-   # cat /rcfs/socket_class/s1/members
-   lists the ipaddress\port of all listening sockets in s1 
-
-6. Get the statictics of different resources of a class
-
-   # cat /rcfs/tasksclass/c1/stats
-   shows c1's statistics for each resource with a registered resource
-   controller.
-
-   # cat /rcfs/socket_class/s1/stats
-   show's s1's stats for the listenaq controller.	
-
-7. View the configuration values of the resources associated with a class
-
-   # cat /rcfs/taskclass/c1/config
-   shows per-controller config values for c1.
-
-8. Change the configuration values of resources associated with a class
-   Configuration values are different for different resources. the comman
-   field "res=<resname>" must always be specified.
-
-   # echo "res=numtasks,parameter=value" > /rcfs/taskclass/c1/config
-   to change (without any effect), the value associated with <parameter>.
-
-
diff --git a/Documentation/ckrm/crbce b/Documentation/ckrm/crbce
deleted file mode 100644
index dfb4b1e96..000000000
--- a/Documentation/ckrm/crbce
+++ /dev/null
@@ -1,33 +0,0 @@
-CRBCE
-----------
-
-crbce is a superset of rbce. In addition to providing automatic
-classification, the crbce module
-- monitors per-process delay data that is collected by the delay 
-accounting patch
-- collects data on significant kernel events where reclassification
-could occur e.g. fork/exec/setuid/setgid etc., and
-- uses relayfs to supply both these datapoints to userspace
-
-To illustrate the utility of the data gathered by crbce, we provide a
-userspace daemon called crbcedmn that prints the header info received
-from the records sent by the crbce module.
-
-0. Ensure that a CKRM-enabled kernel with following options configured
-   has been compiled. At a minimum, core, rcfs, atleast one classtype,
-   delay-accounting patch and relayfs. For testing, it is recommended
-   all classtypes and resource controllers be compiled as modules.
-
-1. Ensure that the Makefile's BUILD_CRBCE=1 and KDIR points to the
-   kernel of step 1 and call make.
-   This also builds the userspace daemon, crbcedmn.
-
-2..9 Same as rbce installation and testing instructions, 
-     except replacing rbce.ko with crbce.ko
-
-10. Read the pseudo daemon help file
-    # ./crbcedmn -h
-
-11. Run the crbcedmn to display all records being processed
-    # ./crbcedmn 
-
diff --git a/Documentation/ckrm/installation b/Documentation/ckrm/installation
deleted file mode 100644
index 0c9033891..000000000
--- a/Documentation/ckrm/installation
+++ /dev/null
@@ -1,70 +0,0 @@
-Kernel installation
-------------------------------
-
-<kernver> = version of mainline Linux kernel
-<ckrmver> = version of CKRM
-
-Note: It is expected that CKRM versions will change fairly rapidly. Hence once
-a CKRM version has been released for some <kernver>, it will only be made
-available for future <kernver>'s until the next CKRM version is released. 
-
-1. Patch 
-
-    Apply ckrm/kernel/<kernver>/ckrm-<ckrmversion>.patch to a mainline kernel
-    tree with version <kernver>. 
-
-    If CRBCE will be used, additionally apply the following patches, in order: 
-       delayacctg-<ckrmversion>.patch 
-       relayfs-<ckrmversion>.patch
-    
- 
-2. Configure
-
-Select appropriate configuration options:
-
-a. for taskclasses 
-
-   General Setup-->Class Based Kernel Resource Management
-
-   [*] Class Based Kernel Resource Management
-   <M> Resource Class File System (User API)
-   [*]   Class Manager for Task Groups  
-   <M>     Number of Tasks Resource Manager
-
-b. To test socket_classes and multiple accept queue controller 
-
-   General Setup-->Class Based Kernel Resource Management
-   [*] Class Based Kernel Resource Management
-   <M> Resource Class File System (User API)
-   [*]   Class Manager for socket groups
-   <M>     Multiple Accept Queues Resource Manager    
-   
-   Device Drivers-->Networking Support-->Networking options-->
-   [*] Network packet filtering (replaces ipchains)  
-   [*] IP: TCP Multiple accept queues support
-
-c. To test CRBCE later (requires 2a.)
-
-   File Systems-->Pseudo filesystems-->
-   <M> Relayfs filesystem support 
-   (enable all sub fields)
-   
-   General Setup-->
-   [*] Enable delay accounting
-   
-
-3. Build, boot into kernel
-
-4. Enable rcfs
-
-    # insmod <patchedtree>/fs/rcfs/rcfs.ko
-    # mount -t rcfs rcfs /rcfs
- 
-    This will create the directories /rcfs/taskclass and
-    /rcfs/socketclass which are the "roots" of subtrees for creating
-    taskclasses and socketclasses respectively.
-  	
-5. Load numtasks and listenaq controllers
-
-    # insmod <patchedtree>/kernel/ckrm/ckrm_tasks.ko
-    # insmod <patchedtree>/kernel/ckrm/ckrm_listenaq.ko
diff --git a/Documentation/ckrm/mem_rc.design b/Documentation/ckrm/mem_rc.design
deleted file mode 100644
index bc565c6a0..000000000
--- a/Documentation/ckrm/mem_rc.design
+++ /dev/null
@@ -1,134 +0,0 @@
-0. Lifecycle of a LRU Page:
-----------------------------
-These are the events in a page's lifecycle:
-   - allocation of the page
-     there are multiple high level page alloc functions; __alloc_pages()
-	 is the lowest level function that does the real allocation.
-   - get into LRU list (active list or inactive list)
-   - get out of LRU list
-   - freeing the page
-     there are multiple high level page free functions; free_pages_bulk()
-	 is the lowest level function that does the real free.
-
-When the memory subsystem runs low on LRU pages, pages are reclaimed by
-    - moving pages from active list to inactive list (refill_inactive_zone())
-	- freeing pages from the inactive list (shrink_zone)
-depending on the recent usage of the page(approximately).
-
-1. Introduction
----------------
-Memory resource controller controls the number of lru physical pages
-(active and inactive list) a class uses. It does not restrict any
-other physical pages (slabs etc.,)
-
-For simplicity, this document will always refer lru physical pages as
-physical pages or simply pages.
-
-There are two parameters(that are set by the user) that affect the number
-of pages a class is allowed to have in active/inactive list.
-They are
-  - guarantee - specifies the number of pages a class is
-	guaranteed to get. In other words, if a class is using less than
-	'guarantee' number of pages, its pages will not be freed when the
-	memory subsystem tries to free some pages.
-  - limit - specifies the maximum number of pages a class can get;
-    'limit' in essence can be considered as the 'hard limit'
-
-Rest of this document details how these two parameters are used in the
-memory allocation logic.
-
-Note that the numbers that are specified in the shares file, doesn't
-directly correspond to the number of pages. But, the user can make
-it so by making the total_guarantee and max_limit of the default class
-(/rcfs/taskclass) to be the total number of pages(given in config file)
-available in the system.
-
-  for example: 
-   # cd /rcfs/taskclass
-   # cat config
-   res=mem;tot_pages=239778,active=60473,inactive=135285,free=44555
-   # cat shares
-   res=mem,guarantee=-2,limit=-2,total_guarantee=100,max_limit=100
-
-  "tot_pages=239778" above mean there are 239778 lru pages in
-  the system.
-  
-  By making total_guarantee and max_limit to be same as this number at 
-  this level (/rcfs/taskclass), one can make guarantee and limit in all 
-  classes refer to the number of pages.
-
-  # echo 'res=mem,total_guarantee=239778,max_limit=239778' > shares
-  # cat shares
-  res=mem,guarantee=-2,limit=-2,total_guarantee=239778,max_limit=239778
-
-
-The number of pages a class can use be anywhere between its guarantee and
-limit. CKRM memory controller springs into action when the system needs
-to choose a victim page to swap out. While the number of pages a class can
-have allocated may be anywhere between its guarantee and limit, victim
-pages will be choosen from classes that are above their guarantee.
-
-Pages will be freed from classes that are close to their "limit" before
-freeing pages from the classes that are close to their guarantee. Pages
-belonging to classes that are below their guarantee will not be chosen as
-a victim.
-
-2. Core Design
---------------------------
-
-CKRM memory resource controller taps at appropriate low level memory 
-management functions to associate a page with a class and to charge
-a class that brings the page to the LRU list.
-
-2.1 Changes in page allocation function(__alloc_pages())
---------------------------------------------------------
-- If the class that the current task belong to is over 110% of its 'limit',
-  allocation of page(s) fail.
-- After succesful allocation of a page, the page is attached with the class
-  to which the current task belongs to.
-- Note that the class is _not_ charged for the page(s) here.
-
-2.2 Changes in page free(free_pages_bulk())
--------------------------------------------
-- page is freed from the class it belongs to.
-
-2.3 Adding/Deleting page to active/inactive list
--------------------------------------------------
-When a page is added to the active or inactive list, the class that the
-page belongs to is charged for the page usage.
-
-When a page is deleted from the active or inactive list, the class that the
-page belongs to is credited back.
-
-If a class uses upto its limit, attempt is made to shrink the class's usage
-to 90% of its limit, in order to help the class stay within its limit.
-But, if the class is aggressive, and keep getting over the class's limit
-often(more than 10 shrink events in 10 seconds), then the memory resource
-controller gives up on the class and doesn't try to shrink the class, which
-will eventually lead the class to reach its 110% of its limit and then the
-page allocations will start failing.
-
-2.4 Chages in the page reclaimation path (refill_inactive_zone and shrink_zone)
--------------------------------------------------------------------------------
-Pages will be moved from active to inactive list(refill_inactive_zone) and
-pages from inactive list will be freed in the following order:
-(range is calculated by subtracting 'guarantee' from 'limit')
-  - Classes that are over 110% of their range
-  - Classes that are over 100% of their range
-  - Classes that are over 75%  of their range
-  - Classes that are over 50%  of their range
-  - Classes that are over 25%  of their range
-  - Classes whose parent is over 110% of its range
-  - Classes that are over their guarantee
-
-2.5 Handling of Shared pages
-----------------------------
-Even if a mm is shared by tasks, the pages that belong to the mm will be
-charged against the individual tasks that bring the page into LRU. 
-
-But, when any task that is using a mm moves to a different class or exits,
-then all pages that belong to the mm will be charged against the richest
-class among the tasks that are using the mm.
-
-Note: Shared page handling need to be improved with a better policy.
-
diff --git a/Documentation/ckrm/mem_rc.usage b/Documentation/ckrm/mem_rc.usage
deleted file mode 100644
index faddbf84e..000000000
--- a/Documentation/ckrm/mem_rc.usage
+++ /dev/null
@@ -1,72 +0,0 @@
-Installation
-------------
-
-1. Configure "Class based physical memory controller" under CKRM (see
-      Documentation/ckrm/installation) 
-
-2. Reboot the system with the new kernel.
-
-3. Verify that the memory controller is present by reading the file
-   /rcfs/taskclass/config (should show a line with res=mem)
-
-Usage
------
-
-For brevity, unless otherwise specified all the following commands are
-executed in the default class (/rcfs/taskclass).
-
-Initially, the systemwide default class gets 100% of the LRU pages, and the
-config file displays the total number of physical pages.
-
-   # cd /rcfs/taskclass
-   # cat config
-   res=mem;tot_pages=239778,active=60473,inactive=135285,free=44555
-   # cat shares
-   res=mem,guarantee=-2,limit=-2,total_guarantee=100,max_limit=100
-
-   tot_pages - total number of pages
-   active    - number of pages in the active list ( sum of all zones)
-   inactive  - number of pages in the inactive list ( sum of all zones )
-   free      -  number of free pages (sum of all pages)
-
-   By making total_guarantee and max_limit to be same as tot_pages, one make 
-   make the numbers in shares file be same as the number of pages for a
-   class.
-
-   # echo 'res=mem,total_guarantee=239778,max_limit=239778' > shares
-   # cat shares
-   res=mem,guarantee=-2,limit=-2,total_guarantee=239778,max_limit=239778
-
-
-Class creation 
---------------
-
-   # mkdir c1
-
-Its initial share is don't care. The parent's share values will be unchanged.
-
-Setting a new class share
--------------------------
-	
-   # echo 'res=mem,guarantee=25000,limit=50000' > c1/shares
-
-   # cat c1/shares	
-   res=mem,guarantee=25000,limit=50000,total_guarantee=100,max_limit=100
-	
-   'guarantee' specifies the number of pages this class entitled to get
-   'limit' is the maximum number of pages this class can get.
-
-Monitoring
-----------
-
-stats file shows statistics of the page usage of a class
-   # cat stats
-   ----------- Memory Resource stats start -----------
-   Number of pages used(including pages lent to children): 196654
-   Number of pages guaranteed: 239778
-   Maximum limit of pages: 239778
-   Total number of pages available(after serving guarantees to children): 214778
-   Number of pages lent to children: 0
-   Number of pages borrowed from the parent: 0
-   ----------- Memory Resource stats end -----------
-
diff --git a/Documentation/ckrm/rbce_basics b/Documentation/ckrm/rbce_basics
deleted file mode 100644
index fd66ef2fb..000000000
--- a/Documentation/ckrm/rbce_basics
+++ /dev/null
@@ -1,67 +0,0 @@
-Rule-based Classification Engine (RBCE)
--------------------------------------------
-
-The ckrm/rbce directory contains the sources for two classification engines
-called rbce and crbce. Both are optional, built as kernel modules and share much
-of their codebase. Only one classification engine (CE) can be loaded at a time
-in CKRM.
-
-
-With RBCE, user can specify rules for how tasks are classified to a
-class.  Rules are specified by one or more attribute-value pairs and
-an associated class. The tasks that match all the attr-value pairs
-will get classified to the class attached with the rule.
-
-The file rbce_info under /rcfs/ce directory details the functionality
-of different files available under the directory and also details
-about attributes that can are used to define rules.
-
-order: When multiple rules are defined the rules are executed
-	   according to the order of a rule. Order can be specified
-	   while defining a rule.  If order is not specified, the
-	   highest order will be assigned to the rule(i.e, the new
-	   rule will be executed after all the previously defined
-	   evaluate false). So, order of rules is important as that
-	   will decide, which class a task will get assigned to. For
-	   example, if we have the two following rules: r1:
-	   uid=1004,order=10,class=/rcfs/taskclass/c1 r2:
-	   uid=1004,cmd=grep,order=20,class=/rcfs/taskclass/c2 then,
-	   the task "grep" executed by user 1004 will always be
-	   assigned to class /rcfs/taskclass/c1, as rule r1 will be
-	   executed before r2 and the task successfully matched the
-	   rule's attr-value pairs. Rule r2 will never be consulted
-	   for the command.  Note: The order in which the rules are
-	   displayed(by ls) has no correlation with the order of the
-	   rule.
-
-dependency: Rules can be defined to be depend on another rule. i.e a
-	   rule can be dependent on one rule and has its own
-	   additional attr-value pairs. the dependent rule will
-	   evaluate true only if all the attr-value pairs of both
-	   rules are satisfied.  ex: r1: gid=502,class=/rcfs/taskclass
-	   r2: depend=r1,cmd=grep,class=rcfstaskclass/c1 r2 is a
-	   dependent rule that depends on r1, a task will be assigned
-	   to /rcfs/taskclass/c1 if its gid is 502 and the executable
-	   command name is "grep". If a task's gid is 502 but the
-	   command name is _not_ "grep" then it will be assigned to
-	   /rcfs/taskclass
-
-	   Note: The order of dependent rule must be _lesser_ than the
-	   rule it depends on, so that it is evaluated _before the
-	   base rule is evaluated. Otherwise the base rule will
-	   evaluate true and the task will be assigned to the class of
-	   that rule without the dependent rule ever getting
-	   evaluated. In the example above, order of r2 must be lesser
-	   than order of r1.
-
-app_tag: a task can be attached with a tag(ascii string), that becomes
-	   an attribute of that task and rules can be defined with the
-	   tag value.
-
-state: states are at two levels in RBCE. The entire RBCE can be
-	   enabled or disabled which writing 1 or 0 to the file
-	   rbce_state under /rcfs/ce.  Disabling RBCE, would mean that
-	   the rules defined in RBCE will not be utilized for
-	   classifying a task to a class.  A specific rule can be
-	   enabled/disabled by changing the state of that rule. Once
-	   it is disabled, the rule will not be evaluated.
diff --git a/Documentation/ckrm/rbce_usage b/Documentation/ckrm/rbce_usage
deleted file mode 100644
index 6d1592646..000000000
--- a/Documentation/ckrm/rbce_usage
+++ /dev/null
@@ -1,98 +0,0 @@
-Usage of CKRM with RBCE
---------------------------
-
-0. Ensure that a CKRM-enabled kernel with following options configured
-   has been compiled. At a minimum, core, rcfs and atleast one
-   classtype. For testing, it is recommended all classtypes and
-   resource controllers be compiled as modules.
-
-1. Change ckrm/rbce/Makefile's KDIR to point to this compiled kernel's source
-   tree and call make
-
-2. Load rbce module.
-   # insmod ckrm/rbce/rbce.ko 
-   Note that /rcfs has to be mounted before this.
-   Note: this command should populate the directory /rcfs/ce with files
-   rbce_reclassify, rbce_tag, rbce_info, rbce_state and a directory
-   rules.
-
-   Note2: If these are not created automatically, just create them by
-   using the commands touch and mkdir.(bug that needs to be fixed)
-
-3. Defining a rule
-   Rules are defined by creating(by writing) to a file under the
-   /rcfs/ce/rules directory by concatinating multiple attribute value
-   pairs.
-
-   Note that the classes must be defined before defining rules that
-   uses the classes.  eg: the command # echo
-   "uid=1004,class=/rcfs/taskclass/c1" > /rcfs/ce/rules/r1 will define
-   a rule r1 that classifies all tasks belong to user id 1004 to class
-   /rcfs/taskclass/c1
-
-4. Viewing a rule
-   read the corresponding file.
-   to read rule r1, issue the command:
-      # cat /rcfs/ce/rules/r1
-
-5. Changing a rule
-
-   Changing a rule is done the same way as defining a rule, the new
-   rule will include the old set of attr-value pairs slapped with new
-   attr-value pairs.  eg: if the current r2 is
-   uid=1004,depend=r1,class=/rcfs/taskclass/c1
-   (r1 as defined in step 3)
-
-   the command:
-     # echo gid=502 > /rcfs/ce/rules/r1
-   will change the rule to
-     r1: uid=1004,gid=502,depend=r1,class=/rcfs/taskclass/c1
-
-   the command:
-     # echo uid=1005 > /rcfs/ce/rules/r1
-   will change the rule to
-     r1: uid=1005,class=/rcfs/taskclass/c1
-
-   the command:
-     # echo class=/rcfs/taskclass/c2 > /rcfs/ce/rules/r1
-   will change the rule to
-     r1: uid=1004,depend=r1,class=/rcfs/taskclass/c2
-   
-   the command:
-     # echo depend=r4 > /rcfs/ce/rules/r1
-   will change the rule to
-     r1: uid=1004,depend=r4,class=/rcfs/taskclass/c2
-   
-   the command:
-     # echo +depend=r4 > /rcfs/ce/rules/r1
-   will change the rule to
-     r1: uid=1004,depend=r1,depend=r4,class=/rcfs/taskclass/c2
-   
-   the command:
-     # echo -depend=r1 > /rcfs/ce/rules/r1
-   will change the rule to
-     r1: uid=1004,class=/rcfs/taskclass/c2
-
-6. Checking the state of RBCE
-   State(enabled/disabled) of RBCE can be checked by reading the file
-   /rcfs/ce/rbce_state, it will show 1(enabled) or 0(disabled).
-   By default, RBCE is enabled(1).
-   ex: # cat /rcfs/ce/rbce_state
-
-7. Changing the state of RBCE
-   State of RBCE can be changed by writing 1(enable) or 0(disable).
-   ex: # echo 1 > cat /rcfs/ce/rbce_state
-
-8. Checking the state of a rule
-   State of a rule is displayed in the rule. Rule can be viewed by
-   reading the rule file.  ex: # cat /rcfs/ce/rules/r1
-
-9. Changing the state of a rule
-
-   State of a rule can be changed by writing "state=1"(enable) or
-   "state=0"(disable) to the corresponding rule file. By defeault, the
-   rule is enabled when defined.  ex: to disable an existing rule r1,
-   issue the command 
-   # echo "state=0" > /rcfs/ce/rules/r1
-
-
diff --git a/Makefile b/Makefile
index 4d94580e0..c23dcfbea 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 8
-EXTRAVERSION = -1.521.2.5.planetlab
+EXTRAVERSION = -1.planetlab
 NAME=Zonked Quokka
 
 # *DOCUMENTATION*
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 3ac74183c..bb91de327 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -1030,7 +1030,5 @@ ENTRY(sys_call_table)
 	.long sys_mq_notify
 	.long sys_mq_getsetattr
 	.long sys_ni_syscall		/* reserved for kexec */
-	.long sys_ioprio_set
-	.long sys_ioprio_get		/* 285 */
 
 syscall_table_size=(.-sys_call_table)
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index 81a72414a..017da4476 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -1450,5 +1450,3 @@ _GLOBAL(sys_call_table)
 	.long sys_mq_notify
 	.long sys_mq_getsetattr
 	.long sys_ni_syscall		/* 268 reserved for sys_kexec_load */
-	.long sys_ioprio_set
-	.long sys_ioprio_get		
diff --git a/configs/kernel-2.6.8-i686-planetlab-desktop.config b/configs/kernel-2.6.8-i686-planetlab-desktop.config
deleted file mode 100644
index 9426fb0c2..000000000
--- a/configs/kernel-2.6.8-i686-planetlab-desktop.config
+++ /dev/null
@@ -1,1750 +0,0 @@
-#
-# Automatically generated make config: don't edit
-#
-CONFIG_X86=y
-CONFIG_MMU=y
-CONFIG_UID16=y
-CONFIG_GENERIC_ISA_DMA=y
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-
-#
-# General setup
-#
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-# CONFIG_BSD_PROCESS_ACCT_V3 is not set
-
-#
-# Class Based Kernel Resource Management
-#
-CONFIG_CKRM=y
-CONFIG_RCFS_FS=y
-CONFIG_CKRM_TYPE_TASKCLASS=y
-CONFIG_CKRM_RES_NUMTASKS=y
-CONFIG_CKRM_CPU_SCHEDULE=y
-CONFIG_CKRM_RES_BLKIO=y
-CONFIG_CKRM_RES_MEM=y
-# CONFIG_CKRM_MEM_LRUORDER_CHANGE is not set
-# CONFIG_CKRM_TYPE_SOCKETCLASS is not set
-CONFIG_CKRM_RBCE=y
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=17
-# CONFIG_HOTPLUG is not set
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_OOM_PANIC=y
-# CONFIG_EMBEDDED is not set
-# CONFIG_DELAY_ACCT is not set
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SIG is not set
-CONFIG_KMOD=y
-
-#
-# Processor type and features
-#
-CONFIG_X86_PC=y
-# CONFIG_X86_ELAN is not set
-# CONFIG_X86_VOYAGER is not set
-# CONFIG_X86_NUMAQ is not set
-# CONFIG_X86_SUMMIT is not set
-# CONFIG_X86_BIGSMP is not set
-# CONFIG_X86_VISWS is not set
-# CONFIG_X86_GENERICARCH is not set
-# CONFIG_X86_ES7000 is not set
-# CONFIG_M386 is not set
-# CONFIG_M486 is not set
-# CONFIG_M586 is not set
-# CONFIG_M586TSC is not set
-# CONFIG_M586MMX is not set
-# CONFIG_M686 is not set
-# CONFIG_MPENTIUMII is not set
-CONFIG_MPENTIUMIII=y
-# CONFIG_MPENTIUMM is not set
-# CONFIG_MPENTIUM4 is not set
-# CONFIG_MK6 is not set
-# CONFIG_MK7 is not set
-# CONFIG_MK8 is not set
-# CONFIG_MCRUSOE is not set
-# CONFIG_MWINCHIPC6 is not set
-# CONFIG_MWINCHIP2 is not set
-# CONFIG_MWINCHIP3D is not set
-# CONFIG_MCYRIXIII is not set
-# CONFIG_MVIAC3_2 is not set
-CONFIG_X86_GENERIC=y
-CONFIG_X86_CMPXCHG=y
-CONFIG_X86_XADD=y
-CONFIG_X86_L1_CACHE_SHIFT=7
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_X86_WP_WORKS_OK=y
-CONFIG_X86_INVLPG=y
-CONFIG_X86_BSWAP=y
-CONFIG_X86_POPAD_OK=y
-CONFIG_X86_GOOD_APIC=y
-CONFIG_X86_INTEL_USERCOPY=y
-CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_4G=y
-CONFIG_X86_SWITCH_PAGETABLES=y
-CONFIG_X86_4G_VM_LAYOUT=y
-CONFIG_X86_UACCESS_INDIRECT=y
-CONFIG_X86_HIGH_ENTRY=y
-CONFIG_HPET_TIMER=y
-CONFIG_HPET_EMULATE_RTC=y
-# CONFIG_SMP is not set
-# CONFIG_PREEMPT is not set
-# CONFIG_PREEMPT_VOLUNTARY is not set
-# CONFIG_X86_UP_APIC is not set
-CONFIG_X86_TSC=y
-CONFIG_X86_MCE=y
-# CONFIG_X86_MCE_NONFATAL is not set
-CONFIG_TOSHIBA=m
-CONFIG_I8K=m
-CONFIG_MICROCODE=m
-CONFIG_X86_MSR=m
-CONFIG_X86_CPUID=m
-
-#
-# Firmware Drivers
-#
-CONFIG_EDD=m
-# CONFIG_NOHIGHMEM is not set
-CONFIG_HIGHMEM4G=y
-# CONFIG_HIGHMEM64G is not set
-CONFIG_HIGHMEM=y
-CONFIG_HIGHPTE=y
-# CONFIG_MATH_EMULATION is not set
-CONFIG_MTRR=y
-# CONFIG_EFI is not set
-CONFIG_REGPARM=y
-
-#
-# Power management options (ACPI, APM)
-#
-CONFIG_PM=y
-# CONFIG_SOFTWARE_SUSPEND is not set
-# CONFIG_PM_DISK is not set
-
-#
-# ACPI (Advanced Configuration and Power Interface) Support
-#
-CONFIG_ACPI=y
-CONFIG_ACPI_BOOT=y
-CONFIG_ACPI_INTERPRETER=y
-CONFIG_ACPI_SLEEP=y
-CONFIG_ACPI_SLEEP_PROC_FS=y
-CONFIG_ACPI_AC=m
-CONFIG_ACPI_BATTERY=m
-CONFIG_ACPI_BUTTON=m
-CONFIG_ACPI_FAN=y
-CONFIG_ACPI_PROCESSOR=y
-CONFIG_ACPI_THERMAL=y
-CONFIG_ACPI_ASUS=m
-CONFIG_ACPI_TOSHIBA=m
-# CONFIG_ACPI_DEBUG is not set
-CONFIG_ACPI_BUS=y
-CONFIG_ACPI_EC=y
-CONFIG_ACPI_POWER=y
-CONFIG_ACPI_PCI=y
-CONFIG_ACPI_SYSTEM=y
-CONFIG_X86_PM_TIMER=y
-
-#
-# APM (Advanced Power Management) BIOS Support
-#
-CONFIG_APM=m
-# CONFIG_APM_IGNORE_USER_SUSPEND is not set
-# CONFIG_APM_DO_ENABLE is not set
-CONFIG_APM_CPU_IDLE=y
-# CONFIG_APM_DISPLAY_BLANK is not set
-CONFIG_APM_RTC_IS_GMT=y
-# CONFIG_APM_ALLOW_INTS is not set
-# CONFIG_APM_REAL_MODE_POWER_OFF is not set
-
-#
-# CPU Frequency scaling
-#
-CONFIG_CPU_FREQ=y
-# CONFIG_CPU_FREQ_PROC_INTF is not set
-# CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set
-CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_USERSPACE=y
-# CONFIG_CPU_FREQ_24_API is not set
-CONFIG_CPU_FREQ_TABLE=y
-
-#
-# CPUFreq processor drivers
-#
-CONFIG_X86_ACPI_CPUFREQ=m
-# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
-CONFIG_X86_POWERNOW_K6=m
-CONFIG_X86_POWERNOW_K7=m
-CONFIG_X86_POWERNOW_K8=m
-# CONFIG_X86_GX_SUSPMOD is not set
-CONFIG_X86_SPEEDSTEP_CENTRINO=m
-CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE=y
-CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI=y
-CONFIG_X86_SPEEDSTEP_ICH=m
-CONFIG_X86_SPEEDSTEP_SMI=m
-CONFIG_X86_P4_CLOCKMOD=m
-CONFIG_X86_SPEEDSTEP_LIB=m
-# CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK is not set
-CONFIG_X86_LONGRUN=m
-CONFIG_X86_LONGHAUL=m
-
-#
-# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
-#
-CONFIG_PCI=y
-# CONFIG_PCI_GOBIOS is not set
-# CONFIG_PCI_GOMMCONFIG is not set
-# CONFIG_PCI_GODIRECT is not set
-CONFIG_PCI_GOANY=y
-CONFIG_PCI_BIOS=y
-CONFIG_PCI_DIRECT=y
-CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_LEGACY_PROC=y
-# CONFIG_PCI_NAMES is not set
-CONFIG_ISA=y
-# CONFIG_EISA is not set
-# CONFIG_MCA is not set
-# CONFIG_SCx200 is not set
-
-#
-# Executable file formats
-#
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_AOUT is not set
-CONFIG_BINFMT_MISC=y
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-CONFIG_MTD=m
-# CONFIG_MTD_DEBUG is not set
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CONCAT=m
-CONFIG_MTD_REDBOOT_PARTS=m
-# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
-# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
-CONFIG_MTD_CMDLINE_PARTS=y
-
-#
-# User Modules And Translation Layers
-#
-CONFIG_MTD_CHAR=m
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_BLOCK_RO=m
-CONFIG_FTL=m
-CONFIG_NFTL=m
-CONFIG_NFTL_RW=y
-CONFIG_INFTL=m
-
-#
-# RAM/ROM/Flash chip drivers
-#
-CONFIG_MTD_CFI=m
-CONFIG_MTD_JEDECPROBE=m
-CONFIG_MTD_GEN_PROBE=m
-# CONFIG_MTD_CFI_ADV_OPTIONS is not set
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-# CONFIG_MTD_CFI_I4 is not set
-# CONFIG_MTD_CFI_I8 is not set
-CONFIG_MTD_CFI_INTELEXT=m
-CONFIG_MTD_CFI_AMDSTD=m
-CONFIG_MTD_CFI_AMDSTD_RETRY=3
-CONFIG_MTD_CFI_STAA=m
-CONFIG_MTD_CFI_UTIL=m
-CONFIG_MTD_RAM=m
-CONFIG_MTD_ROM=m
-CONFIG_MTD_ABSENT=m
-
-#
-# Mapping drivers for chip access
-#
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-# CONFIG_MTD_PHYSMAP is not set
-# CONFIG_MTD_PNC2000 is not set
-CONFIG_MTD_SC520CDP=m
-CONFIG_MTD_NETSC520=m
-CONFIG_MTD_SBC_GXX=m
-CONFIG_MTD_ELAN_104NC=m
-CONFIG_MTD_SCx200_DOCFLASH=m
-# CONFIG_MTD_AMD76XROM is not set
-# CONFIG_MTD_ICHXROM is not set
-CONFIG_MTD_SCB2_FLASH=m
-# CONFIG_MTD_NETtel is not set
-# CONFIG_MTD_DILNETPC is not set
-# CONFIG_MTD_L440GX is not set
-CONFIG_MTD_PCI=m
-
-#
-# Self-contained MTD device drivers
-#
-CONFIG_MTD_PMC551=m
-# CONFIG_MTD_PMC551_BUGFIX is not set
-# CONFIG_MTD_PMC551_DEBUG is not set
-# CONFIG_MTD_SLRAM is not set
-# CONFIG_MTD_PHRAM is not set
-CONFIG_MTD_MTDRAM=m
-CONFIG_MTDRAM_TOTAL_SIZE=4096
-CONFIG_MTDRAM_ERASE_SIZE=128
-# CONFIG_MTD_BLKMTD is not set
-
-#
-# Disk-On-Chip Device Drivers
-#
-CONFIG_MTD_DOC2000=m
-# CONFIG_MTD_DOC2001 is not set
-CONFIG_MTD_DOC2001PLUS=m
-CONFIG_MTD_DOCPROBE=m
-CONFIG_MTD_DOCECC=m
-# CONFIG_MTD_DOCPROBE_ADVANCED is not set
-CONFIG_MTD_DOCPROBE_ADDRESS=0
-
-#
-# NAND Flash Device Drivers
-#
-CONFIG_MTD_NAND=m
-# CONFIG_MTD_NAND_VERIFY_WRITE is not set
-CONFIG_MTD_NAND_IDS=m
-# CONFIG_MTD_NAND_DISKONCHIP is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-CONFIG_PNP=y
-# CONFIG_PNP_DEBUG is not set
-
-#
-# Protocols
-#
-CONFIG_ISAPNP=y
-# CONFIG_PNPBIOS is not set
-
-#
-# Block devices
-#
-CONFIG_BLK_DEV_FD=m
-# CONFIG_BLK_DEV_XD is not set
-CONFIG_BLK_CPQ_DA=m
-CONFIG_BLK_CPQ_CISS_DA=m
-CONFIG_CISS_SCSI_TAPE=y
-CONFIG_BLK_DEV_DAC960=m
-CONFIG_BLK_DEV_UMEM=m
-CONFIG_BLK_DEV_LOOP=m
-# CONFIG_BLK_DEV_CRYPTOLOOP is not set
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_SX8=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDE=y
-
-#
-# Please see Documentation/ide.txt for help/info on IDE drives
-#
-# CONFIG_BLK_DEV_IDE_SATA is not set
-# CONFIG_BLK_DEV_HD_IDE is not set
-CONFIG_BLK_DEV_IDEDISK=y
-CONFIG_IDEDISK_MULTI_MODE=y
-CONFIG_BLK_DEV_IDECD=y
-# CONFIG_BLK_DEV_IDETAPE is not set
-CONFIG_BLK_DEV_IDEFLOPPY=y
-CONFIG_BLK_DEV_IDESCSI=m
-# CONFIG_IDE_TASK_IOCTL is not set
-# CONFIG_IDE_TASKFILE_IO is not set
-
-#
-# IDE chipset support/bugfixes
-#
-CONFIG_IDE_GENERIC=y
-# CONFIG_BLK_DEV_CMD640 is not set
-CONFIG_BLK_DEV_IDEPNP=y
-CONFIG_BLK_DEV_IDEPCI=y
-CONFIG_IDEPCI_SHARE_IRQ=y
-# CONFIG_BLK_DEV_OFFBOARD is not set
-CONFIG_BLK_DEV_GENERIC=y
-# CONFIG_BLK_DEV_OPTI621 is not set
-CONFIG_BLK_DEV_RZ1000=y
-CONFIG_BLK_DEV_IDEDMA_PCI=y
-# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
-CONFIG_IDEDMA_PCI_AUTO=y
-# CONFIG_IDEDMA_ONLYDISK is not set
-CONFIG_BLK_DEV_ADMA=y
-CONFIG_BLK_DEV_AEC62XX=y
-CONFIG_BLK_DEV_ALI15X3=y
-# CONFIG_WDC_ALI15X3 is not set
-CONFIG_BLK_DEV_AMD74XX=y
-CONFIG_BLK_DEV_ATIIXP=y
-CONFIG_BLK_DEV_CMD64X=y
-CONFIG_BLK_DEV_TRIFLEX=y
-CONFIG_BLK_DEV_CY82C693=y
-CONFIG_BLK_DEV_CS5520=y
-CONFIG_BLK_DEV_CS5530=y
-CONFIG_BLK_DEV_HPT34X=y
-# CONFIG_HPT34X_AUTODMA is not set
-CONFIG_BLK_DEV_HPT366=y
-# CONFIG_BLK_DEV_SC1200 is not set
-CONFIG_BLK_DEV_PIIX=y
-# CONFIG_BLK_DEV_NS87415 is not set
-CONFIG_BLK_DEV_PDC202XX_OLD=y
-# CONFIG_PDC202XX_BURST is not set
-CONFIG_BLK_DEV_PDC202XX_NEW=y
-CONFIG_PDC202XX_FORCE=y
-CONFIG_BLK_DEV_SVWKS=y
-CONFIG_BLK_DEV_SIIMAGE=y
-CONFIG_BLK_DEV_SIS5513=y
-CONFIG_BLK_DEV_SLC90E66=y
-# CONFIG_BLK_DEV_TRM290 is not set
-CONFIG_BLK_DEV_VIA82CXXX=y
-# CONFIG_IDE_ARM is not set
-# CONFIG_IDE_CHIPSETS is not set
-CONFIG_BLK_DEV_IDEDMA=y
-# CONFIG_IDEDMA_IVB is not set
-CONFIG_IDEDMA_AUTO=y
-# CONFIG_BLK_DEV_HD is not set
-
-#
-# SCSI device support
-#
-CONFIG_SCSI=m
-CONFIG_SCSI_PROC_FS=y
-
-#
-# SCSI support type (disk, tape, CD-ROM)
-#
-CONFIG_BLK_DEV_SD=m
-CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=m
-
-#
-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
-#
-# CONFIG_SCSI_MULTI_LUN is not set
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-
-#
-# SCSI Transport Attributes
-#
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=m
-
-#
-# SCSI low-level drivers
-#
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_3W_9XXX=m
-# CONFIG_SCSI_7000FASST is not set
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AHA152X=m
-CONFIG_SCSI_AHA1542=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=4
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
-# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
-CONFIG_AIC7XXX_DEBUG_MASK=0
-# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
-CONFIG_SCSI_AIC7XXX_OLD=m
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=4
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
-# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
-# CONFIG_AIC79XX_DEBUG_ENABLE is not set
-CONFIG_AIC79XX_DEBUG_MASK=0
-# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
-# CONFIG_SCSI_DPT_I2O is not set
-CONFIG_SCSI_IN2000=m
-CONFIG_SCSI_MEGARAID=m
-CONFIG_SCSI_SATA=y
-CONFIG_SCSI_SATA_SVW=m
-CONFIG_SCSI_ATA_PIIX=m
-CONFIG_SCSI_SATA_NV=m
-CONFIG_SCSI_SATA_PROMISE=m
-CONFIG_SCSI_SATA_SX4=m
-CONFIG_SCSI_SATA_SIL=m
-CONFIG_SCSI_SATA_SIS=m
-CONFIG_SCSI_SATA_VIA=m
-CONFIG_SCSI_SATA_VITESSE=m
-CONFIG_SCSI_BUSLOGIC=m
-# CONFIG_SCSI_OMIT_FLASHPOINT is not set
-# CONFIG_SCSI_DMX3191D is not set
-# CONFIG_SCSI_DTC3280 is not set
-# CONFIG_SCSI_EATA is not set
-# CONFIG_SCSI_EATA_PIO is not set
-CONFIG_SCSI_FUTURE_DOMAIN=m
-CONFIG_SCSI_GDTH=m
-# CONFIG_SCSI_GENERIC_NCR5380 is not set
-# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INIA100=m
-# CONFIG_SCSI_NCR53C406A is not set
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
-CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
-CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
-# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
-# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_PAS16 is not set
-# CONFIG_SCSI_PSI240I is not set
-CONFIG_SCSI_QLOGIC_FAS=m
-CONFIG_SCSI_QLOGIC_ISP=m
-# CONFIG_SCSI_QLOGIC_FC is not set
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLA2XXX=m
-CONFIG_SCSI_QLA21XX=m
-CONFIG_SCSI_QLA22XX=m
-CONFIG_SCSI_QLA2300=m
-CONFIG_SCSI_QLA2322=m
-CONFIG_SCSI_QLA6312=m
-CONFIG_SCSI_QLA6322=m
-# CONFIG_SCSI_SYM53C416 is not set
-# CONFIG_SCSI_DC395x is not set
-CONFIG_SCSI_DC390T=m
-# CONFIG_SCSI_T128 is not set
-# CONFIG_SCSI_U14_34F is not set
-# CONFIG_SCSI_ULTRASTOR is not set
-# CONFIG_SCSI_NSP32 is not set
-# CONFIG_SCSI_DEBUG is not set
-
-#
-# Old CD-ROM drivers (not SCSI, not IDE)
-#
-# CONFIG_CD_NO_IDESCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_MD_RAID5=m
-CONFIG_MD_RAID6=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_BLK_DEV_DM=m
-# CONFIG_DM_CRYPT is not set
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-
-#
-# Fusion MPT device support
-#
-CONFIG_FUSION=m
-CONFIG_FUSION_MAX_SGE=40
-# CONFIG_FUSION_ISENSE is not set
-CONFIG_FUSION_CTL=m
-
-#
-# IEEE 1394 (FireWire) support
-#
-CONFIG_IEEE1394=m
-
-#
-# Subsystem Options
-#
-# CONFIG_IEEE1394_VERBOSEDEBUG is not set
-CONFIG_IEEE1394_OUI_DB=y
-# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
-
-#
-# Device Drivers
-#
-# CONFIG_IEEE1394_PCILYNX is not set
-CONFIG_IEEE1394_OHCI1394=m
-
-#
-# Protocol Drivers
-#
-# CONFIG_IEEE1394_VIDEO1394 is not set
-CONFIG_IEEE1394_SBP2=m
-# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set
-# CONFIG_IEEE1394_ETH1394 is not set
-CONFIG_IEEE1394_DV1394=m
-CONFIG_IEEE1394_RAWIO=m
-CONFIG_IEEE1394_CMP=m
-CONFIG_IEEE1394_AMDTP=m
-
-#
-# I2O device support
-#
-CONFIG_I2O=m
-CONFIG_I2O_CONFIG=m
-CONFIG_I2O_BLOCK=m
-CONFIG_I2O_SCSI=m
-CONFIG_I2O_PROC=m
-
-#
-# Networking support
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_MMAP=y
-# CONFIG_NETLINK_DEV is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_ACCEPT_QUEUES is not set
-
-#
-# IP: Virtual Server Configuration
-#
-# CONFIG_IP_VS is not set
-CONFIG_ICMP_IPOD=y
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_DEBUG is not set
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_IP_NF_CONNTRACK=m
-CONFIG_IP_NF_FTP=m
-CONFIG_IP_NF_IRC=m
-CONFIG_IP_NF_TFTP=m
-CONFIG_IP_NF_AMANDA=m
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_LIMIT=m
-CONFIG_IP_NF_MATCH_IPRANGE=m
-CONFIG_IP_NF_MATCH_MAC=m
-CONFIG_IP_NF_MATCH_PKTTYPE=m
-CONFIG_IP_NF_MATCH_MARK=m
-CONFIG_IP_NF_MATCH_MULTIPORT=m
-CONFIG_IP_NF_MATCH_TOS=m
-CONFIG_IP_NF_MATCH_RECENT=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_DSCP=m
-CONFIG_IP_NF_MATCH_AH_ESP=m
-CONFIG_IP_NF_MATCH_LENGTH=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_MATCH_TCPMSS=m
-CONFIG_IP_NF_MATCH_HELPER=m
-CONFIG_IP_NF_MATCH_STATE=m
-CONFIG_IP_NF_MATCH_CONNTRACK=m
-CONFIG_IP_NF_MATCH_OWNER=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_NAT_NEEDED=y
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_SAME=m
-CONFIG_IP_NF_NAT_LOCAL=y
-CONFIG_IP_NF_NAT_SNMP_BASIC=m
-CONFIG_IP_NF_NAT_IRC=m
-CONFIG_IP_NF_NAT_FTP=m
-CONFIG_IP_NF_NAT_TFTP=m
-CONFIG_IP_NF_NAT_AMANDA=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_TOS=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_DSCP=m
-CONFIG_IP_NF_TARGET_MARK=m
-CONFIG_IP_NF_TARGET_CLASSIFY=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_IP_NF_TARGET_TCPMSS=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-# CONFIG_IP_NF_COMPAT_IPCHAINS is not set
-# CONFIG_IP_NF_COMPAT_IPFWADM is not set
-CONFIG_IP_NF_TARGET_NOTRACK=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
-CONFIG_IP_NF_MATCH_REALM=m
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-# CONFIG_NET_HW_FLOWCONTROL is not set
-
-#
-# QoS and/or fair queueing
-#
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CLK_JIFFIES=y
-# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
-# CONFIG_NET_SCH_CLK_CPU is not set
-# CONFIG_NET_SCH_CBQ is not set
-CONFIG_NET_SCH_HTB=m
-# CONFIG_NET_SCH_HFSC is not set
-# CONFIG_NET_SCH_PRIO is not set
-# CONFIG_NET_SCH_RED is not set
-# CONFIG_NET_SCH_SFQ is not set
-# CONFIG_NET_SCH_TEQL is not set
-# CONFIG_NET_SCH_TBF is not set
-# CONFIG_NET_SCH_GRED is not set
-# CONFIG_NET_SCH_DSMARK is not set
-# CONFIG_NET_SCH_NETEM is not set
-# CONFIG_NET_SCH_INGRESS is not set
-# CONFIG_NET_QOS is not set
-CONFIG_NET_CLS=y
-# CONFIG_NET_CLS_TCINDEX is not set
-# CONFIG_NET_CLS_ROUTE4 is not set
-CONFIG_NET_CLS_ROUTE=y
-CONFIG_NET_CLS_FW=m
-# CONFIG_NET_CLS_U32 is not set
-# CONFIG_NET_CLS_IND is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_TUX is not set
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-CONFIG_TUN=m
-# CONFIG_NET_SB1000 is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=m
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_NET_VENDOR_3COM=y
-CONFIG_EL1=m
-CONFIG_EL2=m
-CONFIG_ELPLUS=m
-CONFIG_EL16=m
-CONFIG_EL3=m
-CONFIG_3C515=m
-CONFIG_VORTEX=m
-CONFIG_TYPHOON=m
-CONFIG_LANCE=m
-CONFIG_NET_VENDOR_SMC=y
-CONFIG_WD80x3=m
-CONFIG_ULTRA=m
-CONFIG_SMC9194=m
-CONFIG_NET_VENDOR_RACAL=y
-# CONFIG_NI5010 is not set
-CONFIG_NI52=m
-CONFIG_NI65=m
-
-#
-# Tulip family network device support
-#
-CONFIG_NET_TULIP=y
-CONFIG_DE2104X=m
-CONFIG_TULIP=m
-# CONFIG_TULIP_MWI is not set
-CONFIG_TULIP_MMIO=y
-# CONFIG_TULIP_NAPI is not set
-CONFIG_DE4X5=m
-CONFIG_WINBOND_840=m
-CONFIG_DM9102=m
-# CONFIG_AT1700 is not set
-CONFIG_DEPCA=m
-CONFIG_HP100=m
-# CONFIG_NET_ISA is not set
-CONFIG_NE2000=m
-CONFIG_NET_PCI=y
-CONFIG_PCNET32=m
-CONFIG_AMD8111_ETH=m
-CONFIG_AMD8111E_NAPI=y
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_ADAPTEC_STARFIRE_NAPI=y
-CONFIG_AC3200=m
-CONFIG_APRICOT=m
-CONFIG_B44=m
-CONFIG_FORCEDETH=m
-CONFIG_CS89x0=m
-CONFIG_DGRS=m
-CONFIG_EEPRO100=m
-# CONFIG_EEPRO100_PIO is not set
-CONFIG_E100=m
-CONFIG_E100_NAPI=y
-CONFIG_FEALNX=m
-CONFIG_NATSEMI=m
-CONFIG_NE2K_PCI=m
-CONFIG_8139CP=m
-CONFIG_8139TOO=m
-CONFIG_8139TOO_PIO=y
-# CONFIG_8139TOO_TUNE_TWISTER is not set
-CONFIG_8139TOO_8129=y
-# CONFIG_8139_OLD_RX_RESET is not set
-CONFIG_SIS900=m
-CONFIG_EPIC100=m
-CONFIG_SUNDANCE=m
-# CONFIG_SUNDANCE_MMIO is not set
-CONFIG_TLAN=m
-CONFIG_VIA_RHINE=m
-CONFIG_VIA_RHINE_MMIO=y
-CONFIG_VIA_VELOCITY=m
-CONFIG_NET_POCKET=y
-CONFIG_ATP=m
-CONFIG_DE600=m
-CONFIG_DE620=m
-
-#
-# Ethernet (1000 Mbit)
-#
-CONFIG_ACENIC=m
-# CONFIG_ACENIC_OMIT_TIGON_I is not set
-CONFIG_DL2K=m
-CONFIG_E1000=m
-CONFIG_E1000_NAPI=y
-CONFIG_NS83820=m
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_R8169=m
-CONFIG_SK98LIN=m
-CONFIG_TIGON3=m
-
-#
-# Ethernet (10000 Mbit)
-#
-CONFIG_IXGB=m
-CONFIG_IXGB_NAPI=y
-CONFIG_S2IO=m
-CONFIG_S2IO_NAPI=y
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_NET_FC is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input I/O drivers
-#
-# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
-CONFIG_SERIO=y
-CONFIG_SERIO_I8042=y
-# CONFIG_SERIO_SERPORT is not set
-# CONFIG_SERIO_CT82C710 is not set
-# CONFIG_SERIO_PCIPS2 is not set
-
-#
-# Input Device Drivers
-#
-CONFIG_INPUT_KEYBOARD=y
-CONFIG_KEYBOARD_ATKBD=y
-# CONFIG_KEYBOARD_SUNKBD is not set
-# CONFIG_KEYBOARD_LKKBD is not set
-# CONFIG_KEYBOARD_XTKBD is not set
-# CONFIG_KEYBOARD_NEWTON is not set
-CONFIG_INPUT_MOUSE=y
-CONFIG_MOUSE_PS2=y
-# CONFIG_MOUSE_SERIAL is not set
-# CONFIG_MOUSE_INPORT is not set
-# CONFIG_MOUSE_LOGIBM is not set
-# CONFIG_MOUSE_PC110PAD is not set
-# CONFIG_MOUSE_VSXXXAA is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_SERIAL_8250_ACPI is not set
-CONFIG_SERIAL_8250_NR_UARTS=4
-CONFIG_SERIAL_8250_EXTENDED=y
-# CONFIG_SERIAL_8250_MANY_PORTS is not set
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_MULTIPORT=y
-CONFIG_SERIAL_8250_RSA=y
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-# CONFIG_CRASH is not set
-CONFIG_LEGACY_PTY_COUNT=256
-# CONFIG_QIC02_TAPE is not set
-
-#
-# IPMI
-#
-CONFIG_IPMI_HANDLER=m
-# CONFIG_IPMI_PANIC_EVENT is not set
-CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_SI=m
-CONFIG_IPMI_WATCHDOG=m
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-CONFIG_HW_RANDOM=m
-CONFIG_NVRAM=m
-CONFIG_RTC=y
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-# CONFIG_SONYPI is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_FTAPE is not set
-CONFIG_AGP=m
-CONFIG_AGP_ALI=m
-CONFIG_AGP_ATI=m
-CONFIG_AGP_AMD=m
-CONFIG_AGP_AMD64=m
-CONFIG_AGP_INTEL=m
-CONFIG_AGP_INTEL_MCH=m
-CONFIG_AGP_NVIDIA=m
-CONFIG_AGP_SIS=m
-CONFIG_AGP_SWORKS=m
-CONFIG_AGP_VIA=m
-CONFIG_AGP_EFFICEON=m
-CONFIG_DRM=y
-CONFIG_DRM_TDFX=m
-CONFIG_DRM_GAMMA=m
-CONFIG_DRM_R128=m
-CONFIG_DRM_RADEON=m
-CONFIG_DRM_I810=m
-CONFIG_DRM_I830=m
-CONFIG_DRM_MGA=m
-CONFIG_DRM_SIS=m
-CONFIG_MWAVE=m
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_HPET is not set
-CONFIG_HANGCHECK_TIMER=m
-
-#
-# I2C support
-#
-CONFIG_I2C=m
-CONFIG_I2C_CHARDEV=m
-
-#
-# I2C Algorithms
-#
-CONFIG_I2C_ALGOBIT=m
-CONFIG_I2C_ALGOPCF=m
-
-#
-# I2C Hardware Bus support
-#
-CONFIG_I2C_ALI1535=m
-CONFIG_I2C_ALI1563=m
-CONFIG_I2C_ALI15X3=m
-CONFIG_I2C_AMD756=m
-CONFIG_I2C_AMD8111=m
-# CONFIG_I2C_ELEKTOR is not set
-# CONFIG_I2C_I801 is not set
-CONFIG_I2C_I810=m
-CONFIG_I2C_ISA=m
-CONFIG_I2C_NFORCE2=m
-# CONFIG_I2C_PARPORT_LIGHT is not set
-CONFIG_I2C_PIIX4=m
-CONFIG_I2C_PROSAVAGE=m
-CONFIG_I2C_SAVAGE4=m
-# CONFIG_SCx200_ACB is not set
-CONFIG_I2C_SIS5595=m
-CONFIG_I2C_SIS630=m
-CONFIG_I2C_SIS96X=m
-CONFIG_I2C_VIA=m
-CONFIG_I2C_VIAPRO=m
-CONFIG_I2C_VOODOO3=m
-
-#
-# Hardware Sensors Chip support
-#
-CONFIG_I2C_SENSOR=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_ASB100=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_FSCHER=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83627HF=m
-
-#
-# Other I2C Chip support
-#
-CONFIG_SENSORS_EEPROM=m
-CONFIG_SENSORS_PCF8574=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_SENSORS_RTC8564=m
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Misc devices
-#
-CONFIG_IBM_ASM=m
-
-#
-# Multimedia devices
-#
-CONFIG_VIDEO_DEV=m
-
-#
-# Video For Linux
-#
-
-#
-# Video Adapters
-#
-# CONFIG_VIDEO_BT848 is not set
-CONFIG_VIDEO_PMS=m
-CONFIG_VIDEO_CPIA=m
-# CONFIG_VIDEO_CPIA_USB is not set
-CONFIG_VIDEO_SAA5246A=m
-CONFIG_VIDEO_SAA5249=m
-CONFIG_TUNER_3036=m
-CONFIG_VIDEO_STRADIS=m
-CONFIG_VIDEO_ZORAN=m
-CONFIG_VIDEO_ZORAN_BUZ=m
-CONFIG_VIDEO_ZORAN_DC10=m
-CONFIG_VIDEO_ZORAN_DC30=m
-CONFIG_VIDEO_ZORAN_LML33=m
-CONFIG_VIDEO_ZORAN_LML33R10=m
-CONFIG_VIDEO_SAA7134=m
-CONFIG_VIDEO_MXB=m
-CONFIG_VIDEO_DPC=m
-CONFIG_VIDEO_HEXIUM_ORION=m
-CONFIG_VIDEO_HEXIUM_GEMINI=m
-CONFIG_VIDEO_CX88=m
-CONFIG_VIDEO_OVCAMCHIP=m
-
-#
-# Radio Adapters
-#
-CONFIG_RADIO_CADET=m
-CONFIG_RADIO_RTRACK=m
-CONFIG_RADIO_RTRACK2=m
-CONFIG_RADIO_AZTECH=m
-CONFIG_RADIO_GEMTEK=m
-CONFIG_RADIO_GEMTEK_PCI=m
-CONFIG_RADIO_MAXIRADIO=m
-CONFIG_RADIO_MAESTRO=m
-CONFIG_RADIO_SF16FMI=m
-CONFIG_RADIO_SF16FMR2=m
-CONFIG_RADIO_TERRATEC=m
-CONFIG_RADIO_TRUST=m
-CONFIG_RADIO_TYPHOON=m
-CONFIG_RADIO_TYPHOON_PROC_FS=y
-CONFIG_RADIO_ZOLTRIX=m
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-CONFIG_VIDEO_SAA7146=m
-CONFIG_VIDEO_SAA7146_VV=m
-CONFIG_VIDEO_VIDEOBUF=m
-CONFIG_VIDEO_TUNER=m
-CONFIG_VIDEO_BUF=m
-CONFIG_VIDEO_BTCX=m
-CONFIG_VIDEO_IR=m
-
-#
-# Graphics support
-#
-CONFIG_FB=y
-CONFIG_FB_CIRRUS=m
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-CONFIG_FB_VGA16=m
-CONFIG_FB_VESA=y
-CONFIG_VIDEO_SELECT=y
-CONFIG_FB_HGA=m
-CONFIG_FB_HGA_ACCEL=y
-CONFIG_FB_RIVA=m
-# CONFIG_FB_RIVA_I2C is not set
-# CONFIG_FB_RIVA_DEBUG is not set
-CONFIG_FB_I810=m
-CONFIG_FB_I810_GTF=y
-CONFIG_FB_MATROX=m
-CONFIG_FB_MATROX_MILLENIUM=y
-CONFIG_FB_MATROX_MYSTIQUE=y
-CONFIG_FB_MATROX_G450=y
-CONFIG_FB_MATROX_G100=y
-CONFIG_FB_MATROX_I2C=m
-CONFIG_FB_MATROX_MAVEN=m
-CONFIG_FB_MATROX_MULTIHEAD=y
-# CONFIG_FB_RADEON_OLD is not set
-CONFIG_FB_RADEON=m
-CONFIG_FB_RADEON_I2C=y
-# CONFIG_FB_RADEON_DEBUG is not set
-CONFIG_FB_ATY128=m
-CONFIG_FB_ATY=m
-CONFIG_FB_ATY_CT=y
-CONFIG_FB_ATY_GX=y
-# CONFIG_FB_ATY_XL_INIT is not set
-# CONFIG_FB_SIS is not set
-CONFIG_FB_NEOMAGIC=m
-CONFIG_FB_KYRO=m
-CONFIG_FB_3DFX=m
-CONFIG_FB_3DFX_ACCEL=y
-CONFIG_FB_VOODOO1=m
-CONFIG_FB_TRIDENT=m
-CONFIG_FB_TRIDENT_ACCEL=y
-# CONFIG_FB_VIRTUAL is not set
-
-#
-# Console display driver support
-#
-CONFIG_VGA_CONSOLE=y
-CONFIG_MDA_CONSOLE=m
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-# CONFIG_FONTS is not set
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-
-#
-# Logo configuration
-#
-# CONFIG_LOGO is not set
-
-#
-# Sound
-#
-CONFIG_SOUND=m
-
-#
-# Advanced Linux Sound Architecture
-#
-CONFIG_SND=m
-CONFIG_SND_TIMER=m
-CONFIG_SND_PCM=m
-CONFIG_SND_HWDEP=m
-CONFIG_SND_RAWMIDI=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_OSSEMUL=y
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
-CONFIG_SND_RTCTIMER=m
-# CONFIG_SND_VERBOSE_PRINTK is not set
-# CONFIG_SND_DEBUG is not set
-
-#
-# Generic devices
-#
-CONFIG_SND_MPU401_UART=m
-CONFIG_SND_OPL3_LIB=m
-CONFIG_SND_OPL4_LIB=m
-CONFIG_SND_VX_LIB=m
-CONFIG_SND_DUMMY=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_MTPAV=m
-# CONFIG_SND_SERIAL_U16550 is not set
-CONFIG_SND_MPU401=m
-
-#
-# ISA devices
-#
-CONFIG_SND_AD1816A=m
-CONFIG_SND_AD1848=m
-CONFIG_SND_CS4231=m
-CONFIG_SND_CS4232=m
-CONFIG_SND_CS4236=m
-CONFIG_SND_ES968=m
-CONFIG_SND_ES1688=m
-CONFIG_SND_ES18XX=m
-CONFIG_SND_GUSCLASSIC=m
-CONFIG_SND_GUSEXTREME=m
-CONFIG_SND_GUSMAX=m
-CONFIG_SND_INTERWAVE=m
-CONFIG_SND_INTERWAVE_STB=m
-CONFIG_SND_OPTI92X_AD1848=m
-CONFIG_SND_OPTI92X_CS4231=m
-CONFIG_SND_OPTI93X=m
-CONFIG_SND_SB8=m
-CONFIG_SND_SB16=m
-CONFIG_SND_SBAWE=m
-CONFIG_SND_SB16_CSP=y
-# CONFIG_SND_WAVEFRONT is not set
-CONFIG_SND_ALS100=m
-CONFIG_SND_AZT2320=m
-CONFIG_SND_CMI8330=m
-CONFIG_SND_DT019X=m
-CONFIG_SND_OPL3SA2=m
-CONFIG_SND_SGALAXY=m
-CONFIG_SND_SSCAPE=m
-
-#
-# PCI devices
-#
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_ALI5451=m
-CONFIG_SND_ATIIXP=m
-CONFIG_SND_AU8810=m
-CONFIG_SND_AU8820=m
-CONFIG_SND_AU8830=m
-CONFIG_SND_AZT3328=m
-CONFIG_SND_BT87X=m
-CONFIG_SND_CS46XX=m
-CONFIG_SND_CS46XX_NEW_DSP=y
-CONFIG_SND_CS4281=m
-CONFIG_SND_EMU10K1=m
-CONFIG_SND_KORG1212=m
-CONFIG_SND_MIXART=m
-CONFIG_SND_NM256=m
-CONFIG_SND_RME32=m
-CONFIG_SND_RME96=m
-CONFIG_SND_RME9652=m
-CONFIG_SND_HDSP=m
-CONFIG_SND_TRIDENT=m
-CONFIG_SND_YMFPCI=m
-CONFIG_SND_ALS4000=m
-CONFIG_SND_CMIPCI=m
-CONFIG_SND_ENS1370=m
-CONFIG_SND_ENS1371=m
-CONFIG_SND_ES1938=m
-CONFIG_SND_ES1968=m
-CONFIG_SND_MAESTRO3=m
-CONFIG_SND_FM801=m
-CONFIG_SND_FM801_TEA575X=m
-CONFIG_SND_ICE1712=m
-CONFIG_SND_ICE1724=m
-CONFIG_SND_INTEL8X0=m
-CONFIG_SND_INTEL8X0M=m
-CONFIG_SND_SONICVIBES=m
-CONFIG_SND_VIA82XX=m
-CONFIG_SND_VX222=m
-
-#
-# ALSA USB devices
-#
-# CONFIG_SND_USB_AUDIO is not set
-
-#
-# Open Sound System
-#
-# CONFIG_SOUND_PRIME is not set
-
-#
-# USB support
-#
-CONFIG_USB=m
-# CONFIG_USB_DEBUG is not set
-
-#
-# Miscellaneous USB options
-#
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_BANDWIDTH is not set
-# CONFIG_USB_DYNAMIC_MINORS is not set
-
-#
-# USB Host Controller Drivers
-#
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_SPLIT_ISO=y
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_UHCI_HCD=m
-
-#
-# USB Device Class drivers
-#
-# CONFIG_USB_AUDIO is not set
-# CONFIG_USB_BLUETOOTH_TTY is not set
-# CONFIG_USB_MIDI is not set
-# CONFIG_USB_ACM is not set
-# CONFIG_USB_PRINTER is not set
-CONFIG_USB_STORAGE=m
-# CONFIG_USB_STORAGE_DEBUG is not set
-CONFIG_USB_STORAGE_RW_DETECT=y
-CONFIG_USB_STORAGE_DATAFAB=y
-CONFIG_USB_STORAGE_FREECOM=y
-CONFIG_USB_STORAGE_ISD200=y
-CONFIG_USB_STORAGE_DPCM=y
-CONFIG_USB_STORAGE_HP8200e=y
-CONFIG_USB_STORAGE_SDDR09=y
-CONFIG_USB_STORAGE_SDDR55=y
-CONFIG_USB_STORAGE_JUMPSHOT=y
-
-#
-# USB Human Interface Devices (HID)
-#
-# CONFIG_USB_HID is not set
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# CONFIG_USB_AIPTEK is not set
-# CONFIG_USB_WACOM is not set
-# CONFIG_USB_KBTAB is not set
-# CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_EGALAX is not set
-# CONFIG_USB_XPAD is not set
-# CONFIG_USB_ATI_REMOTE is not set
-
-#
-# USB Imaging devices
-#
-# CONFIG_USB_MDC800 is not set
-# CONFIG_USB_MICROTEK is not set
-# CONFIG_USB_HPUSBSCSI is not set
-
-#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-# CONFIG_USB_VICAM is not set
-# CONFIG_USB_DSBR is not set
-# CONFIG_USB_IBMCAM is not set
-# CONFIG_USB_KONICAWC is not set
-# CONFIG_USB_OV511 is not set
-# CONFIG_USB_PWC is not set
-# CONFIG_USB_SE401 is not set
-# CONFIG_USB_SN9C102 is not set
-# CONFIG_USB_STV680 is not set
-# CONFIG_USB_W9968CF is not set
-
-#
-# USB Network adaptors
-#
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_USBNET=m
-
-#
-# USB Host-to-Host Cables
-#
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_BELKIN=y
-CONFIG_USB_GENESYS=y
-CONFIG_USB_NET1080=y
-CONFIG_USB_PL2301=y
-
-#
-# Intelligent USB Devices/Gadgets
-#
-CONFIG_USB_ARMLINUX=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_ZAURUS=y
-CONFIG_USB_CDCETHER=y
-
-#
-# USB Network Adapters
-#
-CONFIG_USB_AX8817X=y
-
-#
-# USB port drivers
-#
-
-#
-# USB Serial Converter support
-#
-# CONFIG_USB_SERIAL is not set
-
-#
-# USB Miscellaneous drivers
-#
-# CONFIG_USB_EMI62 is not set
-# CONFIG_USB_EMI26 is not set
-# CONFIG_USB_TIGL is not set
-# CONFIG_USB_AUERSWALD is not set
-# CONFIG_USB_RIO500 is not set
-# CONFIG_USB_LEGOTOWER is not set
-# CONFIG_USB_LCD is not set
-# CONFIG_USB_LED is not set
-# CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGETSERVO is not set
-# CONFIG_USB_TEST is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_QUOTA=y
-# CONFIG_QFMT_V1 is not set
-CONFIG_QFMT_V2=y
-CONFIG_QUOTACTL=y
-CONFIG_AUTOFS_FS=m
-CONFIG_AUTOFS4_FS=m
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_ZISOFS_FS=y
-CONFIG_UDF_FS=m
-CONFIG_UDF_NLS=y
-
-#
-# DOS/FAT/NT Filesystems
-#
-CONFIG_FAT_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_CODEPAGE=437
-CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
-# CONFIG_DEVFS_FS is not set
-CONFIG_DEVPTS_FS_XATTR=y
-CONFIG_DEVPTS_FS_SECURITY=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_HUGETLB_PAGE=y
-CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_JFFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-# CONFIG_NFS_FS is not set
-# CONFIG_NFSD is not set
-# CONFIG_EXPORTFS is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-# CONFIG_NLS_CODEPAGE_737 is not set
-# CONFIG_NLS_CODEPAGE_775 is not set
-# CONFIG_NLS_CODEPAGE_850 is not set
-# CONFIG_NLS_CODEPAGE_852 is not set
-# CONFIG_NLS_CODEPAGE_855 is not set
-# CONFIG_NLS_CODEPAGE_857 is not set
-# CONFIG_NLS_CODEPAGE_860 is not set
-# CONFIG_NLS_CODEPAGE_861 is not set
-# CONFIG_NLS_CODEPAGE_862 is not set
-# CONFIG_NLS_CODEPAGE_863 is not set
-# CONFIG_NLS_CODEPAGE_864 is not set
-# CONFIG_NLS_CODEPAGE_865 is not set
-# CONFIG_NLS_CODEPAGE_866 is not set
-# CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
-# CONFIG_NLS_CODEPAGE_950 is not set
-# CONFIG_NLS_CODEPAGE_932 is not set
-# CONFIG_NLS_CODEPAGE_949 is not set
-# CONFIG_NLS_CODEPAGE_874 is not set
-# CONFIG_NLS_ISO8859_8 is not set
-# CONFIG_NLS_CODEPAGE_1250 is not set
-# CONFIG_NLS_CODEPAGE_1251 is not set
-# CONFIG_NLS_ASCII is not set
-CONFIG_NLS_ISO8859_1=m
-# CONFIG_NLS_ISO8859_2 is not set
-# CONFIG_NLS_ISO8859_3 is not set
-# CONFIG_NLS_ISO8859_4 is not set
-# CONFIG_NLS_ISO8859_5 is not set
-# CONFIG_NLS_ISO8859_6 is not set
-# CONFIG_NLS_ISO8859_7 is not set
-# CONFIG_NLS_ISO8859_9 is not set
-# CONFIG_NLS_ISO8859_13 is not set
-# CONFIG_NLS_ISO8859_14 is not set
-# CONFIG_NLS_ISO8859_15 is not set
-# CONFIG_NLS_KOI8_R is not set
-# CONFIG_NLS_KOI8_U is not set
-CONFIG_NLS_UTF8=m
-
-#
-# Profiling support
-#
-# CONFIG_PROFILING is not set
-
-#
-# Kernel hacking
-#
-CONFIG_CRASH_DUMP=y
-CONFIG_CRASH_DUMP_BLOCKDEV=y
-# CONFIG_CRASH_DUMP_NETDEV is not set
-# CONFIG_CRASH_DUMP_MEMDEV is not set
-# CONFIG_CRASH_DUMP_COMPRESS_RLE is not set
-# CONFIG_CRASH_DUMP_COMPRESS_GZIP is not set
-CONFIG_DEBUG_KERNEL=y
-CONFIG_EARLY_PRINTK=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_DEBUG_STACK_USAGE is not set
-CONFIG_DEBUG_SLAB=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_SPINLOCK=y
-# CONFIG_DEBUG_PAGEALLOC is not set
-CONFIG_DEBUG_HIGHMEM=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_FRAME_POINTER is not set
-
-#
-# Linux VServer
-#
-CONFIG_VSERVER_LEGACY=y
-# CONFIG_VSERVER_PROC_SECURE is not set
-# CONFIG_VSERVER_HARDCPU is not set
-# CONFIG_INOXID_NONE is not set
-# CONFIG_INOXID_UID16 is not set
-# CONFIG_INOXID_GID16 is not set
-CONFIG_INOXID_UGID24=y
-# CONFIG_INOXID_INTERN is not set
-# CONFIG_INOXID_RUNTIME is not set
-CONFIG_VSERVER_DEBUG=y
-
-#
-# Security options
-#
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Library routines
-#
-CONFIG_CRC_CCITT=m
-CONFIG_CRC32=y
-CONFIG_LIBCRC32C=m
-CONFIG_ZLIB_INFLATE=y
-CONFIG_X86_BIOS_REBOOT=y
-CONFIG_PC=y
diff --git a/configs/kernel-2.6.8-i686-planetlab.config b/configs/kernel-2.6.8-i686-planetlab.config
index ea66387e5..7d9936db6 100644
--- a/configs/kernel-2.6.8-i686-planetlab.config
+++ b/configs/kernel-2.6.8-i686-planetlab.config
@@ -20,27 +20,18 @@ CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_BSD_PROCESS_ACCT=y
-# CONFIG_BSD_PROCESS_ACCT_V3 is not set
 
 #
 # Class Based Kernel Resource Management
 #
-CONFIG_CKRM=y
-CONFIG_RCFS_FS=y
-CONFIG_CKRM_TYPE_TASKCLASS=y
-CONFIG_CKRM_RES_NUMTASKS=y
-CONFIG_CKRM_CPU_SCHEDULE=y
-CONFIG_CKRM_RES_BLKIO=y
-# CONFIG_CKRM_RES_MEM is not set
-# CONFIG_CKRM_TYPE_SOCKETCLASS is not set
-CONFIG_CKRM_RBCE=y
+# CONFIG_CKRM is not set
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
 CONFIG_LOG_BUF_SHIFT=17
 # CONFIG_HOTPLUG is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
-CONFIG_OOM_PANIC=y
 # CONFIG_EMBEDDED is not set
 # CONFIG_DELAY_ACCT is not set
 CONFIG_KALLSYMS=y
@@ -579,13 +570,8 @@ CONFIG_NET_SCH_HTB=m
 # CONFIG_NET_SCH_NETEM is not set
 # CONFIG_NET_SCH_INGRESS is not set
 # CONFIG_NET_QOS is not set
-CONFIG_NET_CLS=y
-# CONFIG_NET_CLS_TCINDEX is not set
-# CONFIG_NET_CLS_ROUTE4 is not set
+# CONFIG_NET_CLS is not set
 CONFIG_NET_CLS_ROUTE=y
-CONFIG_NET_CLS_FW=m
-# CONFIG_NET_CLS_U32 is not set
-# CONFIG_NET_CLS_IND is not set
 
 #
 # Network testing
@@ -849,7 +835,7 @@ CONFIG_UNIX98_PTYS=y
 # CONFIG_DRM is not set
 # CONFIG_MWAVE is not set
 # CONFIG_RAW_DRIVER is not set
-CONFIG_HANGCHECK_TIMER=y
+CONFIG_HANGCHECK_TIMER=m
 
 #
 # I2C support
@@ -1053,7 +1039,12 @@ CONFIG_NLS_UTF8=m
 #
 # Kernel hacking
 #
-# CONFIG_CRASH_DUMP is not set
+CONFIG_CRASH_DUMP=y
+CONFIG_CRASH_DUMP_BLOCKDEV=y
+# CONFIG_CRASH_DUMP_NETDEV is not set
+# CONFIG_CRASH_DUMP_MEMDEV is not set
+# CONFIG_CRASH_DUMP_COMPRESS_RLE is not set
+# CONFIG_CRASH_DUMP_COMPRESS_GZIP is not set
 CONFIG_DEBUG_KERNEL=y
 CONFIG_EARLY_PRINTK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
@@ -1072,14 +1063,14 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y
 #
 CONFIG_VSERVER_LEGACY=y
 # CONFIG_VSERVER_PROC_SECURE is not set
-# CONFIG_VSERVER_HARDCPU is not set
+CONFIG_VSERVER_HARDCPU=y
 # CONFIG_INOXID_NONE is not set
 # CONFIG_INOXID_UID16 is not set
 # CONFIG_INOXID_GID16 is not set
 CONFIG_INOXID_UGID24=y
 # CONFIG_INOXID_INTERN is not set
 # CONFIG_INOXID_RUNTIME is not set
-# CONFIG_VSERVER_DEBUG is not set
+CONFIG_VSERVER_DEBUG=y
 
 #
 # Security options
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index c66498bad..2654b5b76 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -13,13 +13,12 @@
 # kblockd threads
 #
 
-obj-y	:= elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o ckrm-iostub.o
+obj-y	:= elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
 
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_AS)	+= as-iosched.o
 obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
-obj-$(CONFIG_CKRM_RES_BLKIO)	+= ckrm-io.o
 obj-$(CONFIG_MAC_FLOPPY)	+= swim3.o
 obj-$(CONFIG_BLK_DEV_FD)	+= floppy.o
 obj-$(CONFIG_BLK_DEV_FD98)	+= floppy98.o
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index 7b45a805d..068f4eae0 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -6,18 +6,6 @@
  *  Based on ideas from a previously unfinished io
  *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
  *
- *  IO priorities are supported, from 0% to 100% in 5% increments. Both of
- *  those values have special meaning - 0% class is allowed to do io if
- *  noone else wants to use the disk. 100% is considered real-time io, and
- *  always get priority. Default process io rate is 95%. In absence of other
- *  io, a class may consume 100% disk bandwidth regardless. Withing a class,
- *  bandwidth is distributed equally among the citizens.
- *
- * TODO:
- *	- cfq_select_requests() needs some work for 5-95% io
- *	- barriers not supported
- *	- export grace periods in ms, not jiffies
- *
  *  Copyright (C) 2003 Jens Axboe <axboe@suse.de>
  */
 #include <linux/kernel.h>
@@ -33,186 +21,78 @@
 #include <linux/hash.h>
 #include <linux/rbtree.h>
 #include <linux/mempool.h>
-#include <asm/div64.h>
-
-#if IOPRIO_NR > BITS_PER_LONG
-#error Cannot support this many io priority levels
-#endif
-
-#define LIMIT_DEBUG   1
 
 /*
  * tunables
  */
-static int cfq_quantum = 6;
-static int cfq_quantum_io = 256;
-static int cfq_idle_quantum = 1;
-static int cfq_idle_quantum_io = 64;
-static int cfq_queued = 4;
-static int cfq_grace_rt = HZ / 100 ?: 1;
-static int cfq_grace_idle = HZ / 10;
+static int cfq_quantum = 4;
+static int cfq_queued = 8;
 
 #define CFQ_QHASH_SHIFT		6
 #define CFQ_QHASH_ENTRIES	(1 << CFQ_QHASH_SHIFT)
-#define list_entry_qhash(entry)	hlist_entry((entry), struct cfq_queue, cfq_hash)
+#define list_entry_qhash(entry)	list_entry((entry), struct cfq_queue, cfq_hash)
 
 #define CFQ_MHASH_SHIFT		8
 #define CFQ_MHASH_BLOCK(sec)	((sec) >> 3)
 #define CFQ_MHASH_ENTRIES	(1 << CFQ_MHASH_SHIFT)
 #define CFQ_MHASH_FN(sec)	(hash_long(CFQ_MHASH_BLOCK((sec)),CFQ_MHASH_SHIFT))
+#define ON_MHASH(crq)		!list_empty(&(crq)->hash)
 #define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-#define list_entry_hash(ptr)	hlist_entry((ptr), struct cfq_rq, hash)
+#define list_entry_hash(ptr)	list_entry((ptr), struct cfq_rq, hash)
 
 #define list_entry_cfqq(ptr)	list_entry((ptr), struct cfq_queue, cfq_list)
-#define list_entry_prio(ptr)	list_entry((ptr), struct cfq_rq, prio_list)
-
-#define cfq_account_io(crq)	\
-	((crq)->ioprio != IOPRIO_IDLE && (crq)->ioprio != IOPRIO_RT)
-
-/* define to be 50 ms for now; make tunable later */
-#define CFQ_EPOCH		50000
-/* Needs to be made tunable right away, in MiB/s */
-#define CFQ_DISKBW		10       
-/* Temporary global limit, as percent of available b/w, for each "class" */
-#define CFQ_TEMPLIM		10
-
-/*
- * defines how we distribute bandwidth (can be tgid, uid, etc)
- */
-
-/* FIXME: change hash_key to be sizeof(void *) rather than sizeof(int) 
- * otherwise the cast of cki_tsk_icls will not work reliably on 64-bit arches.
- * OR, change cki_tsk_icls to return ints (will need another id space to be 
- * managed)
- */
-
-#if defined(CONFIG_CKRM_RES_BLKIO) || defined(CONFIG_CKRM_RES_BLKIO_MODULE)
-extern inline void *cki_hash_key(struct task_struct *tsk);
-extern inline int cki_ioprio(struct task_struct *tsk);
-#define cfq_hash_key(current)   ((int)cki_hash_key((current)))
-#define cfq_ioprio(current)	(cki_ioprio((current)))
-
-#else
-#define cfq_hash_key(current)	((current)->tgid)
-
-/*
- * move to io_context
- */
-#define cfq_ioprio(current)	((current)->ioprio)
-#endif
 
-#define CFQ_WAIT_RT	0
-#define CFQ_WAIT_NORM	1
+#define RQ_DATA(rq)		((struct cfq_rq *) (rq)->elevator_private)
 
 static kmem_cache_t *crq_pool;
 static kmem_cache_t *cfq_pool;
 static mempool_t *cfq_mpool;
 
-/*
- * defines an io priority level
- */
-struct io_prio_data {
-	struct list_head rr_list;
-	int busy_queues;
-	int busy_rq;
-	unsigned long busy_sectors;
-	
-	/* requests, sectors and queues 
-         * added(in),dispatched/deleted(out) 
-	 * at this priority level. 
-	 */
-	atomic_t cum_rq_in,cum_rq_out;              
-	atomic_t cum_sectors_in,cum_sectors_out;    
-	atomic_t cum_queues_in,cum_queues_out;
-
-#ifdef LIMIT_DEBUG
-	int nskip;
-	unsigned long navsec;
-	unsigned long csectorate;
-	unsigned long lsectorate;
-#endif
-
-	struct list_head prio_list;
-	int last_rq;
-	int last_sectors;
-};
-
-/*
- * per-request queue structure
- */
 struct cfq_data {
 	struct list_head rr_list;
 	struct list_head *dispatch;
-	struct hlist_head *cfq_hash;
-	struct hlist_head *crq_hash;
-	mempool_t *crq_pool;
+	struct list_head *cfq_hash;
 
-	struct io_prio_data cid[IOPRIO_NR];
+	struct list_head *crq_hash;
 
-	/*
-	 * total number of busy queues and requests
-	 */
-	int busy_rq;
-	int busy_queues;
-	unsigned long busy_sectors;
+	unsigned int busy_queues;
+	unsigned int max_queued;
 
+	mempool_t *crq_pool;
 
 	request_queue_t *queue;
-	unsigned long rq_starved_mask;
-
-	/*
-	 * grace period handling
-	 */
-	struct timer_list timer;
-	unsigned long wait_end;
-	unsigned long flags;
-	struct work_struct work;
 
 	/*
 	 * tunables
 	 */
 	unsigned int cfq_quantum;
-	unsigned int cfq_quantum_io;
-	unsigned int cfq_idle_quantum;
-	unsigned int cfq_idle_quantum_io;
 	unsigned int cfq_queued;
-	unsigned int cfq_grace_rt;
-	unsigned int cfq_grace_idle;
-
-	unsigned long cfq_epoch;	/* duration for limit enforcement */
-	unsigned long cfq_epochsectors;	/* max sectors dispatchable/epoch */
 };
 
-/*
- * per-class structure
- */
 struct cfq_queue {
+	struct list_head cfq_hash;
 	struct list_head cfq_list;
-	struct hlist_node cfq_hash;
-	int hash_key;
 	struct rb_root sort_list;
+	int pid;
 	int queued[2];
-	int ioprio;
-
-	unsigned long avsec;		/* avg sectors dispatched/epoch */
-	unsigned long long lastime;	/* timestamp of last request served */
-	unsigned long sectorate;	/* limit for sectors served/epoch */
-	int skipped;			/* queue skipped at last dispatch ? */
+#if 0
+	/*
+	 * with a simple addition like this, we can do io priorities. almost.
+	 * does need a split request free list, too.
+	 */
+	int io_prio
+#endif
 };
 
-/*
- * per-request structure
- */
 struct cfq_rq {
-	struct cfq_queue *cfq_queue;
 	struct rb_node rb_node;
-	struct hlist_node hash;
 	sector_t rb_key;
 
 	struct request *request;
-	struct list_head prio_list;
-	unsigned long nr_sectors;
-	int ioprio;
+
+	struct cfq_queue *cfq_queue;
+
+	struct list_head hash;
 };
 
 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq);
@@ -223,13 +103,18 @@ static void cfq_dispatch_sort(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 /*
  * lots of deadline iosched dupes, can be abstracted later...
  */
+static inline void __cfq_del_crq_hash(struct cfq_rq *crq)
+{
+	list_del_init(&crq->hash);
+}
+
 static inline void cfq_del_crq_hash(struct cfq_rq *crq)
 {
-	hlist_del_init(&crq->hash);
+	if (ON_MHASH(crq))
+		__cfq_del_crq_hash(crq);
 }
 
-static inline void
-cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
+static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
 {
 	cfq_del_crq_hash(crq);
 
@@ -240,26 +125,27 @@ cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
 	struct request *rq = crq->request;
-	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(rq));
 
-	BUG_ON(!hlist_unhashed(&crq->hash));
- 
-	hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
+	BUG_ON(ON_MHASH(crq));
+
+	list_add(&crq->hash, &cfqd->crq_hash[CFQ_MHASH_FN(rq_hash_key(rq))]);
 }
 
 static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
 {
-	struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
-	struct hlist_node *entry, *next;
+	struct list_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
+	struct list_head *entry, *next = hash_list->next;
 
-	hlist_for_each_safe(entry, next, hash_list) {
+	while ((entry = next) != hash_list) {
 		struct cfq_rq *crq = list_entry_hash(entry);
 		struct request *__rq = crq->request;
 
-		BUG_ON(hlist_unhashed(&crq->hash));
+		next = entry->next;
+
+		BUG_ON(!ON_MHASH(crq));
 
 		if (!rq_mergeable(__rq)) {
-			cfq_del_crq_hash(crq);
+			__cfq_del_crq_hash(crq);
 			continue;
 		}
 
@@ -273,27 +159,20 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
 /*
  * rb tree support functions
  */
-#define RB_EMPTY(node)		((node)->rb_node == NULL)
+#define RB_NONE		(2)
+#define RB_EMPTY(node)	((node)->rb_node == NULL)
+#define RB_CLEAR(node)	((node)->rb_color = RB_NONE)
+#define RB_CLEAR_ROOT(root)	((root)->rb_node = NULL)
+#define ON_RB(node)	((node)->rb_color != RB_NONE)
 #define rb_entry_crq(node)	rb_entry((node), struct cfq_rq, rb_node)
 #define rq_rb_key(rq)		(rq)->sector
 
-static void
-cfq_del_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
+static inline void cfq_del_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 {
-	if (crq->cfq_queue) {
-		crq->cfq_queue = NULL;
-
-		if (cfq_account_io(crq)) {
-			cfqd->busy_rq--;
-			cfqd->busy_sectors -= crq->nr_sectors;
-			cfqd->cid[crq->ioprio].busy_rq--;
-			cfqd->cid[crq->ioprio].busy_sectors -= crq->nr_sectors;
-		}
-		atomic_inc(&(cfqd->cid[crq->ioprio].cum_rq_out));
-		atomic_add(crq->nr_sectors,
-			   &(cfqd->cid[crq->ioprio].cum_sectors_out));
+	if (ON_RB(&crq->rb_node)) {
 		cfqq->queued[rq_data_dir(crq->request)]--;
 		rb_erase(&crq->rb_node, &cfqq->sort_list);
+		crq->cfq_queue = NULL;
 	}
 }
 
@@ -326,22 +205,12 @@ cfq_add_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
 	struct request *rq = crq->request;
 	struct cfq_rq *__alias;
 
-
+	crq->rb_key = rq_rb_key(rq);
 	cfqq->queued[rq_data_dir(rq)]++;
-	if (cfq_account_io(crq)) {
-		cfqd->busy_rq++;
-		cfqd->busy_sectors += crq->nr_sectors;
-		cfqd->cid[crq->ioprio].busy_rq++;
-		cfqd->cid[crq->ioprio].busy_sectors += crq->nr_sectors;
-	}
-	atomic_inc(&(cfqd->cid[crq->ioprio].cum_rq_in));
-	atomic_add(crq->nr_sectors,
-		   &(cfqd->cid[crq->ioprio].cum_sectors_in));
 retry:
 	__alias = __cfq_add_crq_rb(cfqq, crq);
 	if (!__alias) {
 		rb_insert_color(&crq->rb_node, &cfqq->sort_list);
-		crq->rb_key = rq_rb_key(rq);
 		crq->cfq_queue = cfqq;
 		return;
 	}
@@ -353,7 +222,7 @@ retry:
 static struct request *
 cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
 {
-	struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(current));
+	struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
 	struct rb_node *n;
 
 	if (!cfqq)
@@ -378,31 +247,16 @@ out:
 static void cfq_remove_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_data *cfqd = q->elevator.elevator_data;
-	struct cfq_rq *crq = RQ_ELV_DATA(rq);
+	struct cfq_rq *crq = RQ_DATA(rq);
 
 	if (crq) {
+		struct cfq_queue *cfqq = crq->cfq_queue;
 
 		cfq_remove_merge_hints(q, crq);
-		list_del_init(&crq->prio_list);
 		list_del_init(&rq->queuelist);
 
-		/*
-		 * set a grace period timer to allow realtime io to make real
-		 * progress, if we release an rt request. for normal request,
-		 * set timer so idle io doesn't interfere with other io
-		 */
-		if (crq->ioprio == IOPRIO_RT) {
-			set_bit(CFQ_WAIT_RT, &cfqd->flags);
-			cfqd->wait_end = jiffies + cfqd->cfq_grace_rt;
-		} else if (crq->ioprio != IOPRIO_IDLE) {
-			set_bit(CFQ_WAIT_NORM, &cfqd->flags);
-			cfqd->wait_end = jiffies + cfqd->cfq_grace_idle;
-		}
-
-		if (crq->cfq_queue) {
-			struct cfq_queue *cfqq = crq->cfq_queue;
-
-			cfq_del_crq_rb(cfqd, cfqq, crq);
+		if (cfqq) {
+			cfq_del_crq_rb(cfqq, crq);
 
 			if (RB_EMPTY(&cfqq->sort_list))
 				cfq_put_queue(cfqd, cfqq);
@@ -452,26 +306,18 @@ out_insert:
 static void cfq_merged_request(request_queue_t *q, struct request *req)
 {
 	struct cfq_data *cfqd = q->elevator.elevator_data;
-	struct cfq_rq *crq = RQ_ELV_DATA(req);
-	int tmp;
+	struct cfq_rq *crq = RQ_DATA(req);
 
 	cfq_del_crq_hash(crq);
 	cfq_add_crq_hash(cfqd, crq);
 
-	if (crq->cfq_queue && (rq_rb_key(req) != crq->rb_key)) {
+	if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
-		cfq_del_crq_rb(cfqd, cfqq, crq);
+		cfq_del_crq_rb(cfqq, crq);
 		cfq_add_crq_rb(cfqd, cfqq, crq);
 	}
 
-	tmp = req->hard_nr_sectors - crq->nr_sectors;
-	cfqd->busy_sectors += tmp;
-	cfqd->cid[crq->ioprio].busy_sectors += tmp;
-	atomic_add(tmp,&(cfqd->cid[crq->ioprio].cum_sectors_in));
-
-	crq->nr_sectors = req->hard_nr_sectors;
-
 	q->last_merge = req;
 }
 
@@ -483,9 +329,6 @@ cfq_merged_requests(request_queue_t *q, struct request *req,
 	cfq_remove_request(q, next);
 }
 
-/*
- * sort into dispatch list, in optimal ascending order
- */
 static void
 cfq_dispatch_sort(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		  struct cfq_rq *crq)
@@ -493,7 +336,7 @@ cfq_dispatch_sort(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	struct list_head *head = cfqd->dispatch, *entry = head;
 	struct request *__rq;
 
-	cfq_del_crq_rb(cfqd, cfqq, crq);
+	cfq_del_crq_rb(cfqq, crq);
 	cfq_remove_merge_hints(cfqd->queue, crq);
 
 	if (!list_empty(head)) {
@@ -516,219 +359,47 @@ link:
 	list_add_tail(&crq->request->queuelist, entry);
 }
 
-/*
- * remove from io scheduler core and put on dispatch list for service
- */
-static inline int
+static inline void
 __cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd,
 			struct cfq_queue *cfqq)
 {
-	struct cfq_rq *crq;
-	unsigned long long ts, gap;
-	unsigned long newavsec;
-
-	crq = rb_entry_crq(rb_first(&cfqq->sort_list));
-
-#if 1
-	/* Determine if queue should be skipped for being overshare */
-	ts = sched_clock();
-	gap = ts - cfqq->lastime;
-#ifdef LIMIT_DEBUG
-	cfqq->sectorate = (cfqd->cfq_epochsectors 
-			   * CFQ_TEMPLIM)/100;
-	
-#endif
-	if ((gap >= cfqd->cfq_epoch) || (gap < 0)) {
-		cfqq->avsec = crq->nr_sectors ; 
-		cfqq->lastime = ts;
-	} else {
-		u64 tmp;
-		/* Age old average and accumalate request to be served */
-
-//		tmp = (u64) (cfqq->avsec * gap) ;
-//		do_div(tmp, cfqd->cfq_epoch);
-		newavsec = (unsigned long)(cfqq->avsec >> 1) + crq->nr_sectors;
-//		if (crq->ioprio >= 0 && crq->ioprio <= 20)
-//			cfqd->cid[crq->ioprio].lsectorate = newavsec; 
-//		atomic_set(&(cfqd->cid[crq->ioprio].lsectorate),
-//			   newavsec);
-
-		if ((newavsec < cfqq->sectorate) || cfqq->skipped) {
-			cfqq->avsec = newavsec ;
-			cfqq->lastime = ts;
-			cfqq->skipped = 0;
-		} else {
-			/* queue over share ; skip once */
-			cfqq->skipped = 1;
-#ifdef LIMIT_DEBUG	
-//			atomic_inc(&(cfqd->cid[crq->ioprio].nskip));
-//			if (crq->ioprio >= 0 && crq->ioprio <= 20)
-//				cfqd->cid[crq->ioprio].nskip++;
-#endif
-			return 0;
-		}
-	}
-#endif
-
-#ifdef LIMIT_DEBUG
-//	if (crq->ioprio >= 0 && crq->ioprio <= 20) {
-//		cfqd->cid[crq->ioprio].navsec = cfqq->avsec;
-//		cfqd->cid[crq->ioprio].csectorate = cfqq->sectorate;
-//	}
+	struct cfq_rq *crq = rb_entry_crq(rb_first(&cfqq->sort_list));
 
-//	atomic_set(&(cfqd->cid[crq->ioprio].navsec),cfqq->avsec);
-//	atomic_set(&(cfqd->cid[crq->ioprio].csectorate),cfqq->sectorate);
-#endif
 	cfq_dispatch_sort(cfqd, cfqq, crq);
-
-	/*
-	 * technically, for IOPRIO_RT we don't need to add it to the list.
-	 */
-	list_add_tail(&crq->prio_list, &cfqd->cid[cfqq->ioprio].prio_list);
-	return crq->nr_sectors;
 }
 
-static int
-cfq_dispatch_requests(request_queue_t *q, int prio, int max_rq, int max_sectors)
+static int cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd)
 {
-	struct cfq_data *cfqd = q->elevator.elevator_data;
-	struct list_head *plist = &cfqd->cid[prio].rr_list;
-	struct list_head *entry, *nxt;
-	int q_rq, q_io;
-	int ret ;
+	struct cfq_queue *cfqq;
+	struct list_head *entry, *tmp;
+	int ret, queued, good_queues;
 
-	/*
-	 * for each queue at this prio level, dispatch a request
-	 */
-	q_rq = q_io = 0;
-	list_for_each_safe(entry, nxt, plist) {
-		struct cfq_queue *cfqq = list_entry_cfqq(entry);
+	if (list_empty(&cfqd->rr_list))
+		return 0;
+
+	queued = ret = 0;
+restart:
+	good_queues = 0;
+	list_for_each_safe(entry, tmp, &cfqd->rr_list) {
+		cfqq = list_entry_cfqq(cfqd->rr_list.next);
 
 		BUG_ON(RB_EMPTY(&cfqq->sort_list));
 
-		ret = __cfq_dispatch_requests(q, cfqd, cfqq);
-		if (ret <= 0) {
-			continue; /* skip queue */
-			/* can optimize more by moving q to end of plist ? */
-		}
-		q_io += ret ;
-		q_rq++ ;
+		__cfq_dispatch_requests(q, cfqd, cfqq);
 
 		if (RB_EMPTY(&cfqq->sort_list))
 			cfq_put_queue(cfqd, cfqq);
-		/*
-		 * if we hit the queue limit, put the string of serviced
-		 * queues at the back of the pending list
-		 */
-		if (q_io >= max_sectors || q_rq >= max_rq) {
-			struct list_head *prv = nxt->prev;
-
-			if (prv != plist) {
-				list_del(plist);
-				list_add(plist, prv);
-			}
-			break;
-		}
-	}
-
-	cfqd->cid[prio].last_rq = q_rq;
-	cfqd->cid[prio].last_sectors = q_io;
-	return q_rq;
-}
-
-/*
- * try to move some requests to the dispatch list. return 0 on success
- */
-static int cfq_select_requests(request_queue_t *q, struct cfq_data *cfqd)
-{
-	int queued, busy_rq, busy_sectors, i;
-
-	/*
-	 * if there's any realtime io, only schedule that
-	 */
-	if (cfq_dispatch_requests(q, IOPRIO_RT, cfqd->cfq_quantum, cfqd->cfq_quantum_io))
-		return 1;
-
-	/*
-	 * if RT io was last serviced and grace time hasn't expired,
-	 * arm the timer to restart queueing if no other RT io has been
-	 * submitted in the mean time
-	 */
-	if (test_bit(CFQ_WAIT_RT, &cfqd->flags)) {
-		if (time_before(jiffies, cfqd->wait_end)) {
-			mod_timer(&cfqd->timer, cfqd->wait_end);
-			return 0;
-		}
-		clear_bit(CFQ_WAIT_RT, &cfqd->flags);
-	}
-
-	/*
-	 * for each priority level, calculate number of requests we
-	 * are allowed to put into service.
-	 */
-	queued = 0;
-	busy_rq = cfqd->busy_rq;
-	busy_sectors = cfqd->busy_sectors;
-	for (i = IOPRIO_RT - 1; i > IOPRIO_IDLE; i--) {
-		const int o_rq = busy_rq - cfqd->cid[i].busy_rq;
-		const int o_sectors = busy_sectors - cfqd->cid[i].busy_sectors;
-		int q_rq = cfqd->cfq_quantum * (i + 1) / IOPRIO_NR;
-		int q_io = cfqd->cfq_quantum_io * (i + 1) / IOPRIO_NR;
-
-		/*
-		 * no need to keep iterating the list, if there are no
-		 * requests pending anymore
-		 */
-		if (!cfqd->busy_rq)
-			break;
-
-		/*
-		 * find out how many requests and sectors we are allowed to
-		 * service
-		 */
-		if (o_rq)
-			q_rq = o_sectors * (i + 1) / IOPRIO_NR;
-		if (q_rq > cfqd->cfq_quantum)
-			q_rq = cfqd->cfq_quantum;
-
-		if (o_sectors)
-			q_io = o_sectors * (i + 1) / IOPRIO_NR;
-		if (q_io > cfqd->cfq_quantum_io)
-			q_io = cfqd->cfq_quantum_io;
-
-		/*
-		 * average with last dispatched for fairness
-		 */
-		if (cfqd->cid[i].last_rq != -1)
-			q_rq = (cfqd->cid[i].last_rq + q_rq) / 2;
-		if (cfqd->cid[i].last_sectors != -1)
-			q_io = (cfqd->cid[i].last_sectors + q_io) / 2;
-
-		queued += cfq_dispatch_requests(q, i, q_rq, q_io);
-	}
-
-	if (queued)
-		return 1;
+		else
+			good_queues++;
 
-	/*
-	 * only allow dispatch of idle io, if the queue has been idle from
-	 * servicing RT or normal io for the grace period
-	 */
-	if (test_bit(CFQ_WAIT_NORM, &cfqd->flags)) {
-		if (time_before(jiffies, cfqd->wait_end)) {
-			mod_timer(&cfqd->timer, cfqd->wait_end);
-			return 0;
-		}
-		clear_bit(CFQ_WAIT_NORM, &cfqd->flags);
+		queued++;
+		ret = 1;
 	}
 
-	/*
-	 * if we found nothing to do, allow idle io to be serviced
-	 */
-	if (cfq_dispatch_requests(q, IOPRIO_IDLE, cfqd->cfq_idle_quantum, cfqd->cfq_idle_quantum_io))
-		return 1;
+	if ((queued < cfqd->cfq_quantum) && good_queues)
+		goto restart;
 
-	return 0;
+	return ret;
 }
 
 static struct request *cfq_next_request(request_queue_t *q)
@@ -739,82 +410,61 @@ static struct request *cfq_next_request(request_queue_t *q)
 	if (!list_empty(cfqd->dispatch)) {
 		struct cfq_rq *crq;
 dispatch:
-		/*
-		 * end grace period, we are servicing a request
-		 */
-		del_timer(&cfqd->timer);
-		clear_bit(CFQ_WAIT_RT, &cfqd->flags);
-		clear_bit(CFQ_WAIT_NORM, &cfqd->flags);
-
-		BUG_ON(list_empty(cfqd->dispatch));
 		rq = list_entry_rq(cfqd->dispatch->next);
 
-		BUG_ON(q->last_merge == rq);
-		crq = RQ_ELV_DATA(rq);
-		if (crq) {
-			BUG_ON(!hlist_unhashed(&crq->hash));
-			list_del_init(&crq->prio_list);
-		}
+		crq = RQ_DATA(rq);
+		if (crq)
+			cfq_remove_merge_hints(q, crq);
 
 		return rq;
 	}
 
-	/*
-	 * we moved requests to dispatch list, go back end serve one
-	 */
-	if (cfq_select_requests(q, cfqd))
+	if (cfq_dispatch_requests(q, cfqd))
 		goto dispatch;
 
 	return NULL;
 }
 
 static inline struct cfq_queue *
-__cfq_find_cfq_hash(struct cfq_data *cfqd, int hashkey, const int hashval)
+__cfq_find_cfq_hash(struct cfq_data *cfqd, int pid, const int hashval)
 {
-	struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
-	struct hlist_node *entry;
+	struct list_head *hash_list = &cfqd->cfq_hash[hashval];
+	struct list_head *entry;
 
-	hlist_for_each(entry, hash_list) {
+	list_for_each(entry, hash_list) {
 		struct cfq_queue *__cfqq = list_entry_qhash(entry);
 
-		if (__cfqq->hash_key == hashkey)
+		if (__cfqq->pid == pid)
 			return __cfqq;
 	}
 
 	return NULL;
 }
 
-
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int hashkey)
+static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid)
 {
-	const int hashval = hash_long(hashkey, CFQ_QHASH_SHIFT);
+	const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
 
-	return __cfq_find_cfq_hash(cfqd, hashkey, hashval);
+	return __cfq_find_cfq_hash(cfqd, pid, hashval);
 }
 
 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	cfqd->busy_queues--;
-	WARN_ON(cfqd->busy_queues < 0);
-
-	cfqd->cid[cfqq->ioprio].busy_queues--;
-	WARN_ON(cfqd->cid[cfqq->ioprio].busy_queues < 0);
-	atomic_inc(&(cfqd->cid[cfqq->ioprio].cum_queues_out));
-
 	list_del(&cfqq->cfq_list);
-	hlist_del(&cfqq->cfq_hash);
+	list_del(&cfqq->cfq_hash);
 	mempool_free(cfqq, cfq_mpool);
 }
 
-static struct cfq_queue *__cfq_get_queue(struct cfq_data *cfqd, int hashkey,
+static struct cfq_queue *__cfq_get_queue(struct cfq_data *cfqd, int pid,
 					 int gfp_mask)
 {
-	const int hashval = hash_long(hashkey, CFQ_QHASH_SHIFT);
+	const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
 	request_queue_t *q = cfqd->queue;
 
 retry:
-	cfqq = __cfq_find_cfq_hash(cfqd, hashkey, hashval);
+	cfqq = __cfq_find_cfq_hash(cfqd, pid, hashval);
 
 	if (!cfqq) {
 		if (new_cfqq) {
@@ -828,15 +478,13 @@ retry:
 		} else
 			return NULL;
 
-		memset(cfqq, 0, sizeof(*cfqq));
-		INIT_HLIST_NODE(&cfqq->cfq_hash);
+		INIT_LIST_HEAD(&cfqq->cfq_hash);
 		INIT_LIST_HEAD(&cfqq->cfq_list);
-		cfqq->hash_key = cfq_hash_key(current);
-		cfqq->ioprio = cfq_ioprio(current);
-		cfqq->avsec = 0 ;
-		cfqq->lastime = sched_clock();
-		cfqq->sectorate = (cfqd->cfq_epochsectors * CFQ_TEMPLIM)/100;
-		hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
+		RB_CLEAR_ROOT(&cfqq->sort_list);
+
+		cfqq->pid = pid;
+		cfqq->queued[0] = cfqq->queued[1] = 0;
+		list_add(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
 	}
 
 	if (new_cfqq)
@@ -845,63 +493,31 @@ retry:
 	return cfqq;
 }
 
-static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int hashkey,
+static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int pid,
 				       int gfp_mask)
 {
 	request_queue_t *q = cfqd->queue;
 	struct cfq_queue *cfqq;
 
 	spin_lock_irq(q->queue_lock);
-	cfqq = __cfq_get_queue(cfqd, hashkey, gfp_mask);
+	cfqq = __cfq_get_queue(cfqd, pid, gfp_mask);
 	spin_unlock_irq(q->queue_lock);
 
 	return cfqq;
 }
 
-static void
-__cfq_enqueue(request_queue_t *q, struct cfq_data *cfqd, struct cfq_rq *crq)
+static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
-	const int prio = crq->ioprio;
 	struct cfq_queue *cfqq;
 
-	cfqq = __cfq_get_queue(cfqd, cfq_hash_key(current), GFP_ATOMIC);
+	cfqq = __cfq_get_queue(cfqd, current->tgid, GFP_ATOMIC);
 	if (cfqq) {
-
-		/*
-		 * not too good...
-		 */
-		if (prio > cfqq->ioprio) {
-			printk("prio hash collision %d %d\n", 
-			       prio, cfqq->ioprio);
-			if (!list_empty(&cfqq->cfq_list)) {
-				cfqd->cid[cfqq->ioprio].busy_queues--;
-				WARN_ON(cfqd->cid[cfqq->ioprio].busy_queues<0);
-				atomic_inc(&(cfqd->cid[cfqq->ioprio].cum_queues_out));
-				cfqd->cid[prio].busy_queues++;
-				atomic_inc(&(cfqd->cid[prio].cum_queues_in));
-				list_move_tail(&cfqq->cfq_list, 
-					       &cfqd->cid[prio].rr_list);
-			}
-			cfqq->ioprio = prio;
-		}
-
 		cfq_add_crq_rb(cfqd, cfqq, crq);
 
 		if (list_empty(&cfqq->cfq_list)) {
-			list_add_tail(&cfqq->cfq_list, 
-				      &cfqd->cid[prio].rr_list);
-			cfqd->cid[prio].busy_queues++;
-			atomic_inc(&(cfqd->cid[prio].cum_queues_in));
+			list_add(&cfqq->cfq_list, &cfqd->rr_list);
 			cfqd->busy_queues++;
 		}
-
-		if (rq_mergeable(crq->request)) {
-			cfq_add_crq_hash(cfqd, crq);
-			
-			if (!q->last_merge)
-				q->last_merge = crq->request;
-		}
-
 	} else {
 		/*
 		 * should can only happen if the request wasn't allocated
@@ -912,57 +528,16 @@ __cfq_enqueue(request_queue_t *q, struct cfq_data *cfqd, struct cfq_rq *crq)
 	}
 }
 
-static void cfq_reenqueue(request_queue_t *q, struct cfq_data *cfqd, int prio)
-{
-	struct list_head *prio_list = &cfqd->cid[prio].prio_list;
-	struct list_head *entry, *tmp;
-
-	list_for_each_safe(entry, tmp, prio_list) {
-		struct cfq_rq *crq = list_entry_prio(entry);
-
-		list_del_init(entry);
-		list_del_init(&crq->request->queuelist);
-		__cfq_enqueue(q, cfqd, crq);
-	}
-}
-
-static void
-cfq_enqueue(request_queue_t *q, struct cfq_data *cfqd, struct cfq_rq *crq)
-{
-	const int prio = cfq_ioprio(current);
-
-	crq->ioprio = prio;
-	crq->nr_sectors = crq->request->hard_nr_sectors;
-	__cfq_enqueue(q, cfqd, crq);
-
-	if (prio == IOPRIO_RT) {
-		int i;
-
-		/*
-		 * realtime io gets priority, move all other io back
-		 */
-		for (i = IOPRIO_IDLE; i < IOPRIO_RT; i++)
-			cfq_reenqueue(q, cfqd, i);
-	} else if (prio != IOPRIO_IDLE) {
-		/*
-		 * check if we need to move idle io back into queue
-		 */
-		cfq_reenqueue(q, cfqd, IOPRIO_IDLE);
-	}
-}
-
 static void
 cfq_insert_request(request_queue_t *q, struct request *rq, int where)
 {
 	struct cfq_data *cfqd = q->elevator.elevator_data;
-	struct cfq_rq *crq = RQ_ELV_DATA(rq);
+	struct cfq_rq *crq = RQ_DATA(rq);
 
 	switch (where) {
 		case ELEVATOR_INSERT_BACK:
-#if 0
 			while (cfq_dispatch_requests(q, cfqd))
 				;
-#endif
 			list_add_tail(&rq->queuelist, cfqd->dispatch);
 			break;
 		case ELEVATOR_INSERT_FRONT:
@@ -970,20 +545,26 @@ cfq_insert_request(request_queue_t *q, struct request *rq, int where)
 			break;
 		case ELEVATOR_INSERT_SORT:
 			BUG_ON(!blk_fs_request(rq));
-			cfq_enqueue(q, cfqd, crq);
+			cfq_enqueue(cfqd, crq);
 			break;
 		default:
-			printk("%s: bad insert point %d\n", 
-			       __FUNCTION__,where);
+			printk("%s: bad insert point %d\n", __FUNCTION__,where);
 			return;
 	}
+
+	if (rq_mergeable(rq)) {
+		cfq_add_crq_hash(cfqd, crq);
+
+		if (!q->last_merge)
+			q->last_merge = rq;
+	}
 }
 
 static int cfq_queue_empty(request_queue_t *q)
 {
 	struct cfq_data *cfqd = q->elevator.elevator_data;
 
-	if (list_empty(cfqd->dispatch) && !cfqd->busy_queues)
+	if (list_empty(cfqd->dispatch) && list_empty(&cfqd->rr_list))
 		return 1;
 
 	return 0;
@@ -992,7 +573,7 @@ static int cfq_queue_empty(request_queue_t *q)
 static struct request *
 cfq_former_request(request_queue_t *q, struct request *rq)
 {
-	struct cfq_rq *crq = RQ_ELV_DATA(rq);
+	struct cfq_rq *crq = RQ_DATA(rq);
 	struct rb_node *rbprev = rb_prev(&crq->rb_node);
 
 	if (rbprev)
@@ -1004,7 +585,7 @@ cfq_former_request(request_queue_t *q, struct request *rq)
 static struct request *
 cfq_latter_request(request_queue_t *q, struct request *rq)
 {
-	struct cfq_rq *crq = RQ_ELV_DATA(rq);
+	struct cfq_rq *crq = RQ_DATA(rq);
 	struct rb_node *rbnext = rb_next(&crq->rb_node);
 
 	if (rbnext)
@@ -1013,46 +594,27 @@ cfq_latter_request(request_queue_t *q, struct request *rq)
 	return NULL;
 }
 
-static void cfq_queue_congested(request_queue_t *q)
-{
-	struct cfq_data *cfqd = q->elevator.elevator_data;
-
-	set_bit(cfq_ioprio(current), &cfqd->rq_starved_mask);
-}
-
 static int cfq_may_queue(request_queue_t *q, int rw)
 {
 	struct cfq_data *cfqd = q->elevator.elevator_data;
 	struct cfq_queue *cfqq;
-	const int prio = cfq_ioprio(current);
-	int limit, ret = 1;
+	int ret = 1;
 
 	if (!cfqd->busy_queues)
 		goto out;
 
-	cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(current));
-	if (!cfqq)
-		goto out;
-
-	cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(current));
-	if (!cfqq)
-		goto out;
-
-	/*
-	 * if higher or equal prio io is sleeping waiting for a request, don't
-	 * allow this one to allocate one. as long as ll_rw_blk does fifo
-	 * waitqueue wakeups this should work...
-	 */
-	if (cfqd->rq_starved_mask & ~((1 << prio) - 1))
-		goto out;
+	cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
+	if (cfqq) {
+		int limit = (q->nr_requests - cfqd->cfq_queued) / cfqd->busy_queues;
 
-	if (cfqq->queued[rw] < cfqd->cfq_queued || !cfqd->cid[prio].busy_queues)
-		goto out;
+		if (limit < 3)
+			limit = 3;
+		else if (limit > cfqd->max_queued)
+			limit = cfqd->max_queued;
 
-	limit = q->nr_requests * (prio + 1) / IOPRIO_NR;
-	limit /= cfqd->cid[prio].busy_queues;
-	if (cfqq->queued[rw] > limit)
-		ret = 0;
+		if (cfqq->queued[rw] > limit)
+			ret = 0;
+	}
 out:
 	return ret;
 }
@@ -1060,13 +622,13 @@ out:
 static void cfq_put_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_data *cfqd = q->elevator.elevator_data;
-	struct cfq_rq *crq = RQ_ELV_DATA(rq);
+	struct cfq_rq *crq = RQ_DATA(rq);
 	struct request_list *rl;
 	int other_rw;
 
 	if (crq) {
 		BUG_ON(q->last_merge == rq);
-		BUG_ON(!hlist_unhashed(&crq->hash));
+		BUG_ON(ON_MHASH(crq));
 
 		mempool_free(crq, cfqd->crq_pool);
 		rq->elevator_private = NULL;
@@ -1099,21 +661,17 @@ static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
 	/*
 	 * prepare a queue up front, so cfq_enqueue() doesn't have to
 	 */
-	cfqq = cfq_get_queue(cfqd, cfq_hash_key(current), gfp_mask);
+	cfqq = cfq_get_queue(cfqd, current->tgid, gfp_mask);
 	if (!cfqq)
 		return 1;
 
 	crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
 	if (crq) {
-		/*
-		 * process now has one request
-		 */
-		clear_bit(cfq_ioprio(current), &cfqd->rq_starved_mask);
-
 		memset(crq, 0, sizeof(*crq));
+		RB_CLEAR(&crq->rb_node);
 		crq->request = rq;
-		INIT_HLIST_NODE(&crq->hash);
-		INIT_LIST_HEAD(&crq->prio_list);
+		crq->cfq_queue = NULL;
+		INIT_LIST_HEAD(&crq->hash);
 		rq->elevator_private = crq;
 		return 0;
 	}
@@ -1132,26 +690,6 @@ static void cfq_exit(request_queue_t *q, elevator_t *e)
 	kfree(cfqd);
 }
 
-static void cfq_timer(unsigned long data)
-{
-	struct cfq_data *cfqd = (struct cfq_data *) data;
-
-	clear_bit(CFQ_WAIT_RT, &cfqd->flags);
-	clear_bit(CFQ_WAIT_NORM, &cfqd->flags);
-	kblockd_schedule_work(&cfqd->work);
-}
-
-static void cfq_work(void *data)
-{
-	request_queue_t *q = data;
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	if (cfq_next_request(q))
-		q->request_fn(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
 static int cfq_init(request_queue_t *q, elevator_t *e)
 {
 	struct cfq_data *cfqd;
@@ -1162,75 +700,38 @@ static int cfq_init(request_queue_t *q, elevator_t *e)
 		return -ENOMEM;
 
 	memset(cfqd, 0, sizeof(*cfqd));
-	init_timer(&cfqd->timer);
-	cfqd->timer.function = cfq_timer;
-	cfqd->timer.data = (unsigned long) cfqd;
-
-	INIT_WORK(&cfqd->work, cfq_work, q);
-
-	for (i = 0; i < IOPRIO_NR; i++) {
-		struct io_prio_data *cid = &cfqd->cid[i];
-
-		INIT_LIST_HEAD(&cid->rr_list);
-		INIT_LIST_HEAD(&cid->prio_list);
-		cid->last_rq = -1;
-		cid->last_sectors = -1;
-
-		atomic_set(&cid->cum_rq_in,0);		
-		atomic_set(&cid->cum_rq_out,0);
-		atomic_set(&cid->cum_sectors_in,0);
-		atomic_set(&cid->cum_sectors_out,0);		
-		atomic_set(&cid->cum_queues_in,0);
-		atomic_set(&cid->cum_queues_out,0);
-#if 0
-		atomic_set(&cid->nskip,0);
-		atomic_set(&cid->navsec,0);
-		atomic_set(&cid->csectorate,0);
-		atomic_set(&cid->lsectorate,0);
-#endif
-	}
+	INIT_LIST_HEAD(&cfqd->rr_list);
 
-	cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES,
-				 GFP_KERNEL);
+	cfqd->crq_hash = kmalloc(sizeof(struct list_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
 	if (!cfqd->crq_hash)
 		goto out_crqhash;
 
-	cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES,
-				 GFP_KERNEL);
+	cfqd->cfq_hash = kmalloc(sizeof(struct list_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
 	if (!cfqd->cfq_hash)
 		goto out_cfqhash;
 
-	cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, 
-					mempool_free_slab, crq_pool);
+	cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
 	if (!cfqd->crq_pool)
 		goto out_crqpool;
 
 	for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
+		INIT_LIST_HEAD(&cfqd->crq_hash[i]);
 	for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
-
-	cfqd->cfq_queued = cfq_queued;
-	cfqd->cfq_quantum = cfq_quantum;
-	cfqd->cfq_quantum_io = cfq_quantum_io;
-	cfqd->cfq_idle_quantum = cfq_idle_quantum;
-	cfqd->cfq_idle_quantum_io = cfq_idle_quantum_io;
-	cfqd->cfq_grace_rt = cfq_grace_rt;
-	cfqd->cfq_grace_idle = cfq_grace_idle;
-
-	q->nr_requests <<= 2;
+		INIT_LIST_HEAD(&cfqd->cfq_hash[i]);
 
 	cfqd->dispatch = &q->queue_head;
 	e->elevator_data = cfqd;
 	cfqd->queue = q;
 
-	cfqd->cfq_epoch = CFQ_EPOCH;
-	if (q->hardsect_size)
-		cfqd->cfq_epochsectors = ((CFQ_DISKBW * 1000000)/
-				      q->hardsect_size)* (1000000 / CFQ_EPOCH);
-	else
-		cfqd->cfq_epochsectors = ((CFQ_DISKBW * 1000000)/512)
-			* (1000000 / CFQ_EPOCH) ;
+	/*
+	 * just set it to some high value, we want anyone to be able to queue
+	 * some requests. fairness is handled differently
+	 */
+	cfqd->max_queued = q->nr_requests;
+	q->nr_requests = 8192;
+
+	cfqd->cfq_queued = cfq_queued;
+	cfqd->cfq_quantum = cfq_quantum;
 
 	return 0;
 out_crqpool:
@@ -1296,12 +797,7 @@ static ssize_t __FUNC(struct cfq_data *cfqd, char *page)		\
 	return cfq_var_show(__VAR, (page));				\
 }
 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum);
-SHOW_FUNCTION(cfq_quantum_io_show, cfqd->cfq_quantum_io);
-SHOW_FUNCTION(cfq_idle_quantum_show, cfqd->cfq_idle_quantum);
-SHOW_FUNCTION(cfq_idle_quantum_io_show, cfqd->cfq_idle_quantum_io);
 SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued);
-SHOW_FUNCTION(cfq_grace_rt_show, cfqd->cfq_grace_rt);
-SHOW_FUNCTION(cfq_grace_idle_show, cfqd->cfq_grace_idle);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)				\
@@ -1315,271 +811,23 @@ static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count)	\
 	return ret;							\
 }
 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, INT_MAX);
-STORE_FUNCTION(cfq_quantum_io_store, &cfqd->cfq_quantum_io, 4, INT_MAX);
-STORE_FUNCTION(cfq_idle_quantum_store, &cfqd->cfq_idle_quantum, 1, INT_MAX);
-STORE_FUNCTION(cfq_idle_quantum_io_store, &cfqd->cfq_idle_quantum_io, 4, INT_MAX);
 STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, INT_MAX);
-STORE_FUNCTION(cfq_grace_rt_store, &cfqd->cfq_grace_rt, 0, INT_MAX);
-STORE_FUNCTION(cfq_grace_idle_store, &cfqd->cfq_grace_idle, 0, INT_MAX);
 #undef STORE_FUNCTION
 
-
-static ssize_t cfq_epoch_show(struct cfq_data *cfqd, char *page)
-{
-	return sprintf(page, "%lu\n", cfqd->cfq_epoch);
-}
-
-static ssize_t cfq_epoch_store(struct cfq_data *cfqd, const char *page, size_t count)
-{
-	char *p = (char *) page;
-	cfqd->cfq_epoch = simple_strtoul(p, &p, 10);
-	return count;
-}
-
-static ssize_t cfq_epochsectors_show(struct cfq_data *cfqd, char *page)
-{
-	return sprintf(page, "%lu\n", cfqd->cfq_epochsectors);
-}
-
-static ssize_t 
-cfq_epochsectors_store(struct cfq_data *cfqd, const char *page, size_t count)
-{
-	char *p = (char *) page;
-	cfqd->cfq_epochsectors = simple_strtoul(p, &p, 10);
-	return count;
-}
-
-/* Additional entries to get priority level data */
-static ssize_t
-cfq_prio_show(struct cfq_data *cfqd, char *page, unsigned int priolvl)
-{
-	int r1,r2,s1,s2,q1,q2;
-
-	if (!(priolvl >= IOPRIO_IDLE && priolvl <= IOPRIO_RT)) 
-		return 0;
-	
-	r1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_rq_in));
-	r2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_rq_out));
-	s1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_sectors_in));
-	s2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_sectors_out));
-	q1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_queues_in)); 
-	q2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_queues_out));
-	
-	return sprintf(page,"skip %d avsec %lu rate %lu new %lu"
-		       "rq (%d,%d) sec (%d,%d) q (%d,%d)\n",
-		       cfqd->cid[priolvl].nskip,
-		       cfqd->cid[priolvl].navsec,
-		       cfqd->cid[priolvl].csectorate,
-		       cfqd->cid[priolvl].lsectorate,
-//		       atomic_read(&cfqd->cid[priolvl].nskip),
-//		       atomic_read(&cfqd->cid[priolvl].navsec),
-//		       atomic_read(&cfqd->cid[priolvl].csectorate),
-//		       atomic_read(&cfqd->cid[priolvl].lsectorate),
-		       r1,r2,
-		       s1,s2,
-		       q1,q2);
-}
-
-#define SHOW_PRIO_DATA(__PRIOLVL)                                               \
-static ssize_t cfq_prio_##__PRIOLVL##_show(struct cfq_data *cfqd, char *page)	\
-{									        \
-	return cfq_prio_show(cfqd,page,__PRIOLVL);				\
-}
-SHOW_PRIO_DATA(0);
-SHOW_PRIO_DATA(1);
-SHOW_PRIO_DATA(2);
-SHOW_PRIO_DATA(3);
-SHOW_PRIO_DATA(4);
-SHOW_PRIO_DATA(5);
-SHOW_PRIO_DATA(6);
-SHOW_PRIO_DATA(7);
-SHOW_PRIO_DATA(8);
-SHOW_PRIO_DATA(9);
-SHOW_PRIO_DATA(10);
-SHOW_PRIO_DATA(11);
-SHOW_PRIO_DATA(12);
-SHOW_PRIO_DATA(13);
-SHOW_PRIO_DATA(14);
-SHOW_PRIO_DATA(15);
-SHOW_PRIO_DATA(16);
-SHOW_PRIO_DATA(17);
-SHOW_PRIO_DATA(18);
-SHOW_PRIO_DATA(19);
-SHOW_PRIO_DATA(20);
-#undef SHOW_PRIO_DATA
-
-
-static ssize_t cfq_prio_store(struct cfq_data *cfqd, const char *page, size_t count, int priolvl)
-{	
-	atomic_set(&(cfqd->cid[priolvl].cum_rq_in),0);
-	atomic_set(&(cfqd->cid[priolvl].cum_rq_out),0);
-	atomic_set(&(cfqd->cid[priolvl].cum_sectors_in),0);
-	atomic_set(&(cfqd->cid[priolvl].cum_sectors_out),0);
-	atomic_set(&(cfqd->cid[priolvl].cum_queues_in),0);
-	atomic_set(&(cfqd->cid[priolvl].cum_queues_out),0);
-
-	return count;
-}
-
-
-#define STORE_PRIO_DATA(__PRIOLVL)				                                   \
-static ssize_t cfq_prio_##__PRIOLVL##_store(struct cfq_data *cfqd, const char *page, size_t count) \
-{									                           \
-        return cfq_prio_store(cfqd,page,count,__PRIOLVL);                                          \
-}                  
-STORE_PRIO_DATA(0);     
-STORE_PRIO_DATA(1);
-STORE_PRIO_DATA(2);
-STORE_PRIO_DATA(3);
-STORE_PRIO_DATA(4);
-STORE_PRIO_DATA(5);
-STORE_PRIO_DATA(6);
-STORE_PRIO_DATA(7);
-STORE_PRIO_DATA(8);
-STORE_PRIO_DATA(9);
-STORE_PRIO_DATA(10);
-STORE_PRIO_DATA(11);
-STORE_PRIO_DATA(12);
-STORE_PRIO_DATA(13);
-STORE_PRIO_DATA(14);
-STORE_PRIO_DATA(15);
-STORE_PRIO_DATA(16);
-STORE_PRIO_DATA(17);
-STORE_PRIO_DATA(18);
-STORE_PRIO_DATA(19);
-STORE_PRIO_DATA(20);
-#undef STORE_PRIO_DATA
-
-
 static struct cfq_fs_entry cfq_quantum_entry = {
 	.attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR },
 	.show = cfq_quantum_show,
 	.store = cfq_quantum_store,
 };
-static struct cfq_fs_entry cfq_quantum_io_entry = {
-	.attr = {.name = "quantum_io", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_quantum_io_show,
-	.store = cfq_quantum_io_store,
-};
-static struct cfq_fs_entry cfq_idle_quantum_entry = {
-	.attr = {.name = "idle_quantum", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_idle_quantum_show,
-	.store = cfq_idle_quantum_store,
-};
-static struct cfq_fs_entry cfq_idle_quantum_io_entry = {
-	.attr = {.name = "idle_quantum_io", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_idle_quantum_io_show,
-	.store = cfq_idle_quantum_io_store,
-};
 static struct cfq_fs_entry cfq_queued_entry = {
 	.attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR },
 	.show = cfq_queued_show,
 	.store = cfq_queued_store,
 };
-static struct cfq_fs_entry cfq_grace_rt_entry = {
-	.attr = {.name = "grace_rt", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_grace_rt_show,
-	.store = cfq_grace_rt_store,
-};
-static struct cfq_fs_entry cfq_grace_idle_entry = {
-	.attr = {.name = "grace_idle", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_grace_idle_show,
-	.store = cfq_grace_idle_store,
-};
-static struct cfq_fs_entry cfq_epoch_entry = {
-	.attr = {.name = "epoch", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_epoch_show,
-	.store = cfq_epoch_store,
-};
-static struct cfq_fs_entry cfq_epochsectors_entry = {
-	.attr = {.name = "epochsectors", .mode = S_IRUGO | S_IWUSR },
-	.show = cfq_epochsectors_show,
-	.store = cfq_epochsectors_store,
-};
-
-#define P_0_STR   "p0"
-#define P_1_STR   "p1"
-#define P_2_STR   "p2"
-#define P_3_STR   "p3"
-#define P_4_STR   "p4"
-#define P_5_STR   "p5"
-#define P_6_STR   "p6"
-#define P_7_STR   "p7"
-#define P_8_STR   "p8"
-#define P_9_STR   "p9"
-#define P_10_STR  "p10"
-#define P_11_STR  "p11"
-#define P_12_STR  "p12"
-#define P_13_STR  "p13"
-#define P_14_STR  "p14"
-#define P_15_STR  "p15"
-#define P_16_STR  "p16"
-#define P_17_STR  "p17"
-#define P_18_STR  "p18"
-#define P_19_STR  "p19"
-#define P_20_STR  "p20"
-
-
-#define CFQ_PRIO_SYSFS_ENTRY(__PRIOLVL)				           \
-static struct cfq_fs_entry cfq_prio_##__PRIOLVL##_entry = {                \
-	.attr = {.name = P_##__PRIOLVL##_STR, .mode = S_IRUGO | S_IWUSR }, \
-	.show = cfq_prio_##__PRIOLVL##_show,                               \
-	.store = cfq_prio_##__PRIOLVL##_store,                             \
-};
-CFQ_PRIO_SYSFS_ENTRY(0);
-CFQ_PRIO_SYSFS_ENTRY(1);
-CFQ_PRIO_SYSFS_ENTRY(2);
-CFQ_PRIO_SYSFS_ENTRY(3);
-CFQ_PRIO_SYSFS_ENTRY(4);
-CFQ_PRIO_SYSFS_ENTRY(5);
-CFQ_PRIO_SYSFS_ENTRY(6);
-CFQ_PRIO_SYSFS_ENTRY(7);
-CFQ_PRIO_SYSFS_ENTRY(8);
-CFQ_PRIO_SYSFS_ENTRY(9);
-CFQ_PRIO_SYSFS_ENTRY(10);
-CFQ_PRIO_SYSFS_ENTRY(11);
-CFQ_PRIO_SYSFS_ENTRY(12);
-CFQ_PRIO_SYSFS_ENTRY(13);
-CFQ_PRIO_SYSFS_ENTRY(14);
-CFQ_PRIO_SYSFS_ENTRY(15);
-CFQ_PRIO_SYSFS_ENTRY(16);
-CFQ_PRIO_SYSFS_ENTRY(17);
-CFQ_PRIO_SYSFS_ENTRY(18);
-CFQ_PRIO_SYSFS_ENTRY(19);
-CFQ_PRIO_SYSFS_ENTRY(20);
-#undef CFQ_PRIO_SYSFS_ENTRY
 
 static struct attribute *default_attrs[] = {
 	&cfq_quantum_entry.attr,
-	&cfq_quantum_io_entry.attr,
-	&cfq_idle_quantum_entry.attr,
-	&cfq_idle_quantum_io_entry.attr,
 	&cfq_queued_entry.attr,
-	&cfq_grace_rt_entry.attr,
-	&cfq_grace_idle_entry.attr,
-	&cfq_epoch_entry.attr,
-	&cfq_epochsectors_entry.attr,
-	&cfq_prio_0_entry.attr,
-	&cfq_prio_1_entry.attr,
-	&cfq_prio_2_entry.attr,
-	&cfq_prio_3_entry.attr,
-	&cfq_prio_4_entry.attr,
-	&cfq_prio_5_entry.attr,
-	&cfq_prio_6_entry.attr,
-	&cfq_prio_7_entry.attr,
-	&cfq_prio_8_entry.attr,
-	&cfq_prio_9_entry.attr,
-	&cfq_prio_10_entry.attr,
-	&cfq_prio_11_entry.attr,
-	&cfq_prio_12_entry.attr,
-	&cfq_prio_13_entry.attr,
-	&cfq_prio_14_entry.attr,
-	&cfq_prio_15_entry.attr,
-	&cfq_prio_16_entry.attr,
-	&cfq_prio_17_entry.attr,
-	&cfq_prio_18_entry.attr,
-	&cfq_prio_19_entry.attr,
-	&cfq_prio_20_entry.attr,
 	NULL,
 };
 
@@ -1635,7 +883,6 @@ elevator_t iosched_cfq = {
 	.elevator_set_req_fn =		cfq_set_request,
 	.elevator_put_req_fn =		cfq_put_request,
 	.elevator_may_queue_fn =	cfq_may_queue,
-	.elevator_set_congested_fn =	cfq_queue_congested,
 	.elevator_init_fn =		cfq_init,
 	.elevator_exit_fn =		cfq_exit,
 };
diff --git a/drivers/block/ckrm-io.c b/drivers/block/ckrm-io.c
index 7edfce727..ce166e855 100644
--- a/drivers/block/ckrm-io.c
+++ b/drivers/block/ckrm-io.c
@@ -74,10 +74,10 @@ typedef struct ckrm_io_class {
 	/* Absolute shares of this class
 	 * in local units. 
 	 */
-
-	int cnt_guarantee; /* Allocation as parent */
-	int cnt_unused;    /* Allocation to default subclass */
-
+	
+	int ioprio;
+	int unused;
+	
 	/* Statistics, for class and default subclass */
 	cki_stats_t stats; 
 	cki_stats_t mystats;
@@ -90,12 +90,13 @@ typedef struct ckrm_io_class {
 static inline void cki_reset_stats(cki_stats_t *usg);
 static inline void init_icls_one(cki_icls_t *icls);
 static inline int cki_div(int *a, int b, int c);
-//static inline int cki_recalc(cki_icls_t *icls, int rel2abs);
-static void cki_recalc_propagate(cki_icls_t *res, cki_icls_t *parres);
+static inline int cki_recalc(cki_icls_t *icls, int rel2abs);
 
+#ifdef DOES_NOT_WORK_AND_NOT_NEEDED
 /* External functions e.g. interface to ioscheduler */
-void *cki_tsk_icls (struct task_struct *tsk);
-int cki_tsk_ioprio (struct task_struct *tsk);
+inline void *cki_tsk_icls(struct task_struct *tsk);
+inline int cki_tsk_ioprio(struct task_struct *tsk);
+#endif
 
 extern void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio);
 
@@ -139,13 +140,9 @@ static inline void init_icls_stats(cki_icls_t *icls)
 
 static inline void init_icls_one(cki_icls_t *icls)
 {
-	// Assign zero as initial guarantee otherwise creations
-	// could fail due to inadequate share
-
-	//icls->shares.my_guarantee = 
-	//	(CKI_IOPRIO_MIN * CKRM_SHARE_DFLT_TOTAL_GUARANTEE) / 
-	//	CKI_IOPRIO_DIV ;
-	icls->shares.my_guarantee = 0;
+	icls->shares.my_guarantee = 
+		(CKI_IOPRIO_MIN * CKRM_SHARE_DFLT_TOTAL_GUARANTEE) / 
+		CKI_IOPRIO_DIV ;
 	icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
 	icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
 	icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
@@ -155,11 +152,8 @@ static inline void init_icls_one(cki_icls_t *icls)
 	icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
 
 
-	icls->cnt_guarantee = icls->cnt_unused = IOPRIO_IDLE;
-
-	//Same rationale icls->ioprio = CKI_IOPRIO_MIN;
-	//IOPRIO_IDLE equivalence to zero my_guarantee (set above) relies
-	//on former being zero.
+	icls->ioprio = CKI_IOPRIO_MIN;
+	icls->unused = 0 ;
 	
 	init_icls_stats(icls);
 }
@@ -180,55 +174,6 @@ static inline int cki_div(int *a, int b, int c)
  * Caller should have a lock on icls
  */
 
-static void cki_recalc_propagate(cki_icls_t *res, cki_icls_t *parres)
-{
-
-	ckrm_core_class_t *child = NULL;
-	cki_icls_t *childres;
-	int resid = cki_rcbs.resid;
-
-	if (parres) {
-		struct ckrm_shares *par = &parres->shares;
-		struct ckrm_shares *self = &res->shares;
-
-
-
-		if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
-			res->cnt_guarantee = CKRM_SHARE_DONTCARE;
-		} else if (par->total_guarantee) {
-			u64 temp = (u64) self->my_guarantee * 
-				parres->cnt_guarantee;
-			do_div(temp, par->total_guarantee);
-			res->cnt_guarantee = (int) temp;
-		} else {
-			res->cnt_guarantee = 0;
-		}
-
-		if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) {
-			res->cnt_unused = CKRM_SHARE_DONTCARE;
-		} else if (self->total_guarantee) {
-			u64 temp = (u64) self->unused_guarantee * 
-				res->cnt_guarantee;
-			do_div(temp, self->total_guarantee);
-			res->cnt_unused = (int) temp;
-		} else {
-			res->cnt_unused = 0;
-		}
-	}
-	// propagate to children
-	ckrm_lock_hier(res->core);
-	while ((child = ckrm_get_next_child(res->core,child)) != NULL){
-		childres = ckrm_get_res_class(child, resid, 
-					      cki_icls_t);
-		
-		spin_lock(&childres->shares_lock);
-		cki_recalc_propagate(childres, res);
-		spin_unlock(&childres->shares_lock);
-	}
-	ckrm_unlock_hier(res->core);
-}
-
-#if 0
 static inline int cki_recalc(cki_icls_t *icls, int rel2abs)
 {
 	u64 temp;
@@ -239,10 +184,8 @@ static inline int cki_recalc(cki_icls_t *icls, int rel2abs)
 		temp = icls->shares.my_guarantee * (IOPRIO_NR-1);
 		do_div(temp, icls->shares.total_guarantee);
 
-		icls->total = IOPRIO_NR-1;
 		icls->ioprio = temp ;
-		icls->unused = icls->total - icls->ioprio;
-//		icls->unused = (IOPRIO_NR-1)-icls->ioprio;
+		icls->unused = (IOPRIO_NR-1)-icls->ioprio;
 
 	} else {
 		cki_icls_t *parres;
@@ -257,9 +200,9 @@ static inline int cki_recalc(cki_icls_t *icls, int rel2abs)
 			return -EINVAL;
 		}
 
+		partot = parres->ioprio + parres->unused;
 
-		temp = (icls->shares.my_guarantee * 
-			parres->total);
+		temp = (icls->shares.my_guarantee * (parres->ioprio + parres->unused));
 		do_div(temp, parres->shares.total_guarantee);
 
 		icls->ioprio = temp;
@@ -270,19 +213,19 @@ static inline int cki_recalc(cki_icls_t *icls, int rel2abs)
 	return 0;
 
 }
-#endif
 
-void *cki_tsk_icls(struct task_struct *tsk)
+
+inline void *cki_icls_tsk(struct task_struct *tsk)
 {
 	return (void *) ckrm_get_res_class(class_core(tsk->taskclass),
 					   cki_rcbs.resid, cki_icls_t);
 }
 
-int cki_tsk_ioprio(struct task_struct *tsk)
+inline int cki_icls_ioprio(struct task_struct *tsk)
 {
 	cki_icls_t *icls = ckrm_get_res_class(class_core(tsk->taskclass),
 					   cki_rcbs.resid, cki_icls_t);
-	return icls->cnt_unused;
+	return icls->ioprio;
 }
 
 static void *cki_alloc(struct ckrm_core_class *core,
@@ -302,13 +245,15 @@ static void *cki_alloc(struct ckrm_core_class *core,
 	icls->shares_lock = SPIN_LOCK_UNLOCKED;
 
 	if (parent == NULL) {
+		u64 temp;
 
 		/* Root class gets same as "normal" CFQ priorities to
 		 * retain compatibility of behaviour in the absence of 
 		 * other classes
 		 */
 
-		icls->cnt_guarantee = icls->cnt_unused = IOPRIO_NR-1; 
+		icls->ioprio = IOPRIO_NORM;
+		icls->unused = (IOPRIO_NR-1)-IOPRIO_NORM;
 
 		/* Default gets normal, not minimum */
 		//icls->unused = IOPRIO_NORM;
@@ -317,27 +262,24 @@ static void *cki_alloc(struct ckrm_core_class *core,
 
 		/* Compute shares in abstract units */
 		icls->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+		temp = (u64) icls->ioprio * icls->shares.total_guarantee;
+		do_div(temp, CKI_IOPRIO_DIV); 
+		icls->shares.my_guarantee = (int) temp;
 
-		// my_guarantee for root is meaningless. Set to default
-		icls->shares.my_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-
-		icls->shares.unused_guarantee = 
-			CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-
-		//temp = (u64) icls->cnt_unused * icls->shares.total_guarantee;
-		//do_div(temp, CKI_IOPRIO_DIV); 
-		// temp now has root's default's share
-		//icls->shares.unused_guarantee = 
-		// icls->shares.total_guarantee - temp; 
-
+		//icls->shares.my_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
+		//icls->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
 		icls->shares.my_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
 		icls->shares.max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+
+
+		icls->shares.unused_guarantee = 
+			icls->shares.total_guarantee - 
+			icls->shares.my_guarantee;
+		//icls->shares.cur_max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
 		icls->shares.cur_max_limit = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
 
 	} else {
 		init_icls_one(icls);
-		/* No propagation to parent needed if icls'
-		   initial share is zero */
 	}
 	try_module_get(THIS_MODULE);
 	return icls;
@@ -373,7 +315,7 @@ static void cki_free(void *res)
 	/* Update parent's shares */
 	spin_lock(&parres->shares_lock);
 	child_guarantee_changed(&parres->shares, icls->shares.my_guarantee, 0);
-	parres->cnt_unused += icls->cnt_guarantee;
+	parres->unused += icls->ioprio;
 	spin_unlock(&parres->shares_lock);
 
 	kfree(res);
@@ -398,7 +340,9 @@ static int cki_setshare(void *res, struct ckrm_shares *new)
 	/* limits not supported */
 	if ((new->max_limit != CKRM_SHARE_UNCHANGED)
 	    || (new->my_limit != CKRM_SHARE_UNCHANGED)) {
-		printk(KERN_ERR "limits not supported\n");
+		printk(KERN_ERR "limits changed max_limit %d my_limit %d\n",
+		       new->max_limit, new->my_limit);
+
 		return -EINVAL;
 	}
 
@@ -420,32 +364,17 @@ static int cki_setshare(void *res, struct ckrm_shares *new)
 	}
 
 	rc = set_shares(new, cur, par);
+
 	printk(KERN_ERR "rc from set_shares %d\n", rc);
 
-	if ((!rc) && parres) {
-		
-		if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
-			parres->cnt_unused = CKRM_SHARE_DONTCARE;
-		} else if (par->total_guarantee) {
-			u64 temp = (u64) par->unused_guarantee * 
-				parres->cnt_guarantee;
-			do_div(temp, par->total_guarantee);
-			parres->cnt_unused = (int) temp;
-		} else {
-			parres->cnt_unused = 0;
-		}
-		cki_recalc_propagate(res, parres);
-	
-#if 0
+	if (!rc) {
 		int old = icls->ioprio;
-		
 		rc = cki_recalc(icls,0);
 
 		if (!rc && parres) {
 			int raise_tot = icls->ioprio - old ;
-			parres->unused -= raise_tot ;
+			parres->unused += raise_tot ;
 		}
-#endif
 	}
 	spin_unlock(&icls->shares_lock);
 	if (icls->parent) {
@@ -478,8 +407,8 @@ static int cki_getstats(void *res, struct seq_file *sfile)
 	seq_printf(sfile, "%d total_write\n",atomic_read(&icls->stats.blkwr));
 */
 	
-	seq_printf(sfile, "%d total ioprio\n",icls->cnt_guarantee);
-	seq_printf(sfile, "%d unused/default ioprio\n",icls->cnt_unused);
+	seq_printf(sfile, "%d ioprio\n",icls->ioprio);
+	seq_printf(sfile, "%d unused\n",icls->unused);
 
 	return 0;
 }
@@ -523,7 +452,7 @@ static void cki_chgcls(void *tsk, void *oldres, void *newres)
 
 
 struct ckrm_res_ctlr cki_rcbs = {
-	.res_name = "io",
+	.res_name = "cki",
 	.res_hdepth = 1,
 	.resid = -1,
 	.res_alloc = cki_alloc,
@@ -554,7 +483,7 @@ int __init cki_init(void)
 		resid = ckrm_register_res_ctlr(clstype, &cki_rcbs);
 		if (resid != -1) {
 			cki_rcbs.classtype = clstype;
-			cki_cfq_set(cki_tsk_icls,cki_tsk_ioprio);
+			cki_cfq_set(cki_icls_tsk,cki_icls_ioprio);
 		}
 	}
 	
diff --git a/drivers/block/ckrm-iostub.c b/drivers/block/ckrm-iostub.c
index c325d8e8d..63beff3e3 100644
--- a/drivers/block/ckrm-iostub.c
+++ b/drivers/block/ckrm-iostub.c
@@ -35,7 +35,7 @@ void cki_cfq_set(icls_tsk_t tskicls, icls_ioprio_t tskioprio)
 	spin_unlock(&stub_lock);
 }
 
-void *cki_hash_key(struct task_struct *tsk)
+inline void *cki_hash_key(struct task_struct *tsk)
 {
 	void *ret;
 	spin_lock(&stub_lock);
@@ -47,7 +47,7 @@ void *cki_hash_key(struct task_struct *tsk)
 	return ret;
 }
 
-int cki_ioprio(struct task_struct *tsk)
+inline int cki_ioprio(struct task_struct *tsk)
 {
 	int ret;
 	spin_lock(&stub_lock);
diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
index 950eb9923..35c9385ac 100644
--- a/drivers/block/elevator.c
+++ b/drivers/block/elevator.c
@@ -339,14 +339,6 @@ void elv_put_request(request_queue_t *q, struct request *rq)
 		e->elevator_put_req_fn(q, rq);
 }
 
-void elv_set_congested(request_queue_t *q)
-{
-	elevator_t *e = &q->elevator;
-
-	if (e->elevator_set_congested_fn)
-		e->elevator_set_congested_fn(q);
-}
-
 int elv_may_queue(request_queue_t *q, int rw)
 {
 	elevator_t *e = &q->elevator;
@@ -354,7 +346,7 @@ int elv_may_queue(request_queue_t *q, int rw)
 	if (e->elevator_may_queue_fn)
 		return e->elevator_may_queue_fn(q, rw);
 
-	return 1;
+	return 0;
 }
 
 void elv_completed_request(request_queue_t *q, struct request *rq)
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 49ff5e0b7..5a570baa6 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -1594,10 +1594,6 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 	struct io_context *ioc = get_io_context(gfp_mask);
 
 	spin_lock_irq(q->queue_lock);
-
-	if (!elv_may_queue(q, rw))
-		goto out_lock;
-
 	if (rl->count[rw]+1 >= q->nr_requests) {
 		/*
 		 * The queue will fill after this allocation, so set it as
@@ -1611,12 +1607,15 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 		}
 	}
 
-	/*
-	 * The queue is full and the allocating process is not a
-	 * "batcher", and not exempted by the IO scheduler
-	 */
-	if (blk_queue_full(q, rw) && !ioc_batching(ioc))
-		goto out_lock;
+	if (blk_queue_full(q, rw)
+			&& !ioc_batching(ioc) && !elv_may_queue(q, rw)) {
+		/*
+		 * The queue is full and the allocating process is not a
+		 * "batcher", and not exempted by the IO scheduler
+		 */
+		spin_unlock_irq(q->queue_lock);
+		goto out;
+	}
 
 	rl->count[rw]++;
 	if (rl->count[rw] >= queue_congestion_on_threshold(q))
@@ -1634,7 +1633,8 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 		 */
 		spin_lock_irq(q->queue_lock);
 		freed_request(q, rw);
-		goto out_lock;
+		spin_unlock_irq(q->queue_lock);
+		goto out;
 	}
 
 	if (ioc_batching(ioc))
@@ -1664,11 +1664,6 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
 out:
 	put_io_context(ioc);
 	return rq;
-out_lock:
-	if (!rq)
-		elv_set_congested(q);
-	spin_unlock_irq(q->queue_lock);
-	goto out;
 }
 
 /*
@@ -3173,21 +3168,3 @@ void blk_unregister_queue(struct gendisk *disk)
 		kobject_put(&disk->kobj);
 	}
 }
-
-asmlinkage int sys_ioprio_set(int ioprio)
-{
-	if (ioprio < IOPRIO_IDLE || ioprio > IOPRIO_RT)
-		return -EINVAL;
-	if (ioprio == IOPRIO_RT && !capable(CAP_SYS_ADMIN))
-		return -EACCES;
-
-	printk("%s: set ioprio %d\n", current->comm, ioprio);
-	current->ioprio = ioprio;
-	return 0;
-}
-
-asmlinkage int sys_ioprio_get(void)
-{
-	return current->ioprio;
-}
-
diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
index 764c6538e..83d6b37b3 100644
--- a/drivers/char/hangcheck-timer.c
+++ b/drivers/char/hangcheck-timer.c
@@ -55,7 +55,7 @@
 
 static int hangcheck_tick = DEFAULT_IOFENCE_TICK;
 static int hangcheck_margin = DEFAULT_IOFENCE_MARGIN;
-static int hangcheck_reboot = 1;  /* Defaults to reboot */
+static int hangcheck_reboot;  /* Defaults to not reboot */
 
 /* Driver options */
 module_param(hangcheck_tick, int, 0);
diff --git a/fs/exec.c b/fs/exec.c
index 90580ec70..bca37d6c0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -48,7 +48,6 @@
 #include <linux/rmap.h>
 #include <linux/ckrm.h>
 #include <linux/vs_memory.h>
-#include <linux/ckrm_mem.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -559,18 +558,6 @@ static int exec_mmap(struct mm_struct *mm)
 	activate_mm(active_mm, mm);
 	task_unlock(tsk);
 	arch_pick_mmap_layout(mm);
-#ifdef CONFIG_CKRM_RES_MEM
-	if (old_mm) {
-		spin_lock(&old_mm->peertask_lock);
-		list_del(&tsk->mm_peers);
-		ckrm_mem_evaluate_mm(old_mm);
-		spin_unlock(&old_mm->peertask_lock);
-	}
-	spin_lock(&mm->peertask_lock);
-	list_add_tail(&tsk->mm_peers, &mm->tasklist);
-	ckrm_mem_evaluate_mm(mm);
-	spin_unlock(&mm->peertask_lock);
-#endif
 	if (old_mm) {
 		if (active_mm != old_mm) BUG();
 		mmput(old_mm);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d232026b4..74acc7846 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -9,7 +9,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/namei.h> 
-#include <linux/vs_base.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -292,9 +291,6 @@ ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	int mode = inode->i_mode;
 
-	/* Prevent vservers from escaping chroot() barriers */
-	if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
-		return -EACCES;
 	/* Nobody gets write access to a read-only fs */
 	if ((mask & MAY_WRITE) && (IS_RDONLY(inode) ||
 	    (nd && MNT_IS_RDONLY(nd->mnt))) &&
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fe9c6a13b..1ef02bccb 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1030,7 +1030,7 @@ void ext2_set_inode_flags(struct inode *inode)
 {
 	unsigned int flags = EXT2_I(inode)->i_flags;
 
-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_IUNLINK|S_BARRIER|S_NOATIME|S_DIRSYNC);
+	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
 	if (flags & EXT2_SYNC_FL)
 		inode->i_flags |= S_SYNC;
 	if (flags & EXT2_APPEND_FL)
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 594c16c80..f6043a6e2 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -50,11 +50,11 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		 *
 		 * This test looks nicer. Thanks to Pauline Middelink
 		 */
-		if (((oldflags & EXT2_IMMUTABLE_FL) ||
+		if ((oldflags & EXT2_IMMUTABLE_FL) ||
 			((flags ^ oldflags) &
-			 (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL)))
-		    && !capable(CAP_LINUX_IMMUTABLE)) {
-			return -EPERM;		
+			(EXT2_APPEND_FL | EXT2_IMMUTABLE_FL))) {
+			if (!capable(CAP_LINUX_IMMUTABLE))
+				return -EPERM;
 		}
 
 		flags = flags & EXT2_FL_USER_MODIFIABLE;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e89cb306c..cc26948d5 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -11,7 +11,6 @@
 #include <linux/namei.h> 
 #include <linux/ext3_jbd.h>
 #include <linux/ext3_fs.h>
-#include <linux/vs_base.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -297,9 +296,6 @@ ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	int mode = inode->i_mode;
 
-	/* Prevent vservers from escaping chroot() barriers */
-	if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
-		return -EACCES;
 	/* Nobody gets write access to a read-only fs */
 	if ((mask & MAY_WRITE) && (IS_RDONLY(inode) ||
 	    (nd && nd->mnt && MNT_IS_RDONLY(nd->mnt))) &&
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 7bc33d5f5..962aef215 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2474,7 +2474,7 @@ void ext3_set_inode_flags(struct inode *inode)
 {
 	unsigned int flags = EXT3_I(inode)->i_flags;
 
-	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_IUNLINK|S_BARRIER|S_NOATIME|S_DIRSYNC);
+	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
 	if (flags & EXT3_SYNC_FL)
 		inode->i_flags |= S_SYNC;
 	if (flags & EXT3_APPEND_FL)
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index f58d49736..37bd4509d 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -59,11 +59,11 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		 *
 		 * This test looks nicer. Thanks to Pauline Middelink
 		 */
-		if (((oldflags & EXT3_IMMUTABLE_FL) ||
+		if ((oldflags & EXT3_IMMUTABLE_FL) ||
 			((flags ^ oldflags) &
-			 (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL)))
-		    && !capable(CAP_LINUX_IMMUTABLE)) {
-			return -EPERM;		
+			(EXT3_APPEND_FL | EXT3_IMMUTABLE_FL))) {
+			if (!capable(CAP_LINUX_IMMUTABLE))
+				return -EPERM;
 		}
 
 		/*
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 6404b0c10..96a1b601e 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -173,19 +173,6 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 				error = vx_proc_ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
 			break;
 #endif
-		case FIOC_SETIATTR:
-		case FIOC_GETIATTR:
-			/*
-			 * Verify that this filp is a file object,
-			 * not (say) a socket.
-			 */
-			error = -ENOTTY;
-			if (S_ISREG(filp->f_dentry->d_inode->i_mode) ||
-			    S_ISDIR(filp->f_dentry->d_inode->i_mode))
-				error = vc_iattr_ioctl(filp->f_dentry,
-						       cmd, arg);
-			break;
-
 		default:
 			error = -ENOTTY;
 			if (S_ISREG(filp->f_dentry->d_inode->i_mode))
diff --git a/fs/namei.c b/fs/namei.c
index 656430d6b..34da5b453 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -165,10 +165,6 @@ int vfs_permission(struct inode * inode, int mask)
 {
 	umode_t			mode = inode->i_mode;
 
-	/* Prevent vservers from escaping chroot() barriers */
-	if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
-		return -EACCES;
-
 	if (mask & MAY_WRITE) {
 		/*
 		 * Nobody gets write access to a read-only fs.
@@ -214,6 +210,20 @@ int vfs_permission(struct inode * inode, int mask)
 	return -EACCES;
 }
 
+static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	if (inode->i_xid == 0)
+		return 0;
+	if (vx_check(inode->i_xid, VX_ADMIN|VX_WATCH|VX_IDENT))
+		return 0;
+/*
+	printk("VSW: xid=%d denied access to %p[#%d,%lu] »%*s«.\n",
+		vx_current_xid(), inode, inode->i_xid, inode->i_ino,
+		nd->dentry->d_name.len, nd->dentry->d_name.name);
+*/
+	return -EACCES;
+}
+
 int permission(struct inode * inode,int mask, struct nameidata *nd)
 {
 	int retval;
@@ -227,6 +237,8 @@ int permission(struct inode * inode,int mask, struct nameidata *nd)
 		(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
 		return -EROFS;
 
+	if ((retval = xid_permission(inode, mask, nd)))
+		return retval;
 	if (inode->i_op && inode->i_op->permission)
 		retval = inode->i_op->permission(inode, submask, nd);
 	else
@@ -2013,13 +2025,8 @@ asmlinkage long sys_link(const char __user * oldname, const char __user * newnam
 	error = path_lookup(to, LOOKUP_PARENT, &nd);
 	if (error)
 		goto out;
-	/*
-	 * We allow hard-links to be created to a bind-mount as long
-	 * as the bind-mount is not read-only.  Checking for cross-dev
-	 * links is subsumed by the superblock check in vfs_link().
-	 */
-	error = -EROFS;
-	if (MNT_IS_RDONLY(old_nd.mnt))
+	error = -EXDEV;
+	if (old_nd.mnt != nd.mnt)
 		goto out_release;
 	new_dentry = lookup_create(&nd, 0);
 	error = PTR_ERR(new_dentry);
diff --git a/fs/rcfs/dir.c b/fs/rcfs/dir.c
index 545500e6d..a72c75448 100644
--- a/fs/rcfs/dir.c
+++ b/fs/rcfs/dir.c
@@ -162,7 +162,7 @@ int rcfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	// create the default set of magic files 
 	clstype = (RCFS_I(dentry->d_inode))->core->classtype;
 	rcfs_create_magic(dentry, &(((struct rcfs_magf *)clstype->mfdesc)[1]),
-			  clstype->mfcount - 3);
+			  clstype->mfcount - 2);
 
 	return retval;
 
diff --git a/fs/rcfs/magic.c b/fs/rcfs/magic.c
index 1cada33e5..043df6e2d 100644
--- a/fs/rcfs/magic.c
+++ b/fs/rcfs/magic.c
@@ -100,7 +100,7 @@ FUNC ## _parse(char *options, char **resstr, char **otherstr)	       \
 	*resstr = NULL;                                                \
 								       \
 	if (!options)						       \
-		return 0;					       \
+		return -EINVAL;					       \
 								       \
 	while ((p = strsep(&options, ",")) != NULL) {		       \
 		substring_t args[MAX_OPT_ARGS];			       \
@@ -113,28 +113,17 @@ FUNC ## _parse(char *options, char **resstr, char **otherstr)	       \
 		switch (token) {				       \
 		case FUNC ## _res_type:			               \
 			*resstr = match_strdup(args);		       \
-			if (!strcmp(#FUNC, "config")) {		       \
-				char *str = p + strlen(p) + 1;	       \
-				*otherstr = kmalloc(strlen(str) + 1,   \
-							 GFP_KERNEL);  \
-				if (*otherstr == NULL) {	       \
-					kfree(*resstr);		       \
-					*resstr = NULL;		       \
-					return 0;		       \
-				} else {			       \
-					strcpy(*otherstr, str);	       \
-					return 1;		       \
-				}				       \
-			}					       \
 			break;					       \
 		case FUNC ## _str:			               \
 			*otherstr = match_strdup(args);		       \
 			break;					       \
 		default:					       \
-			return 0;				       \
+			return -EINVAL;				       \
 		}                                                      \
 	}                                                              \
-	return (*resstr != NULL);				       \
+	if (*resstr)                                                   \
+                return 0;                                              \
+        return -EINVAL;                                                \
 }
 
 #define MAGIC_WRITE(FUNC,CLSTYPEFUN)                                   \
@@ -210,16 +199,17 @@ struct file_operations FUNC ## _fileops = {                            \
 EXPORT_SYMBOL(FUNC ## _fileops);
 
 /******************************************************************************
- * Shared function used by Target / Reclassify
+ * Target
  *
+ * pseudo file for manually reclassifying members to a class
  *
  *****************************************************************************/
 
 #define TARGET_MAX_INPUT_SIZE 100
 
 static ssize_t
-target_reclassify_write(struct file *file, const char __user * buf,
-			size_t count, loff_t * ppos, int manual)
+target_write(struct file *file, const char __user * buf,
+	     size_t count, loff_t * ppos)
 {
 	struct rcfs_inode_info *ri = RCFS_I(file->f_dentry->d_inode);
 	char *optbuf;
@@ -241,7 +231,7 @@ target_reclassify_write(struct file *file, const char __user * buf,
 
 	clstype = ri->core->classtype;
 	if (clstype->forced_reclassify)
-		rc = (*clstype->forced_reclassify) (manual ? ri->core: NULL, optbuf);
+		rc = (*clstype->forced_reclassify) (ri->core, optbuf);
 
 	up(&(ri->vfs_inode.i_sem));
 	kfree(optbuf);
@@ -249,46 +239,12 @@ target_reclassify_write(struct file *file, const char __user * buf,
 
 }
 
-/******************************************************************************
- * Target
- *
- * pseudo file for manually reclassifying members to a class
- *
- *****************************************************************************/
-
-static ssize_t
-target_write(struct file *file, const char __user * buf,
-	     size_t count, loff_t * ppos)
-{
-	return target_reclassify_write(file,buf,count,ppos,1);
-}
-
 struct file_operations target_fileops = {
 	.write = target_write,
 };
 
 EXPORT_SYMBOL(target_fileops);
 
-/******************************************************************************
- * Reclassify
- *
- * pseudo file for reclassification of an object through CE
- *
- *****************************************************************************/
-
-static ssize_t
-reclassify_write(struct file *file, const char __user * buf,
-		 size_t count, loff_t * ppos)
-{
-	return target_reclassify_write(file,buf,count,ppos,0);
-}
-
-struct file_operations reclassify_fileops = {
-	.write = reclassify_write,
-};
-
-EXPORT_SYMBOL(reclassify_fileops);
-
 /******************************************************************************
  * Config
  *
@@ -308,6 +264,7 @@ enum config_token_t {
 
 static match_table_t config_tokens = {
 	{config_res_type, "res=%s"},
+	{config_str, "config=%s"},
 	{config_err, NULL},
 };
 
@@ -504,7 +461,7 @@ shares_write(struct file *file, const char __user * buf,
 		}
 	}
 
-	printk(KERN_DEBUG "Set %s shares to %d %d %d %d\n",
+	printk(KERN_ERR "Set %s shares to %d %d %d %d\n",
 	       resname,
 	       newshares.my_guarantee,
 	       newshares.my_limit,
diff --git a/fs/rcfs/rootdir.c b/fs/rcfs/rootdir.c
index d827db662..6da575ed6 100644
--- a/fs/rcfs/rootdir.c
+++ b/fs/rcfs/rootdir.c
@@ -91,7 +91,7 @@ int rcfs_mkroot(struct rcfs_magf *mfdesc, int mfcount, struct dentry **rootde)
 		return -EINVAL;
 
 	rootdesc = &mfdesc[0];
-	printk(KERN_DEBUG "allocating classtype root <%s>\n", rootdesc->name);
+	printk("allocating classtype root <%s>\n", rootdesc->name);
 	dentry = rcfs_create_internal(rcfs_rootde, rootdesc, 0);
 
 	if (!dentry) {
diff --git a/fs/rcfs/socket_fs.c b/fs/rcfs/socket_fs.c
index f1c089921..9d9ba5241 100644
--- a/fs/rcfs/socket_fs.c
+++ b/fs/rcfs/socket_fs.c
@@ -113,12 +113,6 @@ struct rcfs_magf sock_rootdesc[] = {
 	 .i_op = &my_iops,
 	 .i_fop = &target_fileops,
 	 },
-	{
-	 .name = "reclassify",
-	 .mode = RCFS_DEFAULT_FILE_MODE,
-	 .i_op = &my_iops,
-	 .i_fop = &reclassify_fileops,
-	 },
 };
 
 struct rcfs_magf sock_magf[] = {
diff --git a/fs/rcfs/super.c b/fs/rcfs/super.c
index f013df226..871b7fb17 100644
--- a/fs/rcfs/super.c
+++ b/fs/rcfs/super.c
@@ -164,7 +164,7 @@ static int rcfs_fill_super(struct super_block *sb, void *data, int silent)
 		clstype = ckrm_classtypes[i];
 		if (clstype == NULL)
 			continue;
-		printk(KERN_DEBUG "A non null classtype\n");
+		printk("A non null classtype\n");
 
 		if ((rc = rcfs_register_classtype(clstype)))
 			continue;	// could return with an error too 
diff --git a/fs/rcfs/tc_magic.c b/fs/rcfs/tc_magic.c
index 9ef6d4d18..1a9f69729 100644
--- a/fs/rcfs/tc_magic.c
+++ b/fs/rcfs/tc_magic.c
@@ -43,7 +43,7 @@
 
 #define TC_FILE_MODE (S_IFREG | S_IRUGO | S_IWUSR)
 
-#define NR_TCROOTMF  7
+#define NR_TCROOTMF  6
 struct rcfs_magf tc_rootdesc[NR_TCROOTMF] = {
 	/* First entry must be root */
 	{
@@ -77,15 +77,8 @@ struct rcfs_magf tc_rootdesc[NR_TCROOTMF] = {
 	 .i_fop = &shares_fileops,
 	 .i_op = &rcfs_file_inode_operations,
 	 },
-	// Reclassify and Config should be made available only at the 
-	// root level. Make sure they are the last two entries, as 
-	// rcfs_mkdir depends on it
-	{
-	 .name = "reclassify",
-	 .mode = TC_FILE_MODE,
-	 .i_fop = &reclassify_fileops,
-	 .i_op = &rcfs_file_inode_operations,
-	 },
+	// Config should be made available only at the root level
+	// Make sure this is the last entry, as rcfs_mkdir depends on it
 	{
 	 .name = "config",
 	 .mode = TC_FILE_MODE,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index a70801f35..f8babe603 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -1338,10 +1338,6 @@ __reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd,
 {
 	umode_t			mode = inode->i_mode;
 
-	/* Prevent vservers from escaping chroot() barriers */
-	if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
-		return -EACCES;
-
 	if (mask & MAY_WRITE) {
 		/*
 		 * Nobody gets write access to a read-only fs.
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index d27db1931..72b388bf5 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -289,10 +289,8 @@
 #define __NR_mq_notify		(__NR_mq_open+4)
 #define __NR_mq_getsetattr	(__NR_mq_open+5)
 #define __NR_sys_kexec_load	283
-#define __NR_ioprio_set		284
-#define __NR_ioprio_get		285
 
-#define NR_syscalls 286
+#define NR_syscalls 284
 
 #ifndef __KERNEL_SYSCALLS_NO_ERRNO__
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
diff --git a/include/asm-ppc/unistd.h b/include/asm-ppc/unistd.h
index 64e443d47..21774ed93 100644
--- a/include/asm-ppc/unistd.h
+++ b/include/asm-ppc/unistd.h
@@ -273,10 +273,8 @@
 #define __NR_mq_notify		266
 #define __NR_mq_getsetattr	267
 #define __NR_kexec_load		268
-#define __NR_ioprio_set		269
-#define __NR_ioprio_get		270
 
-#define __NR_syscalls		271
+#define __NR_syscalls		269
 
 #define __NR(n)	#n
 
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 81e4e85ba..311e25a4f 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -552,12 +552,8 @@ __SYSCALL(__NR_mq_notify, sys_mq_notify)
 __SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
 #define __NR_kexec_load 	246
 __SYSCALL(__NR_kexec_load, sys_ni_syscall)
-#define __NR_ioprio_set		247
-__SYSCALL(__NR_ioprio_set, sys_ioprio_set);
-#define __NR_ioprio_get		248
-__SYSCALL(__NR_ioprio_get, sys_ioprio_get);
 
-#define __NR_syscall_max __NR_ioprio_get
+#define __NR_syscall_max __NR_kexec_load
 #ifndef __NO_STUBS
 
 /* user-visible error numbers are in the range -1 - -4095 */
diff --git a/include/linux/ckrm-io.h b/include/linux/ckrm-io.h
index 36040b930..6d6e12749 100644
--- a/include/linux/ckrm-io.h
+++ b/include/linux/ckrm-io.h
@@ -30,10 +30,13 @@
 typedef void *(*icls_tsk_t) (struct task_struct *tsk);
 typedef int (*icls_ioprio_t) (struct task_struct *tsk);
 
+
 #ifdef CONFIG_CKRM_RES_BLKIO
 
-extern void *cki_tsk_icls (struct task_struct *tsk);
-extern int cki_tsk_ioprio (struct task_struct *tsk);
+#ifdef DOES_NOT_WORK_AND_NOT_NEEDED
+extern inline icls_tsk_t cki_tsk_icls;
+extern inline icls_ioprio_t cki_tsk_ioprio;
+#endif
 
 #endif /* CONFIG_CKRM_RES_BLKIO */
 
diff --git a/include/linux/ckrm.h b/include/linux/ckrm.h
index a29bf282a..04f4ec00f 100644
--- a/include/linux/ckrm.h
+++ b/include/linux/ckrm.h
@@ -9,13 +9,10 @@
  *
  * Latest version, more details at http://ckrm.sf.net
  * 
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  *
  */
 
diff --git a/include/linux/ckrm_ce.h b/include/linux/ckrm_ce.h
index f4e91e91d..f3cbd9132 100644
--- a/include/linux/ckrm_ce.h
+++ b/include/linux/ckrm_ce.h
@@ -9,13 +9,10 @@
  *
  * Latest version, more details at http://ckrm.sf.net
  * 
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  *
  */
 
@@ -32,7 +29,7 @@
 
 #ifdef CONFIG_CKRM
 
-#include <linux/ckrm.h>		// getting the event names
+#include "ckrm.h"		// getting the event names
 
 /* Action parameters identifying the cause of a task<->class notify callback 
  * these can perculate up to user daemon consuming records send by the 
diff --git a/include/linux/ckrm_classqueue.h b/include/linux/ckrm_classqueue.h
index 3041c8179..1bdf9b775 100644
--- a/include/linux/ckrm_classqueue.h
+++ b/include/linux/ckrm_classqueue.h
@@ -28,8 +28,7 @@
 
 #include <linux/list.h>
 
-#define CLASSQUEUE_SIZE 1024   // acb: changed from 128
-//#define CLASSQUEUE_SIZE 128
+#define CLASSQUEUE_SIZE 128
 #define CQ_BITMAP_SIZE ((((CLASSQUEUE_SIZE+1+7)/8)+sizeof(long)-1)/sizeof(long))
 
 /**
@@ -117,7 +116,7 @@ void classqueue_update_prio(struct classqueue_struct *cq, cq_node_t * node, int
 cq_node_t *classqueue_get_head(struct classqueue_struct *cq);
 
 /*update the base priority of the classqueue*/
-void classqueue_update_base(struct classqueue_struct *cq);
+void classqueue_update_base(struct classqueue_struct *cq, int new_base);
 
 /**
  * class_compare_prio: compare the priority of this two nodes
diff --git a/include/linux/ckrm_mem.h b/include/linux/ckrm_mem.h
index 4efebb993..52dc949ec 100644
--- a/include/linux/ckrm_mem.h
+++ b/include/linux/ckrm_mem.h
@@ -49,7 +49,6 @@ typedef struct ckrm_mem_res {
 				// more than this is needed.
 	int nr_active[MAX_NR_ZONES];
 	int nr_inactive[MAX_NR_ZONES];
-	int tmp_cnt;
 	int shrink_count;
 	unsigned long last_shrink;
 	int over_limit_failures;
@@ -67,19 +66,17 @@ extern struct ckrm_res_ctlr mem_rcbs;
 // used to fill reclaim_flags, used only when memory is low in the system
 #define CLS_CLEAR		(0)      // class under its guarantee
 #define CLS_OVER_GUAR	(1 << 0) // class is over its guarantee
-#define CLS_PARENT_OVER	(1 << 1) // parent is over 110% mark over limit
-#define CLS_OVER_25		(1 << 2) // class over 25% mark bet guar(0) & limit(100)
-#define CLS_OVER_50		(1 << 3) // class over 50% mark bet guar(0) & limit(100)
-#define CLS_OVER_75		(1 << 4) // class over 75% mark bet guar(0) & limit(100)
-#define CLS_OVER_100	(1 << 5) // class over its limit
-#define CLS_OVER_110	(1 << 6) // class over 110% mark over limit
-#define CLS_FLAGS_ALL	( CLS_OVER_GUAR | CLS_PARENT_OVER | CLS_OVER_25 | \
-					CLS_OVER_50 | CLS_OVER_75 | CLS_OVER_100 | CLS_OVER_110 )
+#define CLS_PARENT_OVER	(1 << 1) // parent is over 120% mark over limit
+#define CLS_OVER_75		(1 << 2) // class over 75% mark bet guar(0) & limit(100)
+#define CLS_OVER_100	(1 << 3) // class over its limit
+#define CLS_OVER_110	(1 << 4) // class over 110% mark over limit
+#define CLS_FLAGS_ALL	( CLS_OVER_GUAR | CLS_PARENT_OVER | CLS_OVER_75 | \
+					CLS_OVER_100 | CLS_OVER_110 )
 #define CLS_SHRINK_BIT	(31)	  // used to both lock and set the bit
 #define CLS_SHRINK		(1 << CLS_SHRINK_BIT) // shrink the given class
 
 // used in flags. set when a class is more than 90% of its maxlimit
-#define MEM_AT_LIMIT 1
+#define MEM_NEAR_LIMIT 1
 
 extern void ckrm_set_aggressive(ckrm_mem_res_t *);
 extern unsigned int ckrm_setup_reclamation(void);
@@ -87,14 +84,16 @@ extern void ckrm_teardown_reclamation(void);
 extern void ckrm_get_reclaim_bits(unsigned int *, unsigned int *);
 extern void ckrm_init_mm_to_task(struct mm_struct *, struct task_struct *);
 extern void ckrm_mem_evaluate_mm(struct mm_struct *);
-extern void ckrm_at_limit(ckrm_mem_res_t *);
-extern int ckrm_memclass_valid(ckrm_mem_res_t *);
+extern void ckrm_mem_evaluate_page_byadd(struct page *, struct mm_struct *);
+extern void ckrm_near_limit(ckrm_mem_res_t *);
 #define ckrm_get_reclaim_flags(cls)	((cls)->reclaim_flags)
 
 #else
 
 #define ckrm_init_mm_to_current(a)			do {} while (0)
 #define ckrm_mem_evaluate_mm(a)				do {} while (0)
+#define ckrm_mem_evaluate_page_byadd(a,b)	do {} while (0)
+#define page_class(page)					(NULL)
 #define ckrm_get_reclaim_flags(a)			(0)
 #define ckrm_setup_reclamation()			(0)
 #define ckrm_teardown_reclamation()			do {} while (0)
diff --git a/include/linux/ckrm_mem_inline.h b/include/linux/ckrm_mem_inline.h
index 221f93601..0eb4e49c0 100644
--- a/include/linux/ckrm_mem_inline.h
+++ b/include/linux/ckrm_mem_inline.h
@@ -56,10 +56,6 @@ ckrm_mem_share_compare(ckrm_mem_res_t *a, ckrm_mem_res_t *b)
 		return -(b != NULL) ;
 	if (b == NULL)
 		return 0;
-	if (a->pg_guar == CKRM_SHARE_DONTCARE)
-		return 1;
-	if (b->pg_guar == CKRM_SHARE_DONTCARE)
-		return -1;
 	return (a->pg_unused - b->pg_unused);
 }
 
@@ -73,45 +69,34 @@ mem_class_get(ckrm_mem_res_t *cls)
 static inline void
 mem_class_put(ckrm_mem_res_t *cls)
 {
-	const char *name;
-	
 	if (cls && atomic_dec_and_test(&(cls->nr_users)) ) {
-		if (cls->core == NULL) {
-			name = "unknown";
-		} else {
-			name = cls->core->name;
-		}
-		printk(KERN_DEBUG "freeing memclass %p of <core:%s>\n", cls, name);
-
-		// BUG_ON(ckrm_memclass_valid(cls));
-		// kfree(cls);
+		printk("freeing memclass %p of <core:%s>\n", cls, cls->core->name);
+		//kfree(cls);
 	}	
 }
 
-static inline void
+static inline int
 incr_use_count(ckrm_mem_res_t *cls, int borrow)
 {
+	int over_limit;
+
 	atomic_inc(&cls->pg_total);
+	over_limit = (atomic_read(&cls->pg_total) > ((9 * cls->pg_limit) / 10));
 
 	if (borrow) 
 		cls->pg_lent++;
-	if ((cls->pg_guar == CKRM_SHARE_DONTCARE) ||
+	if ((cls->pg_guar != CKRM_SHARE_DONTCARE) &&
 				(atomic_read(&cls->pg_total) > cls->pg_unused)) {
 		ckrm_mem_res_t *parcls = ckrm_get_res_class(cls->parent,
 				mem_rcbs.resid, ckrm_mem_res_t);
 		if (parcls) {
-			incr_use_count(parcls, 1);
+			over_limit |= incr_use_count(parcls, 1);
 			cls->pg_borrowed++;
+			return over_limit;
 		}
-	} else {
-		atomic_inc(&ckrm_mem_real_count);
 	}
-	if ((cls->pg_limit != CKRM_SHARE_DONTCARE) && 
-			(atomic_read(&cls->pg_total) >= cls->pg_limit) &&
-			((cls->flags & MEM_AT_LIMIT) != MEM_AT_LIMIT)) {
-		ckrm_at_limit(cls);
-	}
-	return;
+	atomic_inc(&ckrm_mem_real_count);
+	return over_limit;
 }
 
 static inline void
@@ -174,26 +159,10 @@ ckrm_clear_pages_class(struct page *pages, int numpages)
 }
 
 static inline void
-ckrm_change_page_class(struct page *page, ckrm_mem_res_t *newcls)
+ckrm_change_page_class(struct page *page, ckrm_mem_res_t *cls)
 {
-	ckrm_mem_res_t *oldcls = page_class(page);
-
-	if (!newcls || oldcls == newcls)
-		return;
-
 	ckrm_clear_page_class(page);
-	ckrm_set_page_class(page, newcls);
-	if (test_bit(PG_ckrm_account, &page->flags)) {
-		decr_use_count(oldcls, 0);
-		incr_use_count(newcls, 0);
-		if (PageActive(page)) {
-			oldcls->nr_active[page_zonenum(page)]--;
-			newcls->nr_active[page_zonenum(page)]++;
-		} else {
-			oldcls->nr_inactive[page_zonenum(page)]--;
-			newcls->nr_inactive[page_zonenum(page)]++;
-		}
-	}
+	ckrm_set_page_class(page, cls);
 }
 
 static inline void
@@ -209,61 +178,42 @@ ckrm_change_pages_class(struct page *pages, int numpages,
 static inline void
 ckrm_mem_inc_active(struct page *page)
 {
-	ckrm_mem_res_t *cls = page_class(page), *curcls;
-	if (unlikely(!cls)) {
-		return;
-	}
-	BUG_ON(test_bit(PG_ckrm_account, &page->flags));
-	if (unlikely(cls != (curcls = GET_MEM_CLASS(current)))) {
-		cls = curcls;
-		ckrm_change_page_class(page, cls);
-	}
+	ckrm_mem_res_t *cls = page_class(page);
+	BUG_ON(cls == NULL);
 	cls->nr_active[page_zonenum(page)]++;
-	incr_use_count(cls, 0);
-	set_bit(PG_ckrm_account, &page->flags);
+	if (incr_use_count(cls, 0)) {
+		ckrm_near_limit(cls);
+	}
 }
 
 static inline void
 ckrm_mem_dec_active(struct page *page)
 {
 	ckrm_mem_res_t *cls = page_class(page);
-	if (unlikely(!cls)) {
-		return;
-	}
-	BUG_ON(!test_bit(PG_ckrm_account, &page->flags));
+	BUG_ON(cls == NULL);
 	cls->nr_active[page_zonenum(page)]--;
 	decr_use_count(cls, 0);
-	clear_bit(PG_ckrm_account, &page->flags);
 }
 
 static inline void
 ckrm_mem_inc_inactive(struct page *page)
 {
-	ckrm_mem_res_t *cls = page_class(page), *curcls;
-	if (unlikely(!cls)) {
-		return;
-	}
-	BUG_ON(test_bit(PG_ckrm_account, &page->flags));
-	if (unlikely(cls != (curcls = GET_MEM_CLASS(current)))) {
-		cls = curcls;
-		ckrm_change_page_class(page, cls);
-	}
+	ckrm_mem_res_t *cls = page_class(page);
+	BUG_ON(cls == NULL);
 	cls->nr_inactive[page_zonenum(page)]++;
-	incr_use_count(cls, 0);
-	set_bit(PG_ckrm_account, &page->flags);
+	if (incr_use_count(cls, 0) &&
+			((cls->flags & MEM_NEAR_LIMIT) != MEM_NEAR_LIMIT)) {
+		ckrm_near_limit(cls);
+	}
 }
 
 static inline void
 ckrm_mem_dec_inactive(struct page *page)
 {
 	ckrm_mem_res_t *cls = page_class(page);
-	if (unlikely(!cls)) {
-		return;
-	}
-	BUG_ON(!test_bit(PG_ckrm_account, &page->flags));
+	BUG_ON(cls == NULL);
 	cls->nr_inactive[page_zonenum(page)]--;
 	decr_use_count(cls, 0);
-	clear_bit(PG_ckrm_account, &page->flags);
 }
 
 static inline int
@@ -282,13 +232,7 @@ ckrm_class_limit_ok(ckrm_mem_res_t *cls)
 	if ((mem_rcbs.resid == -1) || !cls) {
 		return 1;
 	}
-	if (cls->pg_limit == CKRM_SHARE_DONTCARE) {
-		ckrm_mem_res_t *parcls = ckrm_get_res_class(cls->parent,
-						mem_rcbs.resid, ckrm_mem_res_t);
-		return (!parcls ?: ckrm_class_limit_ok(parcls));
-	} else {
-		return (atomic_read(&cls->pg_total) <= (11 * cls->pg_limit) / 10);
-	}
+	return (atomic_read(&cls->pg_total) <= (11 * cls->pg_limit) / 10);
 }
 
 #else // !CONFIG_CKRM_RES_MEM
diff --git a/include/linux/ckrm_rc.h b/include/linux/ckrm_rc.h
index 1bf2d07b5..b46cfd9f3 100644
--- a/include/linux/ckrm_rc.h
+++ b/include/linux/ckrm_rc.h
@@ -132,7 +132,7 @@ typedef struct ckrm_classtype {
 	int num_classes;	 
 
 	/* state about my ce interaction */
-	atomic_t ce_regd;		// if CE registered
+	int ce_regd;		// if CE registered
 	int ce_cb_active;	// if Callbacks active
 	atomic_t ce_nr_users;	// number of active transient calls 
 	struct ckrm_eng_callback ce_callbacks;	// callback engine
@@ -223,11 +223,7 @@ typedef struct ckrm_core_class {
  * OTHER
  ******************************************************************************/
 
-#define ckrm_get_res_class(rescls, resid, type) \
-	((type*) (((resid != -1) && ((rescls) != NULL) \
-			   && ((rescls) != (void *)-1)) ? \
-	 ((struct ckrm_core_class *)(rescls))->res_class[resid] : NULL))
-
+#define ckrm_get_res_class(rescls,resid,type)   ((type*)((rescls)->res_class[resid]))
 
 extern int ckrm_register_res_ctlr(struct ckrm_classtype *, ckrm_res_ctlr_t *);
 extern int ckrm_unregister_res_ctlr(ckrm_res_ctlr_t *);
diff --git a/include/linux/ckrm_sched.h b/include/linux/ckrm_sched.h
index 3611c2d3e..9d82214fb 100644
--- a/include/linux/ckrm_sched.h
+++ b/include/linux/ckrm_sched.h
@@ -15,34 +15,30 @@
 #ifndef _CKRM_SCHED_H
 #define _CKRM_SCHED_H
 
+#define CC_BUG_ON_DO(cond,action)  do { if (cond)  action; BUG_ON(cond); } while(0)
+#define CC_BUG_ON(cond)            BUG_ON(cond)
+
 #include <linux/sched.h>
 #include <linux/ckrm_rc.h>
 #include <linux/ckrm_classqueue.h>
 
-#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
+//update every second
+#define CVT_UPDATE_TICK     (1*HZ/1 ?: 1)
+#define CLASS_BONUS_RATE 22	// shift from ns to increase class bonus
+#define PRIORITY_BONUS_RATE 0	// ??  Hubertus
 
+#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
 struct prio_array {
-	unsigned int nr_active;
+	int nr_active;
 	unsigned long bitmap[BITMAP_SIZE];
 	struct list_head queue[MAX_PRIO];
 };
 
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-#define rq_active(p,rq)   (get_task_lrq(p)->active)
-#define rq_expired(p,rq)  (get_task_lrq(p)->expired)
-int __init init_ckrm_sched_res(void);
-#else
-#define rq_active(p,rq)   (rq->active)
-#define rq_expired(p,rq)  (rq->expired)
-static inline void init_ckrm_sched_res(void) {}
-static inline int ckrm_cpu_monitor_init(void) {return 0;}
-#endif //CONFIG_CKRM_CPU_SCHEDULE
-
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-struct ckrm_runqueue {
+struct ckrm_local_runqueue {
 	cq_node_t classqueue_linkobj;	/*links in classqueue */
 	struct ckrm_cpu_class *cpu_class;	// class it belongs to
 	struct classqueue_struct *classqueue;	// classqueue it belongs tow
+	CVT_t uncounted_cvt;
 	unsigned long long uncounted_ns;
 
 	prio_array_t *active, *expired, arrays[2];
@@ -59,25 +55,19 @@ struct ckrm_runqueue {
 	 * updated on enqueue, dequeue
 	 */
 	int top_priority;
-	CVT_t local_cvt;
-
-	unsigned long lrq_load;
-	int local_weight; 
-
-
-	/*
-	 * unused CPU time accumulated while thoe class 
-	 * is inactive goes to savings
-	 * 
-	 * initialized to be 0
-	 * a class can't accumulate more than SAVING_THRESHOLD of savings
-	 */
-	unsigned long long savings;
-
+	CVT_t local_cvt;	// snapshot of local_cvt, update on every loadbalance
 	unsigned long magic;	//for debugging
 };
 
-typedef struct ckrm_runqueue ckrm_lrq_t;
+/**
+ * @last_sleep: the last time it sleeps, last_sleep = 0 when not sleeping
+ */
+struct ckrm_cpu_class_local_stat {
+	unsigned long long run;
+	unsigned long long total;
+	unsigned long long last_sleep;
+	unsigned long cpu_demand; /*estimated cpu demand */
+};
 
 /**
  * ckrm_cpu_class_stat - cpu usage statistics maintained for each class
@@ -88,35 +78,22 @@ struct ckrm_cpu_class_stat {
 
 	unsigned long long total_ns;	/*how much nano-secs it has consumed */
 
-	struct ckrm_cpu_demand_stat local_stats[NR_CPUS];
-
-	/* 
-	 * 
-	 */
-	unsigned long max_demand; /* the maximun a class can consume */
-	int egrt,megrt; /*effective guarantee*/
-	int ehl,mehl; /*effective hard limit, my effective hard limit*/
+	struct ckrm_cpu_class_local_stat local_stats[NR_CPUS];
+	unsigned long cpu_demand;
 
+	/*temp stat used by cpu monitor */
+	int effective_guarantee;
+	int effective_limit;
+	int glut;		//true or false
 	/*
-	 * eshare: for both default class and its children
-	 * meshare: just for the default class
+	 * effective_share: for both default class and its children
+	 * self_effective_share: just for the default class
 	 */
-	int eshare;
-	int meshare;
+	int effective_share;
+	int self_effective_share;
 };
 
-#define CKRM_CPU_CLASS_MAGIC 0x7af2abe3
-
-#define USAGE_SAMPLE_FREQ HZ  //sample every 1 seconds
-#define NS_PER_SAMPLE (USAGE_SAMPLE_FREQ*(NSEC_PER_SEC/HZ))
-#define USAGE_WINDOW_SIZE 60  //keep the last 60 sample
-
-struct ckrm_usage {
-	unsigned long samples[USAGE_WINDOW_SIZE]; //record usages 
-	unsigned long sample_pointer; //pointer for the sliding window
-	unsigned long long last_ns; //ns for last sample
-	long long last_sample_jiffies; //in number of jiffies
-};
+typedef struct ckrm_cpu_class_stat ckrm_stat_t;
 
 /*
  * manages the class status
@@ -127,224 +104,72 @@ struct ckrm_cpu_class {
 	struct ckrm_core_class *parent;
 	struct ckrm_shares shares;
 	spinlock_t cnt_lock;	// always grab parent's lock first and then child's
+	CVT_t global_cvt;	// total cummulative virtual time
 	struct ckrm_cpu_class_stat stat;
 	struct list_head links;	// for linking up in cpu classes
-	ckrm_lrq_t local_queues[NR_CPUS];	// runqueues 
-	struct ckrm_usage usage;
-	unsigned long magic;	//for debugging
+	struct ckrm_local_runqueue local_queues[NR_CPUS];	// runqueues 
 };
 
-#define cpu_class_weight(cls) (cls->stat.meshare)
-#define local_class_weight(lrq) (lrq->local_weight)
-
-static inline int valid_cpu_class(struct ckrm_cpu_class * cls)
-{
-	return (cls && cls->magic == CKRM_CPU_CLASS_MAGIC);
-}
-
-struct classqueue_struct *get_cpu_classqueue(int cpu);
-struct ckrm_cpu_class * get_default_cpu_class(void);
-
-
-static inline void ckrm_usage_init(struct ckrm_usage* usage)
-{
-	int i;
-
-	for (i=0; i < USAGE_WINDOW_SIZE; i++)
-		usage->samples[i] = 0;
-	usage->sample_pointer = 0;
-	usage->last_ns = 0;
-	usage->last_sample_jiffies = 0;
-}
-
-/*
- * this function can be called at any frequency
- * it's self-contained
- */
-static inline void ckrm_sample_usage(struct ckrm_cpu_class* clsptr)
-{
-	struct ckrm_usage* usage = &clsptr->usage;
-	unsigned long long cur_sample;
-	int duration = jiffies - usage->last_sample_jiffies;
-
-	//jiffies wasn't start from 0
-	//so it need to be properly handled
-	if (unlikely(!usage->last_sample_jiffies)) 
-		usage->last_sample_jiffies = jiffies;
-
-	//called too frequenctly
-	if (duration < USAGE_SAMPLE_FREQ)
-		return;
-
-	usage->last_sample_jiffies = jiffies;
-
-	cur_sample = clsptr->stat.total_ns - usage->last_ns; 
-	usage->last_ns = clsptr->stat.total_ns;
+#if CONFIG_CKRM_CPU_SCHEDULE
+#define rq_active(p,rq)   (get_task_class_queue(p)->active)
+#define rq_expired(p,rq)  (get_task_class_queue(p)->expired)
+#else
+#define rq_active(p,rq)   (rq->active)
+#define rq_expired(p,rq)  (rq->expired)
+#endif
 
-	//scale it based on the sample duration
-	cur_sample *= ((USAGE_SAMPLE_FREQ<< 15)/duration);
-	cur_sample >>= 15;
-	usage->samples[usage->sample_pointer] = cur_sample;
-	//	printk("sample = %llu jiffies=%lu \n",cur_sample, jiffies);
+//#define cpu_class_weight(cls) (cls->shares.my_guarantee)
+#define cpu_class_weight(cls) (cls->stat.self_effective_share)
 
-	usage->sample_pointer ++;
-	if (usage->sample_pointer >= USAGE_WINDOW_SIZE)
-		usage->sample_pointer = 0;
-}
+#define bpt_queue(cpu) (& (cpu_rq(cpu)->classqueue) )
+CVT_t get_min_cvt(int cpu);
 
-//duration is specified in number of jiffies
-//return the usage in percentage
-static inline int get_ckrm_usage(struct ckrm_cpu_class* clsptr, int duration)
-{
-	int nr_samples = duration/USAGE_SAMPLE_FREQ?:1;
-	struct ckrm_usage* usage = &clsptr->usage;
-	unsigned long long total = 0;
-	int i, idx;
-
-	if (nr_samples > USAGE_WINDOW_SIZE)
-		nr_samples = USAGE_WINDOW_SIZE;
-
-	idx = usage->sample_pointer;	
-	for (i = 0; i< nr_samples; i++) {
-		if (! idx)
-			idx = USAGE_WINDOW_SIZE;
-		idx --;
-		total += usage->samples[idx];
-	}
-        total *= 100;
-        do_div(total,nr_samples);
-        do_div(total,NS_PER_SAMPLE);
-	do_div(total,cpus_weight(cpu_online_map));
-        return total;
-}
+struct classqueue_struct *get_cpu_classqueue(int cpu);
 
+extern struct ckrm_cpu_class default_cpu_class_obj;
+#define default_cpu_class (&default_cpu_class_obj)
 
-#define lrq_nr_running(lrq) \
-             (lrq->active->nr_active + lrq->expired->nr_active)
+#define local_queue_nr_running(local_queue) \
+             (local_queue->active->nr_active + local_queue->expired->nr_active)
 
-static inline ckrm_lrq_t *
-get_ckrm_lrq(struct ckrm_cpu_class*cls, int cpu)
+static inline struct ckrm_local_runqueue *
+get_ckrm_local_runqueue(struct ckrm_cpu_class*cls, int cpu)
 {
 	return &(cls->local_queues[cpu]);
 }
 
-static inline ckrm_lrq_t *get_task_lrq(struct task_struct *p)
+static inline struct ckrm_local_runqueue *get_task_class_queue(struct task_struct *p)
 {
 	return &(p->cpu_class->local_queues[task_cpu(p)]);
 }
 
 #define task_list_entry(list)  list_entry(list,struct task_struct,run_list)
-#define class_list_entry(list) list_entry(list,struct ckrm_runqueue,classqueue_linkobj)
+#define class_list_entry(list) list_entry(list,struct ckrm_local_runqueue,classqueue_linkobj)
 
 /* some additional interfaces exported from sched.c */
 struct runqueue;
+void dequeue_task(struct task_struct *p, prio_array_t * array);
+void enqueue_task(struct task_struct *p, prio_array_t * array);
+struct runqueue *task_rq_lock(task_t * p, unsigned long *flags);
+void task_rq_unlock(struct runqueue *rq, unsigned long *flags);
+extern spinlock_t cvt_lock;
 extern rwlock_t class_list_lock;
 extern struct list_head active_cpu_classes;
-unsigned int task_timeslice(task_t *p);
-void _ckrm_cpu_change_class(task_t *task, struct ckrm_cpu_class *newcls);
 
+/*functions exported by ckrm_cpu_class.c*/
+int __init init_ckrm_sched_res(void);
 void init_cpu_classes(void);
-void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares);
-void ckrm_cpu_change_class(void *task, void *old, void *new);
-
 
+/*functions exported by ckrm_cpu_monitor.c*/
+void ckrm_cpu_monitor(void);
+void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat);
 #define CPU_DEMAND_ENQUEUE 0
 #define CPU_DEMAND_DEQUEUE 1
 #define CPU_DEMAND_DESCHEDULE 2
-#define CPU_DEMAND_INIT 3
-
-/*functions exported by ckrm_cpu_monitor.c*/
-void ckrm_cpu_monitor(int check_min);
-int ckrm_cpu_monitor_init(void);
-void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat);
-void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsigned long long len);
-void adjust_local_weight(void);
-
-#define get_task_lrq_stat(p) (&(p)->cpu_class->stat.local_stats[task_cpu(p)])
-#define get_cls_local_stat(cls,cpu) (&(cls)->stat.local_stats[cpu])
-#define get_rq_local_stat(lrq,cpu) (get_cls_local_stat((lrq)->cpu_class,cpu))
-
-/********************************************************************
- * Parameters that determine how quickly CVT's progress and how
- * priority can impact a LRQ's runqueue position. See also
- * get_effective_prio(). These parameters need to adjusted
- * in accordance to the following example and understanding.
- * 
- * CLASS_QUANTIZER:
- * 
- * A class with 50% share, can execute 500 ms / per sec ~ 2^29 ns.
- * It's share will be set to 512 = 2^9. The globl CLASSQUEUE_SIZE is set to 2^7.
- * With CLASS_QUANTIZER=16, the local_cvt of this class will increase
- * by 2^29/2^9 = 2^20 = 1024K.
- * Setting CLASS_QUANTIZER to 16, 2^(20-16) = 16 slots / per second.
-  * Do the same math, a class with any share value, will cover 16 slots / per second. 
- * So 2^8 total slots is good track for 8 seconds of system execution
- *
- * PRIORITY_QUANTIZER:
- *
- * How much can top priorities of class impact slot bonus.
- * There are 40 nice priorities, range from -20 to 19, with default nice = 0
- * "2" will allow upto 5 slots improvement 
- * when certain task within the class  has a nice value of -20 
- * in the RQ thus for 50% class it can perform ~300 msec starvation.
- *
- *******************************************************************/
-
-#define CLASS_QUANTIZER 16 	//shift from ns to increase class bonus
-#define PRIORITY_QUANTIZER 2	//controls how much a high prio task can borrow
-
-#define CKRM_SHARE_ACCURACY 13
-#define NSEC_PER_MS 1000000
-#define NSEC_PER_JIFFIES (NSEC_PER_SEC/HZ)
-
-
-#define MAX_SAVINGS_ABSOLUTE (10LLU*NSEC_PER_SEC)  // 10 seconds
-
-#define CVT_UPDATE_TICK     ((HZ/2)?:1)
-
-// ABSOLUTE_CKRM_TUNING determines whether classes can make up
-// lost time in absolute time or in relative values
-
-#define ABSOLUTE_CKRM_TUNING         // preferred due to more predictable behavior
-
-#ifdef ABSOLUTE_CKRM_TUNING
-
-#define MAX_SAVINGS        MAX_SAVINGS_ABSOLUTE
-//an absolute bonus of 200ms for classes when reactivated
-#define INTERACTIVE_BONUS(lrq) ((200*NSEC_PER_MS)/local_class_weight(lrq))
-#define SAVINGS_LEAK_SPEED (CVT_UPDATE_TICK/10*NSEC_PER_JIFFIES)
-
-#define scale_cvt(val,lrq)   ((val)*local_class_weight(lrq))
-#define unscale_cvt(val,lrq) (do_div(val,local_class_weight(lrq)))
-
-#else
-
-#define MAX_SAVINGS (MAX_SAVINGS_ABSOLUTE >> CKRM_SHARE_ACCURACY) 
-/*
- * to improve system responsiveness
- * an inactive class is put a little bit ahead of the current class when it wakes up
- * the amount is set in normalized term to simplify the calculation
- * for class with 100% share, it can be 2s ahead
- * while for class with 10% share, it can be 200ms ahead
- */
-#define INTERACTIVE_BONUS(lrq) (2*NSEC_PER_MS)  
-
-/*
- * normalized savings can't be more than MAX_NORMALIZED_SAVINGS
- * based on the current configuration
- * this means that a class with share 100% will accumulate 10s at most
- * while a class with 1% of the share can only accumulate 100ms
- */
-
-//a class with share 100% can get 100ms every 500ms
-//while a class with share 10% can only get 10ms every 500ms
-#define SAVINGS_LEAK_SPEED ((CVT_UPDATE_TICK/5*NSEC_PER_JIFFIES) >> CKRM_SHARE_ACCURACY)
-
-#define scale_cvt(val,lrq)   (val)
-#define unscale_cvt(val,lrq) (val)
-
-#endif
+void cpu_demand_event(struct ckrm_cpu_class_local_stat* local_stat, int event, unsigned long long len);
 
+#define get_task_local_stat(p) (&(p)->cpu_class->stat.local_stats[task_cpu(p)])
+#define get_rq_local_stat(lrq,cpu) (&(lrq)->cpu_class->stat.local_stats[cpu])
 
 /**
  * get_effective_prio: return the effective priority of a class local queue
@@ -356,22 +181,18 @@ void adjust_local_weight(void);
  * currently, prio increases by 1 if either: top_priority increase by one
  *                                   or, local_cvt increases by 4ms
  */
-static inline int get_effective_prio(ckrm_lrq_t * lrq)
+static inline int get_effective_prio(struct ckrm_local_runqueue * lcq)
 {
 	int prio;
 
-	prio = lrq->local_cvt >> CLASS_QUANTIZER;  // cumulative usage
-#ifndef URGENCY_SUPPORT
-#warning "ACB removing urgency calculation from get_effective_prio"
-#else
-	prio += lrq->top_priority >> PRIORITY_QUANTIZER; // queue urgency
-#endif
+	// cumulative usage
+	prio = lcq->local_cvt >> CLASS_BONUS_RATE;
+	// queue urgency
+	prio += lcq->top_priority >> PRIORITY_BONUS_RATE;
 
 	return prio;
 }
 
-CVT_t get_local_cur_cvt(int cpu);
-
 /** 
  * update_class_priority:
  * 
@@ -385,8 +206,9 @@ CVT_t get_local_cur_cvt(int cpu);
  *      -- rq_get_next_task (queue switch)
  *   -- update_local_cvt
  *      -- schedule
+ *   -- update_global_cvt
  */
-static inline void update_class_priority(ckrm_lrq_t *local_rq)
+static inline void update_class_priority(struct ckrm_local_runqueue *local_rq)
 {
 	int effective_prio = get_effective_prio(local_rq);
 	classqueue_update_prio(local_rq->classqueue,
@@ -398,80 +220,42 @@ static inline void update_class_priority(ckrm_lrq_t *local_rq)
  *  set the new top priority and reposition the queue
  *  called when: task enqueue/dequeue and queue switch
  */
-static inline void set_top_priority(ckrm_lrq_t *lrq,
+static inline void set_top_priority(struct ckrm_local_runqueue *class_queue,
 				    int new_priority)
 {
-	lrq->top_priority = new_priority;
-	update_class_priority(lrq);
-}
-
-/*
- * task_load: how much load this task counts
- */
-static inline unsigned long task_load(struct task_struct* p)
-{
-	return (task_timeslice(p) * p->demand_stat.cpu_demand);
-}
-
-/*
- * runqueue load is the local_weight of all the classes on this cpu
- * must be called with class_list_lock held
- */
-static inline unsigned long ckrm_cpu_load(int cpu)
-{
-	struct ckrm_cpu_class *clsptr;
-	ckrm_lrq_t* lrq;
-	struct ckrm_cpu_demand_stat* l_stat;
-	int total_load = 0;
-	int load;
-
-	list_for_each_entry(clsptr,&active_cpu_classes,links) {
-		lrq =  get_ckrm_lrq(clsptr,cpu);
-		l_stat = get_cls_local_stat(clsptr,cpu);
-		load = lrq->local_weight;
-		if (l_stat->cpu_demand < load)
-			load = l_stat->cpu_demand;
-		total_load += load;
-	}	
-	return total_load;
+	class_queue->top_priority = new_priority;
+	update_class_priority(class_queue);
 }
 
 static inline void class_enqueue_task(struct task_struct *p,
 				      prio_array_t * array)
 {
-	ckrm_lrq_t *lrq;
+	struct ckrm_local_runqueue *queue;
 	int effective_prio;
 
-	lrq = get_task_lrq(p);
-
-	cpu_demand_event(&p->demand_stat,CPU_DEMAND_ENQUEUE,0);
-	lrq->lrq_load += task_load(p);
+	queue = get_task_class_queue(p);
 
-	if ((p->prio < lrq->top_priority) && (array == lrq->active))
-		set_top_priority(lrq, p->prio);	
-
-	if (! cls_in_classqueue(&lrq->classqueue_linkobj)) {
-		cpu_demand_event(get_task_lrq_stat(p),CPU_DEMAND_ENQUEUE,0);
-		effective_prio = get_effective_prio(lrq);
-		classqueue_enqueue(lrq->classqueue, &lrq->classqueue_linkobj, effective_prio);
+	if (! cls_in_classqueue(&queue->classqueue_linkobj)) {
+		cpu_demand_event(get_task_local_stat(p),CPU_DEMAND_ENQUEUE,0);
+		/*make sure the cvt of this class is up to date*/
+		queue->local_cvt = get_min_cvt(task_cpu(p));
+		effective_prio = get_effective_prio(queue);
+		classqueue_enqueue(queue->classqueue, &queue->classqueue_linkobj, effective_prio);
 	} 
+	
+	if ((p->prio < queue->top_priority) && (array == queue->active))
+		set_top_priority(queue, p->prio);	
 
 }
 
 static inline void class_dequeue_task(struct task_struct *p,
 				      prio_array_t * array)
 {
-	ckrm_lrq_t *lrq = get_task_lrq(p);
-	unsigned long load = task_load(p);
+	struct ckrm_local_runqueue *queue = get_task_class_queue(p);
 
-	BUG_ON(lrq->lrq_load < load);
-	lrq->lrq_load -= load;
-
-	cpu_demand_event(&p->demand_stat,CPU_DEMAND_DEQUEUE,0);
-
-	if ((array == lrq->active) && (p->prio == lrq->top_priority)
+	if ((array == queue->active) && (p->prio == queue->top_priority)
 	    && list_empty(&(array->queue[p->prio])))
-		set_top_priority(lrq,
+		set_top_priority(queue,
 				 find_next_bit(array->bitmap, MAX_PRIO,
 					       p->prio));
 }
@@ -482,82 +266,32 @@ static inline void class_dequeue_task(struct task_struct *p,
  */
 static inline void update_local_cvt(struct task_struct *p, unsigned long nsec)
 {
-	ckrm_lrq_t * lrq = get_task_lrq(p);
-
-	unsigned long cvt_inc = nsec / local_class_weight(lrq);
-
-	lrq->local_cvt += cvt_inc;
-	lrq->uncounted_ns += nsec;
+	struct ckrm_local_runqueue *class_queue = get_task_class_queue(p);
+	struct ckrm_cpu_class *cls = class_queue->cpu_class;
 
-	update_class_priority(lrq);
-}
+	unsigned long cvt_inc = nsec / cpu_class_weight(cls);
 
-static inline int class_preempts_curr(struct task_struct * p, struct task_struct* curr)
-{
-	struct cq_node_struct* node1 = &(get_task_lrq(p)->classqueue_linkobj);
-	struct cq_node_struct* node2 = &(get_task_lrq(curr)->classqueue_linkobj);
+	class_queue->local_cvt += cvt_inc;
+	class_queue->uncounted_cvt += cvt_inc;
 
-	return (class_compare_prio(node1,node2) < 0);
+	class_queue->uncounted_ns += nsec;
+	update_class_priority(class_queue);
 }
 
 /*
- * return a random value with range [0, (val-1)]
+ * called during loadbalancing 
+ * to charge the class with locally accumulated cvt
  */
-static inline int get_ckrm_rand(unsigned long val)
-{
-	int rand;
-	static int last_rand[NR_CPUS];
-	int cpu = smp_processor_id();
-
-	rand = last_rand[cpu];
-	rand ++;
-	if (rand >= val)
-		rand = 0; 
-	
-	last_rand[cpu] = rand;
-	return rand;
-}
-
-void update_class_cputime(int this_cpu);
+void update_global_cvts(int this_cpu);
 
-/**********************************************/
-/*          PID_LOAD_BALANCING                */
-/**********************************************/
-struct ckrm_load_struct {
-	unsigned long load_p; 	/*propotional*/
-	unsigned long load_i;   /*integral   */
-	long load_d;   /*derivative */
-};
-
-typedef struct ckrm_load_struct ckrm_load_t;
-
-static inline void ckrm_load_init(ckrm_load_t* ckrm_load) {
-	ckrm_load->load_p = 0;
-	ckrm_load->load_i = 0;
-	ckrm_load->load_d = 0;
-}
-
-void ckrm_load_sample(ckrm_load_t* ckrm_load,int cpu);
-long pid_get_pressure(ckrm_load_t* ckrm_load, int local_group);
-#define rq_ckrm_load(rq) (&((rq)->ckrm_load))
-
-static inline void ckrm_sched_tick(unsigned long j,int this_cpu,struct ckrm_load_struct* ckrm_load)
+/**
+ * 
+ */
+static inline int class_preempts_curr(struct task_struct * p, struct task_struct* curr)
 {
-	read_lock(&class_list_lock);
-       
-#ifdef CONFIG_SMP
-	ckrm_load_sample(ckrm_load,this_cpu);
-#endif
-
-	if (! (j % CVT_UPDATE_TICK)) {
-		//		printk("ckrm_sched j=%lu\n",j);
-		classqueue_update_base(get_cpu_classqueue(this_cpu));
-		update_class_cputime(this_cpu);
-	}
+	struct cq_node_struct* node1 = &(get_task_class_queue(p)->classqueue_linkobj);
+	struct cq_node_struct* node2 = &(get_task_class_queue(curr)->classqueue_linkobj);
 
-	read_unlock(&class_list_lock);
+	return (class_compare_prio(node1,node2) < 0);
 }
-
-#endif //CONFIG_CKRM_CPU_SCHEDULE
-
 #endif
diff --git a/include/linux/crbce.h b/include/linux/crbce.h
deleted file mode 100644
index 6a2190dd8..000000000
--- a/include/linux/crbce.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/* 
- * crbce.h
- *
- * Copyright (C) Hubertus Franke, IBM Corp. 2003
- * 
- * This files contains the type definition of the record 
- * created by the CRBCE CKRM classification engine
- * 
- *
- * Latest version, more details at http://ckrm.sf.net
- * 
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * 
- *
- */
- 
-
-/*
- * Changes
- *
- * 2003-11-11   Created					by H.Franke
- * 2003-12-01   Sanitized for Delivery                  by H.Franke
- *        
- */
-
-#ifndef CRBCE_RECORDS_H
-#define CRBCE_RECORDS_H
-
-#ifdef __KERNEL__
-#include <linux/autoconf.h>	
-#else
-#define  CONFIG_CKRM
-#define  CONFIG_CRBCE
-#define  CONFIG_DELAY_ACCT
-#endif
-
-#include <linux/types.h>           
-#include <linux/ckrm.h>
-#include <linux/ckrm_ce.h>
-
-#define CRBCE_UKCC_NAME   "crbce_ukcc"
-#define CRBCE_UKCC_PATH   "/mnt/relayfs"
-
-#define CRBCE_UKCC_PATH_NAME   CRBCE_UKCC_PATH"/"CRBCE_UKCC_NAME
-
-#define CRBCE_MAX_CLASS_NAME_LEN  256
-
-/****************************************************************
- * 
- *  CRBCE EVENT SET is and extension to the standard CKRM_EVENTS
- *
- ****************************************************************/
-enum {
-
-	/* we use the standard CKRM_EVENT_<..> 
-	 * to identify reclassification cause actions
-	 * and extend by additional ones we need
-	 */
-
-	/* up event flow */
-
-	CRBCE_REC_EXIT = CKRM_NUM_EVENTS,
-	CRBCE_REC_DATA_DELIMITER,
-	CRBCE_REC_SAMPLE,
-	CRBCE_REC_TASKINFO,
-	CRBCE_REC_SYS_INFO,
-	CRBCE_REC_CLASS_INFO,
-	CRBCE_REC_KERNEL_CMD_DONE,
-	CRBCE_REC_UKCC_FULL,
-
-	/* down command issueance */
-	CRBCE_REC_KERNEL_CMD,
-
-	CRBCE_NUM_EVENTS
-};
-
-struct task_sample_info {
-	uint32_t cpu_running;
-	uint32_t cpu_waiting;
-	uint32_t io_delayed;
-	uint32_t memio_delayed;
-};
-
-/*********************************************
- *          KERNEL -> USER  records          *
- *********************************************/
-
-/* we have records with either a time stamp or not */
-struct crbce_hdr {
-	int type;
-	pid_t pid;
-};
-
-struct crbce_hdr_ts {
-	int type;
-	pid_t pid;
-	uint32_t jiffies;
-	uint64_t cls;
-};
-
-/* individual records */
-
-struct crbce_rec_fork {
-	struct crbce_hdr_ts hdr;
-	pid_t ppid;
-};
-
-struct crbce_rec_data_delim {
-	struct crbce_hdr_ts hdr;
-	int is_stop;		/* 0 start, 1 stop */
-};
-
-struct crbce_rec_task_data {
-	struct crbce_hdr_ts hdr;
-	struct task_sample_info sample;
-	struct task_delay_info delay;
-};
-
-struct crbce_ukcc_full {
-	struct crbce_hdr_ts hdr;
-};
-
-struct crbce_class_info {
-	struct crbce_hdr_ts hdr;
-	int action;
-	int namelen;
-	char name[CRBCE_MAX_CLASS_NAME_LEN];
-};
-
-/*********************************************
- *           USER -> KERNEL records          *
- *********************************************/
-
-enum crbce_kernel_cmd {
-	CRBCE_CMD_START,
-	CRBCE_CMD_STOP,
-	CRBCE_CMD_SET_TIMER,
-	CRBCE_CMD_SEND_DATA,
-};
-
-struct crbce_command {
-	int type;		/* we need this for the K->U reflection */
-	int cmd;
-	uint32_t len;	/* added in the kernel for reflection */
-};
-
-#define set_cmd_hdr(rec,tok) \
-((rec).hdr.type=CRBCE_REC_KERNEL_CMD,(rec).hdr.cmd=(tok))
-
-struct crbce_cmd_done {
-	struct crbce_command hdr;
-	int rc;
-};
-
-struct crbce_cmd {
-	struct crbce_command hdr;
-};
-
-struct crbce_cmd_send_data {
-	struct crbce_command hdr;
-	int delta_mode;
-};
-
-struct crbce_cmd_settimer {
-	struct crbce_command hdr;
-	uint32_t interval;	/* in msec .. 0 means stop */
-};
-
-#endif
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index b42a9c4e2..27e8183f4 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -17,7 +17,6 @@ typedef void (elevator_requeue_req_fn) (request_queue_t *, struct request *);
 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
 typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
 typedef int (elevator_may_queue_fn) (request_queue_t *, int);
-typedef void (elevator_set_congested_fn) (request_queue_t *);
 
 typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, int);
 typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
@@ -46,7 +45,6 @@ struct elevator_s
 	elevator_put_req_fn *elevator_put_req_fn;
 
 	elevator_may_queue_fn *elevator_may_queue_fn;
-	elevator_set_congested_fn *elevator_set_congested_fn;
 
 	elevator_init_fn *elevator_init_fn;
 	elevator_exit_fn *elevator_exit_fn;
@@ -76,7 +74,6 @@ extern struct request *elv_latter_request(request_queue_t *, struct request *);
 extern int elv_register_queue(request_queue_t *q);
 extern void elv_unregister_queue(request_queue_t *q);
 extern int elv_may_queue(request_queue_t *, int);
-extern void elv_set_congested(request_queue_t *);
 extern void elv_completed_request(request_queue_t *, struct request *);
 extern int elv_set_request(request_queue_t *, struct request *, int);
 extern void elv_put_request(request_queue_t *, struct request *);
@@ -122,6 +119,4 @@ extern int elv_try_last_merge(request_queue_t *, struct bio *);
 #define ELEVATOR_INSERT_BACK	2
 #define ELEVATOR_INSERT_SORT	3
 
-#define RQ_ELV_DATA(rq)		(rq)->elevator_private
-
 #endif
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index cd252c8eb..7c6f650c9 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -196,13 +196,8 @@ struct ext2_group_desc
 #define EXT2_IUNLINK_FL			0x08000000 /* Immutable unlink */
 #define EXT2_RESERVED_FL		0x80000000 /* reserved for ext2 lib */
 
-#ifdef CONFIG_VSERVER_LEGACY
-#define EXT2_FL_USER_VISIBLE		0x0C03DFFF /* User visible flags */
-#define EXT2_FL_USER_MODIFIABLE		0x0C0380FF /* User modifiable flags */
-#else
 #define EXT2_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
 #define EXT2_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
-#endif
 
 /*
  * ioctl commands
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 7fe32d0be..100fba908 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -189,13 +189,8 @@ struct ext3_group_desc
 #define EXT3_IUNLINK_FL			0x08000000 /* Immutable unlink */
 #define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */
 
-#ifdef CONFIG_VSERVER_LEGACY
-#define EXT3_FL_USER_VISIBLE		0x0C03DFFF /* User visible flags */
-#define EXT3_FL_USER_MODIFIABLE		0x0C0380FF /* User modifiable flags */
-#else
 #define EXT3_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
 #define EXT3_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
-#endif
 
 /*
  * Inode dynamic state flags
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ece31a727..e83d8e4dd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -42,7 +42,7 @@ struct vfsmount;
 /* Fixed constants first: */
 #undef NR_OPEN
 #define NR_OPEN (1024*1024)	/* Absolute upper limit on fd num */
-#define INR_OPEN 4096		/* Initial setting for nfile rlimits */
+#define INR_OPEN 1024		/* Initial setting for nfile rlimits */
 
 #define BLOCK_SIZE_BITS 10
 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
@@ -1592,17 +1592,5 @@ static inline void free_secdata(void *secdata)
 { }
 #endif	/* CONFIG_SECURITY */
 
-/* io priorities */
-
-#define IOPRIO_NR      21
-
-#define IOPRIO_IDLE	0
-#define IOPRIO_NORM	10
-#define IOPRIO_RT	20
-
-asmlinkage int sys_ioprio_set(int ioprio);
-asmlinkage int sys_ioprio_get(void);
-
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_FS_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3a43ff49b..9bcf2db6d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -116,7 +116,6 @@ extern struct group_info init_groups;
 	.vx_info	= NULL,						\
 	.nid		= 0,						\
 	.nx_info	= NULL,						\
-	.ioprio		= IOPRIO_NORM,					\
 }
 
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3fb18934a..af2555f60 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -231,9 +231,6 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_CKRM_RES_MEM
-	void *memclass;
-#endif // CONFIG_CKRM_RES_MEM
 };
 
 /*
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 5edb739b4..47762ca69 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -1,11 +1,9 @@
-#include <linux/ckrm_mem_inline.h>
 
 static inline void
 add_page_to_active_list(struct zone *zone, struct page *page)
 {
 	list_add(&page->lru, &zone->active_list);
 	zone->nr_active++;
-	ckrm_mem_inc_active(page);
 }
 
 static inline void
@@ -13,7 +11,6 @@ add_page_to_inactive_list(struct zone *zone, struct page *page)
 {
 	list_add(&page->lru, &zone->inactive_list);
 	zone->nr_inactive++;
-	ckrm_mem_inc_inactive(page);
 }
 
 static inline void
@@ -21,7 +18,6 @@ del_page_from_active_list(struct zone *zone, struct page *page)
 {
 	list_del(&page->lru);
 	zone->nr_active--;
-	ckrm_mem_dec_active(page);
 }
 
 static inline void
@@ -29,7 +25,6 @@ del_page_from_inactive_list(struct zone *zone, struct page *page)
 {
 	list_del(&page->lru);
 	zone->nr_inactive--;
-	ckrm_mem_dec_inactive(page);
 }
 
 static inline void
@@ -39,9 +34,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
 	if (PageActive(page)) {
 		ClearPageActive(page);
 		zone->nr_active--;
-		ckrm_mem_dec_active(page);
 	} else {
 		zone->nr_inactive--;
-		ckrm_mem_dec_inactive(page);
 	}
 }
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
deleted file mode 100644
index 0fbec884a..000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ /dev/null
@@ -1,310 +0,0 @@
-/* PPTP constants and structs */
-#ifndef _CONNTRACK_PPTP_H
-#define _CONNTRACK_PPTP_H
-
-/* state of the control session */
-enum pptp_ctrlsess_state {
-	PPTP_SESSION_NONE,			/* no session present */
-	PPTP_SESSION_ERROR,			/* some session error */
-	PPTP_SESSION_STOPREQ,			/* stop_sess request seen */
-	PPTP_SESSION_REQUESTED,			/* start_sess request seen */
-	PPTP_SESSION_CONFIRMED,			/* session established */
-};
-
-/* state of the call inside the control session */
-enum pptp_ctrlcall_state {
-	PPTP_CALL_NONE,
-	PPTP_CALL_ERROR,
-	PPTP_CALL_OUT_REQ,
-	PPTP_CALL_OUT_CONF,
-	PPTP_CALL_IN_REQ,
-	PPTP_CALL_IN_REP,
-	PPTP_CALL_IN_CONF,
-	PPTP_CALL_CLEAR_REQ,
-};
-
-
-/* conntrack private data */
-struct ip_ct_pptp_master {
-	enum pptp_ctrlsess_state sstate;	/* session state */
-
-	/* everything below is going to be per-expectation in newnat,
-	 * since there could be more than one call within one session */
-	enum pptp_ctrlcall_state cstate;	/* call state */
-	u_int16_t pac_call_id;			/* call id of PAC, host byte order */
-	u_int16_t pns_call_id;			/* call id of PNS, host byte order */
-};
-
-/* conntrack_expect private member */
-struct ip_ct_pptp_expect {
-	enum pptp_ctrlcall_state cstate; 	/* call state */
-	u_int16_t pac_call_id;			/* call id of PAC */
-	u_int16_t pns_call_id;			/* call id of PNS */
-};
-
-
-#ifdef __KERNEL__
-
-#include <linux/netfilter_ipv4/lockhelp.h>
-DECLARE_LOCK_EXTERN(ip_pptp_lock);
-
-#define IP_CONNTR_PPTP		PPTP_CONTROL_PORT
-
-#define PPTP_CONTROL_PORT	1723
-
-#define PPTP_PACKET_CONTROL	1
-#define PPTP_PACKET_MGMT	2
-
-#define PPTP_MAGIC_COOKIE	0x1a2b3c4d
-
-struct pptp_pkt_hdr {
-	__u16	packetLength;
-	__u16	packetType;
-	__u32	magicCookie;
-};
-
-/* PptpControlMessageType values */
-#define PPTP_START_SESSION_REQUEST	1
-#define PPTP_START_SESSION_REPLY	2
-#define PPTP_STOP_SESSION_REQUEST	3
-#define PPTP_STOP_SESSION_REPLY		4
-#define PPTP_ECHO_REQUEST		5
-#define PPTP_ECHO_REPLY			6
-#define PPTP_OUT_CALL_REQUEST		7
-#define PPTP_OUT_CALL_REPLY		8
-#define PPTP_IN_CALL_REQUEST		9
-#define PPTP_IN_CALL_REPLY		10
-#define PPTP_IN_CALL_CONNECT		11
-#define PPTP_CALL_CLEAR_REQUEST		12
-#define PPTP_CALL_DISCONNECT_NOTIFY	13
-#define PPTP_WAN_ERROR_NOTIFY		14
-#define PPTP_SET_LINK_INFO		15
-
-#define PPTP_MSG_MAX			15
-
-/* PptpGeneralError values */
-#define PPTP_ERROR_CODE_NONE		0
-#define PPTP_NOT_CONNECTED		1
-#define PPTP_BAD_FORMAT			2
-#define PPTP_BAD_VALUE			3
-#define PPTP_NO_RESOURCE		4
-#define PPTP_BAD_CALLID			5
-#define PPTP_REMOVE_DEVICE_ERROR	6
-
-struct PptpControlHeader {
-	__u16	messageType;
-	__u16	reserved;
-};
-
-/* FramingCapability Bitmap Values */
-#define PPTP_FRAME_CAP_ASYNC		0x1
-#define PPTP_FRAME_CAP_SYNC		0x2
-
-/* BearerCapability Bitmap Values */
-#define PPTP_BEARER_CAP_ANALOG		0x1
-#define PPTP_BEARER_CAP_DIGITAL		0x2
-
-struct PptpStartSessionRequest {
-	__u16	protocolVersion;
-	__u8	reserved1;
-	__u8	reserved2;
-	__u32	framingCapability;
-	__u32	bearerCapability;
-	__u16	maxChannels;
-	__u16	firmwareRevision;
-	__u8	hostName[64];
-	__u8	vendorString[64];
-};
-
-/* PptpStartSessionResultCode Values */
-#define PPTP_START_OK			1
-#define PPTP_START_GENERAL_ERROR	2
-#define PPTP_START_ALREADY_CONNECTED	3
-#define PPTP_START_NOT_AUTHORIZED	4
-#define PPTP_START_UNKNOWN_PROTOCOL	5
-
-struct PptpStartSessionReply {
-	__u16	protocolVersion;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__u32	framingCapability;
-	__u32	bearerCapability;
-	__u16	maxChannels;
-	__u16	firmwareRevision;
-	__u8	hostName[64];
-	__u8	vendorString[64];
-};
-
-/* PptpStopReasons */
-#define PPTP_STOP_NONE			1
-#define PPTP_STOP_PROTOCOL		2
-#define PPTP_STOP_LOCAL_SHUTDOWN	3
-
-struct PptpStopSessionRequest {
-	__u8	reason;
-};
-
-/* PptpStopSessionResultCode */
-#define PPTP_STOP_OK			1
-#define PPTP_STOP_GENERAL_ERROR		2
-
-struct PptpStopSessionReply {
-	__u8	resultCode;
-	__u8	generalErrorCode;
-};
-
-struct PptpEchoRequest {
-	__u32 identNumber;
-};
-
-/* PptpEchoReplyResultCode */
-#define PPTP_ECHO_OK			1
-#define PPTP_ECHO_GENERAL_ERROR		2
-
-struct PptpEchoReply {
-	__u32	identNumber;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__u16	reserved;
-};
-
-/* PptpFramingType */
-#define PPTP_ASYNC_FRAMING		1
-#define PPTP_SYNC_FRAMING		2
-#define PPTP_DONT_CARE_FRAMING		3
-
-/* PptpCallBearerType */
-#define PPTP_ANALOG_TYPE		1
-#define PPTP_DIGITAL_TYPE		2
-#define PPTP_DONT_CARE_BEARER_TYPE	3
-
-struct PptpOutCallRequest {
-	__u16	callID;
-	__u16	callSerialNumber;
-	__u32	minBPS;
-	__u32	maxBPS;
-	__u32	bearerType;
-	__u32	framingType;
-	__u16	packetWindow;
-	__u16	packetProcDelay;
-	__u16	reserved1;
-	__u16	phoneNumberLength;
-	__u16	reserved2;
-	__u8	phoneNumber[64];
-	__u8	subAddress[64];
-};
-
-/* PptpCallResultCode */
-#define PPTP_OUTCALL_CONNECT		1
-#define PPTP_OUTCALL_GENERAL_ERROR	2
-#define PPTP_OUTCALL_NO_CARRIER		3
-#define PPTP_OUTCALL_BUSY		4
-#define PPTP_OUTCALL_NO_DIAL_TONE	5
-#define PPTP_OUTCALL_TIMEOUT		6
-#define PPTP_OUTCALL_DONT_ACCEPT	7
-
-struct PptpOutCallReply {
-	__u16	callID;
-	__u16	peersCallID;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__u16	causeCode;
-	__u32	connectSpeed;
-	__u16	packetWindow;
-	__u16	packetProcDelay;
-	__u32	physChannelID;
-};
-
-struct PptpInCallRequest {
-	__u16	callID;
-	__u16	callSerialNumber;
-	__u32	callBearerType;
-	__u32	physChannelID;
-	__u16	dialedNumberLength;
-	__u16	dialingNumberLength;
-	__u8	dialedNumber[64];
-	__u8	dialingNumber[64];
-	__u8	subAddress[64];
-};
-
-/* PptpInCallResultCode */
-#define PPTP_INCALL_ACCEPT		1
-#define PPTP_INCALL_GENERAL_ERROR	2
-#define PPTP_INCALL_DONT_ACCEPT		3
-
-struct PptpInCallReply {
-	__u16	callID;
-	__u16	peersCallID;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__u16	packetWindow;
-	__u16	packetProcDelay;
-	__u16	reserved;
-};
-
-struct PptpInCallConnected {
-	__u16	peersCallID;
-	__u16	reserved;
-	__u32	connectSpeed;
-	__u16	packetWindow;
-	__u16	packetProcDelay;
-	__u32	callFramingType;
-};
-
-struct PptpClearCallRequest {
-	__u16	callID;
-	__u16	reserved;
-};
-
-struct PptpCallDisconnectNotify {
-	__u16	callID;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__u16	causeCode;
-	__u16	reserved;
-	__u8	callStatistics[128];
-};
-
-struct PptpWanErrorNotify {
-	__u16	peersCallID;
-	__u16	reserved;
-	__u32	crcErrors;
-	__u32	framingErrors;
-	__u32	hardwareOverRuns;
-	__u32	bufferOverRuns;
-	__u32	timeoutErrors;
-	__u32	alignmentErrors;
-};
-
-struct PptpSetLinkInfo {
-	__u16	peersCallID;
-	__u16	reserved;
-	__u32	sendAccm;
-	__u32	recvAccm;
-};
-
-
-struct pptp_priv_data {
-	__u16	call_id;
-	__u16	mcall_id;
-	__u16	pcall_id;
-};
-
-union pptp_ctrl_union {
-		struct PptpStartSessionRequest	sreq;
-		struct PptpStartSessionReply	srep;
-		struct PptpStopSessionRequest	streq;
-		struct PptpStopSessionReply	strep;
-                struct PptpOutCallRequest       ocreq;
-                struct PptpOutCallReply         ocack;
-                struct PptpInCallRequest        icreq;
-                struct PptpInCallReply          icack;
-                struct PptpInCallConnected      iccon;
-		struct PptpClearCallRequest	clrreq;
-                struct PptpCallDisconnectNotify disc;
-                struct PptpWanErrorNotify       wanerr;
-                struct PptpSetLinkInfo          setlink;
-};
-
-#endif /* __KERNEL__ */
-#endif /* _CONNTRACK_PPTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
deleted file mode 100644
index 07646857c..000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
+++ /dev/null
@@ -1,123 +0,0 @@
-#ifndef _CONNTRACK_PROTO_GRE_H
-#define _CONNTRACK_PROTO_GRE_H
-#include <asm/byteorder.h>
-
-/* GRE PROTOCOL HEADER */
-
-/* GRE Version field */
-#define GRE_VERSION_1701	0x0
-#define GRE_VERSION_PPTP	0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP	0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C		0x80
-#define GRE_FLAG_R		0x40
-#define GRE_FLAG_K		0x20
-#define GRE_FLAG_S		0x10
-#define GRE_FLAG_A		0x80
-
-#define GRE_IS_C(f)	((f)&GRE_FLAG_C)
-#define GRE_IS_R(f)	((f)&GRE_FLAG_R)
-#define GRE_IS_K(f)	((f)&GRE_FLAG_K)
-#define GRE_IS_S(f)	((f)&GRE_FLAG_S)
-#define GRE_IS_A(f)	((f)&GRE_FLAG_A)
-
-/* GRE is a mess: Four different standards */
-struct gre_hdr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u16	rec:3,
-		srr:1,
-		seq:1,
-		key:1,
-		routing:1,
-		csum:1,
-		version:3,
-		reserved:4,
-		ack:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-	__u16	csum:1,
-		routing:1,
-		key:1,
-		seq:1,
-		srr:1,
-		rec:3,
-		ack:1,
-		reserved:4,
-		version:3;
-#else
-#error "Adjust your <asm/byteorder.h> defines"
-#endif
-	__u16	protocol;
-};
-
-/* modified GRE header for PPTP */
-struct gre_hdr_pptp {
-	__u8  flags;		/* bitfield */
-	__u8  version;		/* should be GRE_VERSION_PPTP */
-	__u16 protocol;		/* should be GRE_PROTOCOL_PPTP */
-	__u16 payload_len;	/* size of ppp payload, not inc. gre header */
-	__u16 call_id;		/* peer's call_id for this session */
-	__u32 seq;		/* sequence number.  Present if S==1 */
-	__u32 ack;		/* seq number of highest packet recieved by */
-				/*  sender in this session */
-};
-
-
-/* this is part of ip_conntrack */
-struct ip_ct_gre {
-	unsigned int stream_timeout;
-	unsigned int timeout;
-};
-
-/* this is part of ip_conntrack_expect */
-struct ip_ct_gre_expect {
-	struct ip_ct_gre_keymap *keymap_orig, *keymap_reply;
-};
-
-#ifdef __KERNEL__
-struct ip_conntrack_expect;
-
-/* structure for original <-> reply keymap */
-struct ip_ct_gre_keymap {
-	struct list_head list;
-
-	struct ip_conntrack_tuple tuple;
-};
-
-
-/* add new tuple->key_reply pair to keymap */
-int ip_ct_gre_keymap_add(struct ip_conntrack_expect *exp,
-			 struct ip_conntrack_tuple *t,
-			 int reply);
-
-/* change an existing keymap entry */
-void ip_ct_gre_keymap_change(struct ip_ct_gre_keymap *km,
-			     struct ip_conntrack_tuple *t);
-
-/* delete keymap entries */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack_expect *exp);
-
-
-/* get pointer to gre key, if present */
-static inline u_int32_t *gre_key(struct gre_hdr *greh)
-{
-	if (!greh->key)
-		return NULL;
-	if (greh->csum || greh->routing)
-		return (u_int32_t *) (greh+sizeof(*greh)+4);
-	return (u_int32_t *) (greh+sizeof(*greh));
-}
-
-/* get pointer ot gre csum, if present */
-static inline u_int16_t *gre_csum(struct gre_hdr *greh)
-{
-	if (!greh->csum)
-		return NULL;
-	return (u_int16_t *) (greh+sizeof(*greh));
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h
deleted file mode 100644
index eaf66c2e8..000000000
--- a/include/linux/netfilter_ipv4/ip_nat_pptp.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* PPTP constants and structs */
-#ifndef _NAT_PPTP_H
-#define _NAT_PPTP_H
-
-/* conntrack private data */
-struct ip_nat_pptp {
-	u_int16_t pns_call_id;		/* NAT'ed PNS call id */
-	u_int16_t pac_call_id;		/* NAT'ed PAC call id */
-};
-
-#endif /* _NAT_PPTP_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c70f46a4e..c6f5063f0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -77,7 +77,6 @@
 #define PG_compound		19	/* Part of a compound page */
 
 #define PG_anon			20	/* Anonymous: anon_vma in mapping */
-#define PG_ckrm_account	21	/* This page is accounted by CKRM */
 
 
 /*
diff --git a/include/linux/rbce.h b/include/linux/rbce.h
deleted file mode 100644
index 91afba9ba..000000000
--- a/include/linux/rbce.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/* Rule-based Classification Engine (RBCE) module
- *
- * Copyright (C) Hubertus Franke, IBM Corp. 2003
- *           (C) Chandra Seetharaman, IBM Corp. 2003
- * 
- * Module for loading of classification policies and providing
- * a user API for Class-based Kernel Resource Management (CKRM)
- *
- * Latest version, more details at http://ckrm.sf.net
- * 
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * 
- *
- */
-
-/* Changes
- *
- * 25 Mar 2004
- *        Integrate RBCE and CRBE into a single module
- * 
- */
-
-#ifndef RBCE_H
-#define RBCE_H
-
-// data types defined in main rbcemod.c 
-struct rbce_private_data;
-struct rbce_class;
-struct ckrm_core_class;
-
-#ifndef RBCE_EXTENSION
-
-/****************************************************************************
- *
- *   RBCE STANDALONE VERSION, NO CHOICE FOR DATA COLLECTION
- *
- ****************************************************************************/
-
-#ifdef RBCE_SHOW_INCL
-#warning " ... RBCE .."
-#endif
-
-#define RBCE_MOD_DESCR "Rule Based Classification Engine Module for CKRM"
-#define RBCE_MOD_NAME  "rbce"
-
-/* extension to private data: NONE */
-struct rbce_ext_private_data {
-	/* empty data */
-};
-static inline void init_ext_private_data(struct rbce_private_data *dst)
-{
-}
-
-/* sending notification to user: NONE */
-
-static void notify_class_action(struct rbce_class *cls, int action)
-{
-}
-static inline void send_fork_notification(struct task_struct *tsk,
-					  struct ckrm_core_class *cls)
-{
-}
-static inline void send_exit_notification(struct task_struct *tsk)
-{
-}
-static inline void send_manual_notification(struct task_struct *tsk)
-{
-}
-
-/* extension initialization and destruction at module init and exit */
-static inline int init_rbce_ext_pre(void)
-{
-	return 0;
-}
-static inline int init_rbce_ext_post(void)
-{
-	return 0;
-}
-static inline void exit_rbce_ext(void)
-{
-}
-
-#else
-
-/***************************************************************************
- *
- *   RBCE with User Level Notification
- *
- ***************************************************************************/
-
-#ifdef RBCE_SHOW_INCL
-#warning " ... CRBCE .."
-#ifdef RBCE_DO_SAMPLE
-#warning " ... CRBCE doing sampling ..."
-#endif
-#ifdef RBCE_DO_DELAY
-#warning " ... CRBCE doing delay ..."
-#endif
-#endif
-
-#define RBCE_MOD_DESCR 	"Rule Based Classification Engine Module" \
-			"with Data Sampling/Delivery for CKRM"
-#define RBCE_MOD_NAME 	"crbce"
-
-#include <linux/crbce.h>
-
-struct rbce_ext_private_data {
-	struct task_sample_info sample;
-};
-
-static void notify_class_action(struct rbce_class *cls, int action);
-#if 0
-static void send_fork_notification(struct task_struct *tsk,
-				   struct ckrm_core_class *cls);
-static void send_exit_notification(struct task_struct *tsk);
-static void send_manual_notification(struct task_struct *tsk);
-#endif
-
-#endif
-
-#endif				// RBCE_H
diff --git a/include/linux/rcfs.h b/include/linux/rcfs.h
index 13aa5a7d2..232d58ef1 100644
--- a/include/linux/rcfs.h
+++ b/include/linux/rcfs.h
@@ -71,7 +71,6 @@ extern struct file_operations shares_fileops;
 extern struct file_operations stats_fileops;
 extern struct file_operations config_fileops;
 extern struct file_operations members_fileops;
-extern struct file_operations reclassify_fileops;
 extern struct file_operations rcfs_file_operations;
 
 // Callbacks into rcfs from ckrm 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dd5005295..93f3c3230 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -94,7 +94,7 @@ extern unsigned long avenrun[];		/* Load averages */
 extern int nr_threads;
 extern int last_pid;
 DECLARE_PER_CPU(unsigned long, process_counts);
-// DECLARE_PER_CPU(struct runqueue, runqueues); -- removed after ckrm cpu v7 merge
+DECLARE_PER_CPU(struct runqueue, runqueues);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
@@ -264,11 +264,6 @@ struct mm_struct {
 	struct kioctx		*ioctx_list;
 
 	struct kioctx		default_kioctx;
-#ifdef CONFIG_CKRM_RES_MEM
-	struct ckrm_mem_res *memclass;
-	struct list_head	tasklist; /* list of all tasks sharing this address space */
-	spinlock_t		peertask_lock; /* protect above tasklist */
-#endif
 };
 
 extern int mmlist_nr;
@@ -429,25 +424,6 @@ int set_current_groups(struct group_info *group_info);
 struct audit_context;		/* See audit.c */
 struct mempolicy;
 
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-/**
- * ckrm_cpu_demand_stat - used to track the cpu demand of a task/class
- * @run: how much time it has been running since the counter started
- * @total: total time since the counter started
- * @last_sleep: the last time it sleeps, last_sleep = 0 when not sleeping
- * @recalc_interval: how often do we recalculate the cpu_demand
- * @cpu_demand: moving average of run/total
- */
-struct ckrm_cpu_demand_stat {
-	unsigned long long run;
-	unsigned long long total;
-	unsigned long long last_sleep;
-	unsigned long long recalc_interval;
-	unsigned long cpu_demand; /*estimated cpu demand */
-};
-#endif
-
-
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	struct thread_info *thread_info;
@@ -547,6 +523,7 @@ struct task_struct {
 /* signal handlers */
 	struct signal_struct *signal;
 	struct sighand_struct *sighand;
+
 	sigset_t blocked, real_blocked;
 	struct sigpending pending;
 
@@ -593,8 +570,6 @@ struct task_struct {
 
 	struct io_context *io_context;
 
-	int ioprio;
-
 	unsigned long ptrace_message;
 	siginfo_t *last_siginfo; /* For ptrace use.  */
 
@@ -612,14 +587,10 @@ struct task_struct {
 	struct list_head        taskclass_link;
 #ifdef CONFIG_CKRM_CPU_SCHEDULE
         struct ckrm_cpu_class *cpu_class;
-	//track cpu demand of this task
-	struct ckrm_cpu_demand_stat demand_stat;
-#endif //CONFIG_CKRM_CPU_SCHEDULE
+#endif
 #endif // CONFIG_CKRM_TYPE_TASKCLASS
-#ifdef CONFIG_CKRM_RES_MEM
-	struct list_head	mm_peers; // list of tasks using same mm_struct
-#endif // CONFIG_CKRM_RES_MEM
 #endif // CONFIG_CKRM
+
 	struct task_delay_info  delays;
 };
 
@@ -801,6 +772,83 @@ extern int idle_cpu(int cpu);
 
 void yield(void);
 
+/*
+ * These are the runqueue data structures:
+ */
+typedef struct runqueue runqueue_t;
+
+#ifdef CONFIG_CKRM_CPU_SCHEDULE
+#include <linux/ckrm_classqueue.h>
+#endif
+
+#ifdef CONFIG_CKRM_CPU_SCHEDULE
+
+/**
+ *  if belong to different class, compare class priority
+ *  otherwise compare task priority 
+ */
+#define TASK_PREEMPTS_CURR(p, rq) \
+	(((p)->cpu_class != (rq)->curr->cpu_class) && ((rq)->curr != (rq)->idle))? class_preempts_curr((p),(rq)->curr) : ((p)->prio < (rq)->curr->prio)
+#else
+#define BITMAP_SIZE ((((MAX_PRIO+1+7)/8)+sizeof(long)-1)/sizeof(long))
+struct prio_array {
+	unsigned int nr_active;
+	unsigned long bitmap[BITMAP_SIZE];
+	struct list_head queue[MAX_PRIO];
+};
+#define rq_active(p,rq)   (rq->active)
+#define rq_expired(p,rq)  (rq->expired)
+#define ckrm_rebalance_tick(j,this_cpu) do {} while (0)
+#define TASK_PREEMPTS_CURR(p, rq) \
+	((p)->prio < (rq)->curr->prio)
+#endif
+
+/*
+ * This is the main, per-CPU runqueue data structure.
+ *
+ * Locking rule: those places that want to lock multiple runqueues
+ * (such as the load balancing or the thread migration code), lock
+ * acquire operations must be ordered by ascending &runqueue.
+ */
+struct runqueue {
+	spinlock_t lock;
+
+	/*
+	 * nr_running and cpu_load should be in the same cacheline because
+	 * remote CPUs use both these fields when doing load calculation.
+	 */
+	unsigned long nr_running;
+#if defined(CONFIG_SMP)
+	unsigned long cpu_load;
+#endif
+	unsigned long long nr_switches, nr_preempt;
+	unsigned long expired_timestamp, nr_uninterruptible;
+	unsigned long long timestamp_last_tick;
+	task_t *curr, *idle;
+	struct mm_struct *prev_mm;
+#ifdef CONFIG_CKRM_CPU_SCHEDULE
+	unsigned long ckrm_cpu_load;
+	struct classqueue_struct classqueue;   
+#else
+        prio_array_t *active, *expired, arrays[2];
+#endif
+	int best_expired_prio;
+	atomic_t nr_iowait;
+
+#ifdef CONFIG_SMP
+	struct sched_domain *sd;
+
+	/* For active balancing */
+	int active_balance;
+	int push_cpu;
+
+	task_t *migration_thread;
+	struct list_head migration_queue;
+#endif
+	struct list_head hold_queue;
+	int idle_tokens;
+};
+
 /*
  * The default (Linux) execution domain.
  */
@@ -837,7 +885,6 @@ static inline struct user_struct *get_uid(struct user_struct *u)
 	atomic_inc(&u->__count);
 	return u;
 }
-
 extern void free_uid(struct user_struct *);
 extern void switch_uid(struct user_struct *);
 
@@ -943,7 +990,6 @@ static inline int capable(int cap)
 }
 #endif
 
-
 /*
  * Routines for handling mm_structs
  */
@@ -1077,7 +1123,7 @@ static inline struct mm_struct * get_task_mm(struct task_struct * task)
 
 	return mm;
 }
- 
+
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
@@ -1201,43 +1247,19 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #define def_delay_var(var)		        unsigned long long var
 #define get_delay(tsk,field)                    ((tsk)->delays.field)
+#define delay_value(x)				(((unsigned long)(x))/1000)
 
 #define start_delay(var)                        ((var) = sched_clock())
 #define start_delay_set(var,flg)                (set_delay_flag(current,flg),(var) = sched_clock())
 
 #define inc_delay(tsk,field) (((tsk)->delays.field)++)
+#define add_delay_ts(tsk,field,start_ts,end_ts) ((tsk)->delays.field += delay_value((end_ts)-(start_ts)))
+#define add_delay_clear(tsk,field,start_ts,flg) (add_delay_ts(tsk,field,start_ts,sched_clock()),clear_delay_flag(tsk,flg))
 
-/* because of hardware timer drifts in SMPs and task continue on different cpu
- * then where the start_ts was taken there is a possibility that
- * end_ts < start_ts by some usecs. In this case we ignore the diff
- * and add nothing to the total.
- */
-#ifdef CONFIG_SMP
-#define test_ts_integrity(start_ts,end_ts)  (likely((end_ts) > (start_ts)))
-#else
-#define test_ts_integrity(start_ts,end_ts)  (1)
-#endif
-
-#define add_delay_ts(tsk,field,start_ts,end_ts) \
-	do { if (test_ts_integrity(start_ts,end_ts)) (tsk)->delays.field += ((end_ts)-(start_ts)); } while (0)
-
-#define add_delay_clear(tsk,field,start_ts,flg)        \
-	do {                                           \
-		unsigned long long now = sched_clock();\
-           	add_delay_ts(tsk,field,start_ts,now);  \
-           	clear_delay_flag(tsk,flg);             \
-        } while (0)
-
-static inline void add_io_delay(unsigned long long dstart) 
+static inline void add_io_delay(unsigned long dstart) 
 {
 	struct task_struct * tsk = current;
-	unsigned long long now = sched_clock();
-	unsigned long long val;
-
-	if (test_ts_integrity(dstart,now))
-		val = now - dstart;
-	else
-		val = 0;
+	unsigned long val = delay_value(sched_clock()-dstart);
 	if (test_delay_flag(tsk,PF_MEMIO)) {
 		tsk->delays.mem_iowait_total += val;
 		tsk->delays.num_memwaits++;
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 602d03b5d..4cd4850d7 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -269,9 +269,6 @@ struct ucred {
 #define SOL_NETBEUI	267
 #define SOL_LLC		268
 
-/* PlanetLab PL2525: reset the context ID of an existing socket */
-#define SO_SETXID	SO_PEERCRED
-
 /* IPX options */
 #define IPX_TYPE	1
 
diff --git a/include/linux/taskdelays.h b/include/linux/taskdelays.h
index e5682d805..eafb1e77f 100644
--- a/include/linux/taskdelays.h
+++ b/include/linux/taskdelays.h
@@ -5,7 +5,7 @@
 #include <linux/types.h>
 
 struct task_delay_info {
-#if defined CONFIG_DELAY_ACCT 
+#ifdef CONFIG_DELAY_ACCT
 	/* delay statistics in usecs */
 	uint64_t waitcpu_total;
 	uint64_t runcpu_total;
@@ -14,7 +14,7 @@ struct task_delay_info {
 	uint32_t runs;
 	uint32_t num_iowaits;
 	uint32_t num_memwaits;
-#endif				
+#endif
 };
 
 #endif				// _LINUX_TASKDELAYS_H
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 9ed5fac6c..9cdf6963e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -387,6 +387,7 @@ struct tcp_opt {
 #ifndef CONFIG_ACCEPT_QUEUES
 	struct open_request	*accept_queue_tail;
 #endif
+
 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
 	int			linger2;
diff --git a/include/linux/vserver/inode.h b/include/linux/vserver/inode.h
index e19632d08..fc49aba6d 100644
--- a/include/linux/vserver/inode.h
+++ b/include/linux/vserver/inode.h
@@ -57,10 +57,6 @@ extern int vc_set_iattr_v0(uint32_t, void __user *);
 extern int vc_get_iattr(uint32_t, void __user *);
 extern int vc_set_iattr(uint32_t, void __user *);
 
-extern int vc_iattr_ioctl(struct dentry *de,
-			  unsigned int cmd,
-			  unsigned long arg);
-
 #endif	/* __KERNEL__ */
 
 /* inode ioctls */
@@ -68,7 +64,4 @@ extern int vc_iattr_ioctl(struct dentry *de,
 #define FIOC_GETXFLG	_IOR('x', 5, long)
 #define FIOC_SETXFLG	_IOW('x', 6, long)
 
-#define FIOC_GETIATTR   _IOR('x', 7, long)
-#define FIOC_SETIATTR   _IOR('x', 8, long)
-
 #endif	/* _VX_INODE_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index a487663e0..a2aba080f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1086,10 +1086,8 @@ static inline int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 * packet.
 	 */
 	if (inet_stream_ops.bind != inet_bind &&
-	    (int) sk->sk_xid > 0 && sk->sk_xid != skb->xid) {
-		err = -EPERM;
+	    (int) sk->sk_xid >= 0 && sk->sk_xid != skb->xid)
 		goto out;
-	}
 
 	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
 	   number of warnings when compiling with -W --ANK
diff --git a/init/Kconfig b/init/Kconfig
index 64ca2fcb7..89ec58c3f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -115,18 +115,6 @@ config BSD_PROCESS_ACCT
 	  up to the user level program to do useful things with this
 	  information.  This is generally a good idea, so say Y.
 
-config BSD_PROCESS_ACCT_V3
-	bool "BSD Process Accounting version 3 file format"
-	depends on BSD_PROCESS_ACCT
-	default n
-	help
-	  If you say Y here, the process accounting information is written
-	  in a new file format that also logs the process IDs of each
-	  process and it's parent. Note that this file format is incompatible
-	  with previous v0/v1/v2 file formats, so you will need updated tools
-	  for processing it. A preliminary version of these tools is available
-	  at <http://http://www.de.kernel.org/pub/linux/utils/acct/>.
-
 menu "Class Based Kernel Resource Management"
 
 config CKRM
@@ -175,45 +163,21 @@ config CKRM_RES_NUMTASKS
 config CKRM_CPU_SCHEDULE
 	bool "CKRM CPU scheduler"
 	depends on CKRM_TYPE_TASKCLASS
-	default y
+	default m
 	help
 	  Use CKRM CPU scheduler instead of Linux Scheduler
 	
 	  Say N if unsure, Y to use the feature.
 
-config CKRM_RES_BLKIO
-	tristate " Disk I/O Resource Controller"
-	depends on CKRM_TYPE_TASKCLASS && IOSCHED_CFQ
+config CKRM_CPU_MONITOR
+	bool "CKRM CPU Resoure Monitor"
+	depends on CKRM_CPU_SCHEDULE
 	default m
 	help
-	  Provides a resource controller for best-effort block I/O 
-	  bandwidth control. The controller attempts this by proportional 
-	  servicing of requests in the I/O scheduler. However, seek
-	  optimizations and reordering by device drivers/disk controllers may
-	  alter the actual bandwidth delivered to a class.
+	  Monitor CPU Resource Usage of the classes
 	
 	  Say N if unsure, Y to use the feature.
 
-config CKRM_RES_MEM
-	bool "Class based physical memory controller"
-	default y
-	depends on CKRM
-	help
-	  Provide the basic support for collecting physical memory usage information
-	  among classes. Say Y if you want to know the memory usage of each class.
-
-config CKRM_MEM_LRUORDER_CHANGE
-	bool "Change the LRU ordering of scanned pages"
-	default n
-	depends on CKRM_RES_MEM
-	help
-	  While trying to free pages, by default(n), scanned pages are left were they
-	  are found if they belong to relatively under-used class. In this case the
-	  LRU ordering of the memory subsystemis left intact. If this option is chosen,
-	  then the scanned pages are moved to the tail of the list(active or inactive).
-	  Changing this to yes reduces the checking overhead but violates the approximate
-	  LRU order that is maintained by the paging subsystem.
-
 config CKRM_TYPE_SOCKETCLASS
 	bool "Class Manager for socket groups"
 	depends on CKRM
@@ -262,6 +226,18 @@ config CKRM_CRBCE
 
 endmenu
 
+config BSD_PROCESS_ACCT_V3
+	bool "BSD Process Accounting version 3 file format"
+	depends on BSD_PROCESS_ACCT
+	default n
+	help
+	  If you say Y here, the process accounting information is written
+	  in a new file format that also logs the process IDs of each
+	  process and it's parent. Note that this file format is incompatible
+	  with previous v0/v1/v2 file formats, so you will need updated tools
+	  for processing it. A preliminary version of these tools is available
+	  at <http://http://www.de.kernel.org/pub/linux/utils/acct/>.
+
 config SYSCTL
 	bool "Sysctl support"
 	---help---
@@ -353,22 +329,6 @@ config IKCONFIG_PROC
 	  This option enables access to the kernel configuration file
 	  through /proc/config.gz.
 
-config OOM_PANIC
-	bool "OOM Panic"
-	default y
-	---help---
-	  This option enables panic() to be called when a system is out of
-	  memory. This feature along with /proc/sys/kernel/panic allows a
-	  different behavior on out-of-memory conditions when the standard
-	  behavior (killing processes in an attempt to recover) does not
-	  make sense.
-
-	  If unsure, say N.
-
-config OOM_KILL
-	bool
-	depends on !OOM_PANIC
-	default y
 
 menuconfig EMBEDDED
 	bool "Configure standard kernel features (for small systems)"
diff --git a/init/main.c b/init/main.c
index 6416eab8d..e93d25685 100644
--- a/init/main.c
+++ b/init/main.c
@@ -55,7 +55,6 @@ int __init init_ckrm_sched_res(void);
 #else
 #define init_ckrm_sched_res() ((void)0)
 #endif
-//#include <linux/ckrm_sched.h>
 
 /*
  * This is one of the first .c files built. Error out early
@@ -477,7 +476,6 @@ asmlinkage void __init start_kernel(void)
 	 * printk() and can access its per-cpu storage.
 	 */
 	smp_prepare_boot_cpu();
-
 	/*
 	 * Set up the scheduler prior starting any interrupts (such as the
 	 * timer interrupt). Full topology setup happens at smp_init()
@@ -697,9 +695,7 @@ static int init(void * unused)
 	 * firmware files.
 	 */
 	populate_rootfs();
-
 	do_basic_setup();
-
 	init_ckrm_sched_res();
 
 	sched_init_smp();
diff --git a/kernel/Makefile b/kernel/Makefile
index ec5001052..905f3c59d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -27,9 +27,12 @@ obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_IKCONFIG_PROC) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
-obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_classqueue.o ckrm_sched.o
+obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_classqueue.o
+obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_sched.o
 obj-$(CONFIG_AUDIT) += audit.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_KGDB) += kgdbstub.o
+
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/ckrm/Makefile b/kernel/ckrm/Makefile
index b32530977..3da88775d 100644
--- a/kernel/ckrm/Makefile
+++ b/kernel/ckrm/Makefile
@@ -3,11 +3,11 @@
 #
 
 ifeq ($(CONFIG_CKRM),y)
-    obj-y = ckrm.o ckrmutils.o ckrm_numtasks_stub.o rbce/
+    obj-y = ckrm.o ckrmutils.o ckrm_tasks_stub.o rbce/
 endif	
     obj-$(CONFIG_CKRM_TYPE_TASKCLASS) 	+= ckrm_tc.o
-    obj-$(CONFIG_CKRM_RES_NUMTASKS) 	+= ckrm_numtasks.o
+    obj-$(CONFIG_CKRM_RES_NUMTASKS) 	+= ckrm_tasks.o
     obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o
-    obj-$(CONFIG_CKRM_RES_LISTENAQ) 	+= ckrm_laq.o
-    obj-$(CONFIG_CKRM_CPU_SCHEDULE)     += ckrm_cpu_class.o ckrm_cpu_monitor.o
-    obj-$(CONFIG_CKRM_RES_MEM) 		+= ckrm_mem.o
+    obj-$(CONFIG_CKRM_RES_LISTENAQ) 	+= ckrm_listenaq.o
+    obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o
+    obj-$(CONFIG_CKRM_CPU_MONITOR) += ckrm_cpu_monitor.o
diff --git a/kernel/ckrm/ckrm.c b/kernel/ckrm/ckrm.c
index f1cfb268c..5217ea003 100644
--- a/kernel/ckrm/ckrm.c
+++ b/kernel/ckrm/ckrm.c
@@ -142,7 +142,7 @@ EXPORT_SYMBOL(ckrm_classobj);
 
 static inline void set_callbacks_active(struct ckrm_classtype *ctype)
 {
-	ctype->ce_cb_active = ((atomic_read(&ctype->ce_regd) > 0) &&
+	ctype->ce_cb_active = ((atomic_read(&ctype->ce_nr_users) > 0) &&
 			       (ctype->ce_callbacks.always_callback
 				|| (ctype->num_classes > 1)));
 }
@@ -176,11 +176,10 @@ int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
 	if (ctype == NULL)
 		return (-ENOENT);
 
-	atomic_inc(&ctype->ce_regd);
-
-	/* another engine registered or trying to register ? */
-	if (atomic_read(&ctype->ce_regd) != 1) {
-		atomic_dec(&ctype->ce_regd);
+	ce_protect(ctype);
+	if (atomic_read(&ctype->ce_nr_users) != 1) {
+		// Some engine is acive, deregister it first.
+		ce_release(ctype);
 		return (-EBUSY);
 	}
 
@@ -193,10 +192,17 @@ int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
 	if (!(((ecbs->classify) && (ecbs->class_delete)) || (ecbs->notify)) ||
 	    (ecbs->c_interest && ecbs->classify == NULL) ||
 	    (ecbs->n_interest && ecbs->notify == NULL)) {
-		atomic_dec(&ctype->ce_regd);
+		ce_release(ctype);
 		return (-EINVAL);
 	}
 
+	/* Is any other engine registered for this classtype ? */
+	if (ctype->ce_regd) {
+		ce_release(ctype);
+		return (-EINVAL);
+	}
+
+	ctype->ce_regd = 1;
 	ctype->ce_callbacks = *ecbs;
 	set_callbacks_active(ctype);
 
@@ -229,12 +235,13 @@ int ckrm_unregister_engine(const char *typename)
 
 	ctype->ce_cb_active = 0;
 
-	if (atomic_read(&ctype->ce_nr_users) > 1) {
+	if (atomic_dec_and_test(&ctype->ce_nr_users) != 1) {
 		// Somebody is currently using the engine, cannot deregister.
-		return (-EAGAIN);
+		atomic_inc(&ctype->ce_nr_users);
+		return (-EBUSY);
 	}
 
-	atomic_set(&ctype->ce_regd, 0);
+	ctype->ce_regd = 0;
 	memset(&ctype->ce_callbacks, 0, sizeof(ckrm_eng_callback_t));
 	return 0;
 }
@@ -444,7 +451,7 @@ ckrm_init_core_class(struct ckrm_classtype *clstype,
 	CLS_DEBUG("name %s => %p\n", name ? name : "default", dcore);
 
 	if ((dcore != clstype->default_class) && (!ckrm_is_core_valid(parent))){
-		printk(KERN_DEBUG "error not a valid parent %p\n", parent);
+		printk("error not a valid parent %p\n", parent);
 		return -EINVAL;
 	}
 #if 0  
@@ -456,7 +463,7 @@ ckrm_init_core_class(struct ckrm_classtype *clstype,
 		    (void **)kmalloc(clstype->max_resid * sizeof(void *),
 				     GFP_KERNEL);
 		if (dcore->res_class == NULL) {
-			printk(KERN_DEBUG "error no mem\n");
+			printk("error no mem\n");
 			return -ENOMEM;
 		}
 	}
@@ -532,10 +539,10 @@ void ckrm_free_core_class(struct ckrm_core_class *core)
 		  parent->name);
 	if (core->delayed) {
 		/* this core was marked as late */
-		printk(KERN_DEBUG "class <%s> finally deleted %lu\n", core->name, jiffies);
+		printk("class <%s> finally deleted %lu\n", core->name, jiffies);
 	}
 	if (ckrm_remove_child(core) == 0) {
-		printk(KERN_DEBUG "Core class removal failed. Chilren present\n");
+		printk("Core class removal failed. Chilren present\n");
 	}
 
 	for (i = 0; i < clstype->max_resid; i++) {
@@ -656,7 +663,7 @@ ckrm_register_res_ctlr(struct ckrm_classtype *clstype, ckrm_res_ctlr_t * rcbs)
 		 */
 		read_lock(&ckrm_class_lock);
 		list_for_each_entry(core, &clstype->classes, clslist) {
-			printk(KERN_INFO "CKRM .. create res clsobj for resouce <%s>"
+			printk("CKRM .. create res clsobj for resouce <%s>"
 			       "class <%s> par=%p\n", rcbs->res_name, 
 			       core->name, core->hnode.parent);
 			ckrm_alloc_res_class(core, core->hnode.parent, resid);
@@ -833,7 +840,7 @@ int ckrm_unregister_event_set(struct ckrm_event_spec especs[])
 }
 
 #define ECC_PRINTK(fmt, args...) \
-// printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , ## args)
+// printk("%s: " fmt, __FUNCTION__ , ## args)
 
 void ckrm_invoke_event_cb_chain(enum ckrm_event ev, void *arg)
 {
@@ -978,7 +985,7 @@ void ckrm_cb_exit(struct task_struct *tsk)
 
 void __init ckrm_init(void)
 {
-	printk(KERN_DEBUG "CKRM Initialization\n");
+	printk("CKRM Initialization\n");
 
 	// register/initialize the Metatypes
 
@@ -996,7 +1003,7 @@ void __init ckrm_init(void)
 #endif
 	// prepare init_task and then rely on inheritance of properties
 	ckrm_cb_newtask(&init_task);
-	printk(KERN_DEBUG "CKRM Initialization done\n");
+	printk("CKRM Initialization done\n");
 }
 
 EXPORT_SYMBOL(ckrm_register_engine);
diff --git a/kernel/ckrm/ckrm_cpu_class.c b/kernel/ckrm/ckrm_cpu_class.c
index 917875b18..0ded7f3c6 100644
--- a/kernel/ckrm/ckrm_cpu_class.c
+++ b/kernel/ckrm/ckrm_cpu_class.c
@@ -23,32 +23,17 @@
 #include <linux/ckrm_classqueue.h>
 #include <linux/seq_file.h>
 
-struct ckrm_res_ctlr cpu_rcbs;
 
-/**
- * insert_cpu_class - insert a class to active_cpu_class list
- *
- * insert the class in decreasing order of class weight
- */
-static inline void insert_cpu_class(struct ckrm_cpu_class *cls)
-{
-	list_add(&cls->links,&active_cpu_classes);
-}
+struct ckrm_res_ctlr cpu_rcbs;
 
 /*
  *  initialize a class object and its local queues
  */
-void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares) 
+ static void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares) 
 {
 	int i,j,k;      
 	prio_array_t *array; 	
-	ckrm_lrq_t* queue;
-
-	cls->shares = *shares;
-	cls->cnt_lock = SPIN_LOCK_UNLOCKED;
-	ckrm_cpu_stat_init(&cls->stat);
-	ckrm_usage_init(&cls->usage);
-	cls->magic = CKRM_CPU_CLASS_MAGIC;
+	struct ckrm_local_runqueue* queue;
 
 	for (i = 0 ; i < NR_CPUS ; i++) {
 		queue = &cls->local_queues[i];
@@ -73,37 +58,34 @@ void init_cpu_class(struct ckrm_cpu_class *cls,ckrm_shares_t* shares)
 		queue->top_priority = MAX_PRIO;
 		cq_node_init(&queue->classqueue_linkobj);
 		queue->local_cvt = 0;
-		queue->lrq_load = 0;
-		queue->local_weight = cpu_class_weight(cls);
+		queue->uncounted_cvt = 0;
 		queue->uncounted_ns = 0;
-		queue->savings = 0;
 		queue->magic = 0x43FF43D7;
 	}
 
+	cls->shares = *shares;
+	cls->global_cvt = 0;
+	cls->cnt_lock = SPIN_LOCK_UNLOCKED;
+	ckrm_cpu_stat_init(&cls->stat);
+
 	// add to class list
 	write_lock(&class_list_lock);
-	insert_cpu_class(cls);
+	list_add(&cls->links,&active_cpu_classes);
 	write_unlock(&class_list_lock);
 }
 
 static inline void set_default_share(ckrm_shares_t *shares)
 {
 	shares->my_guarantee     = 0;
-	shares->total_guarantee  = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-	shares->unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
 	shares->my_limit         = CKRM_SHARE_DFLT_MAX_LIMIT;
+	shares->total_guarantee  = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
 	shares->max_limit        = CKRM_SHARE_DFLT_MAX_LIMIT;
-	shares->cur_max_limit    = 0;
+	shares->unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+	shares->cur_max_limit    = CKRM_SHARE_DFLT_MAX_LIMIT;
 }
 
-struct ckrm_cpu_class * ckrm_get_cpu_class(struct ckrm_core_class *core)
-{
-	struct ckrm_cpu_class * cls;
-	cls = ckrm_get_res_class(core, cpu_rcbs.resid, struct ckrm_cpu_class);
-	if (valid_cpu_class(cls))
-		return cls;
-	else
-		return NULL;
+struct ckrm_cpu_class * ckrm_get_cpu_class(struct ckrm_core_class *core) {
+	return ckrm_get_res_class(core, cpu_rcbs.resid, struct ckrm_cpu_class);
 }
 
 
@@ -112,7 +94,7 @@ void* ckrm_alloc_cpu_class(struct ckrm_core_class *core, struct ckrm_core_class
 	struct ckrm_cpu_class *cls;
 
 	if (! parent) /*root class*/
-		cls =  get_default_cpu_class();
+		cls =  default_cpu_class;
 	else
 		cls = (struct ckrm_cpu_class *) kmalloc(sizeof(struct ckrm_cpu_class),GFP_ATOMIC);
 
@@ -131,7 +113,7 @@ void* ckrm_alloc_cpu_class(struct ckrm_core_class *core, struct ckrm_core_class
 			cls->parent = parent;
 		}
 	} else
-		printk(KERN_ERR"alloc_cpu_class failed\n");
+		printk("alloc_cpu_class failed GFP_ATOMIC\n");
 
 	return cls;
 }		
@@ -150,7 +132,7 @@ static void ckrm_free_cpu_class(void *my_res)
 		return;
 
 	/*the default class can't be freed*/
-	if (cls == get_default_cpu_class()) 
+	if (cls == default_cpu_class) 
 		return;
 
 	// Assuming there will be no children when this function is called
@@ -180,9 +162,6 @@ static void ckrm_free_cpu_class(void *my_res)
 	write_unlock(&class_list_lock);
 
 	kfree(cls);
-
-	//call ckrm_cpu_monitor after class removed
-	ckrm_cpu_monitor(0);
 }				
 
 /*
@@ -208,28 +187,18 @@ int ckrm_cpu_set_share(void *my_res, struct ckrm_shares *new_share)
                 parres = NULL;
         }
 
-	/*
-	 * hzheng: CKRM_SHARE_DONTCARE should be handled
-	 */
-	if (new_share->my_guarantee == CKRM_SHARE_DONTCARE)
-		new_share->my_guarantee = 0;
-
 	rc = set_shares(new_share, cur, par);
-	if (cur->my_limit == CKRM_SHARE_DONTCARE)
-		cur->my_limit = cur->max_limit;
-
 
 	spin_unlock(&cls->cnt_lock);
 	if (cls->parent) {
 		spin_unlock(&parres->cnt_lock);
 	}
-
-	//call ckrm_cpu_monitor after changes are changed
-	ckrm_cpu_monitor(0);
-
 	return rc;
 }							
 			
+/*
+ * translate the global_CVT to ticks
+ */
 static int ckrm_cpu_get_share(void *my_res,
 			      struct ckrm_shares *shares)
 {			
@@ -244,59 +213,64 @@ static int ckrm_cpu_get_share(void *my_res,
 int ckrm_cpu_get_stats(void *my_res, struct seq_file * sfile)
 {
 	struct ckrm_cpu_class *cls = my_res;
-	struct ckrm_cpu_class_stat* stat = &cls->stat;
-	ckrm_lrq_t* lrq;
-	int i;
 
 	if (!cls) 
 		return -EINVAL;
 
 	seq_printf(sfile, "-------- CPU Class Status Start---------\n");
-	seq_printf(sfile, "Share:\n\tgrt= %d limit= %d total_grt= %d max_limit= %d\n",
+	seq_printf(sfile, "  gua= %d limit= %d\n",
 		   cls->shares.my_guarantee,
-		   cls->shares.my_limit,
+		   cls->shares.my_limit);
+	seq_printf(sfile, "  total_gua= %d limit= %d\n",
 		   cls->shares.total_guarantee,
 		   cls->shares.max_limit);
-	seq_printf(sfile, "\tunused_grt= %d cur_max_limit= %d\n",
+	seq_printf(sfile, "  used_gua= %d cur_limit= %d\n",
 		   cls->shares.unused_guarantee,
 		   cls->shares.cur_max_limit);
 
-	seq_printf(sfile, "Effective:\n\tegrt= %d\n",stat->egrt);
-	seq_printf(sfile, "\tmegrt= %d\n",stat->megrt);
-	seq_printf(sfile, "\tehl= %d\n",stat->ehl);
-	seq_printf(sfile, "\tmehl= %d\n",stat->mehl);
-	seq_printf(sfile, "\teshare= %d\n",stat->eshare);
-	seq_printf(sfile, "\tmeshare= %d\n",cpu_class_weight(cls));
-	seq_printf(sfile, "\tmax_demand= %lu\n",stat->max_demand);
-	seq_printf(sfile, "\ttotal_ns= %llu\n",stat->total_ns);
-	seq_printf(sfile, "\tusage(2,10,60)= %d %d %d\n",
-		   get_ckrm_usage(cls,2*HZ),
-		   get_ckrm_usage(cls,10*HZ),
-		   get_ckrm_usage(cls,60*HZ)
-		   );
-	for_each_online_cpu(i) {
-		lrq = get_ckrm_lrq(cls,i);		
-		seq_printf(sfile, "\tlrq %d demand= %lu weight= %d lrq_load= %lu cvt= %llu sav= %llu\n",i,stat->local_stats[i].cpu_demand,local_class_weight(lrq),lrq->lrq_load,lrq->local_cvt,lrq->savings);
-	}
-
+	seq_printf(sfile, "  Share= %d\n",cpu_class_weight(cls));
+	seq_printf(sfile, "  cvt= %llu\n",cls->local_queues[0].local_cvt);
+	seq_printf(sfile, "  total_ns= %llu\n",cls->stat.total_ns);
+	seq_printf(sfile, "  prio= %d\n",cls->local_queues[0].classqueue_linkobj.prio);
+	seq_printf(sfile, "  index= %d\n",cls->local_queues[0].classqueue_linkobj.index);
+	seq_printf(sfile, "  run= %llu\n",cls->stat.local_stats[0].run);
+	seq_printf(sfile, "  total= %llu\n",cls->stat.local_stats[0].total);
+	seq_printf(sfile, "  cpu_demand= %lu\n",cls->stat.cpu_demand);
+
+	seq_printf(sfile, "  effective_guarantee= %d\n",cls->stat.effective_guarantee);
+	seq_printf(sfile, "  effective_limit= %d\n",cls->stat.effective_limit);
+	seq_printf(sfile, "  effective_share= %d\n",cls->stat.effective_share);
 	seq_printf(sfile, "-------- CPU Class Status END ---------\n");
 
+
 	return 0;
 }
 
 /*
  * task will remain in the same cpu but on a different local runqueue
  */
-void ckrm_cpu_change_class(void *task, void *old, void *new)
+static void ckrm_cpu_change_class(void *task, void *old, void *new)
 {		
 	struct task_struct *tsk = task;			   
 	struct ckrm_cpu_class *newcls = new;
+	unsigned long flags;
+	struct runqueue *rq;
+	prio_array_t *array;
 
 	/*sanity checking*/
 	if (!task || ! old || !new)
 		return; 
 
-	_ckrm_cpu_change_class(tsk,newcls);
+	rq = task_rq_lock(tsk,&flags); 
+	array = tsk->array;
+	if (array) {
+		dequeue_task(tsk,array);
+		tsk->cpu_class = newcls;
+		enqueue_task(tsk,rq_active(tsk,rq));
+	} else {
+		tsk->cpu_class = newcls;
+	}
+	task_rq_unlock(rq,&flags);
 }							
 
 /*dummy function, not used*/
@@ -318,12 +292,12 @@ static int ckrm_cpu_set_config(void *my_res, const char *cfgstr)
 
 	if (!cls) 
 		return -EINVAL;
-	printk(KERN_DEBUG "ckrm_cpu config='%s'\n",cfgstr);
+	printk("ckrm_cpu config='%s'\n",cfgstr);
 	return 0;
 }
 	
 struct ckrm_res_ctlr cpu_rcbs = {
-	.res_name          = "cpu",
+	.res_name          = "CKRM CPU Class",
 	.res_hdepth        = 1,
 	.resid             = -1,
 	.res_alloc         = ckrm_alloc_cpu_class,
@@ -349,7 +323,7 @@ int __init init_ckrm_sched_res(void)
 
 	if (resid == -1) { /*not registered */
 		resid = ckrm_register_res_ctlr(clstype,&cpu_rcbs);
-		printk(KERN_DEBUG "........init_ckrm_sched_res , resid= %d\n",resid);
+		printk("........init_ckrm_sched_res , resid= %d\n",resid);
 	}
 	return 0;
 }
@@ -365,11 +339,10 @@ void init_cpu_classes(void)
 	//init classqueues for each processor
 	for (i=0; i < NR_CPUS; i++)
 		classqueue_init(get_cpu_classqueue(i)); 
-
-	/*
-	 * hzheng: initialize the default cpu class
-	 *  required for E14/E15 since ckrm_init is called after sched_init
-	 */
+/*
+ * hzheng: initialize the default cpu class
+ *         required for E14 since ckrm_init is called after sched_init
+ */
 	ckrm_alloc_cpu_class(NULL,NULL);
 }
 
diff --git a/kernel/ckrm/ckrm_cpu_monitor.c b/kernel/ckrm/ckrm_cpu_monitor.c
index d8c199a20..674ee6e50 100644
--- a/kernel/ckrm/ckrm_cpu_monitor.c
+++ b/kernel/ckrm/ckrm_cpu_monitor.c
@@ -28,84 +28,36 @@
 #include <asm/div64.h>
 #include <linux/ckrm_sched.h>
 
-#define CPU_MONITOR_INTERVAL (HZ) /*how often do we adjust the shares*/
+#define CPU_MONITOR_INTERVAL (4*HZ) /*how often do we adjust the shares*/
+#define CKRM_SHARE_ACCURACY 7
 #define CKRM_SHARE_MAX (1<<CKRM_SHARE_ACCURACY)
 
-#define CKRM_CPU_DEMAND_RUN 0
-#define CKRM_CPU_DEMAND_SLEEP 1
-//sample task cpu demand every 64ms
-#define CPU_DEMAND_TASK_RECALC  (64000000LL)
-#define CPU_DEMAND_CLASS_RECALC (256000000LL)
-#define CPU_DEMAND_TP_CLASS 0
-#define CPU_DEMAND_TP_TASK 1
-
 extern struct ckrm_cpu_class *ckrm_get_cpu_class(struct ckrm_core_class *core);
-void update_ckrm_idle(unsigned long surplus);
-
-/*interface to share definition*/
-static inline int get_soft_limit(struct ckrm_cpu_class *cls)
-{
-	return cls->shares.my_limit;
-}
-
-static inline int get_mysoft_limit(struct ckrm_cpu_class *cls)
-{
-	return cls->shares.total_guarantee;
-}
-
-static inline int get_hard_limit(struct ckrm_cpu_class *cls)
-{
-	return cls->shares.total_guarantee;
-}
-
-static inline int get_myhard_limit(struct ckrm_cpu_class *cls)
-{
-	return cls->shares.total_guarantee;
-}
-
-
-static inline void cpu_demand_stat_init(struct ckrm_cpu_demand_stat* local_stat, int type)
-{
-	unsigned long long now = sched_clock();
-
-	local_stat->run = 0;
-	local_stat->total = 0;
-	local_stat->last_sleep = now;
-	switch (type) {
-	case CPU_DEMAND_TP_CLASS:
-		local_stat->recalc_interval = CPU_DEMAND_CLASS_RECALC;
-		local_stat->cpu_demand = 0; 
-		break;
-	case CPU_DEMAND_TP_TASK:
-		local_stat->recalc_interval = CPU_DEMAND_TASK_RECALC;
-		//for task, the init cpu_demand is copied from its parent
-		break;
-	default:
-		BUG();
-	}
-}
 
 void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat)
 {
 	int i;
+	struct ckrm_cpu_class_local_stat* local_stat;
+	unsigned long long now = sched_clock();
 
 	stat->stat_lock = SPIN_LOCK_UNLOCKED;
 	stat->total_ns = 0;
-	stat->max_demand = 0;
+	stat->cpu_demand = 0;
 
 	for (i=0; i< NR_CPUS; i++) {
-		cpu_demand_stat_init(&stat->local_stats[i],CPU_DEMAND_TP_CLASS);
+		local_stat = &stat->local_stats[i];
+		local_stat->run = 0;
+		local_stat->total = 0;
+		local_stat->last_sleep = now;
+		local_stat->cpu_demand = 0;		
 	}
 
-	stat->egrt = 0;
-	stat->megrt = 0;
-	stat->ehl = CKRM_SHARE_MAX; /*default: no limit*/
-	stat->mehl = CKRM_SHARE_MAX; /*default: no limit */
-
-	stat->eshare = CKRM_SHARE_MAX;
-	stat->meshare = CKRM_SHARE_MAX;
+	stat->effective_guarantee = 0;
+	stat->effective_limit = 0;
+	stat->glut = 0;
+	stat->effective_share = 100;
+	stat->self_effective_share = 100;
 }
-
 /**********************************************/
 /*          cpu demand                        */
 /**********************************************/
@@ -125,42 +77,52 @@ void ckrm_cpu_stat_init(struct ckrm_cpu_class_stat *stat)
  */
 
 /**
- * update_cpu_demand_stat - 
+ * update_cpu_demand - update a state change
  * 
- * should be called whenever the state of a task/task local queue changes
+ * should be called whenever the state of a local queue changes
  * -- when deschedule : report how much run
  * -- when enqueue: report how much sleep
  *
- * how often should we recalculate the cpu demand
- * the number is in ns
+ * to deal with excessive long run/sleep state
+ * -- whenever the the ckrm_cpu_monitor is called, check if the class is in sleep state, if yes, then update sleep record
  */
-static inline void update_cpu_demand_stat(struct ckrm_cpu_demand_stat* local_stat,int state, unsigned long long len)
+#define CKRM_CPU_DEMAND_RUN 0
+#define CKRM_CPU_DEMAND_SLEEP 1
+//how often should we recalculate the cpu demand, in ns
+#define CPU_DEMAND_CAL_THRESHOLD (1000000000LL)
+static inline void update_local_cpu_demand(struct ckrm_cpu_class_local_stat* local_stat,int state, unsigned long long len)
 {	
 	local_stat->total += len;
 	if (state == CKRM_CPU_DEMAND_RUN)
 		local_stat->run += len;
 
-	if (local_stat->total >= local_stat->recalc_interval) {
+	if (local_stat->total >= CPU_DEMAND_CAL_THRESHOLD) {
 		local_stat->total >>= CKRM_SHARE_ACCURACY;
-		if (unlikely(local_stat->run > 0xFFFFFFFF))
-			local_stat->run = 0xFFFFFFFF;
-
-		if (local_stat->total > 0xFFFFFFFF) 
+		if (local_stat->total > 0xFFFFFFFF)
 			local_stat->total = 0xFFFFFFFF;
-			
-		do_div(local_stat->run,(unsigned long)local_stat->total);
 
-		if (local_stat->total > 0xFFFFFFFF) //happens after very long sleep
-			local_stat->cpu_demand = local_stat->run;
-		else {
-			local_stat->cpu_demand += local_stat->run;
-			local_stat->cpu_demand >>= 1;
-		}
+		do_div(local_stat->run,(unsigned long)local_stat->total);
+		local_stat->cpu_demand +=local_stat->run;
+		local_stat->cpu_demand >>= 1;
 		local_stat->total = 0;
 		local_stat->run = 0;
 	}
 }
 
+static inline void cpu_demand_update_run(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len)
+{
+	update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_RUN,len);
+}
+
+static inline void cpu_demand_update_sleep(struct ckrm_cpu_class_local_stat* local_stat, unsigned long long len)
+{
+	update_local_cpu_demand(local_stat,CKRM_CPU_DEMAND_SLEEP,len);
+}
+
+#define CPU_DEMAND_ENQUEUE 0
+#define CPU_DEMAND_DEQUEUE 1
+#define CPU_DEMAND_DESCHEDULE 2
+
 /**
  * cpu_demand_event - and cpu_demand event occured
  * @event: one of the following three events:
@@ -169,24 +131,19 @@ static inline void update_cpu_demand_stat(struct ckrm_cpu_demand_stat* local_sta
  *   CPU_DEMAND_DESCHEDULE: one task belong a certain local class deschedule
  * @len: valid only for CPU_DEMAND_DESCHEDULE, how long the task has been run
  */
-void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsigned long long len) 
+void cpu_demand_event(struct ckrm_cpu_class_local_stat* local_stat, int event, unsigned long long len) 
 {	
 	switch (event) {
 	case CPU_DEMAND_ENQUEUE: 
 		len = sched_clock() - local_stat->last_sleep;
 		local_stat->last_sleep = 0;
-		update_cpu_demand_stat(local_stat,CKRM_CPU_DEMAND_SLEEP,len);
+		cpu_demand_update_sleep(local_stat,len);
 		break;
 	case CPU_DEMAND_DEQUEUE:
-		if (! local_stat->last_sleep) {
-			local_stat->last_sleep = sched_clock();
-		}
+		local_stat->last_sleep = sched_clock();
 		break;
 	case CPU_DEMAND_DESCHEDULE:
-		update_cpu_demand_stat(local_stat,CKRM_CPU_DEMAND_RUN,len);
-		break;
-	case CPU_DEMAND_INIT: //for task init only
-		cpu_demand_stat_init(local_stat,CPU_DEMAND_TP_TASK);
+		cpu_demand_update_run(local_stat,len);		
 		break;
 	default:
 		BUG();
@@ -195,19 +152,18 @@ void cpu_demand_event(struct ckrm_cpu_demand_stat* local_stat, int event, unsign
 
 /** 
  * check all the class local queue
- * 
- * to deal with excessive long run/sleep state
- * -- whenever the the ckrm_cpu_monitor is called, check if the class is in sleep state, if yes, then update sleep record
+ * if local queueu is not in runqueue, then it's in sleep state
+ * if compare to last sleep, 
  */
 static inline void cpu_demand_check_sleep(struct ckrm_cpu_class_stat *stat, int cpu)
 {
-	struct ckrm_cpu_demand_stat * local_stat = &stat->local_stats[cpu];
+	struct ckrm_cpu_class_local_stat * local_stat = &stat->local_stats[cpu];
 	unsigned long long sleep,now;
 	if (local_stat->last_sleep) {
 		now = sched_clock();
 		sleep = now - local_stat->last_sleep;
 		local_stat->last_sleep = now;
-		update_cpu_demand_stat(local_stat,CKRM_CPU_DEMAND_SLEEP,sleep);
+		cpu_demand_update_sleep(local_stat,sleep);
 	}
 }
 
@@ -216,72 +172,51 @@ static inline void cpu_demand_check_sleep(struct ckrm_cpu_class_stat *stat, int
  *
  * self_cpu_demand = sum(cpu demand of all local queues) 
  */
-static inline unsigned long get_self_cpu_demand(struct ckrm_cpu_class_stat *stat)
+static unsigned long get_self_cpu_demand(struct ckrm_cpu_class_stat
+						*stat)
 {
 	int cpu_demand = 0;
 	int i;
-	int cpuonline = 0;
 
 	for_each_online_cpu(i) {
 		cpu_demand_check_sleep(stat,i);
 		cpu_demand += stat->local_stats[i].cpu_demand;
-		cpuonline ++;
 	}
 
-	return (cpu_demand/cpuonline);
+	if (cpu_demand > CKRM_SHARE_MAX)
+		cpu_demand = CKRM_SHARE_MAX;
+	return cpu_demand;
 }
 
 /*
- * my max demand = min(cpu_demand, my effective hard limit)
+ * update effective cpu demand for each class
+ * assume the root_core->parent == NULL
  */
-static inline unsigned long get_mmax_demand(struct ckrm_cpu_class_stat* stat) 
-{
-	unsigned long mmax_demand = get_self_cpu_demand(stat);
-	if (mmax_demand > stat->mehl)
-		mmax_demand = stat->mehl;
-
-	return mmax_demand;
-}
-
-/**
- * update_max_demand: update effective cpu demand for each class
- * return -1 on error
- * 
- * Assume: the root_core->parent == NULL
- */
-static int update_max_demand(struct ckrm_core_class *root_core)
+static void update_cpu_demand(struct ckrm_core_class *root_core)
 {
 	struct ckrm_core_class *cur_core, *child_core;
-	struct ckrm_cpu_class *cls,*c_cls;
-	int ret = -1;
+	struct ckrm_cpu_class *cls;
 
 	cur_core = root_core;
 	child_core = NULL;
-	
- repeat:
-	if (!cur_core) { //normal exit
-		ret = 0;
-		goto out;
-	}
+	/*
+	 * iterate the tree
+	 * update cpu_demand of each node
+	 */
+      repeat:
+	if (!cur_core)
+		return;
 
 	cls = ckrm_get_cpu_class(cur_core);
-	if (! cls) //invalid c_cls, abort
-		goto out;
-
 	if (!child_core)	//first child
-		cls->stat.max_demand = get_mmax_demand(&cls->stat);
+		cls->stat.cpu_demand = get_self_cpu_demand(&cls->stat);
 	else {
-		c_cls = ckrm_get_cpu_class(child_core);
-		if (c_cls)
-			cls->stat.max_demand += c_cls->stat.max_demand;
-		else //invalid c_cls, abort
-			goto out;
+		cls->stat.cpu_demand +=
+		    ckrm_get_cpu_class(child_core)->stat.cpu_demand;
+		if (cls->stat.cpu_demand > CKRM_SHARE_MAX)
+			cls->stat.cpu_demand = CKRM_SHARE_MAX;
 	}
 
-	//check class hard limit
-	if (cls->stat.max_demand > cls->stat.ehl)
-		cls->stat.max_demand = cls->stat.ehl;
-
 	//next child
 	child_core = ckrm_get_next_child(cur_core, child_core);
 	if (child_core) {
@@ -294,123 +229,78 @@ static int update_max_demand(struct ckrm_core_class *root_core)
 		cur_core = child_core->hnode.parent;
 	}
 	goto repeat;
- out:
-	return ret;
 }
 
 /**********************************************/
 /*          effective guarantee & limit       */
 /**********************************************/
-static inline void set_eshare(struct ckrm_cpu_class_stat *stat,
+static inline void set_effective_share(struct ckrm_cpu_class_stat *stat,
 				       int new_share)
 {
 	if (!new_share)
 		new_share = 1;
-
-	BUG_ON(new_share < 0);
-	stat->eshare = new_share;
+	stat->effective_share = new_share;
 }
 
-static inline void set_meshare(struct ckrm_cpu_class_stat *stat,
+static inline void set_self_effective_share(struct ckrm_cpu_class_stat *stat,
 					    int new_share)
 {
 	if (!new_share)
 		new_share = 1;
-
-	BUG_ON(new_share < 0);
-	stat->meshare = new_share;
+	stat->self_effective_share = new_share;
 }
 
-/**
- *update_child_effective - update egrt, ehl, mehl for all children of parent
- *@parent: the parent node
- *return -1 if anything wrong
- *
- */
-static int update_child_effective(struct ckrm_core_class *parent)
+static inline void update_child_effective(struct ckrm_core_class *parent)
 {
 	struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
-	struct ckrm_core_class *child_core;	
-	int ret = -1;
-
-	if (! p_cls)
-		return ret;
+	struct ckrm_core_class *child_core = ckrm_get_next_child(parent, NULL);
 
-	child_core = ckrm_get_next_child(parent, NULL);
 	while (child_core) {
 		struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
-		if (! c_cls)
-			return ret;
 
-		c_cls->stat.egrt =
-		    p_cls->stat.egrt *
+		c_cls->stat.effective_guarantee =
+		    p_cls->stat.effective_guarantee *
 		    c_cls->shares.my_guarantee / p_cls->shares.total_guarantee;
-
-		c_cls->stat.megrt = c_cls->stat.egrt * c_cls->shares.unused_guarantee
-			/ c_cls->shares.total_guarantee;
-		
-		c_cls->stat.ehl =
-		    p_cls->stat.ehl *
-		    get_hard_limit(c_cls) / p_cls->shares.total_guarantee;
-
-		c_cls->stat.mehl =
-		    c_cls->stat.ehl *
-		    get_myhard_limit(c_cls) / c_cls->shares.total_guarantee;
-
-		set_eshare(&c_cls->stat,c_cls->stat.egrt);
-		set_meshare(&c_cls->stat,c_cls->stat.megrt);
-
+		c_cls->stat.effective_limit =
+		    p_cls->stat.effective_guarantee * c_cls->shares.my_limit /
+		    p_cls->shares.total_guarantee;
 
 		child_core = ckrm_get_next_child(parent, child_core);
 	};
-	return 0;
+
 }
 
-/**
- * update_effectives: update egrt, ehl, mehl for the whole tree
+/*
+ * update effective guarantee and effective limit
+ * -- effective share = parent->effective->share * share/parent->total_share
+ * -- effective limit = parent->effective->share * limit/parent->total_share
  * should be called only when class structure changed
- *
- * return -1 if anything wrong happened (eg: the structure changed during the process)
  */
-static int update_effectives(struct ckrm_core_class *root_core)
+static void update_effective_guarantee_limit(struct ckrm_core_class *root_core)
 {
-	struct ckrm_core_class *cur_core, *child_core;
+	struct ckrm_core_class *cur_core, *child_core = NULL;
 	struct ckrm_cpu_class *cls;
-	int ret = -1;
 
 	cur_core = root_core;
-	child_core = NULL;
 	cls = ckrm_get_cpu_class(cur_core);
+	cls->stat.effective_guarantee = CKRM_SHARE_MAX;
+	cls->stat.effective_limit = cls->stat.effective_guarantee;
 
-	//initialize the effectives for root 
-	cls->stat.egrt = CKRM_SHARE_MAX; /*egrt of the root is always 100% */
-	cls->stat.megrt = cls->stat.egrt * cls->shares.unused_guarantee
-		/ cls->shares.total_guarantee;
-	cls->stat.ehl = CKRM_SHARE_MAX * get_hard_limit(cls)
-		/ cls->shares.total_guarantee;
-	cls->stat.mehl = cls->stat.ehl * get_myhard_limit(cls)
-		/ cls->shares.total_guarantee;
-	set_eshare(&cls->stat,cls->stat.egrt);
-	set_meshare(&cls->stat,cls->stat.megrt);
-
- repeat:
+      repeat:
 	//check exit
 	if (!cur_core)
-		return 0;
+		return;
 
-	//visit this node only once
-	if (! child_core)
-		if (update_child_effective(cur_core) < 0)
-			return ret; //invalid cur_core node
-	
+	//visit this node
+	update_child_effective(cur_core);
 	//next child
 	child_core = ckrm_get_next_child(cur_core, child_core);
-
 	if (child_core) {
-		//go down to the next hier
+		//go down
 		cur_core = child_core;
 		child_core = NULL;
-	} else { //no more child, go back
+		goto repeat;
+	} else {		//no more child, go back
 		child_core = cur_core;
 		cur_core = child_core->hnode.parent;
 	}
@@ -422,12 +312,12 @@ static int update_effectives(struct ckrm_core_class *root_core)
 /**********************************************/
 
 /*
- * surplus = egrt - demand
+ * surplus = my_effective_share - demand
  * if surplus < 0, surplus = 0 
  */
 static inline int get_node_surplus(struct ckrm_cpu_class *cls)
 {
-	int surplus = cls->stat.egrt - cls->stat.max_demand;
+	int surplus = cls->stat.effective_guarantee - cls->stat.cpu_demand;
 
 	if (surplus < 0)
 		surplus = 0;
@@ -435,254 +325,122 @@ static inline int get_node_surplus(struct ckrm_cpu_class *cls)
 	return surplus;
 }
 
-static inline int get_my_node_surplus(struct ckrm_cpu_class *cls)
-{
-	int surplus = cls->stat.megrt - get_mmax_demand(&cls->stat);
-
-	if (surplus < 0)
-		surplus = 0;
-
-	return surplus;
-}
-
-/**
- * consume_surplus: decides how much surplus a node can consume
- * @ckeck_sl: if check_sl is set, then check soft_limitx
+/*
+ * consume the surplus
  * return how much consumed
- *
- * implements all the CKRM Scheduling Requirement
- * assume c_cls is valid
+ * set glut when necessary
  */
-static inline int consume_surplus(int surplus,
-				       struct ckrm_cpu_class *c_cls,
-				       struct ckrm_cpu_class *p_cls,
-				       int check_sl
-				       )
+static inline int node_surplus_consume(int old_surplus,
+				       struct ckrm_core_class *child_core,
+				       struct ckrm_cpu_class *p_cls)
 {
 	int consumed = 0;
 	int inc_limit;
-	int total_grt = p_cls->shares.total_guarantee;
 
- 	BUG_ON(surplus < 0);
+	struct ckrm_cpu_class *c_cls = ckrm_get_cpu_class(child_core);
 
-	/*can't consume more than demand or hard limit*/
-	if (c_cls->stat.eshare >= c_cls->stat.max_demand)
+	if (c_cls->stat.glut)
 		goto out;
 
-	//the surplus allocation is propotional to grt
-	consumed =
-		surplus * c_cls->shares.my_guarantee / total_grt;
-
-	if (! consumed) //no more share
+	//check demand
+	if (c_cls->stat.effective_share >= c_cls->stat.cpu_demand) {
+		c_cls->stat.glut = 1;
 		goto out;
-
-	//hard limit and demand limit
-	inc_limit = c_cls->stat.max_demand - c_cls->stat.eshare;
-
-	if (check_sl) {
-		int esl = p_cls->stat.eshare * get_soft_limit(c_cls)
-			/total_grt;
-		if (esl < c_cls->stat.max_demand)
-			inc_limit = esl - c_cls->stat.eshare;
 	}
 
-	if (consumed > inc_limit)
-		consumed = inc_limit;
-
-        BUG_ON(consumed < 0);
- out:		
-	return consumed;
-}
-
-/*
- * how much a node can consume for itself?
- */
-static inline int consume_self_surplus(int surplus,
-				       struct ckrm_cpu_class *p_cls,
-				       int check_sl
-				       )
-{
-	int consumed = 0;
-	int inc_limit;
-	int total_grt = p_cls->shares.total_guarantee;
-	int max_demand = get_mmax_demand(&p_cls->stat);
-
- 	BUG_ON(surplus < 0);
-
-	/*can't consume more than demand or hard limit*/
-	if (p_cls->stat.meshare >= max_demand)
-		goto out;
-
-	//the surplus allocation is propotional to grt
 	consumed =
-		surplus * p_cls->shares.unused_guarantee / total_grt;
-
-	if (! consumed) //no more share
-		goto out;
-
-	//hard limit and demand limit
-	inc_limit = max_demand - p_cls->stat.meshare;
+	    old_surplus * c_cls->shares.my_guarantee /
+	    p_cls->shares.total_guarantee;
 
-	if (check_sl) {
-		int mesl = p_cls->stat.eshare * get_mysoft_limit(p_cls)
-			/total_grt;
-		if (mesl < max_demand)
-			inc_limit = mesl - p_cls->stat.meshare;
-	}
-
-	if (consumed > inc_limit)
+	//check limit
+	inc_limit = c_cls->stat.effective_limit - c_cls->stat.effective_share;
+	if (inc_limit <= consumed) {
+		c_cls->stat.glut = 1;
 		consumed = inc_limit;
+	}
 
-        BUG_ON(consumed < 0);
- out:		
+	c_cls->stat.effective_share += consumed;
+      out:
 	return consumed;
 }
 
-
 /*
- * allocate surplus to all its children and also its default class
- */
-static int alloc_surplus_single_round(
-				      int surplus,
-				      struct ckrm_core_class *parent,
-				      struct ckrm_cpu_class *p_cls,
-				      int check_sl)
-{
-	struct ckrm_cpu_class *c_cls;
-	struct ckrm_core_class *child_core = NULL;
-	int total_consumed = 0,consumed;
-
-	//first allocate to the default class
-	consumed  =
-		consume_self_surplus(surplus,p_cls,check_sl);
-
-	if (consumed > 0) {
-		set_meshare(&p_cls->stat,p_cls->stat.meshare + consumed);
-		total_consumed += consumed;
-	}
-
-	do {
-		child_core = ckrm_get_next_child(parent, child_core);
-		if (child_core)  {
-			c_cls = ckrm_get_cpu_class(child_core);
-			if (! c_cls)
-				return -1;
-
-			consumed    =
-				consume_surplus(surplus, c_cls,
-						     p_cls,check_sl);
-			if (consumed > 0) {
-				set_eshare(&c_cls->stat,c_cls->stat.eshare + consumed);
-				total_consumed += consumed;
-			}
-		}
-	} while (child_core);
-
-	return total_consumed;
-}
-
-/**
- * alloc_surplus_node: re-allocate the shares for children under parent
- * @parent: parent node
- * return the remaining surplus
- *
+ * re-allocate the shares for all the childs under this node
  * task:
  *  1. get total surplus
  *  2. allocate surplus
  *  3. set the effective_share of each node
  */
-static int alloc_surplus_node(struct ckrm_core_class *parent)
+static void alloc_surplus_node(struct ckrm_core_class *parent)
 {
-	struct ckrm_cpu_class *p_cls,*c_cls;
-	int total_surplus,consumed;
-	int check_sl;
-	int ret = -1;
+	int total_surplus = 0, old_surplus = 0;
+	struct ckrm_cpu_class *p_cls = ckrm_get_cpu_class(parent);
 	struct ckrm_core_class *child_core = NULL;
-
-	p_cls = ckrm_get_cpu_class(parent);
-	if (! p_cls)
-		goto realloc_out;
+	int self_share;
 
 	/*
-	 * get total surplus
+	 * calculate surplus 
+	 * total_surplus = sum(child_surplus)
+	 * reset glut flag
+	 * initialize effective_share
 	 */
-	total_surplus = p_cls->stat.eshare - p_cls->stat.egrt;
-	BUG_ON(total_surplus < 0);
-	total_surplus += get_my_node_surplus(p_cls);
-
 	do {
 		child_core = ckrm_get_next_child(parent, child_core);
 		if (child_core) {
-			c_cls = ckrm_get_cpu_class(child_core);				
-			if (! c_cls)
-				goto realloc_out;
+			struct ckrm_cpu_class *c_cls =
+			    ckrm_get_cpu_class(child_core);
+			ckrm_stat_t *stat = &c_cls->stat;
 
 			total_surplus += get_node_surplus(c_cls);
+			stat->glut = 0;
+			set_effective_share(stat, stat->effective_guarantee);
 		}
 	} while (child_core);
 
-
-	if (! total_surplus) {
-		ret = 0;
-		goto realloc_out;
-	}
-
-	/* 
-	 * distributing the surplus 
-	 * first with the check_sl enabled
-	 * once all the tasks has research the soft limit, disable check_sl and try again
-	 */
-	
-	check_sl = 1;
+	/*distribute the surplus */
+	child_core = NULL;
 	do {
-		consumed = alloc_surplus_single_round(total_surplus,parent,p_cls,check_sl);
-		if (consumed < 0) //something is wrong
-			goto realloc_out;
+		if (!child_core)	//keep the surplus of last round
+			old_surplus = total_surplus;
 
-		if (! consumed)
-			check_sl = 0;
-		else
-			total_surplus -= consumed;
+		child_core = ckrm_get_next_child(parent, child_core);
+		if (child_core) {
+			total_surplus -=
+			    node_surplus_consume(old_surplus, child_core,
+						 p_cls);
+		}
+		//start a new round if something is allocated in the last round
+	} while (child_core || (total_surplus != old_surplus));
 
-	} while ((total_surplus > 0) && (consumed || check_sl) );
+	//any remaining surplus goes to the default class
+	self_share = p_cls->stat.effective_share *
+	    p_cls->shares.unused_guarantee / p_cls->shares.total_guarantee;
+	self_share += total_surplus;
 
-	ret = 0;
-	
- realloc_out:
-	return ret;
+	set_self_effective_share(&p_cls->stat, self_share);
 }
 
 /**
  * alloc_surplus - reallocate unused shares
  *
  * class A's usused share should be allocated to its siblings
- * the re-allocation goes downward from the top
  */
-static int alloc_surplus(struct ckrm_core_class *root_core)
+static void alloc_surplus(struct ckrm_core_class *root_core)
 {
-	struct ckrm_core_class *cur_core, *child_core;
-	//	struct ckrm_cpu_class *cls;
-	int ret = -1;
+	struct ckrm_core_class *cur_core, *child_core = NULL;
+	struct ckrm_cpu_class *cls;
 
-	/*initialize*/
 	cur_core = root_core;
-	child_core = NULL;
-	//	cls = ckrm_get_cpu_class(cur_core);
-
-	/*the ckrm idle tasks get all what's remaining*/
-	/*hzheng: uncomment the following like for hard limit support */
-	//	update_ckrm_idle(CKRM_SHARE_MAX - cls->stat.max_demand);
-	
- repeat:
+	cls = ckrm_get_cpu_class(cur_core);
+	cls->stat.glut = 0;
+	set_effective_share(&cls->stat, cls->stat.effective_guarantee);
+      repeat:
 	//check exit
 	if (!cur_core)
-		return 0;
-
-	//visit this node only once
-	if (! child_core) 
-		if ( alloc_surplus_node(cur_core) < 0 )
-			return ret;
+		return;
 
+	//visit this node
+	alloc_surplus_node(cur_core);
 	//next child
 	child_core = ckrm_get_next_child(cur_core, child_core);
 	if (child_core) {
@@ -697,250 +455,22 @@ static int alloc_surplus(struct ckrm_core_class *root_core)
 	goto repeat;
 }
 
-/**********************************************/
-/*           CKRM Idle Tasks                  */
-/**********************************************/
-struct ckrm_cpu_class ckrm_idle_class_obj, *ckrm_idle_class;
-struct task_struct* ckrm_idle_tasks[NR_CPUS];
-
-/*how many ckrm idle tasks should I wakeup*/
-static inline int get_nr_idle(unsigned long surplus)
-{
-	int cpu_online = cpus_weight(cpu_online_map);	
-	int nr_idle = 0; 
-	
-	nr_idle = surplus * cpu_online;
-	nr_idle >>= CKRM_SHARE_ACCURACY;
-
-	if (surplus) 
-		nr_idle ++;
-
-	if (nr_idle > cpu_online)  
-		nr_idle = cpu_online;
-
-	return nr_idle;
-}
-
-/**
- * update_ckrm_idle: update the status of the idle class according to the new surplus
- * surplus: new system surplus
- *
- * Task:
- * -- update share of the idle class 
- * -- wakeup idle tasks according to surplus
- */
-void update_ckrm_idle(unsigned long surplus)
-{
-	int nr_idle = get_nr_idle(surplus);
-	int i;
-	struct task_struct* idle_task;
-
-	set_eshare(&ckrm_idle_class->stat,surplus);
-	set_meshare(&ckrm_idle_class->stat,surplus);
-	/*wake up nr_idle idle tasks*/
-	for_each_online_cpu(i) {
-		idle_task = ckrm_idle_tasks[i];
-		if (unlikely(idle_task->cpu_class != ckrm_idle_class)) {
-			ckrm_cpu_change_class(idle_task,
-					      idle_task->cpu_class,
-					      ckrm_idle_class);
-		}
-		if (! idle_task)
-			continue;
-		if (i < nr_idle) {
-			//activate it
-			wake_up_process(idle_task);
-		} else {
-			//deactivate it
-			idle_task->state = TASK_INTERRUPTIBLE;
-			set_tsk_need_resched(idle_task);
-		}
-	}
-}
-
-static int ckrm_cpu_idled(void *nothing)
-{
-	set_user_nice(current,19);
-	daemonize("ckrm_idle_task");
-
-	//deactivate it, it will be awakened by ckrm_cpu_monitor
-	current->state = TASK_INTERRUPTIBLE;
-	schedule();		
-
-	/*similar to cpu_idle */
-	while (1) {
-		while (!need_resched()) {
-			ckrm_cpu_monitor(1);
-			if (current_cpu_data.hlt_works_ok) {
-				local_irq_disable();
-				if (!need_resched()) {
-					set_tsk_need_resched(current);
-					safe_halt();
-				} else
-					local_irq_enable();
-			}
-		}
-		schedule();		
-	}
-	return 0;
-}
-
-/**
- * ckrm_start_ckrm_idle: 
- *  create the ckrm_idle_class and starts the idle tasks
- *
- */
-void ckrm_start_ckrm_idle(void)
-{
-	int i;
-	int ret;
-	ckrm_shares_t shares;
-	
-	ckrm_idle_class = &ckrm_idle_class_obj; 
-	memset(ckrm_idle_class,0,sizeof(shares));
-	/*don't care about the shares */
-	init_cpu_class(ckrm_idle_class,&shares);
-	printk(KERN_INFO"ckrm idle class %x created\n",(int)ckrm_idle_class);
-	
-	for_each_online_cpu(i) {
-		ret = kernel_thread(ckrm_cpu_idled, 0, CLONE_KERNEL);
-		
-		/*warn on error, but the system should still work without it*/
-		if (ret < 0)
-			printk(KERN_ERR"Warn: can't start ckrm idle tasks\n");
-		else {
-			ckrm_idle_tasks[i] = find_task_by_pid(ret);
-			if (!ckrm_idle_tasks[i])
-				printk(KERN_ERR"Warn: can't find ckrm idle tasks %d\n",ret);
-		}
-	}
-}
-
-/**********************************************/
-/*          Local Weight                      */
-/**********************************************/
-/**
- * adjust_class_local_weight: adjust the local weight for each cpu
- *
- * lrq->weight = lpr->pressure * class->weight / total_pressure
- */
-static void adjust_lrq_weight(struct ckrm_cpu_class *clsptr, int cpu_online)
-{
-	unsigned long total_pressure = 0;
-	ckrm_lrq_t* lrq;
-	int i;
-	unsigned long class_weight;
-	unsigned long long lw;	
-
-	//get total pressure
-	for_each_online_cpu(i) {
-		lrq = get_ckrm_lrq(clsptr,i);
-		total_pressure += lrq->lrq_load;
-	}
-
-	if (! total_pressure)
-		return;
-	
-	class_weight = cpu_class_weight(clsptr) * cpu_online;
-
-	/*
-	 * update weight for each cpu, minimun is 1
-	 */
-	for_each_online_cpu(i) {
-		lrq = get_ckrm_lrq(clsptr,i);
-		if (! lrq->lrq_load)
-			/*give idle class a high share to boost interactiveness */
-			lw = cpu_class_weight(clsptr); 
-		else {
-			lw = lrq->lrq_load * class_weight;
-			do_div(lw,total_pressure);
-			if (!lw)
-				lw = 1;
-			else if (lw > CKRM_SHARE_MAX)
-				lw = CKRM_SHARE_MAX;
-		}
-		
-		lrq->local_weight = lw;
-	}
-}
-
-/*
- * assume called with class_list_lock read lock held
- */
-void adjust_local_weight(void)
-{
-	static spinlock_t lock = SPIN_LOCK_UNLOCKED; 
-	struct ckrm_cpu_class *clsptr;
-	int cpu_online;
-
-	//do nothing if someone already holding the lock
-	if (! spin_trylock(&lock))
-		return;
-
-	cpu_online = cpus_weight(cpu_online_map);	
-
-	//class status: demand, share,total_ns prio, index
-	list_for_each_entry(clsptr,&active_cpu_classes,links) {
-		adjust_lrq_weight(clsptr,cpu_online);
-	}
-
-	spin_unlock(&lock);
-}
-
-/**********************************************/
-/*          Main                              */
-/**********************************************/
 /**
  *ckrm_cpu_monitor - adjust relative shares of the classes based on their progress
- *@check_min: if check_min is set, the call can't be within 100ms of last call
  *
  * this function is called every CPU_MONITOR_INTERVAL
  * it computes the cpu demand of each class
  * and re-allocate the un-used shares to other classes
  */
-void ckrm_cpu_monitor(int check_min)
+void ckrm_cpu_monitor(void)
 {
-	static spinlock_t lock = SPIN_LOCK_UNLOCKED; 
-	static unsigned long long last_check = 0;
-	struct ckrm_core_class *root_core = get_default_cpu_class()->core;
-	unsigned long long now;	
-#define MIN_CPU_MONITOR_INTERVAL 100000000UL
-
+	struct ckrm_core_class *root_core = default_cpu_class->core;
 	if (!root_core)
 		return;
 
-	//do nothing if someone already holding the lock
-	if (! spin_trylock(&lock))
-		return;
-
-	read_lock(&class_list_lock);
-
-	now = sched_clock();
-
-	//consecutive check should be at least 100ms apart
-	if (check_min && ((now - last_check) < MIN_CPU_MONITOR_INTERVAL))
-		goto outunlock;
-
-	last_check = now;
-
-	if (update_effectives(root_core) != 0)
-		goto outunlock;
-	
-	if (update_max_demand(root_core) != 0)
-		goto outunlock;
-	
-#ifndef ALLOC_SURPLUS_SUPPORT
-#warning "MEF taking out alloc_surplus"
-#else
-	if (alloc_surplus(root_core) != 0)
-		goto outunlock;
-#endif
-	
-	adjust_local_weight();
-
- outunlock:	
-	read_unlock(&class_list_lock);
-	spin_unlock(&lock);
+	update_effective_guarantee_limit(root_core);
+	update_cpu_demand(root_core);
+	alloc_surplus(root_core);
 }
 
 /*****************************************************/
@@ -951,19 +481,22 @@ static int thread_exit = 0;
 
 static int ckrm_cpu_monitord(void *nothing)
 {
+	wait_queue_head_t wait;
+
+	init_waitqueue_head(&wait);
+
 	daemonize("ckrm_cpu_ctrld");
 	for (;;) {
 		/*sleep for sometime before next try*/
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(CPU_MONITOR_INTERVAL);
-		ckrm_cpu_monitor(1);
+		interruptible_sleep_on_timeout(&wait, CPU_MONITOR_INTERVAL);
+		ckrm_cpu_monitor();
 		if (thread_exit) {
 			break;
 		}
 	}
 	cpu_monitor_pid = -1;
 	thread_exit = 2;
-	printk(KERN_DEBUG "cpu_monitord exit\n");
+	printk("cpu_monitord exit\n");
 	return 0;
 }
 
@@ -971,18 +504,21 @@ void ckrm_start_monitor(void)
 {
 	cpu_monitor_pid = kernel_thread(ckrm_cpu_monitord, 0, CLONE_KERNEL);
 	if (cpu_monitor_pid < 0) {
-		printk(KERN_DEBUG "ckrm_cpu_monitord for failed\n");
+		printk("ckrm_cpu_monitord for failed\n");
 	}
 }
 
 void ckrm_kill_monitor(void)
 {
-	printk(KERN_DEBUG "killing process %d\n", cpu_monitor_pid);
+	wait_queue_head_t wait;
+	int interval = HZ;
+	init_waitqueue_head(&wait);
+
+	printk("killing process %d\n", cpu_monitor_pid);
 	if (cpu_monitor_pid > 0) {
 		thread_exit = 1;
 		while (thread_exit != 2) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(CPU_MONITOR_INTERVAL);
+			interruptible_sleep_on_timeout(&wait, interval);
 		}
 	}
 }
@@ -990,8 +526,6 @@ void ckrm_kill_monitor(void)
 int ckrm_cpu_monitor_init(void)
 {
 	ckrm_start_monitor();
-	/*hzheng: uncomment the following like for hard limit support */
-	//	ckrm_start_ckrm_idle();
 	return 0;
 }
 
diff --git a/kernel/ckrm/ckrm_laq.c b/kernel/ckrm/ckrm_laq.c
deleted file mode 100644
index b64205a06..000000000
--- a/kernel/ckrm/ckrm_laq.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/* ckrm_socketaq.c - accept queue resource controller
- *
- * Copyright (C) Vivek Kashyap,      IBM Corp. 2004
- * 
- * Latest version, more details at http://ckrm.sf.net
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-/* Changes
- * Initial version
- */
-
-/* Code Description: TBD
- *
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <asm/errno.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/ckrm.h>
-#include <linux/ckrm_rc.h>
-#include <net/tcp.h>
-
-#include <linux/ckrm_net.h>
-
-#define hnode_2_core(ptr) \
-        ((ptr) ? container_of(ptr, struct ckrm_core_class, hnode) : NULL)
-
-#define CKRM_SAQ_MAX_DEPTH	3	// 0 => /rcfs
-				  // 1 => socket_aq
-				  // 2 => socket_aq/listen_class
-				  // 3 => socket_aq/listen_class/accept_queues
-				  // 4 => Not allowed
-
-typedef struct ckrm_laq_res {
-	spinlock_t reslock;
-	atomic_t refcnt;
-	struct ckrm_shares shares;
-	struct ckrm_core_class *core;
-	struct ckrm_core_class *pcore;
-	int my_depth;
-	int my_id;
-	unsigned int min_ratio;
-} ckrm_laq_res_t;
-
-static int my_resid = -1;
-
-extern struct ckrm_core_class *rcfs_create_under_netroot(char *, int, int);
-extern struct ckrm_core_class *rcfs_make_core(struct dentry *,
-					      struct ckrm_core_class *);
-
-void laq_res_hold(struct ckrm_laq_res *res)
-{
-	atomic_inc(&res->refcnt);
-	return;
-}
-
-void laq_res_put(struct ckrm_laq_res *res)
-{
-	if (atomic_dec_and_test(&res->refcnt))
-		kfree(res);
-	return;
-}
-
-/* Initialize rescls values
- */
-static void laq_res_initcls(void *my_res)
-{
-	ckrm_laq_res_t *res = my_res;
-
-	res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
-	res->shares.my_limit = CKRM_SHARE_DONTCARE;
-	res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-	res->shares.max_limit = CKRM_SHARE_DFLT_MAX_LIMIT;
-	res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-	res->shares.cur_max_limit = 0;
-}
-
-static int atoi(char *s)
-{
-	int k = 0;
-	while (*s)
-		k = *s++ - '0' + (k * 10);
-	return k;
-}
-
-static char *laq_get_name(struct ckrm_core_class *c)
-{
-	char *p = (char *)c->name;
-
-	while (*p)
-		p++;
-	while (*p != '/' && p != c->name)
-		p--;
-
-	return ++p;
-}
-
-static void *laq_res_alloc(struct ckrm_core_class *core,
-			   struct ckrm_core_class *parent)
-{
-	ckrm_laq_res_t *res, *pres;
-	int pdepth;
-
-	if (parent)
-		pres = ckrm_get_res_class(parent, my_resid, ckrm_laq_res_t);
-	else
-		pres = NULL;
-
-	if (core == core->classtype->default_class)
-		pdepth = 1;
-	else {
-		if (!parent)
-			return NULL;
-		pdepth = 1 + pres->my_depth;
-	}
-
-	res = kmalloc(sizeof(ckrm_laq_res_t), GFP_ATOMIC);
-	if (res) {
-		memset(res, 0, sizeof(res));
-		spin_lock_init(&res->reslock);
-		laq_res_hold(res);
-		res->my_depth = pdepth;
-		if (pdepth == 2)	// listen class
-			res->my_id = 0;
-		else if (pdepth == 3)
-			res->my_id = atoi(laq_get_name(core));
-		res->core = core;
-		res->pcore = parent;
-
-		// rescls in place, now initialize contents other than 
-		// hierarchy pointers
-		laq_res_initcls(res);	// acts as initialising value
-	}
-
-	return res;
-}
-
-static void laq_res_free(void *my_res)
-{
-	ckrm_laq_res_t *res = (ckrm_laq_res_t *) my_res;
-	ckrm_laq_res_t *parent;
-
-	if (!res)
-		return;
-
-	if (res->my_depth != 3) {
-		kfree(res);
-		return;
-	}
-
-	parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
-	if (!parent)		// Should never happen
-		return;
-
-	spin_lock(&parent->reslock);
-	spin_lock(&res->reslock);
-
-	// return child's guarantee to parent node
-	// Limits have no meaning for accept queue control
-	child_guarantee_changed(&parent->shares, res->shares.my_guarantee, 0);
-
-	spin_unlock(&res->reslock);
-	laq_res_put(res);
-	spin_unlock(&parent->reslock);
-	return;
-}
-
-/**************************************************************************
- * 			SHARES					        ***
- **************************************************************************/
-
-void laq_set_aq_value(struct ckrm_net_struct *ns, unsigned int *aq_ratio)
-{
-	int i;
-	struct tcp_opt *tp;
-
-	tp = tcp_sk(ns->ns_sk);
-	for (i = 0; i < NUM_ACCEPT_QUEUES; i++)
-		tp->acceptq[i].aq_ratio = aq_ratio[i];
-	return;
-}
-void laq_set_aq_values(ckrm_laq_res_t * parent, unsigned int *aq_ratio)
-{
-
-	struct ckrm_net_struct *ns;
-	struct ckrm_core_class *core = parent->core;
-
-	class_lock(core);
-	list_for_each_entry(ns, &core->objlist, ckrm_link) {
-		laq_set_aq_value(ns, aq_ratio);
-	}
-	class_unlock(core);
-	return;
-}
-
-static void calculate_aq_ratios(ckrm_laq_res_t * res, unsigned int *aq_ratio)
-{
-	struct ckrm_hnode *chnode;
-	ckrm_laq_res_t *child;
-	unsigned int min;
-	int i;
-
-	min = aq_ratio[0] = (unsigned int)res->shares.unused_guarantee;
-
-	list_for_each_entry(chnode, &res->core->hnode.children, siblings) {
-		child = hnode_2_core(chnode)->res_class[my_resid];
-
-		aq_ratio[child->my_id] =
-		    (unsigned int)child->shares.my_guarantee;
-		if (aq_ratio[child->my_id] == CKRM_SHARE_DONTCARE)
-			aq_ratio[child->my_id] = 0;
-		if (aq_ratio[child->my_id] &&
-		    ((unsigned int)aq_ratio[child->my_id] < min))
-			min = (unsigned int)child->shares.my_guarantee;
-	}
-
-	if (min == 0) {
-		min = 1;
-		// default takes all if nothing specified
-		aq_ratio[0] = 1;	
-	}
-	res->min_ratio = min;
-
-	for (i = 0; i < NUM_ACCEPT_QUEUES; i++)
-		aq_ratio[i] = aq_ratio[i] / min;
-}
-
-static int laq_set_share_values(void *my_res, struct ckrm_shares *shares)
-{
-	ckrm_laq_res_t *res = my_res;
-	ckrm_laq_res_t *parent;
-	unsigned int aq_ratio[NUM_ACCEPT_QUEUES];
-	int rc = 0;
-
-	if (!res)
-		return -EINVAL;
-
-	if (!res->pcore) {
-		// something is badly wrong
-		printk(KERN_ERR "socketaq internal inconsistency\n");
-		return -EBADF;
-	}
-
-	parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
-	if (!parent)		// socketclass does not have a share interface
-		return -EINVAL;
-
-	// Ensure that we ignore limit values
-	shares->my_limit = CKRM_SHARE_DONTCARE;
-	shares->max_limit = CKRM_SHARE_UNCHANGED;
-
-	if (res->my_depth == 0) {
-		printk(KERN_ERR "socketaq bad entry\n");
-		return -EBADF;
-	} else if (res->my_depth == 1) {
-		// can't be written to. This is an internal default.
-		return -EINVAL;
-	} else if (res->my_depth == 2) {
-		//nothin to inherit
-		if (!shares->total_guarantee) {
-			return -EINVAL;
-		}
-		parent = res;
-		shares->my_guarantee = CKRM_SHARE_DONTCARE;
-	} else if (res->my_depth == 3) {
-		// accept queue itself. 
-		shares->total_guarantee = CKRM_SHARE_UNCHANGED;
-	}
-
-	ckrm_lock_hier(parent->pcore);
-	spin_lock(&parent->reslock);
-	rc = set_shares(shares, &res->shares,
-			(parent == res) ? NULL : &parent->shares);
-	if (rc) {
-		spin_unlock(&res->reslock);
-		ckrm_unlock_hier(res->pcore);
-		return rc;
-	}
-	calculate_aq_ratios(parent, aq_ratio);
-	laq_set_aq_values(parent, aq_ratio);
-	spin_unlock(&parent->reslock);
-	ckrm_unlock_hier(parent->pcore);
-
-	return rc;
-}
-
-static int laq_get_share_values(void *my_res, struct ckrm_shares *shares)
-{
-	ckrm_laq_res_t *res = my_res;
-
-	if (!res)
-		return -EINVAL;
-	*shares = res->shares;
-	return 0;
-}
-
-/**************************************************************************
- * 			STATS						***
- **************************************************************************/
-
-void
-laq_print_aq_stats(struct seq_file *sfile, struct tcp_acceptq_info *taq, int i)
-{
-	seq_printf(sfile, "Class %d connections:\n\taccepted: %u\n\t"
-		   "queued: %u\n\twait_time: %u\n",
-		   i, taq->acceptq_count, taq->acceptq_qcount,
-		   jiffies_to_msecs(taq->acceptq_wait_time));
-
-	if (i)
-		return;
-
-	for (i = 1; i < NUM_ACCEPT_QUEUES; i++) {
-		taq[0].acceptq_wait_time += taq[i].acceptq_wait_time;
-		taq[0].acceptq_qcount += taq[i].acceptq_qcount;
-		taq[0].acceptq_count += taq[i].acceptq_count;
-	}
-
-	seq_printf(sfile, "Totals :\n\taccepted: %u\n\t"
-		   "queued: %u\n\twait_time: %u\n",
-		   taq->acceptq_count, taq->acceptq_qcount,
-		   jiffies_to_msecs(taq->acceptq_wait_time));
-
-	return;
-}
-
-void
-laq_get_aq_stats(ckrm_laq_res_t * pres, ckrm_laq_res_t * mres,
-		 struct tcp_acceptq_info *taq)
-{
-	struct ckrm_net_struct *ns;
-	struct ckrm_core_class *core = pres->core;
-	struct tcp_opt *tp;
-	int a = mres->my_id;
-	int z;
-
-	if (a == 0)
-		z = NUM_ACCEPT_QUEUES;
-	else
-		z = a + 1;
-
-	// XXX Instead of holding a  class_lock introduce a rw
-	// lock to be write locked by listen callbacks and read locked here.
-	// - VK
-	class_lock(pres->core);
-	list_for_each_entry(ns, &core->objlist, ckrm_link) {
-		tp = tcp_sk(ns->ns_sk);
-		for (; a < z; a++) {
-			taq->acceptq_wait_time += tp->acceptq[a].aq_wait_time;
-			taq->acceptq_qcount += tp->acceptq[a].aq_qcount;
-			taq->acceptq_count += tp->acceptq[a].aq_count;
-			taq++;
-		}
-	}
-	class_unlock(pres->core);
-}
-
-static int laq_get_stats(void *my_res, struct seq_file *sfile)
-{
-	ckrm_laq_res_t *res = my_res;
-	ckrm_laq_res_t *parent;
-	struct tcp_acceptq_info taq[NUM_ACCEPT_QUEUES];
-	int rc = 0;
-
-	if (!res)
-		return -EINVAL;
-
-	if (!res->pcore) {
-		// something is badly wrong
-		printk(KERN_ERR "socketaq internal inconsistency\n");
-		return -EBADF;
-	}
-
-	parent = ckrm_get_res_class(res->pcore, my_resid, ckrm_laq_res_t);
-	if (!parent) {		// socketclass does not have a stat interface
-		printk(KERN_ERR "socketaq internal fs inconsistency\n");
-		return -EINVAL;
-	}
-
-	memset(taq, 0, sizeof(struct tcp_acceptq_info) * NUM_ACCEPT_QUEUES);
-
-	switch (res->my_depth) {
-
-	default:
-	case 0:
-		printk(KERN_ERR "socket class bad entry\n");
-		rc = -EBADF;
-		break;
-
-	case 1:		// can't be read from. this is internal default.
-		// return -EINVAL
-		rc = -EINVAL;
-		break;
-
-	case 2:		// return the default and total
-		ckrm_lock_hier(res->core);	// block any deletes
-		laq_get_aq_stats(res, res, &taq[0]);
-		laq_print_aq_stats(sfile, &taq[0], 0);
-		ckrm_unlock_hier(res->core);	// block any deletes
-		break;
-
-	case 3:
-		ckrm_lock_hier(parent->core);	// block any deletes
-		laq_get_aq_stats(parent, res, &taq[res->my_id]);
-		laq_print_aq_stats(sfile, &taq[res->my_id], res->my_id);
-		ckrm_unlock_hier(parent->core);	// block any deletes
-		break;
-	}
-
-	return rc;
-}
-
-/*
- * The network connection is reclassified to this class. Update its shares.
- * The socket lock is held. 
- */
-static void laq_change_resclass(void *n, void *old, void *r)
-{
-	struct ckrm_net_struct *ns = (struct ckrm_net_struct *)n;
-	struct ckrm_laq_res *res = (struct ckrm_laq_res *)r;
-	unsigned int aq_ratio[NUM_ACCEPT_QUEUES];
-
-	if (res->my_depth != 2)
-		return;
-
-	// a change to my_depth == 3 ie. the accept classes cannot happen.
-	// there is no target file
-	if (res->my_depth == 2) {	// it is one of the socket classes
-		ckrm_lock_hier(res->pcore);
-		// share rule: hold parent resource lock. then self.
-		// However, since my_depth == 1 is a generic class it is not
-		// needed here. Self lock is enough.
-		spin_lock(&res->reslock);
-		calculate_aq_ratios(res, aq_ratio);
-		class_lock(res->pcore);
-		laq_set_aq_value(ns, aq_ratio);
-		class_unlock(res->pcore);
-		spin_unlock(&res->reslock);
-		ckrm_unlock_hier(res->pcore);
-	}
-
-	return;
-}
-
-struct ckrm_res_ctlr laq_rcbs = {
-	.res_name = "laq",
-	.resid = -1,		// dynamically assigned
-	.res_alloc = laq_res_alloc,
-	.res_free = laq_res_free,
-	.set_share_values = laq_set_share_values,
-	.get_share_values = laq_get_share_values,
-	.get_stats = laq_get_stats,
-	.change_resclass = laq_change_resclass,
-	//.res_initcls       = laq_res_initcls,  //HUBERTUS: unnecessary !!
-};
-
-int __init init_ckrm_laq_res(void)
-{
-	struct ckrm_classtype *clstype;
-	int resid;
-
-	clstype = ckrm_find_classtype_by_name("socketclass");
-	if (clstype == NULL) {
-		printk(KERN_INFO " Unknown ckrm classtype<socketclass>");
-		return -ENOENT;
-	}
-
-	if (my_resid == -1) {
-		resid = ckrm_register_res_ctlr(clstype, &laq_rcbs);
-		if (resid >= 0)
-			my_resid = resid;
-		printk(KERN_DEBUG "........init_ckrm_listen_aq_res -> %d\n", my_resid);
-	}
-	return 0;
-
-}
-
-void __exit exit_ckrm_laq_res(void)
-{
-	ckrm_unregister_res_ctlr(&laq_rcbs);
-	my_resid = -1;
-}
-
-module_init(init_ckrm_laq_res)
-    module_exit(exit_ckrm_laq_res)
-
-    MODULE_LICENSE("GPL");
diff --git a/kernel/ckrm/ckrm_mem.c b/kernel/ckrm/ckrm_mem.c
index c6c594a96..667ac9c67 100644
--- a/kernel/ckrm/ckrm_mem.c
+++ b/kernel/ckrm/ckrm_mem.c
@@ -52,7 +52,6 @@ EXPORT_SYMBOL(ckrm_tot_lru_pages);
 static ckrm_mem_res_t *ckrm_mem_root_class;
 atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
 EXPORT_SYMBOL(ckrm_mem_real_count);
-static void ckrm_mem_evaluate_all_pages(void);
 
 /* Initialize rescls values
  * May be called on each rcfs unmount or as part of error recovery
@@ -90,7 +89,7 @@ mem_res_initcls_one(void *my_res)
 
 	res->pg_guar = CKRM_SHARE_DONTCARE;
 	res->pg_limit = CKRM_SHARE_DONTCARE;
-	res->pg_unused = 0;
+	res->pg_unused = CKRM_SHARE_DONTCARE;
 }
 
 static void *
@@ -180,25 +179,20 @@ mem_res_free(void *my_res)
 	if (!res) 
 		return;
 
-	res->shares.my_guarantee = 0;
-	res->shares.my_limit = 0;
-	res->pg_guar = 0;
-	res->pg_limit = 0;
-	res->pg_unused = 0;
-
 	parres = ckrm_get_res_class(res->parent, mem_rcbs.resid, ckrm_mem_res_t);
+
 	// return child's limit/guarantee to parent node
 	if (parres) {
 		child_guarantee_changed(&parres->shares, res->shares.my_guarantee, 0);
 		child_maxlimit_changed_local(parres);
 	}
-	ckrm_mem_evaluate_all_pages();
-	res->core = NULL;
-
+	res->shares.my_guarantee = 0;
+	res->shares.my_limit = 0;
 	spin_lock(&ckrm_mem_lock);
 	list_del(&res->mcls_list);
 	spin_unlock(&ckrm_mem_lock);
 	mem_class_put(res);
+
 	return;
 }
 
@@ -361,14 +355,8 @@ mem_change_resclass(void *tsk, void *old, void *new)
 		}
 	}
 
-	spin_unlock(&mm->peertask_lock);
 	ckrm_mem_evaluate_mm(mm);
-	/*
-	printk("chg_cls: task <%s:%d> mm %p oldmm %s newmm %s o %s n %s\n",
-		task->comm, task->pid, mm, prev_mmcls ? prev_mmcls->core->name:
-		"NULL", mm->memclass ? mm->memclass->core->name : "NULL",
-		o ? o->core->name: "NULL", n ? n->core->name: "NULL");	
-	*/
+	spin_unlock(&mm->peertask_lock);
 	return;
 }
 
@@ -497,7 +485,7 @@ set_usage_flags(ckrm_mem_res_t *res)
 	guar = (res->pg_guar > 0) ? res->pg_guar : 0;
 	range = res->pg_limit - guar;
 
-	if ((tot_usage > (guar + ((110 * range) / 100))) &&
+	if ((tot_usage > (guar + ((120 * range) / 100))) &&
 				(res->pg_lent > (guar + ((25 * range) / 100)))) {
 		set_flags_of_children(res, CLS_PARENT_OVER);
 	}
@@ -508,10 +496,6 @@ set_usage_flags(ckrm_mem_res_t *res)
 		res->reclaim_flags |= CLS_OVER_100;
 	} else if (cls_usage > (guar + ((3 * range) / 4))) {
 		res->reclaim_flags |= CLS_OVER_75;
-	} else if (cls_usage > (guar + (range / 2))) {
-		res->reclaim_flags |= CLS_OVER_50;
-	} else if (cls_usage > (guar + (range / 4))) {
-		res->reclaim_flags |= CLS_OVER_25;
 	} else if (cls_usage > guar) {
 		res->reclaim_flags |= CLS_OVER_GUAR;
 	} else {
@@ -562,16 +546,15 @@ ckrm_get_reclaim_bits(unsigned int *flags, unsigned int *extract)
 {
 	int i, j, mask = 0;
 
-	if (*flags == 0) {
-		*extract = 0;
+	if (*extract == 0 || *flags == 0) {
 		return;
 	}
-
 	if (*flags & CLS_SHRINK) {
 		*extract = CLS_SHRINK;
 		*flags = 0;
 		return;
 	}
+			
 
 	i = fls(*flags);
 	for (j = i-1; j > 0; j--) {
@@ -583,16 +566,12 @@ ckrm_get_reclaim_bits(unsigned int *flags, unsigned int *extract)
 }
 
 void
-ckrm_at_limit(ckrm_mem_res_t *cls)
+ckrm_near_limit(ckrm_mem_res_t *cls)
 {
-#ifndef AT_LIMIT_SUPPORT
-#warning "ckrm_at_limit disabled due to problems with memory hog tests"
-#else
 	struct zone *zone;
 	unsigned long now = jiffies;
 
-	if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) || 
-			((cls->flags & MEM_AT_LIMIT) == MEM_AT_LIMIT)) {
+	if (!cls || ((cls->flags & MEM_NEAR_LIMIT) == MEM_NEAR_LIMIT)) {
 		return;
 	}
 	if ((cls->last_shrink + (10 * HZ)) < now) { // 10 seconds since last ?
@@ -606,17 +585,14 @@ ckrm_at_limit(ckrm_mem_res_t *cls)
 	spin_lock(&ckrm_mem_lock);
 	list_add(&cls->shrink_list, &ckrm_shrink_list);
 	spin_unlock(&ckrm_mem_lock);
-	cls->flags |= MEM_AT_LIMIT;
+	cls->flags |= MEM_NEAR_LIMIT;
 	for_each_zone(zone) {
 		wakeup_kswapd(zone);
 		break; // only once is enough
 	}
-#endif // AT_LIMIT_SUPPORT
 }
 
-static int unmapped = 0, changed = 0, unchanged = 0, maxnull = 0,
-anovma = 0, fnovma = 0;
-static void
+static int
 ckrm_mem_evaluate_page_anon(struct page* page)
 {
 	ckrm_mem_res_t* pgcls = page_class(page);
@@ -624,12 +600,10 @@ ckrm_mem_evaluate_page_anon(struct page* page)
 	struct anon_vma *anon_vma = (struct anon_vma *) page->mapping;
 	struct vm_area_struct *vma;
 	struct mm_struct* mm;
-	int v = 0;
 
 	spin_lock(&anon_vma->lock);
 	BUG_ON(list_empty(&anon_vma->head));
 	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
-		v++;
 		mm = vma->vm_mm;
 		if (!maxshareclass ||
 				ckrm_mem_share_compare(maxshareclass, mm->memclass) < 0) {
@@ -637,20 +611,15 @@ ckrm_mem_evaluate_page_anon(struct page* page)
 		}
 	}
 	spin_unlock(&anon_vma->lock);
-	if (!v)
-		anovma++;
 
-	if (!maxshareclass)
-		maxnull++;
 	if (maxshareclass && (pgcls != maxshareclass)) {
 		ckrm_change_page_class(page, maxshareclass);
-		changed++;
-	} else 
-		unchanged++;
-	return;
+		return 1;
+	}
+	return 0;
 }
 
-static void
+static int
 ckrm_mem_evaluate_page_file(struct page* page) 
 {
 	ckrm_mem_res_t* pgcls = page_class(page);
@@ -660,132 +629,69 @@ ckrm_mem_evaluate_page_file(struct page* page)
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct prio_tree_iter iter;
 	struct mm_struct* mm;
-	int v = 0;
 
 	if (!mapping)
-		return;
+		return 0;
 
 	if (!spin_trylock(&mapping->i_mmap_lock))
-		return;
+		return 0;
 
 	while ((vma = vma_prio_tree_next(vma, &mapping->i_mmap,
 					&iter, pgoff, pgoff)) != NULL) {
-		v++;
 		mm = vma->vm_mm;
 		if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,mm->memclass)<0)
 			maxshareclass = mm->memclass;
 	}
 	spin_unlock(&mapping->i_mmap_lock);
 
-	if (!v)
-		fnovma++;
-	if (!maxshareclass)
-		maxnull++;
-
 	if (maxshareclass && pgcls != maxshareclass) {
 		ckrm_change_page_class(page, maxshareclass);
-		changed++;
-	} else 
-		unchanged++;
-	return;
+		return 1;
+	}
+	return 0;
 }
 
-static void
+static int
 ckrm_mem_evaluate_page(struct page* page) 
 {
+	int changed = 0;
+
 	if (page->mapping) {
 		if (PageAnon(page))
-			ckrm_mem_evaluate_page_anon(page);
+			changed = ckrm_mem_evaluate_page_anon(page);
 		else
-			ckrm_mem_evaluate_page_file(page);
-	} else
-		unmapped++;
-	return;
-}
-
-static void
-ckrm_mem_evaluate_all_pages()
-{
-	struct page *page;
-	struct zone *zone;
-	int active = 0, inactive = 0, cleared = 0;
-	int act_cnt, inact_cnt, idx;
-	ckrm_mem_res_t *res;
-
-	spin_lock(&ckrm_mem_lock);
-	list_for_each_entry(res, &ckrm_memclass_list, mcls_list) {
-		res->tmp_cnt = 0;
+			changed = ckrm_mem_evaluate_page_file(page);
 	}
-	spin_unlock(&ckrm_mem_lock);
-
-	for_each_zone(zone) {
-		spin_lock_irq(&zone->lru_lock);
-		list_for_each_entry(page, &zone->inactive_list, lru) {
-			ckrm_mem_evaluate_page(page);
-			active++;
-			page_class(page)->tmp_cnt++;
-			if (!test_bit(PG_ckrm_account, &page->flags))
-				cleared++;
-		}
-		list_for_each_entry(page, &zone->active_list, lru) {
-			ckrm_mem_evaluate_page(page);
-			inactive++;
-			page_class(page)->tmp_cnt++;
-			if (!test_bit(PG_ckrm_account, &page->flags))
-				cleared++;
-		}
-		spin_unlock_irq(&zone->lru_lock);
-	}
-	printk(KERN_DEBUG "all_pages: active %d inactive %d cleared %d\n", 
-			active, inactive, cleared);
-	spin_lock(&ckrm_mem_lock);
-	list_for_each_entry(res, &ckrm_memclass_list, mcls_list) {
-		act_cnt = 0; inact_cnt = 0; idx = 0;
-		for_each_zone(zone) {
-			act_cnt += res->nr_active[idx];
-			inact_cnt += res->nr_inactive[idx];
-			idx++;
-		}
-		printk(KERN_DEBUG "all_pages: %s: tmp_cnt %d; act_cnt %d inact_cnt %d\n",
-			res->core->name, res->tmp_cnt, act_cnt, inact_cnt);
-	}
-	spin_unlock(&ckrm_mem_lock);
-
-	// check all mm's in the system to see which memclass they are attached
-	// to.
-	return;
+	return changed;
 }
 
-static /*inline*/ int
+static inline int
 class_migrate_pmd(struct mm_struct* mm, struct vm_area_struct* vma,
 		pmd_t* pmdir, unsigned long address, unsigned long end)
 {
-	pte_t *pte, *orig_pte;
+	pte_t* pte;
 	unsigned long pmd_end;
 	
 	if (pmd_none(*pmdir))
 		return 0;
 	BUG_ON(pmd_bad(*pmdir));
 	
-	orig_pte = pte = pte_offset_map(pmdir,address);
+	pte = pte_offset_map(pmdir,address);
 	pmd_end = (address+PMD_SIZE)&PMD_MASK;
 	if (end>pmd_end)
 		end = pmd_end;
 	
 	do {
 		if (pte_present(*pte)) {
-			BUG_ON(mm->memclass == NULL);
-			ckrm_change_page_class(pte_page(*pte), mm->memclass);
-			// ckrm_mem_evaluate_page(pte_page(*pte));
+			ckrm_mem_evaluate_page(pte_page(*pte));
 		}
 		address += PAGE_SIZE;
 		pte++;
 	} while(address && (address<end));
-	pte_unmap(orig_pte);
 	return 0;
 }
 
-static /*inline*/ int
+static inline int
 class_migrate_pgd(struct mm_struct* mm, struct vm_area_struct* vma,
 		pgd_t* pgdir, unsigned long address, unsigned long end)
 {
@@ -810,7 +716,7 @@ class_migrate_pgd(struct mm_struct* mm, struct vm_area_struct* vma,
 	return 0;
 }
 
-static /*inline*/ int
+static inline int
 class_migrate_vma(struct mm_struct* mm, struct vm_area_struct* vma)
 {
 	pgd_t* pgdir;
@@ -852,11 +758,11 @@ ckrm_mem_evaluate_mm(struct mm_struct* mm)
 			maxshareclass = cls;
 	}
 
-	if (maxshareclass && (mm->memclass != (void *)maxshareclass)) {
+	if (mm->memclass != (void *)maxshareclass) {
+		mem_class_get(maxshareclass);
 		if (mm->memclass)
 			mem_class_put(mm->memclass);
 		mm->memclass = maxshareclass;
-		mem_class_get(maxshareclass);
 		
 		/* Go through all VMA to migrate pages */
 		down_read(&mm->mmap_sem);
@@ -870,6 +776,26 @@ ckrm_mem_evaluate_mm(struct mm_struct* mm)
 	return;
 }
 
+void
+ckrm_mem_evaluate_page_byadd(struct page* page, struct mm_struct* mm)
+{
+	ckrm_mem_res_t *pgcls = page_class(page);
+	ckrm_mem_res_t *chgcls = mm->memclass ? mm->memclass : GET_MEM_CLASS(current);
+
+	if (!chgcls || pgcls == chgcls)
+		return;
+
+	if (!page->mapcount) {
+		ckrm_change_page_class(page, chgcls);
+		return;
+	}
+	if (ckrm_mem_share_compare(pgcls, chgcls) < 0) {
+		ckrm_change_page_class(page, chgcls);
+		return;
+	}
+	return;
+}
+
 void
 ckrm_init_mm_to_task(struct mm_struct * mm, struct task_struct *task)
 {
@@ -879,26 +805,10 @@ ckrm_init_mm_to_task(struct mm_struct * mm, struct task_struct *task)
 		list_del_init(&task->mm_peers);
 	}
 	list_add_tail(&task->mm_peers, &mm->tasklist);
-	spin_unlock(&mm->peertask_lock);
 	if (mm->memclass != GET_MEM_CLASS(task))
 		ckrm_mem_evaluate_mm(mm);
+	spin_unlock(&mm->peertask_lock);
 	return;
 }
 
-int
-ckrm_memclass_valid(ckrm_mem_res_t *cls)
-{
-	ckrm_mem_res_t *tmp;
-
-	spin_lock(&ckrm_mem_lock);
-	list_for_each_entry(tmp, &ckrm_memclass_list, mcls_list) {
-		if (tmp == cls) {
-			spin_unlock(&ckrm_mem_lock);
-			return 1;
-		}
-	}
-	spin_unlock(&ckrm_mem_lock);
-	return 0;
-}
-
 MODULE_LICENSE("GPL");
diff --git a/kernel/ckrm/ckrm_sockc.c b/kernel/ckrm/ckrm_sockc.c
index 8ccadfa39..a8a3b4bd5 100644
--- a/kernel/ckrm/ckrm_sockc.c
+++ b/kernel/ckrm/ckrm_sockc.c
@@ -59,7 +59,7 @@ struct ckrm_sock_class {
 static struct ckrm_sock_class sockclass_dflt_class = {
 };
 
-#define SOCKET_CLASS_TYPE_NAME  "socketclass"
+#define SOCKET_CLASS_TYPE_NAME  "socket_class"
 
 const char *dflt_sockclass_name = SOCKET_CLASS_TYPE_NAME;
 
@@ -464,16 +464,6 @@ sock_forced_reclassify(struct ckrm_core_class *target, const char *options)
 	if (!options)
 		return -EINVAL;
 
-	if (target == NULL) {
-		unsigned long id = simple_strtol(options,NULL,0);
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-		if (id != 0) 
-			return -EINVAL;
-		printk(KERN_DEBUG "sock_class: reclassify all not net implemented\n");
-		return 0;
-	}
-
 	while ((p = strsep((char **)&options, ",")) != NULL) {
 		substring_t args[MAX_OPT_ARGS];
 		int token;
@@ -553,7 +543,7 @@ static void sock_reclassify_class(struct ckrm_sock_class *cls)
 
 void __init ckrm_meta_init_sockclass(void)
 {
-	printk(KERN_DEBUG "...... Initializing ClassType<%s> ........\n",
+	printk("...... Initializing ClassType<%s> ........\n",
 	       CT_sockclass.name);
 	// intialize the default class
 	ckrm_init_core_class(&CT_sockclass, class_core(&sockclass_dflt_class),
diff --git a/kernel/ckrm/ckrm_numtasks.c b/kernel/ckrm/ckrm_tasks.c
similarity index 90%
rename from kernel/ckrm/ckrm_numtasks.c
rename to kernel/ckrm/ckrm_tasks.c
index 61517aee0..ee539216e 100644
--- a/kernel/ckrm/ckrm_numtasks.c
+++ b/kernel/ckrm/ckrm_tasks.c
@@ -25,7 +25,6 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <asm/errno.h>
-#include <asm/div64.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/ckrm.h>
@@ -191,11 +190,6 @@ static void numtasks_put_ref_local(void *arg)
 	res = ckrm_get_res_class(core, resid, ckrm_numtasks_t);
 	if (res == NULL)
 		return;
-	if (unlikely(atomic_read(&res->cnt_cur_alloc) == 0)) {
-		printk(KERN_WARNING "numtasks_put_ref: Trying to decrement "
-					"counter below 0\n");
-		return;
-	}
 	atomic_dec(&res->cnt_cur_alloc);
 	if (atomic_read(&res->cnt_borrowed) > 0) {
 		atomic_dec(&res->cnt_borrowed);
@@ -249,13 +243,10 @@ static void numtasks_res_free(void *my_res)
 
 	parres = ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t);
 
-	if (unlikely(atomic_read(&res->cnt_cur_alloc) < 0)) {
-		printk(KERN_WARNING "numtasks_res: counter below 0\n");
-	}
-	if (unlikely(atomic_read(&res->cnt_cur_alloc) > 0 ||
-				atomic_read(&res->cnt_borrowed) > 0)) {
-		printk(KERN_WARNING "numtasks_res_free: resource still "
-		       "alloc'd %p\n", res);
+	if (unlikely(atomic_read(&res->cnt_cur_alloc) != 0 ||
+		     atomic_read(&res->cnt_borrowed))) {
+		printk(KERN_ERR
+		       "numtasks_res_free: resource still alloc'd %p\n", res);
 		if ((borrowed = atomic_read(&res->cnt_borrowed)) > 0) {
 			for (i = 0; i < borrowed; i++) {
 				numtasks_put_ref_local(parres->core);
@@ -307,9 +298,9 @@ recalc_and_propagate(ckrm_numtasks_t * res, ckrm_numtasks_t * parres)
 		if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
 			res->cnt_guarantee = CKRM_SHARE_DONTCARE;
 		} else if (par->total_guarantee) {
-			u64 temp = (u64) self->my_guarantee * parres->cnt_guarantee;
-			do_div(temp, par->total_guarantee);
-			res->cnt_guarantee = (int) temp;
+			res->cnt_guarantee =
+			    (self->my_guarantee * parres->cnt_guarantee)
+			    / par->total_guarantee;
 		} else {
 			res->cnt_guarantee = 0;
 		}
@@ -317,9 +308,8 @@ recalc_and_propagate(ckrm_numtasks_t * res, ckrm_numtasks_t * parres)
 		if (parres->cnt_limit == CKRM_SHARE_DONTCARE) {
 			res->cnt_limit = CKRM_SHARE_DONTCARE;
 		} else if (par->max_limit) {
-			u64 temp = (u64) self->my_limit * parres->cnt_limit;
-			do_div(temp, par->max_limit);
-			res->cnt_limit = (int) temp;
+			res->cnt_limit = (self->my_limit * parres->cnt_limit)
+			    / par->max_limit;
 		} else {
 			res->cnt_limit = 0;
 		}
@@ -328,9 +318,9 @@ recalc_and_propagate(ckrm_numtasks_t * res, ckrm_numtasks_t * parres)
 		if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) {
 			res->cnt_unused = CKRM_SHARE_DONTCARE;
 		} else if (self->total_guarantee) {
-			u64 temp = (u64) self->unused_guarantee * res->cnt_guarantee;
-			do_div(temp, self->total_guarantee);
-			res->cnt_unused = (int) temp;
+			res->cnt_unused = (self->unused_guarantee *
+					   res->cnt_guarantee) /
+			    self->total_guarantee;
 		} else {
 			res->cnt_unused = 0;
 		}
@@ -376,9 +366,9 @@ static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new)
 		if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
 			parres->cnt_unused = CKRM_SHARE_DONTCARE;
 		} else if (par->total_guarantee) {
-			u64 temp = (u64) par->unused_guarantee * parres->cnt_guarantee;
-			do_div(temp, par->total_guarantee);
-			parres->cnt_unused = (int) temp;
+			parres->cnt_unused = (par->unused_guarantee *
+					      parres->cnt_guarantee) /
+			    par->total_guarantee;
 		} else {
 			parres->cnt_unused = 0;
 		}
@@ -425,11 +415,10 @@ static int numtasks_get_stats(void *my_res, struct seq_file *sfile)
 #ifdef NUMTASKS_DEBUG
 	seq_printf(sfile,
 		   "cur_alloc %d; borrowed %d; cnt_guar %d; cnt_limit %d "
-		   "cnt_unused %d, unused_guarantee %d, cur_max_limit %d\n",
+		   "unused_guarantee %d, cur_max_limit %d\n",
 		   atomic_read(&res->cnt_cur_alloc),
 		   atomic_read(&res->cnt_borrowed), res->cnt_guarantee,
-		   res->cnt_limit, res->cnt_unused,
-		   res->shares.unused_guarantee,
+		   res->cnt_limit, res->shares.unused_guarantee,
 		   res->shares.cur_max_limit);
 #endif
 
@@ -453,7 +442,7 @@ static int numtasks_set_config(void *my_res, const char *cfgstr)
 
 	if (!res)
 		return -EINVAL;
-	printk(KERN_DEBUG "numtasks config='%s'\n", cfgstr);
+	printk("numtasks config='%s'\n", cfgstr);
 	return 0;
 }
 
@@ -505,7 +494,7 @@ int __init init_ckrm_numtasks_res(void)
 
 	if (resid == -1) {
 		resid = ckrm_register_res_ctlr(clstype, &numtasks_rcbs);
-		printk(KERN_DEBUG "........init_ckrm_numtasks_res -> %d\n", resid);
+		printk("........init_ckrm_numtasks_res -> %d\n", resid);
 		if (resid != -1) {
 			ckrm_numtasks_register(numtasks_get_ref_local,
 					       numtasks_put_ref_local);
diff --git a/kernel/ckrm/ckrm_numtasks_stub.c b/kernel/ckrm/ckrm_tasks_stub.c
similarity index 100%
rename from kernel/ckrm/ckrm_numtasks_stub.c
rename to kernel/ckrm/ckrm_tasks_stub.c
diff --git a/kernel/ckrm/ckrm_tc.c b/kernel/ckrm/ckrm_tc.c
index af95644f2..316266494 100644
--- a/kernel/ckrm/ckrm_tc.c
+++ b/kernel/ckrm/ckrm_tc.c
@@ -318,7 +318,7 @@ static void cb_taskclass_fork(struct task_struct *tsk)
 		ckrm_task_unlock(tsk->parent);
 	}
 	if (!list_empty(&tsk->taskclass_link))
-		printk(KERN_WARNING "BUG in cb_fork.. tsk (%s:%d> already linked\n",
+		printk("BUG in cb_fork.. tsk (%s:%d> already linked\n",
 		       tsk->comm, tsk->pid);
 
 	ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_FORK);
@@ -397,7 +397,7 @@ DECLARE_MUTEX(async_serializer);	// serialize all async functions
  * We use a hybrid by comparing ratio nr_threads/pidmax
  */
 
-static int ckrm_reclassify_all_tasks(void)
+static void ckrm_reclassify_all_tasks(void)
 {
 	extern int pid_max;
 
@@ -407,11 +407,6 @@ static int ckrm_reclassify_all_tasks(void)
 	int ratio;
 	int use_bitmap;
 
-	/* Check permissions */
-	if ((!capable(CAP_SYS_NICE)) && (!capable(CAP_SYS_RESOURCE))) {
-		return -EPERM;
-	}
-
 	ratio = curpidmax / nr_threads;
 	if (curpidmax <= PID_MAX_DEFAULT) {
 		use_bitmap = 1;
@@ -422,7 +417,6 @@ static int ckrm_reclassify_all_tasks(void)
 	ce_protect(&CT_taskclass);
 
       retry:
-
 	if (use_bitmap == 0) {
 		// go through it in one walk
 		read_lock(&tasklist_lock);
@@ -496,13 +490,40 @@ static int ckrm_reclassify_all_tasks(void)
 				} else {
 					read_unlock(&tasklist_lock);
 				}
-				pos++;
 			}
 		}
 
 	}
 	ce_release(&CT_taskclass);
-	return 0;
+}
+
+int ckrm_reclassify(int pid)
+{
+	struct task_struct *tsk;
+	int rc = 0;
+
+	down(&async_serializer);	// protect again race condition
+	if (pid < 0) {
+		// do we want to treat this as process group .. should YES ToDo
+		rc = -EINVAL;
+	} else if (pid == 0) {
+		// reclassify all tasks in the system
+		ckrm_reclassify_all_tasks();
+	} else {
+		// reclassify particular pid
+		read_lock(&tasklist_lock);
+		if ((tsk = find_task_by_pid(pid)) != NULL) {
+			get_task_struct(tsk);
+			read_unlock(&tasklist_lock);
+			CE_CLASSIFY_TASK_PROTECT(CKRM_EVENT_RECLASSIFY, tsk);
+			put_task_struct(tsk);
+		} else {
+			read_unlock(&tasklist_lock);
+			rc = -EINVAL;
+		}
+	}
+	up(&async_serializer);
+	return rc;
 }
 
 /*
@@ -525,7 +546,7 @@ static void ckrm_reclassify_class_tasks(struct ckrm_task_class *cls)
 		 atomic_read(&cls->core.hnode.parent->refcnt));
 	// If no CE registered for this classtype, following will be needed 
 	// repeatedly;
-	ce_regd = atomic_read(&class_core(cls)->classtype->ce_regd);
+	ce_regd = class_core(cls)->classtype->ce_regd;
 	cnode = &(class_core(cls)->hnode);
 	parcls = class_type(ckrm_task_class_t, cnode->parent);
 
@@ -574,21 +595,20 @@ static void ckrm_reclassify_class_tasks(struct ckrm_task_class *cls)
 }
 
 /*
- * Change the core class of the given task
+ * Change the core class of the given task.
  */
 
 int ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls)
 {
 	struct task_struct *tsk;
 
-	if (cls && !ckrm_validate_and_grab_core(class_core(cls)))
+	if (!ckrm_validate_and_grab_core(class_core(cls)))
 		return -EINVAL;
 
 	read_lock(&tasklist_lock);
 	if ((tsk = find_task_by_pid(pid)) == NULL) {
 		read_unlock(&tasklist_lock);
-		if (cls) 
-			ckrm_core_drop(class_core(cls));
+		ckrm_core_drop(class_core(cls));
 		return -EINVAL;
 	}
 	get_task_struct(tsk);
@@ -597,21 +617,19 @@ int ckrm_forced_reclassify_pid(pid_t pid, struct ckrm_task_class *cls)
 	/* Check permissions */
 	if ((!capable(CAP_SYS_NICE)) &&
 	    (!capable(CAP_SYS_RESOURCE)) && (current->user != tsk->user)) {
-		if (cls) 
-			ckrm_core_drop(class_core(cls));
+		ckrm_core_drop(class_core(cls));
 		put_task_struct(tsk);
 		return -EPERM;
 	}
 
-	ce_protect(&CT_taskclass);
-	if (cls == NULL)
-		CE_CLASSIFY_TASK(CKRM_EVENT_RECLASSIFY,tsk);
-	else 
-		ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_MANUAL);
+	down(&async_serializer);	// protect again race condition
 
+	ce_protect(&CT_taskclass);
+	ckrm_set_taskclass(tsk, cls, NULL, CKRM_EVENT_MANUAL);
 	ce_release(&CT_taskclass);
 	put_task_struct(tsk);
 
+	up(&async_serializer);
 	return 0;
 }
 
@@ -669,7 +687,7 @@ static int ckrm_free_task_class(struct ckrm_core_class *core)
 
 void __init ckrm_meta_init_taskclass(void)
 {
-	printk(KERN_DEBUG "...... Initializing ClassType<%s> ........\n",
+	printk("...... Initializing ClassType<%s> ........\n",
 	       CT_taskclass.name);
 	// intialize the default class
 	ckrm_init_core_class(&CT_taskclass, class_core(&taskclass_dflt_class),
@@ -703,25 +721,16 @@ static int tc_forced_reclassify(struct ckrm_core_class *target, const char *obj)
 	pid_t pid;
 	int rc = -EINVAL;
 
-	pid = (pid_t) simple_strtol(obj, NULL, 0);
-
-	down(&async_serializer);	// protect again race condition with reclassify_class
-	if (pid < 0) {
-		// do we want to treat this as process group .. TBD
-		rc = -EINVAL;
-	} else if (pid == 0) {
-		rc = (target == NULL) ? ckrm_reclassify_all_tasks() : -EINVAL;
-	} else {
-		struct ckrm_task_class *cls = NULL;
-		if (target) 
-			cls = class_type(ckrm_task_class_t,target);
-		rc = ckrm_forced_reclassify_pid(pid,cls);
+	pid = (pid_t) simple_strtoul(obj, NULL, 10);
+	if (pid > 0) {
+		rc = ckrm_forced_reclassify_pid(pid,
+						class_type(ckrm_task_class_t,
+							   target));
 	}
-	up(&async_serializer);
 	return rc;
 }
 
-#if 0
+#if 1
 
 /******************************************************************************
  * Debugging Task Classes:  Utility functions
@@ -737,7 +746,7 @@ void check_tasklist_sanity(struct ckrm_task_class *cls)
 		class_lock(core);
 		if (list_empty(&core->objlist)) {
 			class_lock(core);
-			printk(KERN_DEBUG "check_tasklist_sanity: class %s empty list\n",
+			printk("check_tasklist_sanity: class %s empty list\n",
 			       core->name);
 			return;
 		}
@@ -746,14 +755,14 @@ void check_tasklist_sanity(struct ckrm_task_class *cls)
 			    container_of(lh1, struct task_struct,
 					 taskclass_link);
 			if (count++ > 20000) {
-				printk(KERN_WARNING "list is CORRUPTED\n");
+				printk("list is CORRUPTED\n");
 				break;
 			}
 			if (tsk->taskclass != cls) {
 				const char *tclsname;
 				tclsname = (tsk->taskclass) ? 
 					class_core(tsk->taskclass)->name:"NULL";
-				printk(KERN_WARNING "sanity: task %s:%d has ckrm_core "
+				printk("sanity: task %s:%d has ckrm_core "
 				       "|%s| but in list |%s|\n", tsk->comm, 
 				       tsk->pid, tclsname, core->name);
 			}
@@ -767,7 +776,7 @@ void ckrm_debug_free_task_class(struct ckrm_task_class *tskcls)
 	struct task_struct *proc, *thread;
 	int count = 0;
 
-	printk(KERN_DEBUG "Analyze Error <%s> %d\n",
+	printk("Analyze Error <%s> %d\n",
 	       class_core(tskcls)->name,
 	       atomic_read(&(class_core(tskcls)->refcnt)));
 
@@ -779,7 +788,7 @@ void ckrm_debug_free_task_class(struct ckrm_task_class *tskcls)
 			const char *tclsname;
 			tclsname = (thread->taskclass) ? 
 				class_core(thread->taskclass)->name :"NULL";
-			printk(KERN_DEBUG "%d thread=<%s:%d>  -> <%s> <%lx>\n", count,
+			printk("%d thread=<%s:%d>  -> <%s> <%lx>\n", count,
 			       thread->comm, thread->pid, tclsname,
 			       thread->flags & PF_EXITING);
 		}
@@ -787,7 +796,7 @@ void ckrm_debug_free_task_class(struct ckrm_task_class *tskcls)
 	class_unlock(class_core(tskcls));
 	read_unlock(&tasklist_lock);
 
-	printk(KERN_DEBUG "End Analyze Error <%s> %d\n",
+	printk("End Analyze Error <%s> %d\n",
 	       class_core(tskcls)->name,
 	       atomic_read(&(class_core(tskcls)->refcnt)));
 }
diff --git a/kernel/ckrm/ckrmutils.c b/kernel/ckrm/ckrmutils.c
index d54e7b563..c56a2ae1c 100644
--- a/kernel/ckrm/ckrmutils.c
+++ b/kernel/ckrm/ckrmutils.c
@@ -96,6 +96,7 @@ void child_maxlimit_changed(struct ckrm_shares *parent, int new_limit)
 	return;
 }
 
+
 /*
  * Caller is responsible for holding any lock to protect the data
  * structures passed to this function
@@ -110,18 +111,26 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
 
 	// Check total_guarantee for correctness
 	if (new->total_guarantee <= CKRM_SHARE_DONTCARE) {
+		printk(KERN_ERR "new->total_guarantee %d <= CKRM_SHARE_DONTCARE\n",
+		    new->total_guarantee);
 		goto set_share_err;
 	} else if (new->total_guarantee == CKRM_SHARE_UNCHANGED) {
 		;		// do nothing
 	} else if (cur_usage_guar > new->total_guarantee) {
+		printk(KERN_ERR "cur_usage_guar %d > new->total_guarantee %d\n",
+		    cur_usage_guar,new->total_guarantee);
 		goto set_share_err;
 	}
 	// Check max_limit for correctness
 	if (new->max_limit <= CKRM_SHARE_DONTCARE) {
+		printk(KERN_ERR "new->max_limit %d <= CKRM_SHARE_DONTCARE\n",
+		    new->max_limit);
 		goto set_share_err;
 	} else if (new->max_limit == CKRM_SHARE_UNCHANGED) {
 		;		// do nothing
 	} else if (cur->cur_max_limit > new->max_limit) {
+		printk(KERN_ERR "cur->cur_max_limit %d > new->max_limit %d\n",
+		    cur->cur_max_limit, new->max_limit);
 		goto set_share_err;
 	}
 	// Check my_guarantee for correctness
@@ -130,6 +139,8 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
 	} else if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
 		;		// do nothing
 	} else if (par && increase_by > par->unused_guarantee) {
+		printk(KERN_ERR "increase_by %d > par->unused_guarantee %d\n",
+		    increase_by, par->unused_guarantee);
 		goto set_share_err;
 	}
 	// Check my_limit for correctness
@@ -139,6 +150,8 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
 		;		// do nothing
 	} else if (par && new->my_limit > par->max_limit) {
 		// I can't get more limit than my parent's limit
+		printk(KERN_ERR "new->my_limit %d > par->max_limit %d\n",
+		    new->my_limit,par->max_limit);
 		goto set_share_err;
 
 	}
@@ -152,6 +165,8 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
 			;	// do nothing earlier setting would've 
 			        // taken care of it
 		} else if (new->my_guarantee > cur->my_limit) {
+			printk(KERN_ERR "new->my_guarantee %d > cur->my_limit %d\n",
+			    new->my_guarantee,par->max_limit);
 			goto set_share_err;
 		}
 	} else {		// new->my_limit has a valid value
@@ -159,9 +174,13 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
 			;	// do nothing
 		} else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
 			if (cur->my_guarantee > new->my_limit) {
+				printk(KERN_ERR "cur->my_guarantee %d > new->my_limit %d\n",
+				    cur->my_guarantee,new->my_limit);
 				goto set_share_err;
 			}
 		} else if (new->my_guarantee > new->my_limit) {
+			printk(KERN_ERR "new->my_guarantee %d > new->my_limit %d\n",
+			    new->my_guarantee,new->my_limit);
 			goto set_share_err;
 		}
 	}
diff --git a/kernel/ckrm/rbce/bitvector.h b/kernel/ckrm/rbce/bitvector.h
index 098cc2327..4f53f9847 100644
--- a/kernel/ckrm/rbce/bitvector.h
+++ b/kernel/ckrm/rbce/bitvector.h
@@ -136,12 +136,12 @@ inline static void bitvector_print(int flag, bitvector_t * vec)
 		return;
 	}
 	if (vec == NULL) {
-		printk(KERN_DEBUG "v<0>-NULL\n");
+		printk("v<0>-NULL\n");
 		return;
 	}
-	printk(KERN_DEBUG "v<%d>-", sz = vec->size);
+	printk("v<%d>-", sz = vec->size);
 	for (i = 0; i < sz; i++) {
-		printk(KERN_DEBUG "%c", test_bit(i, vec->bits) ? '1' : '0');
+		printk("%c", test_bit(i, vec->bits) ? '1' : '0');
 	}
 	return;
 }
diff --git a/kernel/ckrm/rbce/info.h b/kernel/ckrm/rbce/info.h
index 7263b22e1..3bc13b519 100644
--- a/kernel/ckrm/rbce/info.h
+++ b/kernel/ckrm/rbce/info.h
@@ -1,6 +1,12 @@
 static char *info =
     "1. Magic files\n"
     "\t|--rbce_info - read only file detailing how to setup and use RBCE.\n\n"
+    "\t|--rbce_reclassify - contains nothing. Writing a pid to it" 
+	"reclassifies\n"
+    "\tthe given task according to the current set of rules.\n"
+    "\tWriting 0 to it reclassifies all tasks in the system according to the \n"
+    "\tsurrent set of rules. This is typically done by the user/sysadmin \n"
+    "\tafter changing/creating rules. \n\n"
     "\t|--rbce_state - determines whether RBCE is currently active"
 	" or inactive.\n"
     "\tWriting 1 (0) activates (deactivates) the CE. Reading the file\n"
diff --git a/kernel/ckrm/rbce/rbce_fs.c b/kernel/ckrm/rbce/rbce_fs.c
index 187e7cdba..bb92fb94c 100644
--- a/kernel/ckrm/rbce/rbce_fs.c
+++ b/kernel/ckrm/rbce/rbce_fs.c
@@ -1,26 +1,6 @@
-/* RCFS API for Rule-based Classification Engine (RBCE) and
- * Consolidated RBCE module code (combined)
- *
- * Copyright (C) Hubertus Franke, IBM Corp. 2003
- *           (C) Chandra Seetharaman, IBM Corp. 2003
- *           (C) Vivek Kashyap, IBM Corp. 2004 
- * 
- * Module for loading of classification policies and providing
- * a user API for Class-based Kernel Resource Management (CKRM)
- *
- * Latest version, more details at http://ckrm.sf.net
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
+/*
+ * This file is released under the GPL.
  */
-
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/errno.h>
@@ -74,6 +54,12 @@ rbce_write(struct file *file, const char __user * buf,
 	if (*ptr == '\n') {
 		*ptr = '\0';
 	}
+#if 0
+	if (!strcmp(file->f_dentry->d_name.name, "rbce_reclassify")) {
+		pid = simple_strtol(line, NULL, 0);
+		rc = reclassify_pid(pid);
+	} else
+#endif
 	if (!strcmp(file->f_dentry->d_name.name, "rbce_tag")) {
 		pid = simple_strtol(line, &ptr, 0);
 		rc = set_tasktag(pid, ptr + 1);	// expected syntax "pid tag"
@@ -101,7 +87,8 @@ static int rbce_show(struct seq_file *seq, void *offset)
 	char result[256];
 
 	memset(result, 0, 256);
-	if (!strcmp(file->f_dentry->d_name.name, "rbce_tag")) {
+	if (!strcmp(file->f_dentry->d_name.name, "rbce_reclassify") ||
+	    !strcmp(file->f_dentry->d_name.name, "rbce_tag")) {
 		return -EPERM;
 	}
 	if (!strcmp(file->f_dentry->d_name.name, "rbce_state")) {
@@ -130,7 +117,8 @@ static int rbce_close(struct inode *ino, struct file *file)
 {
 	const char *name = file->f_dentry->d_name.name;
 
-	if (strcmp(name, "rbce_state") &&
+	if (strcmp(name, "rbce_reclassify") &&
+	    strcmp(name, "rbce_state") &&
 	    strcmp(name, "rbce_tag") && strcmp(name, "rbce_info")) {
 
 		if (!rule_exists(name)) {
@@ -304,9 +292,11 @@ rbce_create(struct inode *dir, struct dentry *dentry,
 	struct dentry *pd =
 	    list_entry(dir->i_dentry.next, struct dentry, d_alias);
 
-	// Under /ce only "rbce_state", "rbce_tag" and "rbce_info" are allowed
+	// Under /ce only "rbce_reclassify", "rbce_state", "rbce_tag" and
+	// "rbce_info" are allowed
 	if (!strcmp(pd->d_name.name, "ce")) {
-		if (strcmp(dentry->d_name.name, "rbce_state") &&
+		if (strcmp(dentry->d_name.name, "rbce_reclassify") &&
+		    strcmp(dentry->d_name.name, "rbce_state") &&
 		    strcmp(dentry->d_name.name, "rbce_tag") &&
 		    strcmp(dentry->d_name.name, "rbce_info")) {
 			return -EINVAL;
@@ -329,7 +319,7 @@ rbce_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 
 /******************************* Magic files  ********************/
 
-#define RBCE_NR_MAGF 5
+#define RBCE_NR_MAGF 6
 struct rcfs_magf rbce_magf_files[RBCE_NR_MAGF] = {
 	{
 	 .name = "ce",
@@ -351,6 +341,11 @@ struct rcfs_magf rbce_magf_files[RBCE_NR_MAGF] = {
 	 .mode = RCFS_DEFAULT_FILE_MODE,
 	 .i_fop = &rbce_file_operations,
 	 },
+	{
+	 .name = "rbce_reclassify",
+	 .mode = RCFS_DEFAULT_FILE_MODE,
+	 .i_fop = &rbce_file_operations,
+	 },
 	{
 	 .name = "rules",
 	 .mode = (RCFS_DEFAULT_DIR_MODE | S_IWUSR),
@@ -422,7 +417,7 @@ static struct inode_operations rbce_dir_inode_operations = {
 static void rbce_put_super(struct super_block *sb)
 {
 	module_put(THIS_MODULE);
-	printk(KERN_DEBUG "rbce_put_super called\n");
+	printk("rbce_put_super called\n");
 }
 
 static struct super_operations rbce_ops = {
diff --git a/kernel/ckrm/rbce/rbcemod.c b/kernel/ckrm/rbce/rbcemod.c
index 555ba0a4e..fa8d2c470 100644
--- a/kernel/ckrm/rbce/rbcemod.c
+++ b/kernel/ckrm/rbce/rbcemod.c
@@ -1,5 +1,4 @@
-/* Rule-based Classification Engine (RBCE) and
- * Consolidated RBCE module code (combined)
+/* Rule-based Classification Engine (RBCE) module
  *
  * Copyright (C) Hubertus Franke, IBM Corp. 2003
  *           (C) Chandra Seetharaman, IBM Corp. 2003
@@ -15,10 +14,6 @@
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
  */
 
 /* Changes
@@ -54,7 +49,7 @@
 #include <linux/ckrm_ce.h>
 #include <linux/ckrm_net.h>
 #include "bitvector.h"
-#include <linux/rbce.h>
+#include "rbce.h"
 
 #define DEBUG
 
@@ -179,8 +174,6 @@ int termop_2_vecidx[RBCE_RULE_INVALID] = {
 #define POLICY_ACTION_REDO_ALL		0x02	// Recompute all rule flags
 #define POLICY_ACTION_PACK_TERMS	0x04	// Time to pack the terms
 
-const int use_persistent_state = 1;
-
 struct ckrm_eng_callback ckrm_ecbs;
 
 // Term vector state
@@ -254,7 +247,7 @@ int rbcedebug = 0x00;
 #define DBG_RULE             ( 0x20 )
 #define DBG_POLICY           ( 0x40 )
 
-#define DPRINTK(x, y...)   if (rbcedebug & (x)) printk(KERN_DEBUG y)
+#define DPRINTK(x, y...)   if (rbcedebug & (x)) printk(y)
 	// debugging selectively enabled through /proc/sys/debug/rbce
 
 static void print_context_vectors(void)
@@ -265,9 +258,9 @@ static void print_context_vectors(void)
 		return;
 	}
 	for (i = 0; i < NUM_TERM_MASK_VECTOR; i++) {
-		printk(KERN_DEBUG "%d: ", i);
+		printk("%d: ", i);
 		bitvector_print(DBG_OPTIMIZATION, gl_mask_vecs[i]);
-		printk(KERN_DEBUG "\n");
+		printk("\n");
 	}
 }
 #else
@@ -506,7 +499,7 @@ rbce_class_deletecb(const char *classname, void *classobj, int classtype)
 		}
 		notify_class_action(cls, 0);
 		cls->classobj = NULL;
-		list_for_each_entry(pos, &rules_list[classtype], link) {
+		list_for_each_entry(pos, &rules_list[cls->classtype], link) {
 			rule = (struct rbce_rule *)pos;
 			if (rule->target_class) {
 				if (!strcmp
@@ -517,6 +510,7 @@ rbce_class_deletecb(const char *classname, void *classobj, int classtype)
 				}
 			}
 		}
+		put_class(cls);
 		if ((cls = find_class_name(classname)) != NULL) {
 			printk(KERN_ERR
 			       "rbce ERROR: class %s exists in rbce after "
@@ -1343,49 +1337,65 @@ int rule_exists(const char *rname)
 static struct rbce_private_data *create_private_data(struct rbce_private_data *,
 						     int);
 
-static inline
-void reset_evaluation(struct rbce_private_data *pdata,int termflag)
+int rbce_ckrm_reclassify(int pid)
 {
-	/* reset TAG ruleterm evaluation results to pick up 
- 	 * on next classification event
- 	 */
- 	if (use_persistent_state && gl_mask_vecs[termflag]) {
- 		bitvector_and_not( pdata->eval, pdata->eval, 
- 				   gl_mask_vecs[termflag] );
- 		bitvector_and_not( pdata->true, pdata->true, 
- 				   gl_mask_vecs[termflag] );
- 	}
+	printk("ckrm_reclassify_pid ignored\n");
+	return -EINVAL;
+}
+
+int reclassify_pid(int pid)
+{
+	struct task_struct *tsk;
+
+	// FIXME: Need to treat -pid as process group
+	if (pid < 0) {
+		return -EINVAL;
+	}
+
+	if (pid == 0) {
+		rbce_ckrm_reclassify(0);	// just reclassify all tasks.
+	}
+	// if pid is +ve take control of the task, start evaluating it
+	if ((tsk = find_task_by_pid(pid)) == NULL) {
+		return -EINVAL;
+	}
+
+	if (unlikely(!RBCE_DATA(tsk))) {
+		RBCE_DATAP(tsk) = create_private_data(NULL, 0);
+		if (!RBCE_DATA(tsk)) {
+			return -ENOMEM;
+		}
+	}
+	RBCE_DATA(tsk)->evaluate = 1;
+	rbce_ckrm_reclassify(pid);
+	return 0;
 }
-  
+
 int set_tasktag(int pid, char *tag)
 {
 	char *tp;
-	int rc = 0;
 	struct task_struct *tsk;
 	struct rbce_private_data *pdata;
-	int len;
 
 	if (!tag) {
 		return -EINVAL;
 	}
-	len = strlen(tag) + 1;
-	tp = kmalloc(len, GFP_ATOMIC);
-	if (!tp) {
-		return -ENOMEM;
-	}
-	strncpy(tp,tag,len);
 
-	read_lock(&tasklist_lock);
 	if ((tsk = find_task_by_pid(pid)) == NULL) {
-		rc = -EINVAL;
-		goto out;
+		return -EINVAL;
+	}
+
+	tp = kmalloc(strlen(tag) + 1, GFP_ATOMIC);
+
+	if (!tp) {
+		return -ENOMEM;
 	}
 
 	if (unlikely(!RBCE_DATA(tsk))) {
 		RBCE_DATAP(tsk) = create_private_data(NULL, 0);
 		if (!RBCE_DATA(tsk)) {
-			rc = -ENOMEM;
-			goto out;
+			kfree(tp);
+			return -ENOMEM;
 		}
 	}
 	pdata = RBCE_DATA(tsk);
@@ -1393,13 +1403,10 @@ int set_tasktag(int pid, char *tag)
 		kfree(pdata->app_tag);
 	}
 	pdata->app_tag = tp;
-	reset_evaluation(pdata,RBCE_TERMFLAG_TAG);
-	
- out:
-	read_unlock(&tasklist_lock);
-	if (rc != 0) 
-		kfree(tp);
-	return rc;
+	strcpy(pdata->app_tag, tag);
+	rbce_ckrm_reclassify(pid);
+
+	return 0;
 }
 
 /*====================== Classification Functions =======================*/
@@ -1816,7 +1823,7 @@ static inline int valid_pdata(struct rbce_private_data *pdata)
 		}
 	}
 	spin_unlock(&pdata_lock);
-	printk(KERN_WARNING "INVALID/CORRUPT PDATA %p\n", pdata);
+	printk("INVALID/CORRUPT PDATA %p\n", pdata);
 	return 0;
 }
 
@@ -1829,7 +1836,7 @@ static inline void store_pdata(struct rbce_private_data *pdata)
 
 		while (i < MAX_PDATA) {
 			if (pdata_arr[pdata_next] == NULL) {
-				printk(KERN_DEBUG "storing %p at %d, count %d\n", pdata,
+				printk("storing %p at %d, count %d\n", pdata,
 				       pdata_next, pdata_count);
 				pdata_arr[pdata_next++] = pdata;
 				if (pdata_next == MAX_PDATA) {
@@ -1844,7 +1851,7 @@ static inline void store_pdata(struct rbce_private_data *pdata)
 		spin_unlock(&pdata_lock);
 	}
 	if (i == MAX_PDATA) {
-		printk(KERN_DEBUG "PDATA BUFFER FULL pdata_count %d pdata %p\n",
+		printk("PDATA BUFFER FULL pdata_count %d pdata %p\n",
 		       pdata_count, pdata);
 	}
 }
@@ -1856,7 +1863,7 @@ static inline void unstore_pdata(struct rbce_private_data *pdata)
 		spin_lock(&pdata_lock);
 		for (i = 0; i < MAX_PDATA; i++) {
 			if (pdata_arr[i] == pdata) {
-				printk(KERN_DEBUG "unstoring %p at %d, count %d\n", pdata,
+				printk("unstoring %p at %d, count %d\n", pdata,
 				       i, pdata_count);
 				pdata_arr[i] = NULL;
 				pdata_count--;
@@ -1866,7 +1873,7 @@ static inline void unstore_pdata(struct rbce_private_data *pdata)
 		}
 		spin_unlock(&pdata_lock);
 		if (i == MAX_PDATA) {
-			printk(KERN_DEBUG "pdata %p not found in the stored array\n",
+			printk("pdata %p not found in the stored array\n",
 			       pdata);
 		}
 	}
@@ -1881,6 +1888,8 @@ static inline void unstore_pdata(struct rbce_private_data *pdata)
 
 #endif				// PDATA_DEBUG
 
+const int use_persistent_state = 1;
+
 /*
  * Allocate and initialize a rbce_private_data data structure.
  *
@@ -1929,7 +1938,7 @@ static struct rbce_private_data *create_private_data(struct rbce_private_data
 		//      pdata->evaluate = src->evaluate;
 		//      if(src->app_tag) {
 		//              int len = strlen(src->app_tag)+1;
-		//              printk(KERN_DEBUG "CREATE_PRIVATE: apptag %s len %d\n",
+		//              printk("CREATE_PRIVATE: apptag %s len %d\n",
 		//                          src->app_tag,len);
 		//              pdata->app_tag = kmalloc(len, GFP_ATOMIC);
 		//              if (pdata->app_tag) {
@@ -2252,7 +2261,6 @@ void *rbce_tc_classify(enum ckrm_event event, ...)
 	va_list args;
 	void *cls = NULL;
 	struct task_struct *tsk;
-	struct rbce_private_data *pdata;
 
 	va_start(args, event);
 	tsk = va_arg(args, struct task_struct *);
@@ -2262,7 +2270,7 @@ void *rbce_tc_classify(enum ckrm_event event, ...)
 	 * [ CKRM_LATCHABLE_EVENTS .. CKRM_NONLATCHABLE_EVENTS ) 
 	 */
 
-	// printk(KERN_DEBUG "tc_classify %p:%d:%s '%s'\n",tsk,tsk->pid,
+	// printk("tc_classify %p:%d:%s '%s'\n",tsk,tsk->pid,
 	// 			tsk->comm,event_names[event]);
 
 	switch (event) {
@@ -2307,14 +2315,11 @@ void *rbce_tc_classify(enum ckrm_event event, ...)
 		break;
 
 	case CKRM_EVENT_RECLASSIFY:
-		if ((pdata = (RBCE_DATA(tsk)))) {
-			pdata->evaluate = 1;
-		}
 		cls = rbce_classify(tsk, NULL, RBCE_TERMFLAG_ALL, tc_classtype);
 		break;
 
 	}
-	// printk(KERN_DEBUG "tc_classify %p:%d:%s '%s' ==> %p\n",tsk,tsk->pid,
+	// printk("tc_classify %p:%d:%s '%s' ==> %p\n",tsk,tsk->pid,
 	//			tsk->comm,event_names[event],cls);
 
 	return cls;
@@ -2323,7 +2328,7 @@ void *rbce_tc_classify(enum ckrm_event event, ...)
 #ifndef RBCE_EXTENSION
 static void rbce_tc_notify(int event, void *core, struct task_struct *tsk)
 {
-	printk(KERN_DEBUG "tc_manual %p:%d:%s '%s'\n", tsk, tsk->pid, tsk->comm,
+	printk("tc_manual %p:%d:%s '%s'\n", tsk, tsk->pid, tsk->comm,
 	       event_names[event]);
 	if (event != CKRM_EVENT_MANUAL)
 		return;
@@ -2402,40 +2407,38 @@ struct ce_regtable_struct ce_regtable[] = {
 	{NULL}
 };
 
-static void unregister_classtype_engines(void)
-  {
+static int register_classtype_engines(void)
+{
 	int rc;
 	struct ce_regtable_struct *ceptr = ce_regtable;
 
 	while (ceptr->name) {
-		if (*ceptr->clsvar >= 0) {
-			printk(KERN_DEBUG "ce unregister with <%s>\n",ceptr->name);
-			while ((rc = ckrm_unregister_engine(ceptr->name)) == -EAGAIN)
-				;
-			printk(KERN_DEBUG "ce unregister with <%s> rc=%d\n",ceptr->name,rc);
-			*ceptr->clsvar = -1;
-		}
+		rc = ckrm_register_engine(ceptr->name, ceptr->cbs);
+		printk("ce register with <%s> typeId=%d\n", ceptr->name, rc);
+		if ((rc < 0) && (rc != -ENOENT))
+			return (rc);
+		if (rc != -ENOENT)
+			*ceptr->clsvar = rc;
 		ceptr++;
 	}
-  }
+	return 0;
+}
 
-static int register_classtype_engines(void)
+static void unregister_classtype_engines(void)
 {
 	int rc;
 	struct ce_regtable_struct *ceptr = ce_regtable;
 
 	while (ceptr->name) {
-		rc = ckrm_register_engine(ceptr->name, ceptr->cbs);
-		printk(KERN_DEBUG "ce register with <%s> typeId=%d\n",ceptr->name,rc);
-		if ((rc < 0) && (rc != -ENOENT)) {
-			unregister_classtype_engines();
-			return (rc);
+		if (*ceptr->clsvar >= 0) {
+			printk("ce unregister with <%s>\n", ceptr->name);
+			rc = ckrm_unregister_engine(ceptr->name);
+			printk("ce unregister with <%s> rc=%d\n", ceptr->name,
+			       rc);
+			*ceptr->clsvar = -1;
 		}
-		if (rc != -ENOENT) 
-			*ceptr->clsvar = rc;
 		ceptr++;
 	}
-	return 0;
 }
 
 // =========== /proc/sysctl/debug/rbce debug stuff =============
@@ -2506,7 +2509,7 @@ int init_rbce(void)
 {
 	int rc, i, line;
 
-	printk(KERN_DEBUG "<1>\nInstalling \'%s\' module\n", modname);
+	printk("<1>\nInstalling \'%s\' module\n", modname);
 
 	for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) {
 		INIT_LIST_HEAD(&rules_list[i]);
@@ -2555,7 +2558,7 @@ int init_rbce(void)
 	exit_rbce_ext();
       out:
 
-	printk(KERN_DEBUG "<1>%s: error installing rc=%d line=%d\n", __FUNCTION__, rc,
+	printk("<1>%s: error installing rc=%d line=%d\n", __FUNCTION__, rc,
 	       line);
 	return rc;
 }
@@ -2564,19 +2567,19 @@ void exit_rbce(void)
 {
 	int i;
 
-	printk(KERN_DEBUG "<1>Removing \'%s\' module\n", modname);
+	printk("<1>Removing \'%s\' module\n", modname);
 
 	stop_debug();
 	exit_rbce_ext();
 
 	// Print warnings if lists are not empty, which is a bug
 	if (!list_empty(&class_list)) {
-		printk(KERN_DEBUG "exit_rbce: Class list is not empty\n");
+		printk("exit_rbce: Class list is not empty\n");
 	}
 
 	for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) {
 		if (!list_empty(&rules_list[i])) {
-			printk(KERN_DEBUG "exit_rbce: Rules list for classtype %d"
+			printk("exit_rbce: Rules list for classtype %d"
 			     " is not empty\n", i);
 		}
 	}
@@ -2594,6 +2597,7 @@ EXPORT_SYMBOL(rule_exists);
 EXPORT_SYMBOL(change_rule);
 EXPORT_SYMBOL(delete_rule);
 EXPORT_SYMBOL(rename_rule);
+EXPORT_SYMBOL(reclassify_pid);
 EXPORT_SYMBOL(set_tasktag);
 
 module_init(init_rbce);
diff --git a/kernel/ckrm/rbce/rbcemod_ext.c b/kernel/ckrm/rbce/rbcemod_ext.c
index 3cae550f7..b7886ebf4 100644
--- a/kernel/ckrm/rbce/rbcemod_ext.c
+++ b/kernel/ckrm/rbce/rbcemod_ext.c
@@ -3,7 +3,7 @@
  * Copyright (C) Hubertus Franke, IBM Corp. 2003
  *
  * Extension to be included into RBCE to collect delay and sample information
- * Requires user daemon e.g. crbcedmn to activate.
+ * requires user daemon <crbcedmn> to activate.
  *
  * Latest version, more details at http://ckrm.sf.net
  *
@@ -12,13 +12,8 @@
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
  */
 
-
 /*******************************************************************************
  *
  *   User-Kernel Communication Channel (UKCC)
@@ -62,10 +57,10 @@ static int ukcc_fileop_notify(int rchan_id,
 {
 	static int readers = 0;
 	if (fileop == RELAY_FILE_OPEN) {
-		// printk(KERN_DEBUG "got fileop_notify RELAY_FILE_OPEN for file %p\n", 
+		// printk("got fileop_notify RELAY_FILE_OPEN for file %p\n", 
 		//		filp);
 		if (readers) {
-			printk(KERN_DEBUG "only one client allowed, backoff .... \n");
+			printk("only one client allowed, backoff .... \n");
 			return -EPERM;
 		}
 		if (!try_module_get(THIS_MODULE))
@@ -74,7 +69,7 @@ static int ukcc_fileop_notify(int rchan_id,
 		client_attached();
 
 	} else if (fileop == RELAY_FILE_CLOSE) {
-		// printk(KERN_DEBUG "got fileop_notify RELAY_FILE_CLOSE for file %p\n", 
+		// printk("got fileop_notify RELAY_FILE_CLOSE for file %p\n", 
 		//		filp);
 		client_detached();
 		readers--;
@@ -109,10 +104,10 @@ static int create_ukcc_channel(void)
 				  channel_flags,
 				  &ukcc_callbacks, 0, 0, 0, 0, 0, 0, NULL, 0);
 	if (ukcc_channel < 0)
-		printk(KERN_DEBUG "crbce: ukcc creation failed, errcode: %d\n",
+		printk("crbce: ukcc creation failed, errcode: %d\n",
 		       ukcc_channel);
 	else
-		printk(KERN_DEBUG "crbce: ukcc created (%u KB)\n",
+		printk("crbce: ukcc created (%u KB)\n",
 		       UKCC_TOTAL_BUFFER_SIZE >> 10);
 	return ukcc_channel;
 }
@@ -144,9 +139,9 @@ static inline void close_ukcc_channel(void)
 					     (r),(l),-1,NULL) > 0);	\
 		chan_state = chan_isok ? UKCC_OK : UKCC_STANDBY;	\
 		if (chan_wasok && !chan_isok) {				\
-			printk(KERN_DEBUG "Channel stalled\n");			\
+			printk("Channel stalled\n");			\
 		} else if (!chan_wasok && chan_isok) {			\
-			printk(KERN_DEBUG "Channel continues\n");			\
+			printk("Channel continues\n");			\
 		}							\
 	} while (0)
 
@@ -288,7 +283,7 @@ send_task_record(struct task_struct *tsk, int event,
 		return 0;
 	pdata = RBCE_DATA(tsk);
 	if (pdata == NULL) {
-		// printk(KERN_DEBUG "send [%d]<%s>: no pdata\n",tsk->pid,tsk->comm);
+		// printk("send [%d]<%s>: no pdata\n",tsk->pid,tsk->comm);
 		return 0;
 	}
 	if (send_forced || (delta_mode == 0)
@@ -384,7 +379,7 @@ static void send_task_data(void)
 	rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0);
 	rec_send(&limrec);
 
-	// printk(KERN_DEBUG "send_task_data mode=%d t#=%d s#=%d\n",
+	// printk("send_task_data mode=%d t#=%d s#=%d\n",
 	// 		delta_mode,taskcnt,sendcnt);
 }
 
@@ -503,7 +498,7 @@ static void sample_task_data(unsigned long unused)
 	}
 	while_each_thread(proc, thread);
 	read_unlock(&tasklist_lock);
-//      printk(KERN_DEBUG "sample_timer: run=%d wait=%d\n",run,wait);
+//      printk("sample_timer: run=%d wait=%d\n",run,wait);
 	start_sample_timer();
 }
 
@@ -513,7 +508,7 @@ static void ukcc_cmd_deliver(int rchan_id, char *from, u32 len)
 	struct crbce_cmd_done cmdret;
 	int rc = 0;
 
-//      printk(KERN_DEBUG "ukcc_cmd_deliver: %d %d len=%d:%d\n",cmdrec->type, 
+//      printk("ukcc_cmd_deliver: %d %d len=%d:%d\n",cmdrec->type, 
 //		cmdrec->cmd,cmdrec->len,len);
 
 	cmdrec->len = len;	// add this to reflection so the user doesn't 
@@ -578,20 +573,20 @@ static void ukcc_cmd_deliver(int rchan_id, char *from, u32 len)
 	cmdret.hdr.cmd = cmdrec->cmd;
 	cmdret.rc = rc;
 	rec_send(&cmdret);
-//      printk(KERN_DEBUG "ukcc_cmd_deliver ACK: %d %d rc=%d %d\n",cmdret.hdr.type,
+//      printk("ukcc_cmd_deliver ACK: %d %d rc=%d %d\n",cmdret.hdr.type,
 //			cmdret.hdr.cmd,rc,sizeof(cmdret));
 }
 
 static void client_attached(void)
 {
-	printk(KERN_DEBUG "client [%d]<%s> attached to UKCC\n", current->pid,
+	printk("client [%d]<%s> attached to UKCC\n", current->pid,
 	       current->comm);
 	relay_reset(ukcc_channel);
 }
 
 static void client_detached(void)
 {
-	printk(KERN_DEBUG "client [%d]<%s> detached to UKCC\n", current->pid,
+	printk("client [%d]<%s> detached to UKCC\n", current->pid,
 	       current->comm);
 	chan_state = UKCC_STANDBY;
 	stop_sample_timer();
diff --git a/kernel/ckrm/rbce/token.c b/kernel/ckrm/rbce/token.c
index 32446fb2b..0ace80a50 100644
--- a/kernel/ckrm/rbce/token.c
+++ b/kernel/ckrm/rbce/token.c
@@ -1,24 +1,3 @@
-/* Tokens for Rule-based Classification Engine (RBCE) and
- * Consolidated RBCE module code (combined)
- *
- * Copyright (C) Hubertus Franke, IBM Corp. 2003
- *           (C) Chandra Seetharaman, IBM Corp. 2003
- *           (C) Vivek Kashyap, IBM Corp. 2004 
- * 
- * Latest version, more details at http://ckrm.sf.net
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * 
- *
- */
-
 #include <linux/parser.h>
 #include <linux/ctype.h>
 
@@ -197,7 +176,7 @@ rules_parse(char *rule_defn, struct rbce_rule_term **rterms, int *term_mask)
 
 	nterms = 0;
 	while (*rp++) {
-		if (*rp == '>' || *rp == '<' || *rp == '=' || *rp == '!') {
+		if (*rp == '>' || *rp == '<' || *rp == '=') {
 			nterms++;
 		}
 	}
@@ -293,7 +272,7 @@ rules_parse(char *rule_defn, struct rbce_rule_term **rterms, int *term_mask)
 		*term_mask = 0;
 	}			/* else {
 				   for (i = 0; i < nterms; i++) {
-				   printk(KERN_DEBUG "token: i %d; op %d, operator %d, str %ld\n",
+				   printk("token: i %d; op %d, operator %d, str %ld\n",
 				   i, terms[i].op, terms[i].operator, terms[i].u.id);
 				   }
 				   } */
diff --git a/kernel/ckrm_classqueue.c b/kernel/ckrm_classqueue.c
index 0400844a3..1929aaf4e 100644
--- a/kernel/ckrm_classqueue.c
+++ b/kernel/ckrm_classqueue.c
@@ -133,42 +133,11 @@ void classqueue_update_prio(struct classqueue_struct *cq,
 	
 	//add to new positon, round robin for classes with same priority
 	list_add_tail(&(node->list), &cq->array.queue[index]);
-	__set_bit(index, cq->array.bitmap);	
+	__set_bit(index, cq->array.bitmap);
+	
 	node->index = index;
 }
 
-/**
- *classqueue_get_min_prio: return the priority of the last node in queue
- *
- * this function can be called without runqueue lock held
- */
-static inline int classqueue_get_min_prio(struct classqueue_struct *cq)
-{
-	cq_node_t *result = NULL;
-	int pos;
-
-	/* 
-	 * search over the bitmap to get the first class in the queue
-	 */
-	pos = find_next_bit(cq->array.bitmap, CLASSQUEUE_SIZE, cq->base_offset);
-	//do circular search from the beginning
-	if (pos >= CLASSQUEUE_SIZE) 
-		pos = find_first_bit(cq->array.bitmap, CLASSQUEUE_SIZE);
-
-	if (pos < CLASSQUEUE_SIZE) {
-		result = list_entry(cq->array.queue[pos].next, cq_node_t, list);
-		if (list_empty(&cq->array.queue[pos]))
-			result = NULL;
-	}
-	if (result)
-		return result->prio;
-	else 
-		return 0;
-}
-
-/**
- * this function must be called with runqueue lock held
- */
 cq_node_t *classqueue_get_head(struct classqueue_struct *cq)
 {
 	cq_node_t *result = NULL;
@@ -178,9 +147,9 @@ cq_node_t *classqueue_get_head(struct classqueue_struct *cq)
 	 * search over the bitmap to get the first class in the queue
 	 */
 	pos = find_next_bit(cq->array.bitmap, CLASSQUEUE_SIZE, cq->base_offset);
-	//do circular search from the beginning
-	if (pos >= CLASSQUEUE_SIZE) 
+	if (pos >= CLASSQUEUE_SIZE) {	//do circular search from the beginning
 		pos = find_first_bit(cq->array.bitmap, CLASSQUEUE_SIZE);
+	}
 
 	if (pos < CLASSQUEUE_SIZE) {
 		BUG_ON(list_empty(&cq->array.queue[pos]));
@@ -193,17 +162,15 @@ cq_node_t *classqueue_get_head(struct classqueue_struct *cq)
  * Moving the end of queue forward
  * the new_base here is logical, we need to translate to the abosule position
  */
-void classqueue_update_base(struct classqueue_struct *cq)
+void classqueue_update_base(struct classqueue_struct *cq, int new_base)
 {
-	int new_base;
-	
-	if (! cq_nr_member(cq)) {
+	if (!cq_nr_member(cq)) {
 		cq->base_offset = -1;	//not defined
 		return;
 	}
 
-	new_base = classqueue_get_min_prio(cq);
-	
+	//	assert(new_base >= cq->base);
+
 	if (new_base > cq->base) {
 		cq->base_offset = get_index(cq, &new_base);
 		cq->base = new_base;
diff --git a/kernel/ckrm_sched.c b/kernel/ckrm_sched.c
index 5142b2eaa..ba716d4c5 100644
--- a/kernel/ckrm_sched.c
+++ b/kernel/ckrm_sched.c
@@ -15,202 +15,57 @@
 #include <linux/init.h>
 #include <linux/ckrm_sched.h>
 
-rwlock_t   class_list_lock = RW_LOCK_UNLOCKED;
-LIST_HEAD(active_cpu_classes);   // list of active cpu classes; anchor
-
-struct ckrm_cpu_class default_cpu_class_obj;
-
-struct ckrm_cpu_class * get_default_cpu_class(void) {
-	return (&default_cpu_class_obj);
-}
-
 /*******************************************************/
 /*                CVT Management                       */
 /*******************************************************/
+#define CVT_WINDOW_SIZE (CLASSQUEUE_SIZE << CLASS_BONUS_RATE)
+static CVT_t max_CVT = CVT_WINDOW_SIZE;
 
-static inline void check_inactive_class(ckrm_lrq_t * lrq,CVT_t cur_cvt)
+/*
+ *  Also ensure that the classes global cvt is upgraded to the 
+ * minimum CVT in the system, as a class might not have run for a while
+ */
+static void update_global_cvt(struct ckrm_cpu_class *cpu_class, int cpu)
 {
+	struct ckrm_local_runqueue *class_queue =
+	    get_ckrm_local_runqueue(cpu_class, cpu);
 	CVT_t min_cvt;
-	CVT_t bonus;
-
-	//just a safty measure
-	if (unlikely(! cur_cvt))
-		return; 
+	CVT_t local_cvt_old = class_queue->local_cvt;
 
-#ifndef INTERACTIVE_BONUS_SUPPORT
-#warning "ACB taking out interactive bonus calculation"	
-	bonus = 0;
-#else
-	/*
-	 * Always leaving a small bonus for inactive classes 
-	 * allows them to compete for cycles immediately when the become
-	 * active. This should improve interactive behavior
-	 */
-	bonus = INTERACTIVE_BONUS(lrq);
+	spin_lock(&cvt_lock);
+	if (class_queue->uncounted_cvt) {
+		cpu_class->global_cvt += class_queue->uncounted_cvt;
+		class_queue->uncounted_cvt = 0;
+	}
+	min_cvt = max_CVT - CVT_WINDOW_SIZE;
+	if (cpu_class->global_cvt < min_cvt)
+		cpu_class->global_cvt = min_cvt;
+	else  if (cpu_class->global_cvt > max_CVT)
+		max_CVT = cpu_class->global_cvt;
+
+/* update local cvt from global cvt*/
+#if 0
+	class_queue->local_cvt = cpu_class->global_cvt;
 #endif
+	spin_unlock(&cvt_lock);
 
-	//cvt can't be negative
-	if (cur_cvt > bonus)
-		min_cvt = cur_cvt - bonus;
-	else
-		min_cvt = 0;
-	
-	if (lrq->local_cvt < min_cvt) {
-		CVT_t lost_cvt;
-
-		lost_cvt = scale_cvt(min_cvt - lrq->local_cvt,lrq);
-		lrq->local_cvt = min_cvt;
-
-		/* add what the class lost to its savings*/
-		lrq->savings += lost_cvt;
-		if (lrq->savings > MAX_SAVINGS)
-			lrq->savings = MAX_SAVINGS; 
-	} else if (lrq->savings) {
-		/*
-		 *if a class saving and falling behind
-		 * then start to use it saving in a leaking bucket way
-		 */
-		CVT_t savings_used;
-
-		savings_used = scale_cvt((lrq->local_cvt - min_cvt),lrq);
-		if (savings_used > lrq->savings)
-			savings_used = lrq->savings;
-		
-		if (savings_used > SAVINGS_LEAK_SPEED)
-			savings_used = SAVINGS_LEAK_SPEED;
-
-		BUG_ON(lrq->savings < savings_used);
-		lrq->savings -= savings_used;
-		unscale_cvt(savings_used,lrq);
-		BUG_ON(lrq->local_cvt < savings_used);
-#ifndef CVT_SAVINGS_SUPPORT
-#warning "ACB taking out cvt saving"
-#else
-		lrq->local_cvt -= savings_used;
-#endif
-	}		
+	if (class_queue->local_cvt != local_cvt_old)
+		update_class_priority(class_queue);
 }
 
 /*
- * return the max_cvt of all the classes
- */
-static inline CVT_t get_max_cvt(int this_cpu)
-{
-        struct ckrm_cpu_class *clsptr;
-        ckrm_lrq_t * lrq;
-        CVT_t max_cvt;
-
-        max_cvt = 0;
-
-        /*update class time, at the same time get max_cvt */
-        list_for_each_entry(clsptr, &active_cpu_classes, links) {
-                lrq = get_ckrm_lrq(clsptr, this_cpu);
-                if (lrq->local_cvt > max_cvt)
-                        max_cvt = lrq->local_cvt;
-        }
-
-	return max_cvt;
-}
-
-/**
- * update_class_cputime - updates cvt of inactive classes
- * -- an inactive class shouldn't starve others when it comes back
- * -- the cpu time it lost when it's inactive should be accumulated
- * -- its accumulated saving should be compensated (in a leaky bucket fashion)
- * 
  * class_list_lock must have been acquired 
  */
-void update_class_cputime(int this_cpu)
+void update_global_cvts(int this_cpu)
 {
 	struct ckrm_cpu_class *clsptr;
-	ckrm_lrq_t * lrq;
-	CVT_t cur_cvt;
-
-	/*
-	 *  a class's local_cvt must not be significantly smaller than min_cvt 
-	 *  of active classes otherwise, it will starve other classes when it 
-         *  is reactivated.
-	 * 
-  	 *  Hence we keep all local_cvt's within a range of the min_cvt off
-	 *  all active classes (approximated by the local_cvt of the currently
-	 *  running class) and account for how many cycles where thus taken
-	 *  from an inactive class building a savings (not to exceed a few seconds)
-	 *  for a class to gradually make up upon reactivation, without 
-	 *  starvation of other classes.
-         *  
-	 */
-	cur_cvt = get_local_cur_cvt(this_cpu);
+	struct ckrm_local_runqueue *class_queue;
 
-	/*
-	 * cur_cvt == 0 means the system is now idle
-	 * in this case, we use max_cvt as cur_cvt
-	 * max_cvt roughly represents the cvt of the class 
-	 * that has just finished running
-	 *
-	 * fairness wouldn't be a problem since we account for whatever lost in savings
-	 * if the system is not busy, the system responsiveness is not a problem.
-	 * still fine if the sytem is busy, but happened to be idle at this certain point
-	 * since bias toward interactive classes (class priority) is a more important way to improve system responsiveness
-	 */
-	if (unlikely(! cur_cvt))  {
-		cur_cvt = get_max_cvt(this_cpu);
-		//return;
-	}
-
-	/* 
-	 *  - check the local cvt of all the classes 
-	 *  - update total_ns received by the class
-	 *  - do a usage sampling for the whole class
-	 */
+	/*for each class*/
 	list_for_each_entry(clsptr, &active_cpu_classes, links) {
-		lrq = get_ckrm_lrq(clsptr, this_cpu);
-
-		spin_lock(&clsptr->stat.stat_lock);
-		clsptr->stat.total_ns += lrq->uncounted_ns;
-		ckrm_sample_usage(clsptr);
-		spin_unlock(&clsptr->stat.stat_lock);
-		lrq->uncounted_ns = 0;
-
-		check_inactive_class(lrq,cur_cvt);		
+		update_global_cvt(clsptr, this_cpu);
+		class_queue = get_ckrm_local_runqueue(clsptr, this_cpu);
+		clsptr->stat.total_ns += class_queue->uncounted_ns;
+		class_queue->uncounted_ns = 0;
 	}
 }
-
-/*******************************************************/
-/*                PID load balancing stuff             */
-/*******************************************************/
-#define PID_SAMPLE_T 32
-#define PID_KP 20
-#define PID_KI 60
-#define PID_KD 20
-
-/**
- * sample pid load periodically
- */
-void ckrm_load_sample(ckrm_load_t* pid,int cpu)
-{
-	long load;
-	long err;
-
-	if (jiffies % PID_SAMPLE_T)
-		return;
-
-	adjust_local_weight();	
-
-	load = ckrm_cpu_load(cpu);
-	err = load - pid->load_p;
-	pid->load_d = err;
-	pid->load_p = load;
-	pid->load_i *= 9;
-	pid->load_i += load;
-	pid->load_i /= 10;
-}
-
-long pid_get_pressure(ckrm_load_t* ckrm_load, int local_group)
-{
-	long pressure;
-	pressure = ckrm_load->load_p * PID_KP;
-	pressure += ckrm_load->load_i * PID_KI;
-	pressure += ckrm_load->load_d * PID_KD;
-	pressure /= 100;
-	return pressure;
-}
diff --git a/kernel/exit.c b/kernel/exit.c
index 60075cbb3..5bc8fff46 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -26,7 +26,6 @@
 #include <linux/ckrm.h>
 #include <linux/ckrm_tsk.h>
 #include <linux/vs_limit.h>
-#include <linux/ckrm_mem.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -524,12 +523,6 @@ static inline void __exit_mm(struct task_struct * tsk)
 	task_lock(tsk);
 	tsk->mm = NULL;
 	up_read(&mm->mmap_sem);
-#ifdef CONFIG_CKRM_RES_MEM
-	spin_lock(&mm->peertask_lock);
-	list_del_init(&tsk->mm_peers);
-	ckrm_mem_evaluate_mm(mm);
-	spin_unlock(&mm->peertask_lock);
-#endif
 	enter_lazy_tlb(mm, current);
 	task_unlock(tsk);
 	mmput(mm);
@@ -866,6 +859,9 @@ asmlinkage NORET_TYPE void do_exit(long code)
 		module_put(tsk->binfmt->module);
 
 	tsk->exit_code = code;
+#ifdef CONFIG_CKRM_TYPE_TASKCLASS
+	numtasks_put_ref(tsk->taskclass);
+#endif
 	exit_notify(tsk);
 #ifdef CONFIG_NUMA
 	mpol_free(tsk->mempolicy);
diff --git a/kernel/exit.c.orig b/kernel/exit.c.orig
new file mode 100644
index 000000000..f53583e2b
--- /dev/null
+++ b/kernel/exit.c.orig
@@ -0,0 +1,1192 @@
+/*
+ *  linux/kernel/exit.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/personality.h>
+#include <linux/tty.h>
+#include <linux/namespace.h>
+#include <linux/security.h>
+#include <linux/acct.h>
+#include <linux/file.h>
+#include <linux/binfmts.h>
+#include <linux/ptrace.h>
+#include <linux/profile.h>
+#include <linux/mount.h>
+#include <linux/proc_fs.h>
+#include <linux/mempolicy.h>
+#include <linux/ckrm.h>
+#include <linux/ckrm_tsk.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
+
+extern void sem_exit (void);
+extern struct task_struct *child_reaper;
+
+int getrusage(struct task_struct *, int, struct rusage __user *);
+
+static void __unhash_process(struct task_struct *p)
+{
+	nr_threads--;
+	detach_pid(p, PIDTYPE_PID);
+	detach_pid(p, PIDTYPE_TGID);
+	if (thread_group_leader(p)) {
+		detach_pid(p, PIDTYPE_PGID);
+		detach_pid(p, PIDTYPE_SID);
+		if (p->pid)
+			__get_cpu_var(process_counts)--;
+	}
+
+	REMOVE_LINKS(p);
+}
+
+void release_task(struct task_struct * p)
+{
+	int zap_leader;
+	task_t *leader;
+	struct dentry *proc_dentry;
+
+repeat: 
+	BUG_ON(p->state < TASK_ZOMBIE);
+ 
+	atomic_dec(&p->user->processes);
+	spin_lock(&p->proc_lock);
+	proc_dentry = proc_pid_unhash(p);
+	write_lock_irq(&tasklist_lock);
+	if (unlikely(p->ptrace))
+		__ptrace_unlink(p);
+	BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
+	__exit_signal(p);
+	__exit_sighand(p);
+	__unhash_process(p);
+
+	/*
+	 * If we are the last non-leader member of the thread
+	 * group, and the leader is zombie, then notify the
+	 * group leader's parent process. (if it wants notification.)
+	 */
+	zap_leader = 0;
+	leader = p->group_leader;
+	if (leader != p && thread_group_empty(leader) && leader->state == TASK_ZOMBIE) {
+		BUG_ON(leader->exit_signal == -1);
+		do_notify_parent(leader, leader->exit_signal);
+		/*
+		 * If we were the last child thread and the leader has
+		 * exited already, and the leader's parent ignores SIGCHLD,
+		 * then we are the one who should release the leader.
+		 *
+		 * do_notify_parent() will have marked it self-reaping in
+		 * that case.
+		 */
+		zap_leader = (leader->exit_signal == -1);
+	}
+
+	p->parent->cutime += p->utime + p->cutime;
+	p->parent->cstime += p->stime + p->cstime;
+	p->parent->cmin_flt += p->min_flt + p->cmin_flt;
+	p->parent->cmaj_flt += p->maj_flt + p->cmaj_flt;
+	p->parent->cnvcsw += p->nvcsw + p->cnvcsw;
+	p->parent->cnivcsw += p->nivcsw + p->cnivcsw;
+	sched_exit(p);
+	write_unlock_irq(&tasklist_lock);
+	spin_unlock(&p->proc_lock);
+	proc_pid_flush(proc_dentry);
+	release_thread(p);
+	put_task_struct(p);
+
+	p = leader;
+	if (unlikely(zap_leader))
+		goto repeat;
+}
+
+/* we are using it only for SMP init */
+
+void unhash_process(struct task_struct *p)
+{
+	struct dentry *proc_dentry;
+
+	spin_lock(&p->proc_lock);
+	proc_dentry = proc_pid_unhash(p);
+	write_lock_irq(&tasklist_lock);
+	__unhash_process(p);
+	write_unlock_irq(&tasklist_lock);
+	spin_unlock(&p->proc_lock);
+	proc_pid_flush(proc_dentry);
+}
+
+/*
+ * This checks not only the pgrp, but falls back on the pid if no
+ * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
+ * without this...
+ */
+int session_of_pgrp(int pgrp)
+{
+	struct task_struct *p;
+	struct list_head *l;
+	struct pid *pid;
+	int sid = -1;
+
+	read_lock(&tasklist_lock);
+	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid)
+		if (p->signal->session > 0) {
+			sid = p->signal->session;
+			goto out;
+		}
+	p = find_task_by_pid(pgrp);
+	if (p)
+		sid = p->signal->session;
+out:
+	read_unlock(&tasklist_lock);
+	
+	return sid;
+}
+
+/*
+ * Determine if a process group is "orphaned", according to the POSIX
+ * definition in 2.2.2.52.  Orphaned process groups are not to be affected
+ * by terminal-generated stop signals.  Newly orphaned process groups are
+ * to receive a SIGHUP and a SIGCONT.
+ *
+ * "I ask you, have you ever known what it is to be an orphan?"
+ */
+static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
+{
+	struct task_struct *p;
+	struct list_head *l;
+	struct pid *pid;
+	int ret = 1;
+
+	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
+		if (p == ignored_task
+				|| p->state >= TASK_ZOMBIE 
+				|| p->real_parent->pid == 1)
+			continue;
+		if (process_group(p->real_parent) != pgrp
+			    && p->real_parent->signal->session == p->signal->session) {
+			ret = 0;
+			break;
+		}
+	}
+	return ret;	/* (sighing) "Often!" */
+}
+
+int is_orphaned_pgrp(int pgrp)
+{
+	int retval;
+
+	read_lock(&tasklist_lock);
+	retval = will_become_orphaned_pgrp(pgrp, NULL);
+	read_unlock(&tasklist_lock);
+
+	return retval;
+}
+
+static inline int has_stopped_jobs(int pgrp)
+{
+	int retval = 0;
+	struct task_struct *p;
+	struct list_head *l;
+	struct pid *pid;
+
+	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
+		if (p->state != TASK_STOPPED)
+			continue;
+
+		/* If p is stopped by a debugger on a signal that won't
+		   stop it, then don't count p as stopped.  This isn't
+		   perfect but it's a good approximation.  */
+		if (unlikely (p->ptrace)
+		    && p->exit_code != SIGSTOP
+		    && p->exit_code != SIGTSTP
+		    && p->exit_code != SIGTTOU
+		    && p->exit_code != SIGTTIN)
+			continue;
+
+		retval = 1;
+		break;
+	}
+	return retval;
+}
+
+/**
+ * reparent_to_init() - Reparent the calling kernel thread to the init task.
+ *
+ * If a kernel thread is launched as a result of a system call, or if
+ * it ever exits, it should generally reparent itself to init so that
+ * it is correctly cleaned up on exit.
+ *
+ * The various task state such as scheduling policy and priority may have
+ * been inherited from a user process, so we reset them to sane values here.
+ *
+ * NOTE that reparent_to_init() gives the caller full capabilities.
+ */
+void reparent_to_init(void)
+{
+	write_lock_irq(&tasklist_lock);
+
+	ptrace_unlink(current);
+	/* Reparent to init */
+	REMOVE_LINKS(current);
+	current->parent = child_reaper;
+	current->real_parent = child_reaper;
+	SET_LINKS(current);
+
+	/* Set the exit signal to SIGCHLD so we signal init on exit */
+	current->exit_signal = SIGCHLD;
+
+	if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0))
+		set_user_nice(current, 0);
+	/* cpus_allowed? */
+	/* rt_priority? */
+	/* signals? */
+	security_task_reparent_to_init(current);
+	memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim)));
+	atomic_inc(&(INIT_USER->__count));
+	switch_uid(INIT_USER);
+
+	write_unlock_irq(&tasklist_lock);
+}
+
+void __set_special_pids(pid_t session, pid_t pgrp)
+{
+	struct task_struct *curr = current;
+
+	if (curr->signal->session != session) {
+		detach_pid(curr, PIDTYPE_SID);
+		curr->signal->session = session;
+		attach_pid(curr, PIDTYPE_SID, session);
+	}
+	if (process_group(curr) != pgrp) {
+		detach_pid(curr, PIDTYPE_PGID);
+		curr->signal->pgrp = pgrp;
+		attach_pid(curr, PIDTYPE_PGID, pgrp);
+	}
+}
+
+void set_special_pids(pid_t session, pid_t pgrp)
+{
+	write_lock_irq(&tasklist_lock);
+	__set_special_pids(session, pgrp);
+	write_unlock_irq(&tasklist_lock);
+}
+
+/*
+ * Let kernel threads use this to say that they
+ * allow a certain signal (since daemonize() will
+ * have disabled all of them by default).
+ */
+int allow_signal(int sig)
+{
+	if (sig < 1 || sig > _NSIG)
+		return -EINVAL;
+
+	spin_lock_irq(&current->sighand->siglock);
+	sigdelset(&current->blocked, sig);
+	if (!current->mm) {
+		/* Kernel threads handle their own signals.
+		   Let the signal code know it'll be handled, so
+		   that they don't get converted to SIGKILL or
+		   just silently dropped */
+		current->sighand->action[(sig)-1].sa.sa_handler = (void *)2;
+	}
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+	return 0;
+}
+
+EXPORT_SYMBOL(allow_signal);
+
+int disallow_signal(int sig)
+{
+	if (sig < 1 || sig > _NSIG)
+		return -EINVAL;
+
+	spin_lock_irq(&current->sighand->siglock);
+	sigaddset(&current->blocked, sig);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+	return 0;
+}
+
+EXPORT_SYMBOL(disallow_signal);
+
+/*
+ *	Put all the gunge required to become a kernel thread without
+ *	attached user resources in one place where it belongs.
+ */
+
+void daemonize(const char *name, ...)
+{
+	va_list args;
+	struct fs_struct *fs;
+	sigset_t blocked;
+
+	va_start(args, name);
+	vsnprintf(current->comm, sizeof(current->comm), name, args);
+	va_end(args);
+
+	/*
+	 * If we were started as result of loading a module, close all of the
+	 * user space pages.  We don't need them, and if we didn't close them
+	 * they would be locked into memory.
+	 */
+	exit_mm(current);
+
+	set_special_pids(1, 1);
+	current->signal->tty = NULL;
+
+	/* Block and flush all signals */
+	sigfillset(&blocked);
+	sigprocmask(SIG_BLOCK, &blocked, NULL);
+	flush_signals(current);
+
+	/* Become as one with the init task */
+
+	exit_fs(current);	/* current->fs->count--; */
+	fs = init_task.fs;
+	current->fs = fs;
+	atomic_inc(&fs->count);
+ 	exit_files(current);
+	current->files = init_task.files;
+	atomic_inc(&current->files->count);
+
+	reparent_to_init();
+}
+
+EXPORT_SYMBOL(daemonize);
+
+static inline void close_files(struct files_struct * files)
+{
+	int i, j;
+
+	j = 0;
+	for (;;) {
+		unsigned long set;
+		i = j * __NFDBITS;
+		if (i >= files->max_fdset || i >= files->max_fds)
+			break;
+		set = files->open_fds->fds_bits[j++];
+		while (set) {
+			if (set & 1) {
+				struct file * file = xchg(&files->fd[i], NULL);
+				if (file)
+					filp_close(file, files);
+			}
+			i++;
+			set >>= 1;
+		}
+	}
+}
+
+struct files_struct *get_files_struct(struct task_struct *task)
+{
+	struct files_struct *files;
+
+	task_lock(task);
+	files = task->files;
+	if (files)
+		atomic_inc(&files->count);
+	task_unlock(task);
+
+	return files;
+}
+
+void fastcall put_files_struct(struct files_struct *files)
+{
+	if (atomic_dec_and_test(&files->count)) {
+		close_files(files);
+		/*
+		 * Free the fd and fdset arrays if we expanded them.
+		 */
+		if (files->fd != &files->fd_array[0])
+			free_fd_array(files->fd, files->max_fds);
+		if (files->max_fdset > __FD_SETSIZE) {
+			free_fdset(files->open_fds, files->max_fdset);
+			free_fdset(files->close_on_exec, files->max_fdset);
+		}
+		kmem_cache_free(files_cachep, files);
+	}
+}
+
+EXPORT_SYMBOL(put_files_struct);
+
+static inline void __exit_files(struct task_struct *tsk)
+{
+	struct files_struct * files = tsk->files;
+
+	if (files) {
+		task_lock(tsk);
+		tsk->files = NULL;
+		task_unlock(tsk);
+		put_files_struct(files);
+	}
+}
+
+void exit_files(struct task_struct *tsk)
+{
+	__exit_files(tsk);
+}
+
+static inline void __put_fs_struct(struct fs_struct *fs)
+{
+	/* No need to hold fs->lock if we are killing it */
+	if (atomic_dec_and_test(&fs->count)) {
+		dput(fs->root);
+		mntput(fs->rootmnt);
+		dput(fs->pwd);
+		mntput(fs->pwdmnt);
+		if (fs->altroot) {
+			dput(fs->altroot);
+			mntput(fs->altrootmnt);
+		}
+		kmem_cache_free(fs_cachep, fs);
+	}
+}
+
+void put_fs_struct(struct fs_struct *fs)
+{
+	__put_fs_struct(fs);
+}
+
+static inline void __exit_fs(struct task_struct *tsk)
+{
+	struct fs_struct * fs = tsk->fs;
+
+	if (fs) {
+		task_lock(tsk);
+		tsk->fs = NULL;
+		task_unlock(tsk);
+		__put_fs_struct(fs);
+	}
+}
+
+void exit_fs(struct task_struct *tsk)
+{
+	__exit_fs(tsk);
+}
+
+EXPORT_SYMBOL_GPL(exit_fs);
+
+/*
+ * Turn us into a lazy TLB process if we
+ * aren't already..
+ */
+static inline void __exit_mm(struct task_struct * tsk)
+{
+	struct mm_struct *mm = tsk->mm;
+
+	mm_release(tsk, mm);
+	if (!mm)
+		return;
+	/*
+	 * Serialize with any possible pending coredump.
+	 * We must hold mmap_sem around checking core_waiters
+	 * and clearing tsk->mm.  The core-inducing thread
+	 * will increment core_waiters for each thread in the
+	 * group with ->mm != NULL.
+	 */
+	down_read(&mm->mmap_sem);
+	if (mm->core_waiters) {
+		up_read(&mm->mmap_sem);
+		down_write(&mm->mmap_sem);
+		if (!--mm->core_waiters)
+			complete(mm->core_startup_done);
+		up_write(&mm->mmap_sem);
+
+		wait_for_completion(&mm->core_done);
+		down_read(&mm->mmap_sem);
+	}
+	atomic_inc(&mm->mm_count);
+	if (mm != tsk->active_mm) BUG();
+	/* more a memory barrier than a real lock */
+	task_lock(tsk);
+	tsk->mm = NULL;
+	up_read(&mm->mmap_sem);
+	enter_lazy_tlb(mm, current);
+	task_unlock(tsk);
+	mmput(mm);
+}
+
+void exit_mm(struct task_struct *tsk)
+{
+	__exit_mm(tsk);
+}
+
+EXPORT_SYMBOL(exit_mm);
+
+static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
+{
+	/*
+	 * Make sure we're not reparenting to ourselves and that
+	 * the parent is not a zombie.
+	 */
+	if (p == reaper || reaper->state >= TASK_ZOMBIE)
+		p->real_parent = child_reaper;
+	else
+		p->real_parent = reaper;
+	if (p->parent == p->real_parent)
+		BUG();
+}
+
+static inline void reparent_thread(task_t *p, task_t *father, int traced)
+{
+	/* We don't want people slaying init.  */
+	if (p->exit_signal != -1)
+		p->exit_signal = SIGCHLD;
+	p->self_exec_id++;
+
+	if (p->pdeath_signal)
+		/* We already hold the tasklist_lock here.  */
+		group_send_sig_info(p->pdeath_signal, (void *) 0, p);
+
+	/* Move the child from its dying parent to the new one.  */
+	if (unlikely(traced)) {
+		/* Preserve ptrace links if someone else is tracing this child.  */
+		list_del_init(&p->ptrace_list);
+		if (p->parent != p->real_parent)
+			list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
+	} else {
+		/* If this child is being traced, then we're the one tracing it
+		 * anyway, so let go of it.
+		 */
+		p->ptrace = 0;
+		list_del_init(&p->sibling);
+		p->parent = p->real_parent;
+		list_add_tail(&p->sibling, &p->parent->children);
+
+		/* If we'd notified the old parent about this child's death,
+		 * also notify the new parent.
+		 */
+		if (p->state == TASK_ZOMBIE && p->exit_signal != -1 &&
+		    thread_group_empty(p))
+			do_notify_parent(p, p->exit_signal);
+	}
+
+	/*
+	 * process group orphan check
+	 * Case ii: Our child is in a different pgrp
+	 * than we are, and it was the only connection
+	 * outside, so the child pgrp is now orphaned.
+	 */
+	if ((process_group(p) != process_group(father)) &&
+	    (p->signal->session == father->signal->session)) {
+		int pgrp = process_group(p);
+
+		if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) {
+			__kill_pg_info(SIGHUP, (void *)1, pgrp);
+			__kill_pg_info(SIGCONT, (void *)1, pgrp);
+		}
+	}
+}
+
+/*
+ * When we die, we re-parent all our children.
+ * Try to give them to another thread in our thread
+ * group, and if no such member exists, give it to
+ * the global child reaper process (ie "init")
+ */
+static inline void forget_original_parent(struct task_struct * father)
+{
+	struct task_struct *p, *reaper = father;
+	struct list_head *_p, *_n;
+
+	reaper = father->group_leader;
+	if (reaper == father)
+		reaper = child_reaper;
+
+	/*
+	 * There are only two places where our children can be:
+	 *
+	 * - in our child list
+	 * - in our ptraced child list
+	 *
+	 * Search them and reparent children.
+	 */
+	list_for_each_safe(_p, _n, &father->children) {
+		p = list_entry(_p,struct task_struct,sibling);
+		if (father == p->real_parent) {
+			choose_new_parent(p, reaper, child_reaper);
+			reparent_thread(p, father, 0);
+		} else {
+			ptrace_unlink (p);
+			if (p->state == TASK_ZOMBIE && p->exit_signal != -1 &&
+			    thread_group_empty(p))
+				do_notify_parent(p, p->exit_signal);
+		}
+	}
+	list_for_each_safe(_p, _n, &father->ptrace_children) {
+		p = list_entry(_p,struct task_struct,ptrace_list);
+		choose_new_parent(p, reaper, child_reaper);
+		reparent_thread(p, father, 1);
+	}
+}
+
+/*
+ * Send signals to all our closest relatives so that they know
+ * to properly mourn us..
+ */
+static void exit_notify(struct task_struct *tsk)
+{
+	int state;
+	struct task_struct *t;
+
+	ckrm_cb_exit(tsk);
+
+	if (signal_pending(tsk) && !tsk->signal->group_exit
+	    && !thread_group_empty(tsk)) {
+		/*
+		 * This occurs when there was a race between our exit
+		 * syscall and a group signal choosing us as the one to
+		 * wake up.  It could be that we are the only thread
+		 * alerted to check for pending signals, but another thread
+		 * should be woken now to take the signal since we will not.
+		 * Now we'll wake all the threads in the group just to make
+		 * sure someone gets all the pending signals.
+		 */
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&tsk->sighand->siglock);
+		for (t = next_thread(tsk); t != tsk; t = next_thread(t))
+			if (!signal_pending(t) && !(t->flags & PF_EXITING)) {
+				recalc_sigpending_tsk(t);
+				if (signal_pending(t))
+					signal_wake_up(t, 0);
+			}
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+	}
+
+	write_lock_irq(&tasklist_lock);
+
+	/*
+	 * This does two things:
+	 *
+  	 * A.  Make init inherit all the child processes
+	 * B.  Check to see if any process groups have become orphaned
+	 *	as a result of our exiting, and if they have any stopped
+	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
+	 */
+
+	forget_original_parent(tsk);
+	BUG_ON(!list_empty(&tsk->children));
+
+	/*
+	 * Check to see if any process groups have become orphaned
+	 * as a result of our exiting, and if they have any stopped
+	 * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
+	 *
+	 * Case i: Our father is in a different pgrp than we are
+	 * and we were the only connection outside, so our pgrp
+	 * is about to become orphaned.
+	 */
+	 
+	t = tsk->real_parent;
+	
+	if ((process_group(t) != process_group(tsk)) &&
+	    (t->signal->session == tsk->signal->session) &&
+	    will_become_orphaned_pgrp(process_group(tsk), tsk) &&
+	    has_stopped_jobs(process_group(tsk))) {
+		__kill_pg_info(SIGHUP, (void *)1, process_group(tsk));
+		__kill_pg_info(SIGCONT, (void *)1, process_group(tsk));
+	}
+
+	/* Let father know we died 
+	 *
+	 * Thread signals are configurable, but you aren't going to use
+	 * that to send signals to arbitary processes. 
+	 * That stops right now.
+	 *
+	 * If the parent exec id doesn't match the exec id we saved
+	 * when we started then we know the parent has changed security
+	 * domain.
+	 *
+	 * If our self_exec id doesn't match our parent_exec_id then
+	 * we have changed execution domain as these two values started
+	 * the same after a fork.
+	 *	
+	 */
+	
+	if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
+	    ( tsk->parent_exec_id != t->self_exec_id  ||
+	      tsk->self_exec_id != tsk->parent_exec_id)
+	    && !capable(CAP_KILL))
+		tsk->exit_signal = SIGCHLD;
+
+
+	/* If something other than our normal parent is ptracing us, then
+	 * send it a SIGCHLD instead of honoring exit_signal.  exit_signal
+	 * only has special meaning to our real parent.
+	 */
+	if (tsk->exit_signal != -1 && thread_group_empty(tsk)) {
+		int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD;
+		do_notify_parent(tsk, signal);
+	} else if (tsk->ptrace) {
+		do_notify_parent(tsk, SIGCHLD);
+	}
+
+	state = TASK_ZOMBIE;
+	if (tsk->exit_signal == -1 && tsk->ptrace == 0)
+		state = TASK_DEAD;
+	tsk->state = state;
+	tsk->flags |= PF_DEAD;
+
+	/*
+	 * Clear these here so that update_process_times() won't try to deliver
+	 * itimer, profile or rlimit signals to this task while it is in late exit.
+	 */
+	tsk->it_virt_value = 0;
+	tsk->it_prof_value = 0;
+	tsk->rlim[RLIMIT_CPU].rlim_cur = RLIM_INFINITY;
+
+	/*
+	 * In the preemption case it must be impossible for the task
+	 * to get runnable again, so use "_raw_" unlock to keep
+	 * preempt_count elevated until we schedule().
+	 *
+	 * To avoid deadlock on SMP, interrupts must be unmasked.  If we
+	 * don't, subsequently called functions (e.g, wait_task_inactive()
+	 * via release_task()) will spin, with interrupt flags
+	 * unwittingly blocked, until the other task sleeps.  That task
+	 * may itself be waiting for smp_call_function() to answer and
+	 * complete, and with interrupts blocked that will never happen.
+	 */
+	_raw_write_unlock(&tasklist_lock);
+	local_irq_enable();
+
+	/* If the process is dead, release it - nobody will wait for it */
+	if (state == TASK_DEAD)
+		release_task(tsk);
+
+}
+
+asmlinkage NORET_TYPE void do_exit(long code)
+{
+	struct task_struct *tsk = current;
+
+	if (unlikely(in_interrupt()))
+		panic("Aiee, killing interrupt handler!");
+	if (unlikely(!tsk->pid))
+		panic("Attempted to kill the idle task!");
+	if (unlikely(tsk->pid == 1))
+		panic("Attempted to kill init!");
+	if (tsk->io_context)
+		exit_io_context();
+	tsk->flags |= PF_EXITING;
+	del_timer_sync(&tsk->real_timer);
+
+	if (unlikely(in_atomic()))
+		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
+				current->comm, current->pid,
+				preempt_count());
+
+	profile_exit_task(tsk);
+ 
+	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
+		current->ptrace_message = code;
+		ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
+	}
+
+	acct_process(code);
+	__exit_mm(tsk);
+
+	exit_sem(tsk);
+	__exit_files(tsk);
+	__exit_fs(tsk);
+	exit_namespace(tsk);
+	exit_thread();
+#ifdef CONFIG_NUMA
+	mpol_free(tsk->mempolicy);
+#endif
+
+	if (tsk->signal->leader)
+		disassociate_ctty(1);
+
+	module_put(tsk->thread_info->exec_domain->module);
+	if (tsk->binfmt)
+		module_put(tsk->binfmt->module);
+
+	tsk->exit_code = code;
+#ifdef CONFIG_CKRM_TYPE_TASKCLASS
+	numtasks_put_ref(tsk->taskclass);
+#endif
+	exit_notify(tsk);
+	schedule();
+	BUG();
+	/* Avoid "noreturn function does return".  */
+	for (;;) ;
+}
+
+NORET_TYPE void complete_and_exit(struct completion *comp, long code)
+{
+	if (comp)
+		complete(comp);
+	
+	do_exit(code);
+}
+
+EXPORT_SYMBOL(complete_and_exit);
+
+asmlinkage long sys_exit(int error_code)
+{
+	do_exit((error_code&0xff)<<8);
+}
+
+task_t fastcall *next_thread(task_t *p)
+{
+	struct pid_link *link = p->pids + PIDTYPE_TGID;
+	struct list_head *tmp, *head = &link->pidptr->task_list;
+
+#ifdef CONFIG_SMP
+	if (!p->sighand)
+		BUG();
+	if (!spin_is_locked(&p->sighand->siglock) &&
+				!rwlock_is_locked(&tasklist_lock))
+		BUG();
+#endif
+	tmp = link->pid_chain.next;
+	if (tmp == head)
+		tmp = head->next;
+
+	return pid_task(tmp, PIDTYPE_TGID);
+}
+
+EXPORT_SYMBOL(next_thread);
+
+/*
+ * Take down every thread in the group.  This is called by fatal signals
+ * as well as by sys_exit_group (below).
+ */
+NORET_TYPE void
+do_group_exit(int exit_code)
+{
+	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
+
+	if (current->signal->group_exit)
+		exit_code = current->signal->group_exit_code;
+	else if (!thread_group_empty(current)) {
+		struct signal_struct *const sig = current->signal;
+		struct sighand_struct *const sighand = current->sighand;
+		read_lock(&tasklist_lock);
+		spin_lock_irq(&sighand->siglock);
+		if (sig->group_exit)
+			/* Another thread got here before we took the lock.  */
+			exit_code = sig->group_exit_code;
+		else {
+			sig->group_exit = 1;
+			sig->group_exit_code = exit_code;
+			zap_other_threads(current);
+		}
+		spin_unlock_irq(&sighand->siglock);
+		read_unlock(&tasklist_lock);
+	}
+
+	do_exit(exit_code);
+	/* NOTREACHED */
+}
+
+/*
+ * this kills every thread in the thread group. Note that any externally
+ * wait4()-ing process will get the correct exit code - even if this
+ * thread is not the thread group leader.
+ */
+asmlinkage void sys_exit_group(int error_code)
+{
+	do_group_exit((error_code & 0xff) << 8);
+}
+
+static int eligible_child(pid_t pid, int options, task_t *p)
+{
+	if (pid > 0) {
+		if (p->pid != pid)
+			return 0;
+	} else if (!pid) {
+		if (process_group(p) != process_group(current))
+			return 0;
+	} else if (pid != -1) {
+		if (process_group(p) != -pid)
+			return 0;
+	}
+
+	/*
+	 * Do not consider detached threads that are
+	 * not ptraced:
+	 */
+	if (p->exit_signal == -1 && !p->ptrace)
+		return 0;
+
+	/* Wait for all children (clone and not) if __WALL is set;
+	 * otherwise, wait for clone children *only* if __WCLONE is
+	 * set; otherwise, wait for non-clone children *only*.  (Note:
+	 * A "clone" child here is one that reports to its parent
+	 * using a signal other than SIGCHLD.) */
+	if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
+	    && !(options & __WALL))
+		return 0;
+	/*
+	 * Do not consider thread group leaders that are
+	 * in a non-empty thread group:
+	 */
+	if (current->tgid != p->tgid && delay_group_leader(p))
+		return 2;
+
+	if (security_task_wait(p))
+		return 0;
+
+	return 1;
+}
+
+/*
+ * Handle sys_wait4 work for one task in state TASK_ZOMBIE.  We hold
+ * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+ * the lock and this task is uninteresting.  If we return nonzero, we have
+ * released the lock and the system call should return.
+ */
+static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct rusage __user *ru)
+{
+	unsigned long state;
+	int retval;
+
+	/*
+	 * Try to move the task's state to DEAD
+	 * only one thread is allowed to do this:
+	 */
+	state = xchg(&p->state, TASK_DEAD);
+	if (state != TASK_ZOMBIE) {
+		BUG_ON(state != TASK_DEAD);
+		return 0;
+	}
+	if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
+		/*
+		 * This can only happen in a race with a ptraced thread
+		 * dying on another processor.
+		 */
+		return 0;
+
+	/*
+	 * Now we are sure this task is interesting, and no other
+	 * thread can reap it because we set its state to TASK_DEAD.
+	 */
+	read_unlock(&tasklist_lock);
+
+	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
+	if (!retval && stat_addr) {
+		if (p->signal->group_exit)
+			retval = put_user(p->signal->group_exit_code, stat_addr);
+		else
+			retval = put_user(p->exit_code, stat_addr);
+	}
+	if (retval) {
+		p->state = TASK_ZOMBIE;
+		return retval;
+	}
+	retval = p->pid;
+	if (p->real_parent != p->parent) {
+		write_lock_irq(&tasklist_lock);
+		/* Double-check with lock held.  */
+		if (p->real_parent != p->parent) {
+			__ptrace_unlink(p);
+			p->state = TASK_ZOMBIE;
+			/* If this is a detached thread, this is where it goes away.  */
+			if (p->exit_signal == -1) {
+				/* release_task takes the lock itself.  */
+				write_unlock_irq(&tasklist_lock);
+				release_task (p);
+			}
+			else {
+				do_notify_parent(p, p->exit_signal);
+				write_unlock_irq(&tasklist_lock);
+			}
+			p = NULL;
+		}
+		else
+			write_unlock_irq(&tasklist_lock);
+	}
+	if (p != NULL)
+		release_task(p);
+	BUG_ON(!retval);
+	return retval;
+}
+
+/*
+ * Handle sys_wait4 work for one task in state TASK_STOPPED.  We hold
+ * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+ * the lock and this task is uninteresting.  If we return nonzero, we have
+ * released the lock and the system call should return.
+ */
+static int wait_task_stopped(task_t *p, int delayed_group_leader,
+			     unsigned int __user *stat_addr,
+			     struct rusage __user *ru)
+{
+	int retval, exit_code;
+
+	if (!p->exit_code)
+		return 0;
+	if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
+	    p->signal && p->signal->group_stop_count > 0)
+		/*
+		 * A group stop is in progress and this is the group leader.
+		 * We won't report until all threads have stopped.
+		 */
+		return 0;
+
+	/*
+	 * Now we are pretty sure this task is interesting.
+	 * Make sure it doesn't get reaped out from under us while we
+	 * give up the lock and then examine it below.  We don't want to
+	 * keep holding onto the tasklist_lock while we call getrusage and
+	 * possibly take page faults for user memory.
+	 */
+	get_task_struct(p);
+	read_unlock(&tasklist_lock);
+	write_lock_irq(&tasklist_lock);
+
+	/*
+	 * This uses xchg to be atomic with the thread resuming and setting
+	 * it.  It must also be done with the write lock held to prevent a
+	 * race with the TASK_ZOMBIE case.
+	 */
+	exit_code = xchg(&p->exit_code, 0);
+	if (unlikely(p->state > TASK_STOPPED)) {
+		/*
+		 * The task resumed and then died.  Let the next iteration
+		 * catch it in TASK_ZOMBIE.  Note that exit_code might
+		 * already be zero here if it resumed and did _exit(0).
+		 * The task itself is dead and won't touch exit_code again;
+		 * other processors in this function are locked out.
+		 */
+		p->exit_code = exit_code;
+		exit_code = 0;
+	}
+	if (unlikely(exit_code == 0)) {
+		/*
+		 * Another thread in this function got to it first, or it
+		 * resumed, or it resumed and then died.
+		 */
+		write_unlock_irq(&tasklist_lock);
+		put_task_struct(p);
+		read_lock(&tasklist_lock);
+		return 0;
+	}
+
+	/* move to end of parent's list to avoid starvation */
+	remove_parent(p);
+	add_parent(p, p->parent);
+
+	write_unlock_irq(&tasklist_lock);
+
+	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
+	if (!retval && stat_addr)
+		retval = put_user((exit_code << 8) | 0x7f, stat_addr);
+	if (!retval)
+		retval = p->pid;
+	put_task_struct(p);
+
+	BUG_ON(!retval);
+	return retval;
+}
+
+asmlinkage long sys_wait4(pid_t pid,unsigned int __user *stat_addr, int options, struct rusage __user *ru)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct task_struct *tsk;
+	int flag, retval;
+
+	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
+		return -EINVAL;
+
+	add_wait_queue(&current->wait_chldexit,&wait);
+repeat:
+	flag = 0;
+	current->state = TASK_INTERRUPTIBLE;
+	read_lock(&tasklist_lock);
+	tsk = current;
+	do {
+		struct task_struct *p;
+		struct list_head *_p;
+		int ret;
+
+		list_for_each(_p,&tsk->children) {
+			p = list_entry(_p,struct task_struct,sibling);
+
+			ret = eligible_child(pid, options, p);
+			if (!ret)
+				continue;
+			flag = 1;
+
+			switch (p->state) {
+			case TASK_STOPPED:
+				if (!(options & WUNTRACED) &&
+				    !(p->ptrace & PT_PTRACED))
+					continue;
+				retval = wait_task_stopped(p, ret == 2,
+							   stat_addr, ru);
+				if (retval != 0) /* He released the lock.  */
+					goto end_wait4;
+				break;
+			case TASK_ZOMBIE:
+				/*
+				 * Eligible but we cannot release it yet:
+				 */
+				if (ret == 2)
+					continue;
+				retval = wait_task_zombie(p, stat_addr, ru);
+				if (retval != 0) /* He released the lock.  */
+					goto end_wait4;
+				break;
+			}
+		}
+		if (!flag) {
+			list_for_each (_p,&tsk->ptrace_children) {
+				p = list_entry(_p,struct task_struct,ptrace_list);
+				if (!eligible_child(pid, options, p))
+					continue;
+				flag = 1;
+				break;
+			}
+		}
+		if (options & __WNOTHREAD)
+			break;
+		tsk = next_thread(tsk);
+		if (tsk->signal != current->signal)
+			BUG();
+	} while (tsk != current);
+	read_unlock(&tasklist_lock);
+	if (flag) {
+		retval = 0;
+		if (options & WNOHANG)
+			goto end_wait4;
+		retval = -ERESTARTSYS;
+		if (signal_pending(current))
+			goto end_wait4;
+		schedule();
+		goto repeat;
+	}
+	retval = -ECHILD;
+end_wait4:
+	current->state = TASK_RUNNING;
+	remove_wait_queue(&current->wait_chldexit,&wait);
+	return retval;
+}
+
+#ifdef __ARCH_WANT_SYS_WAITPID
+
+/*
+ * sys_waitpid() remains for compatibility. waitpid() should be
+ * implemented by calling sys_wait4() from libc.a.
+ */
+asmlinkage long sys_waitpid(pid_t pid, unsigned __user *stat_addr, int options)
+{
+	return sys_wait4(pid, stat_addr, options, NULL);
+}
+
+#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 195394433..df85a9daa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -42,7 +42,6 @@
 #include <linux/vs_memory.h>
 #include <linux/ckrm.h>
 #include <linux/ckrm_tsk.h>
-#include <linux/ckrm_mem_inline.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -272,9 +271,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	ckrm_cb_newtask(tsk);
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
-#ifdef CONFIG_CKRM_RES_MEM	
-	INIT_LIST_HEAD(&tsk->mm_peers);
-#endif
 	return tsk;
 }
 
@@ -427,10 +423,6 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
 	mm->ioctx_list = NULL;
 	mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
-#ifdef CONFIG_CKRM_RES_MEM
-	INIT_LIST_HEAD(&mm->tasklist);
-	mm->peertask_lock = SPIN_LOCK_UNLOCKED;
-#endif
 
 	if (likely(!mm_alloc_pgd(mm))) {
 		mm->def_flags = 0;
@@ -452,10 +444,6 @@ struct mm_struct * mm_alloc(void)
 	if (mm) {
 		memset(mm, 0, sizeof(*mm));
 		mm = mm_init(mm);
-#ifdef CONFIG_CKRM_RES_MEM
-		mm->memclass = GET_MEM_CLASS(current);
-		mem_class_get(mm->memclass);
-#endif
 	}
 	return mm;
 }
@@ -471,13 +459,6 @@ void fastcall __mmdrop(struct mm_struct *mm)
 	mm_free_pgd(mm);
 	destroy_context(mm);
 	clr_vx_info(&mm->mm_vx_info);
-#ifdef CONFIG_CKRM_RES_MEM
-	/* class can be null and mm's tasklist can be empty here */
-	if (mm->memclass) {
-		mem_class_put(mm->memclass);
-		mm->memclass = NULL;
-	}
-#endif
 	free_mm(mm);
 }
 
@@ -607,7 +588,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 good_mm:
 	tsk->mm = mm;
 	tsk->active_mm = mm;
-	ckrm_init_mm_to_task(mm, tsk);
 	return 0;
 
 free_pt:
@@ -1148,7 +1128,6 @@ struct task_struct *copy_process(unsigned long clone_flags,
 	} else
 		link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
 
-	p->ioprio = current->ioprio;
 	nr_threads++;
 	/* p is copy of current */
 	vxi = p->vx_info;
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 5bf6c881c..6918cb746 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -68,9 +68,7 @@ void it_real_fn(unsigned long __data)
 	struct task_struct * p = (struct task_struct *) __data;
 	unsigned long interval;
 
-	if (send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p))
-		printk("*warning*: failed to send SIGALRM to %u\n", p->pid);
-
+	send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p);
 	interval = p->it_real_incr;
 	if (interval) {
 		if (interval > (unsigned long) LONG_MAX)
diff --git a/kernel/panic.c b/kernel/panic.c
index 37f3e82de..290bf0d1e 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -23,8 +23,8 @@
 #include <linux/kexec.h>
 #endif
 
-int panic_timeout = 900;
-int panic_on_oops = 1;
+int panic_timeout;
+int panic_on_oops;
 int tainted;
 void (*dump_function_ptr)(const char *, const struct pt_regs *) = 0;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 20b09215e..b4512b77b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -163,21 +163,6 @@ EXPORT_SYMBOL(dump_oncpu);
 #define LOW_CREDIT(p) \
 	((p)->interactive_credit < -CREDIT_LIMIT)
 
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-/*
- *  if belong to different class, compare class priority
- *  otherwise compare task priority 
- */
-#define TASK_PREEMPTS_CURR(p, rq) \
-	( ((p)->cpu_class != (rq)->curr->cpu_class) \
-	  && ((rq)->curr != (rq)->idle) && ((p) != (rq)->idle )) \
-	  ? class_preempts_curr((p),(rq)->curr)  \
-	  : ((p)->prio < (rq)->curr->prio)
-#else
-#define TASK_PREEMPTS_CURR(p, rq) \
-	((p)->prio < (rq)->curr->prio)
-#endif
-
 /*
  * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ]
  * to time slice values.
@@ -193,71 +178,14 @@ EXPORT_SYMBOL(dump_oncpu);
 		((MAX_TIMESLICE - MIN_TIMESLICE) * \
 			(MAX_PRIO-1 - (p)->static_prio) / (MAX_USER_PRIO-1)))
 
-unsigned int task_timeslice(task_t *p)
+static unsigned int task_timeslice(task_t *p)
 {
 	return BASE_TIMESLICE(p);
 }
 
 #define task_hot(p, now, sd) ((now) - (p)->timestamp < (sd)->cache_hot_time)
 
-/*
- * These are the runqueue data structures:
- */
-
-typedef struct runqueue runqueue_t;
-#include <linux/ckrm_classqueue.h>
-#include <linux/ckrm_sched.h>
-
-/*
- * This is the main, per-CPU runqueue data structure.
- *
- * Locking rule: those places that want to lock multiple runqueues
- * (such as the load balancing or the thread migration code), lock
- * acquire operations must be ordered by ascending &runqueue.
- */
-struct runqueue {
-	spinlock_t lock;
-
-	/*
-	 * nr_running and cpu_load should be in the same cacheline because
-	 * remote CPUs use both these fields when doing load calculation.
-	 */
-	unsigned long nr_running;
-#if defined(CONFIG_SMP)
-	unsigned long cpu_load;
-#endif
-	unsigned long long nr_switches, nr_preempt;
-	unsigned long expired_timestamp, nr_uninterruptible;
-	unsigned long long timestamp_last_tick;
-	task_t *curr, *idle;
-	struct mm_struct *prev_mm;
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-	struct classqueue_struct classqueue;   
-	ckrm_load_t ckrm_load;
-#else
-        prio_array_t *active, *expired, arrays[2];
-#endif
-	int best_expired_prio;
-	atomic_t nr_iowait;
-
-#ifdef CONFIG_SMP
-	struct sched_domain *sd;
-
-	/* For active balancing */
-	int active_balance;
-	int push_cpu;
-
-	task_t *migration_thread;
-	struct list_head migration_queue;
-#endif
-
-#ifdef	CONFIG_VSERVER_HARDCPU		
-	struct list_head hold_queue;
-	int idle_tokens;
-#endif
-};
-
-static DEFINE_PER_CPU(struct runqueue, runqueues);
+DEFINE_PER_CPU(struct runqueue, runqueues);
 
 #define for_each_domain(cpu, domain) \
 	for (domain = cpu_rq(cpu)->sd; domain; domain = domain->parent)
@@ -276,111 +204,121 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
 # define task_running(rq, p)		((rq)->curr == (p))
 #endif
 
+#ifdef CONFIG_CKRM_CPU_SCHEDULE
+#include <linux/ckrm_sched.h>
+spinlock_t cvt_lock        = SPIN_LOCK_UNLOCKED;
+rwlock_t   class_list_lock = RW_LOCK_UNLOCKED;
+LIST_HEAD(active_cpu_classes);   // list of active cpu classes; anchor
+struct ckrm_cpu_class default_cpu_class_obj;
+
 /*
- * task_rq_lock - lock the runqueue a given task resides on and disable
- * interrupts.  Note the ordering: we can safely lookup the task_rq without
- * explicitly disabling preemption.
+ * the minimum CVT allowed is the base_cvt
+ * otherwise, it will starve others
  */
-static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
+CVT_t get_min_cvt(int cpu)
 {
-	struct runqueue *rq;
-
-repeat_lock_task:
-	local_irq_save(*flags);
-	rq = task_rq(p);
-	spin_lock(&rq->lock);
-	if (unlikely(rq != task_rq(p))) {
-		spin_unlock_irqrestore(&rq->lock, *flags);
-		goto repeat_lock_task;
-	}
-	return rq;
-}
+	cq_node_t *node;
+	struct ckrm_local_runqueue * lrq;
+	CVT_t min_cvt;
 
-static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
-{
-	spin_unlock_irqrestore(&rq->lock, *flags);
+	node = classqueue_get_head(bpt_queue(cpu));
+	lrq =  (node) ? class_list_entry(node) : NULL;
+	
+	if (lrq) 
+		min_cvt = lrq->local_cvt;
+	else 
+		min_cvt = 0;
+		
+	return min_cvt;
 }
 
 /*
- * rq_lock - lock a given runqueue and disable interrupts.
+ * update the classueue base for all the runqueues
+ * TODO: we can only update half of the min_base to solve the movebackward issue
  */
-static runqueue_t *this_rq_lock(void)
-{
-	runqueue_t *rq;
+static inline void check_update_class_base(int this_cpu) {
+	unsigned long min_base = 0xFFFFFFFF; 
+	cq_node_t *node;
+	int i;
 
-	local_irq_disable();
-	rq = this_rq();
-	spin_lock(&rq->lock);
+	if (! cpu_online(this_cpu)) return;
 
-	return rq;
+	/*
+	 * find the min_base across all the processors
+	 */
+	for_each_online_cpu(i) {
+		/*
+		 * I should change it to directly use bpt->base
+		 */
+		node = classqueue_get_head(bpt_queue(i));
+		if (node && node->prio < min_base) {
+			min_base = node->prio;
+		}
+	}
+	if (min_base != 0xFFFFFFFF) 
+		classqueue_update_base(bpt_queue(this_cpu),min_base);
 }
 
-static inline void rq_unlock(runqueue_t *rq)
+static inline void ckrm_rebalance_tick(int j,int this_cpu)
 {
-	spin_unlock_irq(&rq->lock);
+#ifdef CONFIG_CKRM_CPU_SCHEDULE
+	read_lock(&class_list_lock);
+	if (!(j % CVT_UPDATE_TICK))
+		update_global_cvts(this_cpu);
+
+#define CKRM_BASE_UPDATE_RATE 400
+	if (! (jiffies % CKRM_BASE_UPDATE_RATE))
+		check_update_class_base(this_cpu);
+
+	read_unlock(&class_list_lock);
+#endif
 }
 
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-static inline ckrm_lrq_t *rq_get_next_class(struct runqueue *rq)
+static inline struct ckrm_local_runqueue *rq_get_next_class(struct runqueue *rq)
 {
 	cq_node_t *node = classqueue_get_head(&rq->classqueue);
 	return ((node) ? class_list_entry(node) : NULL);
 }
 
-/*
- * return the cvt of the current running class
- * if no current running class, return 0
- * assume cpu is valid (cpu_online(cpu) == 1)
- */
-CVT_t get_local_cur_cvt(int cpu)
-{
-	ckrm_lrq_t * lrq = rq_get_next_class(cpu_rq(cpu));
-
-	if (lrq)
-		return lrq->local_cvt;
-	else	
-		return 0;
-}
-
 static inline struct task_struct * rq_get_next_task(struct runqueue* rq) 
 {
 	prio_array_t               *array;
 	struct task_struct         *next;
-	ckrm_lrq_t *queue;
-	int idx;
+	struct ckrm_local_runqueue *queue;
 	int cpu = smp_processor_id();
-
-	// it is guaranteed be the ( rq->nr_running > 0 ) check in 
-	// schedule that a task will be found.
-
+	
+	next = rq->idle;
  retry_next_class:
-	queue = rq_get_next_class(rq);
-	// BUG_ON( !queue );
-
-	array = queue->active;
-	if (unlikely(!array->nr_active)) {
-		queue->active = queue->expired;
-		queue->expired = array;
-		queue->expired_timestamp = 0;
+	if ((queue = rq_get_next_class(rq))) {
+		array = queue->active;
+		//check switch active/expired queue
+		if (unlikely(!queue->active->nr_active)) {
+			queue->active = queue->expired;
+			queue->expired = array;
+			queue->expired_timestamp = 0;
+
+			if (queue->active->nr_active)
+				set_top_priority(queue,
+						 find_first_bit(queue->active->bitmap, MAX_PRIO));
+			else {
+				classqueue_dequeue(queue->classqueue,
+						   &queue->classqueue_linkobj);
+				cpu_demand_event(get_rq_local_stat(queue,cpu),CPU_DEMAND_DEQUEUE,0);
+			}
 
-		if (queue->active->nr_active)
-			set_top_priority(queue,
-					 find_first_bit(queue->active->bitmap, MAX_PRIO));
-		else {
-			classqueue_dequeue(queue->classqueue,
-					   &queue->classqueue_linkobj);
-			cpu_demand_event(get_rq_local_stat(queue,cpu),CPU_DEMAND_DEQUEUE,0);
+			goto retry_next_class; 				
 		}
-		goto retry_next_class; 				
+		BUG_ON(!queue->active->nr_active);
+		next = task_list_entry(array->queue[queue->top_priority].next);
 	}
-	// BUG_ON(!array->nr_active);
-
-	idx = queue->top_priority;
-	// BUG_ON (idx == MAX_PRIO);
-	next = task_list_entry(array->queue[idx].next);
 	return next;
 }
-#else /*! CONFIG_CKRM_CPU_SCHEDULE*/
+
+static inline void rq_load_inc(runqueue_t *rq, struct task_struct *p) { rq->ckrm_cpu_load += cpu_class_weight(p->cpu_class); }
+static inline void rq_load_dec(runqueue_t *rq, struct task_struct *p) { rq->ckrm_cpu_load -= cpu_class_weight(p->cpu_class); }
+
+#else /*CONFIG_CKRM_CPU_SCHEDULE*/
+
 static inline struct task_struct * rq_get_next_task(struct runqueue* rq) 
 {
 	prio_array_t *array;
@@ -407,15 +345,61 @@ static inline struct task_struct * rq_get_next_task(struct runqueue* rq)
 static inline void class_enqueue_task(struct task_struct* p, prio_array_t *array) { }
 static inline void class_dequeue_task(struct task_struct* p, prio_array_t *array) { }
 static inline void init_cpu_classes(void) { }
-#define rq_ckrm_load(rq) NULL
-static inline void ckrm_sched_tick(int j,int this_cpu,void* name) {}
+static inline void rq_load_inc(runqueue_t *rq, struct task_struct *p) { }
+static inline void rq_load_dec(runqueue_t *rq, struct task_struct *p) { }
 #endif  /* CONFIG_CKRM_CPU_SCHEDULE */
 
+
+/*
+ * task_rq_lock - lock the runqueue a given task resides on and disable
+ * interrupts.  Note the ordering: we can safely lookup the task_rq without
+ * explicitly disabling preemption.
+ */
+runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
+{
+	struct runqueue *rq;
+
+repeat_lock_task:
+	local_irq_save(*flags);
+	rq = task_rq(p);
+	spin_lock(&rq->lock);
+	if (unlikely(rq != task_rq(p))) {
+		spin_unlock_irqrestore(&rq->lock, *flags);
+		goto repeat_lock_task;
+	}
+	return rq;
+}
+
+void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
+{
+	spin_unlock_irqrestore(&rq->lock, *flags);
+}
+
+/*
+ * rq_lock - lock a given runqueue and disable interrupts.
+ */
+static runqueue_t *this_rq_lock(void)
+{
+	runqueue_t *rq;
+
+	local_irq_disable();
+	rq = this_rq();
+	spin_lock(&rq->lock);
+
+	return rq;
+}
+
+static inline void rq_unlock(runqueue_t *rq)
+{
+	spin_unlock_irq(&rq->lock);
+}
+
 /*
  * Adding/removing a task to/from a priority array:
  */
-static void dequeue_task(struct task_struct *p, prio_array_t *array)
+void dequeue_task(struct task_struct *p, prio_array_t *array)
 {
+	BUG_ON(! array);
 	array->nr_active--;
 	list_del(&p->run_list);
 	if (list_empty(array->queue + p->prio))
@@ -423,7 +407,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array)
 	class_dequeue_task(p,array);
 }
 
-static void enqueue_task(struct task_struct *p, prio_array_t *array)
+void enqueue_task(struct task_struct *p, prio_array_t *array)
 {
 	list_add_tail(&p->run_list, array->queue + p->prio);
 	__set_bit(p->prio, array->bitmap);
@@ -487,6 +471,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
 {
 	enqueue_task(p, rq_active(p,rq));
 	rq->nr_running++;
+	rq_load_inc(rq,p);
 }
 
 /*
@@ -496,6 +481,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
 {
 	enqueue_task_head(p, rq_active(p,rq));
 	rq->nr_running++;
+	rq_load_inc(rq,p);
 }
 
 static void recalc_task_prio(task_t *p, unsigned long long now)
@@ -627,6 +613,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
 static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
 	rq->nr_running--;
+	rq_load_dec(rq,p);
 	if (p->state == TASK_UNINTERRUPTIBLE)
 		rq->nr_uninterruptible++;
 	dequeue_task(p, p->array);
@@ -1000,10 +987,6 @@ void fastcall sched_fork(task_t *p)
 	INIT_LIST_HEAD(&p->run_list);
 	p->array = NULL;
 	spin_lock_init(&p->switch_lock);
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-	cpu_demand_event(&p->demand_stat,CPU_DEMAND_INIT,0);
-#endif
-
 #ifdef CONFIG_PREEMPT
 	/*
 	 * During context-switch we hold precisely one spinlock, which
@@ -1079,7 +1062,7 @@ void fastcall wake_up_forked_process(task_t * p)
 		p->array = current->array;
 		p->array->nr_active++;
 		rq->nr_running++;
-		class_enqueue_task(p,p->array);
+		rq_load_inc(rq,p);
 	}
 	task_rq_unlock(rq, &flags);
 }
@@ -1412,7 +1395,7 @@ lock_again:
 			p->array = current->array;
 			p->array->nr_active++;
 			rq->nr_running++;
-			class_enqueue_task(p,p->array);
+			rq_load_inc(rq,p);
 		}
 	} else {
 		/* Not the local CPU - must adjust timestamp */
@@ -1517,9 +1500,13 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
 {
 	dequeue_task(p, src_array);
 	src_rq->nr_running--;
+	rq_load_dec(src_rq,p);
+
 	set_task_cpu(p, this_cpu);
 	this_rq->nr_running++;
+	rq_load_inc(this_rq,p);
 	enqueue_task(p, this_array);
+
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
 	/*
@@ -1559,61 +1546,133 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
 }
 
 #ifdef CONFIG_CKRM_CPU_SCHEDULE
-static inline int ckrm_preferred_task(task_t *tmp,long min, long max, 
-				      int phase, enum idle_type idle)
+
+struct ckrm_cpu_class *find_unbalanced_class(int busiest_cpu, int this_cpu, unsigned long *cls_imbalance)
 {
-	long pressure = task_load(tmp);
-	
-	if (pressure > max) 
-		return 0;
+	struct ckrm_cpu_class *most_unbalanced_class = NULL;
+	struct ckrm_cpu_class *clsptr;
+	int max_unbalance = 0;
 
-	if ((idle == NOT_IDLE) && ! phase && (pressure <= min))
-		return 0;
-	return 1;
+	list_for_each_entry(clsptr,&active_cpu_classes,links) {
+		struct ckrm_local_runqueue *this_lrq    = get_ckrm_local_runqueue(clsptr,this_cpu);
+		struct ckrm_local_runqueue *busiest_lrq = get_ckrm_local_runqueue(clsptr,busiest_cpu);
+		int unbalance_degree;
+		
+		unbalance_degree = (local_queue_nr_running(busiest_lrq) - local_queue_nr_running(this_lrq)) * cpu_class_weight(clsptr);
+		if (unbalance_degree >= *cls_imbalance) 
+			continue;  // already looked at this class
+
+		if (unbalance_degree > max_unbalance) {
+			max_unbalance = unbalance_degree;
+			most_unbalanced_class = clsptr;
+		}
+	}
+	*cls_imbalance = max_unbalance;
+	return most_unbalanced_class;
 }
 
+
 /*
- * move tasks for a specic local class
- * return number of tasks pulled
+ * find_busiest_queue - find the busiest runqueue among the cpus in cpumask.
  */
-static inline int ckrm_cls_move_tasks(ckrm_lrq_t* src_lrq,ckrm_lrq_t*dst_lrq,
-				      runqueue_t *this_rq,
-				      runqueue_t *busiest,
-				      struct sched_domain *sd,
-				      int this_cpu,
-				      enum idle_type idle,
-				      long* pressure_imbalance) 
+static int find_busiest_cpu(runqueue_t *this_rq, int this_cpu, int idle, 
+			    int *imbalance)
 {
-	prio_array_t *array, *dst_array;
+	int cpu_load, load, max_load, i, busiest_cpu;
+	runqueue_t *busiest, *rq_src;
+
+
+	/*Hubertus ... the concept of nr_running is replace with cpu_load */
+	cpu_load = this_rq->ckrm_cpu_load;
+
+	busiest = NULL;
+	busiest_cpu = -1;
+
+	max_load = -1;
+	for_each_online_cpu(i) {
+		rq_src = cpu_rq(i);
+		load = rq_src->ckrm_cpu_load;
+
+		if ((load > max_load) && (rq_src != this_rq)) {
+			busiest = rq_src;
+			busiest_cpu = i;
+			max_load = load;
+		}
+	}
+
+	if (likely(!busiest))
+		goto out;
+
+	*imbalance = max_load - cpu_load;
+
+	/* It needs an at least ~25% imbalance to trigger balancing. */
+	if (!idle && ((*imbalance)*4 < max_load)) {
+		busiest = NULL;
+		goto out;
+	}
+
+	double_lock_balance(this_rq, busiest);
+	/*
+	 * Make sure nothing changed since we checked the
+	 * runqueue length.
+	 */
+	if (busiest->ckrm_cpu_load <= cpu_load) {
+		spin_unlock(&busiest->lock);
+		busiest = NULL;
+	}
+out:
+	return (busiest ? busiest_cpu : -1);
+}
+
+static int load_balance(int this_cpu, runqueue_t *this_rq,
+			struct sched_domain *sd, enum idle_type idle)
+{
+	int imbalance, idx;
+	int busiest_cpu;
+	runqueue_t *busiest;
+	prio_array_t *array;
 	struct list_head *head, *curr;
 	task_t *tmp;
-	int idx;
-	int pulled = 0;
-	int phase = -1;
-	long pressure_min, pressure_max;
-	/*hzheng: magic : 90% balance is enough*/
-	long balance_min = *pressure_imbalance / 10; 
-/*
- * we don't want to migrate tasks that will reverse the balance
- *     or the tasks that make too small difference
- */
-#define CKRM_BALANCE_MAX_RATIO	100
-#define CKRM_BALANCE_MIN_RATIO	1
- start:
-	phase ++;
+        struct ckrm_local_runqueue * busiest_local_queue;
+	struct ckrm_cpu_class *clsptr;
+	int weight;
+	unsigned long cls_imbalance;      // so we can retry other classes
+
+	// need to update global CVT based on local accumulated CVTs
+	read_lock(&class_list_lock);
+	busiest_cpu = find_busiest_cpu(this_rq, this_cpu, idle, &imbalance);
+	if (busiest_cpu == -1)
+		goto out;
+
+	busiest = cpu_rq(busiest_cpu);
+
+	/*
+	 * We only want to steal a number of tasks equal to 1/2 the imbalance,
+	 * otherwise we'll just shift the imbalance to the new queue:
+	 */
+	imbalance /= 2;
+		
+	/* now find class on that runqueue with largest inbalance */
+	cls_imbalance = 0xFFFFFFFF; 
+
+ retry_other_class:
+	clsptr = find_unbalanced_class(busiest_cpu, this_cpu, &cls_imbalance);
+	if (!clsptr) 
+		goto out_unlock;
+
+	busiest_local_queue = get_ckrm_local_runqueue(clsptr,busiest_cpu);
+	weight = cpu_class_weight(clsptr);
+
 	/*
 	 * We first consider expired tasks. Those will likely not be
 	 * executed in the near future, and they are most likely to
 	 * be cache-cold, thus switching CPUs has the least effect
 	 * on them.
 	 */
-	if (src_lrq->expired->nr_active) {
-		array = src_lrq->expired;
-		dst_array = dst_lrq->expired;
-	} else {
-		array = src_lrq->active;
-		dst_array = dst_lrq->active;
-	}
+	if (busiest_local_queue->expired->nr_active)
+		array = busiest_local_queue->expired;
+	else
+		array = busiest_local_queue->active;
 	
  new_array:
 	/* Start searching at priority 0: */
@@ -1624,15 +1683,11 @@ static inline int ckrm_cls_move_tasks(ckrm_lrq_t* src_lrq,ckrm_lrq_t*dst_lrq,
 	else
 		idx = find_next_bit(array->bitmap, MAX_PRIO, idx);
 	if (idx >= MAX_PRIO) {
-		if (array == src_lrq->expired && src_lrq->active->nr_active) {
-			array = src_lrq->active;
-			dst_array = dst_lrq->active;
+		if (array == busiest_local_queue->expired && busiest_local_queue->active->nr_active) {
+			array = busiest_local_queue->active;
 			goto new_array;
 		}
-		if ((! phase) && (! pulled) && (idle != IDLE))
-			goto start; //try again
-		else 
-			goto out; //finished search for this lrq
+		goto retry_other_class;
 	}
 	
 	head = array->queue + idx;
@@ -1642,365 +1697,42 @@ static inline int ckrm_cls_move_tasks(ckrm_lrq_t* src_lrq,ckrm_lrq_t*dst_lrq,
 	
 	curr = curr->prev;
 	
-	if (!can_migrate_task(tmp, busiest, this_cpu, sd, idle)) {
+	if (!can_migrate_task(tmp, busiest, this_cpu, sd,idle)) {
 		if (curr != head)
 			goto skip_queue;
 		idx++;
 		goto skip_bitmap;
 	}
-
-	pressure_min = *pressure_imbalance * CKRM_BALANCE_MIN_RATIO/100;
-	pressure_max = *pressure_imbalance * CKRM_BALANCE_MAX_RATIO/100;
+	pull_task(busiest, array, tmp, this_rq, rq_active(tmp,this_rq),this_cpu);
 	/*
-	 * skip the tasks that will reverse the balance too much
+	 * tmp BUG FIX: hzheng
+	 * load balancing can make the busiest local queue empty
+	 * thus it should be removed from bpt
 	 */
-	if (ckrm_preferred_task(tmp,pressure_min,pressure_max,phase,idle)) {
-		*pressure_imbalance -= task_load(tmp);
-		pull_task(busiest, array, tmp, 
-			  this_rq, dst_array, this_cpu);
-		pulled++;
-
-		if (*pressure_imbalance <= balance_min)
-			goto out;
+	if (! local_queue_nr_running(busiest_local_queue)) {
+		classqueue_dequeue(busiest_local_queue->classqueue,&busiest_local_queue->classqueue_linkobj);
+		cpu_demand_event(get_rq_local_stat(busiest_local_queue,busiest_cpu),CPU_DEMAND_DEQUEUE,0);		
 	}
-		
-	if (curr != head)
-		goto skip_queue;
-	idx++;
-	goto skip_bitmap;
- out:	       
-	return pulled;
-}
-
-static inline long ckrm_rq_imbalance(runqueue_t *this_rq,runqueue_t *dst_rq)
-{
-	long imbalance;
-	/*
-	 * make sure after balance, imbalance' > - imbalance/2
-	 * we don't want the imbalance be reversed too much
-	 */
-	imbalance = pid_get_pressure(rq_ckrm_load(dst_rq),0) 
-		- pid_get_pressure(rq_ckrm_load(this_rq),1);
-	imbalance /= 2;
-	return imbalance;
-}
 
-/*
- * try to balance the two runqueues
- *
- * Called with both runqueues locked.
- * if move_tasks is called, it will try to move at least one task over
- */
-static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
-		      unsigned long max_nr_move, struct sched_domain *sd,
-		      enum idle_type idle)
-{
-	struct ckrm_cpu_class *clsptr,*vip_cls = NULL;
-	ckrm_lrq_t* src_lrq,*dst_lrq;
-	long pressure_imbalance, pressure_imbalance_old;
-	int src_cpu = task_cpu(busiest->curr);
-	struct list_head *list;
-	int pulled = 0;
-	long imbalance;
-
-	imbalance =  ckrm_rq_imbalance(this_rq,busiest);
-
-	if ((idle == NOT_IDLE && imbalance <= 0) || busiest->nr_running <= 1)
-		goto out;
-
-	//try to find the vip class
-        list_for_each_entry(clsptr,&active_cpu_classes,links) {
-		src_lrq = get_ckrm_lrq(clsptr,src_cpu);
-
-		if (! lrq_nr_running(src_lrq))
-			continue;
-
-		if (! vip_cls || cpu_class_weight(vip_cls) < cpu_class_weight(clsptr) )  
-			{
-				vip_cls = clsptr;
-			}
+	imbalance -= weight;
+	if (!idle && (imbalance>0)) {
+		if (curr != head)
+			goto skip_queue;
+		idx++;
+		goto skip_bitmap;
 	}
-
-	/*
-	 * do search from the most significant class
-	 * hopefully, less tasks will be migrated this way
-	 */
-	clsptr = vip_cls;
-
- move_class:
-	if (! clsptr)
-		goto out;
-	
-
-	src_lrq = get_ckrm_lrq(clsptr,src_cpu);
-	if (! lrq_nr_running(src_lrq))
-		goto other_class;
-	
-	dst_lrq = get_ckrm_lrq(clsptr,this_cpu);
-
-	//how much pressure for this class should be transferred
-	pressure_imbalance = src_lrq->lrq_load * imbalance/src_lrq->local_weight;
-	if (pulled && ! pressure_imbalance) 
-		goto other_class;
-	
-	pressure_imbalance_old = pressure_imbalance;
-	
-	//move tasks
-	pulled += 
-		ckrm_cls_move_tasks(src_lrq,dst_lrq,
-				    this_rq,
-				    busiest,
-				    sd,this_cpu,idle,
-				    &pressure_imbalance);
-
-	/* 
-	 * hzheng: 2 is another magic number
-	 * stop balancing if the imbalance is less than 25% of the orig
-	 */
-	if (pressure_imbalance <= (pressure_imbalance_old >> 2))
-		goto out;
-		
-	//update imbalance
-	imbalance *= pressure_imbalance / pressure_imbalance_old;
- other_class:
-	//who is next?
-	list = clsptr->links.next;
-	if (list == &active_cpu_classes)
-		list = list->next;
-	clsptr = list_entry(list, typeof(*clsptr), links);
-	if (clsptr != vip_cls)
-		goto move_class;
+ out_unlock:
+	spin_unlock(&busiest->lock);
  out:
-	return pulled;
-}
-
-/**
- * ckrm_check_balance - is load balancing necessary?
- * return 0 if load balancing is not necessary
- * otherwise return the average load of the system
- * also, update nr_group
- *
- * heuristics: 
- *   no load balancing if it's load is over average
- *   no load balancing if it's load is far more than the min
- * task:
- *   read the status of all the runqueues
- */
-static unsigned long ckrm_check_balance(struct sched_domain *sd, int this_cpu,
-					     enum idle_type idle, int* nr_group)
-{
-	struct sched_group *group = sd->groups;
-	unsigned long min_load, max_load, avg_load;
-	unsigned long total_load, this_load, total_pwr;
-
-	max_load = this_load = total_load = total_pwr = 0;
-	min_load = 0xFFFFFFFF;
-	*nr_group = 0;
-
-	do {
-		cpumask_t tmp;
-		unsigned long load;
-		int local_group;
-		int i, nr_cpus = 0;
-
-		/* Tally up the load of all CPUs in the group */
-		cpus_and(tmp, group->cpumask, cpu_online_map);
-		if (unlikely(cpus_empty(tmp)))
-			goto nextgroup;
-
-		avg_load = 0;
-		local_group = cpu_isset(this_cpu, group->cpumask);
-
-		for_each_cpu_mask(i, tmp) {
-			load = pid_get_pressure(rq_ckrm_load(cpu_rq(i)),local_group);
-			nr_cpus++;
-			avg_load += load;
-		}
-
-		if (!nr_cpus)
-			goto nextgroup;
-
-		total_load += avg_load;
-		total_pwr += group->cpu_power;
-
-		/* Adjust by relative CPU power of the group */
-		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
-
-		if (local_group) {
-			this_load = avg_load;
-			goto nextgroup;
-		} else if (avg_load > max_load) {
-			max_load = avg_load;
-		}      
-		if (avg_load < min_load) {
-			min_load = avg_load;
-		}
-nextgroup:
-		group = group->next;
-		*nr_group = *nr_group + 1;
-	} while (group != sd->groups);
-
-	if (!max_load || this_load >= max_load)
-		goto out_balanced;
-
-	avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
-
-	/* hzheng: debugging: 105 is a magic number
-	 * 100*max_load <= sd->imbalance_pct*this_load)
-	 * should use imbalance_pct instead
-	 */
-	if (this_load > avg_load 
-	    || 100*max_load < 105*this_load
-	    || 100*min_load < 70*this_load
-	    )
-		goto out_balanced;
-
-	return avg_load;
- out_balanced:
-	return 0;
-}
-
-/**
- * any group that has above average load is considered busy
- * find the busiest queue from any of busy group
- */
-static runqueue_t *
-ckrm_find_busy_queue(struct sched_domain *sd, int this_cpu,
-		     unsigned long avg_load, enum idle_type idle,
-		     int nr_group)
-{
-	struct sched_group *group;
-	runqueue_t * busiest=NULL;
-	unsigned long rand;
-	
-	group = sd->groups;
-	rand = get_ckrm_rand(nr_group);
-	nr_group = 0;
-
-	do {
-		unsigned long load,total_load,max_load;
-		cpumask_t tmp;
-		int i;
-		runqueue_t * grp_busiest;
-
-		cpus_and(tmp, group->cpumask, cpu_online_map);
-		if (unlikely(cpus_empty(tmp)))
-			goto find_nextgroup;
-
-		total_load = 0;
-		max_load = 0;
-		grp_busiest = NULL;
-		for_each_cpu_mask(i, tmp) {
-			load = pid_get_pressure(rq_ckrm_load(cpu_rq(i)),0);
-			total_load += load;
-			if (load > max_load) {
-				max_load = load;
-				grp_busiest = cpu_rq(i);
-			}				
-		}
-
-		total_load = (total_load * SCHED_LOAD_SCALE) / group->cpu_power;
-		if (total_load > avg_load) {
-			busiest = grp_busiest;
-			if (nr_group >= rand)
-				break;
-		}
-	find_nextgroup:		
-		group = group->next;
-		nr_group ++;
-	} while (group != sd->groups);
-
-	return busiest;
-}
-
-/**
- * load_balance - pressure based load balancing algorithm used by ckrm
- */
-static int ckrm_load_balance(int this_cpu, runqueue_t *this_rq,
-			struct sched_domain *sd, enum idle_type idle)
-{
-	runqueue_t *busiest;
-	unsigned long avg_load;
-	int nr_moved,nr_group;
-
-	avg_load = ckrm_check_balance(sd, this_cpu, idle, &nr_group);
-	if (! avg_load)
-		goto out_balanced;
-
-	busiest = ckrm_find_busy_queue(sd,this_cpu,avg_load,idle,nr_group);
-	if (! busiest)
-		goto out_balanced;
-	/*
-	 * This should be "impossible", but since load
-	 * balancing is inherently racy and statistical,
-	 * it could happen in theory.
-	 */
-	if (unlikely(busiest == this_rq)) {
-		WARN_ON(1);
-		goto out_balanced;
-	}
-
-	nr_moved = 0;
-	if (busiest->nr_running > 1) {
-		/*
-		 * Attempt to move tasks. If find_busiest_group has found
-		 * an imbalance but busiest->nr_running <= 1, the group is
-		 * still unbalanced. nr_moved simply stays zero, so it is
-		 * correctly treated as an imbalance.
-		 */
-		double_lock_balance(this_rq, busiest);
-		nr_moved = move_tasks(this_rq, this_cpu, busiest,
-				      0,sd, idle);		
-		spin_unlock(&busiest->lock);
-		if (nr_moved) {
-			adjust_local_weight();
-		}
-	}
-
-	if (!nr_moved) 
-		sd->nr_balance_failed ++;
-	else
-		sd->nr_balance_failed  = 0;		
-
-	/* We were unbalanced, so reset the balancing interval */
-	sd->balance_interval = sd->min_interval;
-
-	return nr_moved;
-
-out_balanced:
-	/* tune up the balancing interval */
-	if (sd->balance_interval < sd->max_interval)
-		sd->balance_interval *= 2;
-
+	read_unlock(&class_list_lock);
 	return 0;
 }
 
-/*
- * this_rq->lock is already held
- */
-static inline int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
-				       struct sched_domain *sd)
-{
-	int ret;
-	read_lock(&class_list_lock);
-	ret = ckrm_load_balance(this_cpu,this_rq,sd,NEWLY_IDLE);
-	read_unlock(&class_list_lock);
-	return ret;
-}
 
-static inline int load_balance(int this_cpu, runqueue_t *this_rq,
-			struct sched_domain *sd, enum idle_type idle)
+static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
 {
-	int ret;
-
-	spin_lock(&this_rq->lock);
-	read_lock(&class_list_lock);
-	ret= ckrm_load_balance(this_cpu,this_rq,sd,NEWLY_IDLE);
-	read_unlock(&class_list_lock);
-	spin_unlock(&this_rq->lock);
-	return ret;
 }
-#else /*! CONFIG_CKRM_CPU_SCHEDULE */
+#else /* CONFIG_CKRM_CPU_SCHEDULE */
 /*
  * move_tasks tries to move up to max_nr_move tasks from busiest to this_rq,
  * as part of a balancing operation within "domain". Returns the number of
@@ -2365,8 +2097,6 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
 out:
 	return nr_moved;
 }
-#endif /* CONFIG_CKRM_CPU_SCHEDULE*/
-
 
 /*
  * idle_balance is called by schedule() if this_cpu is about to become
@@ -2452,6 +2182,7 @@ next_group:
 		group = group->next;
 	} while (group != sd->groups);
 }
+#endif /* CONFIG_CKRM_CPU_SCHEDULE*/
 
 /*
  * rebalance_tick will get called every timer tick, on every CPU.
@@ -2472,6 +2203,8 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
 	unsigned long j = jiffies + CPU_OFFSET(this_cpu);
 	struct sched_domain *sd;
 
+	ckrm_rebalance_tick(j,this_cpu);
+
 	/* Update our load */
 	old_load = this_rq->cpu_load;
 	this_load = this_rq->nr_running * SCHED_LOAD_SCALE;
@@ -2510,7 +2243,9 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
  */
 static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle)
 {
+	ckrm_rebalance_tick(jiffies,cpu);
 }
+
 static inline void idle_balance(int cpu, runqueue_t *rq)
 {
 }
@@ -2531,7 +2266,8 @@ static inline int wake_priority_sleeper(runqueue_t *rq)
 	return 0;
 }
 
-DEFINE_PER_CPU(struct kernel_stat, kstat);
+DEFINE_PER_CPU(struct kernel_stat, kstat) = { { 0 } };
+
 EXPORT_PER_CPU_SYMBOL(kstat);
 
 /*
@@ -2555,7 +2291,7 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 #define EXPIRED_STARVING(rq) \
  		(STARVATION_LIMIT && ((rq)->expired_timestamp && \
  		(jiffies - (rq)->expired_timestamp >= \
- 			STARVATION_LIMIT * (lrq_nr_running(rq)) + 1)))
+ 			STARVATION_LIMIT * (local_queue_nr_running(rq)) + 1)))
 #endif
 
 /*
@@ -2587,10 +2323,8 @@ void scheduler_tick(int user_ticks, int sys_ticks)
 	}
 
 	if (p == rq->idle) {
-#ifdef	CONFIG_VSERVER_HARDCPU
 		if (!--rq->idle_tokens && !list_empty(&rq->hold_queue))
 			set_need_resched();	
-#endif
 
 		if (atomic_read(&rq->nr_iowait) > 0)
 			cpustat->iowait += sys_ticks;
@@ -2598,7 +2332,6 @@ void scheduler_tick(int user_ticks, int sys_ticks)
 			cpustat->idle += sys_ticks;
 		if (wake_priority_sleeper(rq))
 			goto out;
-		ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq));
 		rebalance_tick(cpu, rq, IDLE);
 		return;
 	}
@@ -2637,10 +2370,11 @@ void scheduler_tick(int user_ticks, int sys_ticks)
 		}
 		goto out_unlock;
 	}
+#warning MEF PLANETLAB: "if (vx_need_resched(p)) was if (!--p->time_slice) */"
 	if (vx_need_resched(p)) {
 #ifdef CONFIG_CKRM_CPU_SCHEDULE
 		/* Hubertus ... we can abstract this out */
-		ckrm_lrq_t* rq = get_task_lrq(p);
+		struct ckrm_local_runqueue* rq = get_task_class_queue(p);
 #endif
 		dequeue_task(p, rq->active);
 		set_tsk_need_resched(p);
@@ -2687,7 +2421,6 @@ void scheduler_tick(int user_ticks, int sys_ticks)
 out_unlock:
 	spin_unlock(&rq->lock);
 out:
-	ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq));
 	rebalance_tick(cpu, rq, NOT_IDLE);
 }
 
@@ -2837,19 +2570,6 @@ need_resched:
 
 	spin_lock_irq(&rq->lock);
 
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-  	if (prev != rq->idle) {
-		unsigned long long run = now - prev->timestamp;
-		ckrm_lrq_t * lrq = get_task_lrq(prev);
-
-		lrq->lrq_load -= task_load(prev);
-		cpu_demand_event(&prev->demand_stat,CPU_DEMAND_DESCHEDULE,run);
-		lrq->lrq_load += task_load(prev);
-
-		cpu_demand_event(get_task_lrq_stat(prev),CPU_DEMAND_DESCHEDULE,run);
-  		update_local_cvt(prev, run);
-	}
-#endif
 	/*
 	 * if entering off of a kernel preemption go straight
 	 * to picking the next task.
@@ -2898,17 +2618,17 @@ pick_next:
 #endif
 	if (unlikely(!rq->nr_running)) {
 		idle_balance(cpu, rq);
-                if (!rq->nr_running) {
-                        next = rq->idle;
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-                        rq->expired_timestamp = 0;
-#endif
-                        wake_sleeping_dependent(cpu, rq);
-                        goto switch_tasks;
-                }
+		if (!rq->nr_running) {
+			next = rq->idle;
+			rq->expired_timestamp = 0;
+			wake_sleeping_dependent(cpu, rq);
+			goto switch_tasks;
+		}
 	}
 
 	next = rq_get_next_task(rq);
+	if (next == rq->idle) 
+		goto switch_tasks;
 
 	if (dependent_sleeper(cpu, rq, next)) {
 		next = rq->idle;
@@ -2950,6 +2670,14 @@ switch_tasks:
 		rq->nr_preempt++;
 	RCU_qsctr(task_cpu(prev))++;
 
+#ifdef CONFIG_CKRM_CPU_SCHEDULE
+  	if (prev != rq->idle) {
+		unsigned long long run = now - prev->timestamp;
+		cpu_demand_event(get_task_local_stat(prev),CPU_DEMAND_DESCHEDULE,run);
+  		update_local_cvt(prev, run);
+	}
+#endif
+
 	prev->sleep_avg -= run_time;
 	if ((long)prev->sleep_avg <= 0) {
 		prev->sleep_avg = 0;
@@ -2992,6 +2720,7 @@ switch_tasks:
 }
 
 EXPORT_SYMBOL(schedule);
+
 #ifdef CONFIG_PREEMPT
 /*
  * this is is the entry point to schedule() from in-kernel preemption
@@ -4092,6 +3821,7 @@ static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	if (!cpu_isset(dest_cpu, p->cpus_allowed))
 		goto out;
 
+	set_task_cpu(p, dest_cpu);
 	if (p->array) {
 		/*
 		 * Sync timestamp with rq_dest's before activating.
@@ -4102,12 +3832,10 @@ static void __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 		p->timestamp = p->timestamp - rq_src->timestamp_last_tick
 				+ rq_dest->timestamp_last_tick;
 		deactivate_task(p, rq_src);
-		set_task_cpu(p, dest_cpu);
 		activate_task(p, rq_dest, 0);
 		if (TASK_PREEMPTS_CURR(p, rq_dest))
 			resched_task(rq_dest->curr);
-	} else
-		set_task_cpu(p, dest_cpu);
+	}
 
 out:
 	double_rq_unlock(rq_src, rq_dest);
@@ -4142,7 +3870,9 @@ static int migration_thread(void * data)
 		}
 
 		if (rq->active_balance) {
+#ifndef CONFIG_CKRM_CPU_SCHEDULE
 			active_load_balance(rq, cpu);
+#endif
 			rq->active_balance = 0;
 		}
 
@@ -4617,6 +4347,9 @@ void __init sched_init(void)
 {
 	runqueue_t *rq;
  	int i;
+#ifndef CONFIG_CKRM_CPU_SCHEDULE
+ 	int j, k;
+#endif
 
 #ifdef CONFIG_SMP
 	/* Set up an initial dummy domain for early boot */
@@ -4635,50 +4368,46 @@ void __init sched_init(void)
 	sched_group_init.next = &sched_group_init;
 	sched_group_init.cpu_power = SCHED_LOAD_SCALE;
 #endif
+
  	init_cpu_classes();
 
 	for (i = 0; i < NR_CPUS; i++) {
 #ifndef CONFIG_CKRM_CPU_SCHEDULE
-		int j, k;
 		prio_array_t *array;
-
+#endif
 		rq = cpu_rq(i);
 		spin_lock_init(&rq->lock);
 
-		for (j = 0; j < 2; j++) {
-			array = rq->arrays + j;
-			for (k = 0; k < MAX_PRIO; k++) {
-				INIT_LIST_HEAD(array->queue + k);
-				__clear_bit(k, array->bitmap);
-			}
-			// delimiter for bitsearch
-			__set_bit(MAX_PRIO, array->bitmap);
-		}
-
+#ifndef CONFIG_CKRM_CPU_SCHEDULE
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
 #else
-		rq = cpu_rq(i);
-		spin_lock_init(&rq->lock);
+		rq->ckrm_cpu_load = 0;
 #endif
-
 		rq->best_expired_prio = MAX_PRIO;
 
 #ifdef CONFIG_SMP
 		rq->sd = &sched_domain_init;
 		rq->cpu_load = 0;
-#ifdef CONFIG_CKRM_CPU_SCHEDULE
-		ckrm_load_init(rq_ckrm_load(rq));
-#endif
 		rq->active_balance = 0;
 		rq->push_cpu = 0;
 		rq->migration_thread = NULL;
 		INIT_LIST_HEAD(&rq->migration_queue);
 #endif
-#ifdef	CONFIG_VSERVER_HARDCPU		
 		INIT_LIST_HEAD(&rq->hold_queue);
-#endif
 		atomic_set(&rq->nr_iowait, 0);
+
+#ifndef CONFIG_CKRM_CPU_SCHEDULE
+		for (j = 0; j < 2; j++) {
+			array = rq->arrays + j;
+			for (k = 0; k < MAX_PRIO; k++) {
+				INIT_LIST_HEAD(array->queue + k);
+				__clear_bit(k, array->bitmap);
+			}
+			// delimiter for bitsearch
+			__set_bit(MAX_PRIO, array->bitmap);
+		}
+#endif
 	}
 
 	/*
@@ -4690,8 +4419,7 @@ void __init sched_init(void)
 	rq->idle = current;
 	set_task_cpu(current, smp_processor_id());
 #ifdef CONFIG_CKRM_CPU_SCHEDULE
-	cpu_demand_event(&(current)->demand_stat,CPU_DEMAND_INIT,0);
-	current->cpu_class = get_default_cpu_class();
+	current->cpu_class = default_cpu_class;
 	current->array = NULL;
 #endif
 	wake_up_forked_process(current);
@@ -4785,30 +4513,10 @@ EXPORT_SYMBOL(task_running_sys);
 #ifdef CONFIG_CKRM_CPU_SCHEDULE
 /**
  * return the classqueue object of a certain processor
+ * Note: not supposed to be used in performance sensitive functions
  */
 struct classqueue_struct * get_cpu_classqueue(int cpu)
 {
 	return (& (cpu_rq(cpu)->classqueue) );
 }
-
-/**
- * _ckrm_cpu_change_class - change the class of a task
- */
-void _ckrm_cpu_change_class(task_t *tsk, struct ckrm_cpu_class *newcls)
-{
-	prio_array_t *array;
-	struct runqueue *rq;
-	unsigned long flags;
-
-	rq = task_rq_lock(tsk,&flags); 
-	array = tsk->array;
-	if (array) {
-		dequeue_task(tsk,array);
-		tsk->cpu_class = newcls;
-		enqueue_task(tsk,rq_active(tsk,rq));
-	} else
-		tsk->cpu_class = newcls;
-
-	task_rq_unlock(rq,&flags);
-}
 #endif
diff --git a/kernel/signal.c b/kernel/signal.c
index e4282d2de..b3574b096 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -603,28 +603,17 @@ static int check_kill_permission(int sig, struct siginfo *info,
 				 struct task_struct *t)
 {
 	int error = -EINVAL;
-	int user;
-
 	if (sig < 0 || sig > _NSIG)
 		return error;
-
-	user = (!info ||
-		(info != SEND_SIG_PRIV &&
-		 info != SEND_SIG_FORCED &&
-		 SI_FROMUSER(info)));
-
 	error = -EPERM;
-	if (user && (sig != SIGCONT ||
-		     current->signal->session != t->signal->session)
+	if ((!info || ((unsigned long)info != 1 &&
+			(unsigned long)info != 2 && SI_FROMUSER(info)))
+	    && ((sig != SIGCONT) ||
+		(current->signal->session != t->signal->session))
 	    && (current->euid ^ t->suid) && (current->euid ^ t->uid)
 	    && (current->uid ^ t->suid) && (current->uid ^ t->uid)
 	    && !capable(CAP_KILL))
 		return error;
-
-	error = -ESRCH;
-	if (user && !vx_check(vx_task_xid(t), VX_ADMIN|VX_IDENT))
-		return error;
-
 	return security_task_kill(t, info, sig);
 }
 
@@ -1066,6 +1055,9 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	unsigned long flags;
 	int ret;
 
+	if (!vx_check(vx_task_xid(p), VX_ADMIN|VX_WATCH|VX_IDENT))
+		return -ESRCH;
+
 	ret = check_kill_permission(sig, info, p);
 	if (!ret && sig && p->sighand) {
 		spin_lock_irqsave(&p->sighand->siglock, flags);
diff --git a/kernel/vserver/inode.c b/kernel/vserver/inode.c
index 3e8120bd3..dda881895 100644
--- a/kernel/vserver/inode.c
+++ b/kernel/vserver/inode.c
@@ -170,37 +170,6 @@ int vc_set_iattr(uint32_t id, void __user *data)
 	return ret;
 }
 
-int vc_iattr_ioctl(struct dentry *de, unsigned int cmd, unsigned long arg)
-{
-	void __user *data = (void __user *)arg;
-	struct vcmd_ctx_iattr_v1 vc_data;
-	int ret;
-
-	/*
-	 * I don't think we need any dget/dput pairs in here as long as
-	 * this function is always called from sys_ioctl i.e., de is
-         * a field of a struct file that is guaranteed not to be freed.
-	 */
-	if (cmd == FIOC_SETIATTR) {
-		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_LINUX_IMMUTABLE))
-			return -EPERM;
-		if (copy_from_user (&vc_data, data, sizeof(vc_data)))
-			return -EFAULT;
-		ret = __vc_set_iattr(de,
-			&vc_data.xid, &vc_data.flags, &vc_data.mask);
-	}
-	else {
-		if (!vx_check(0, VX_ADMIN))
-			return -ENOSYS;
-		ret = __vc_get_iattr(de->d_inode,
-			&vc_data.xid, &vc_data.flags, &vc_data.mask);
-	}
-
-	if (!ret && copy_to_user (data, &vc_data, sizeof(vc_data)))
-		ret = -EFAULT;
-	return ret;
-}
-
 
 #ifdef	CONFIG_VSERVER_LEGACY		
 #include <linux/proc_fs.h>
diff --git a/kernel/vserver/sysctl.c b/kernel/vserver/sysctl.c
index e1f2cacc7..298c62f18 100644
--- a/kernel/vserver/sysctl.c
+++ b/kernel/vserver/sysctl.c
@@ -200,6 +200,4 @@ static ctl_table vserver_table[] = {
 
 
 EXPORT_SYMBOL_GPL(vx_debug_dlim);
-EXPORT_SYMBOL_GPL(vx_debug_nid);
-EXPORT_SYMBOL_GPL(vx_debug_xid);
 
diff --git a/mm/Makefile b/mm/Makefile
index 60fbbce51..b7866b1a5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -7,13 +7,11 @@ mmu-$(CONFIG_MMU)	:= fremap.o highmem.o madvise.o memory.o mincore.o \
 			   mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
 			   shmem.o vmalloc.o
 
-obj-y			:= bootmem.o filemap.o mempool.o fadvise.o \
+obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   page_alloc.o page-writeback.o pdflush.o prio_tree.o \
 			   readahead.o slab.o swap.o truncate.o vmscan.o \
 			   $(mmu-y)
 
-obj-$(CONFIG_OOM_KILL)	+= oom_kill.o
-obj-$(CONFIG_OOM_PANIC)	+= oom_panic.o
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_X86_4G)	+= usercopy.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
diff --git a/mm/memory.c b/mm/memory.c
index 0dfb74060..6c44ecca0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1650,9 +1650,8 @@ retry:
 	 */
 	/* Only go through if we didn't race with anybody else... */
 	if (pte_none(*page_table)) {
-	        if (!PageReserved(new_page)) 
-		        //++mm->rss;
-		        vx_rsspages_inc(mm);
+		if (!PageReserved(new_page))
+			++mm->rss;
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
diff --git a/mm/oom_panic.c b/mm/oom_panic.c
deleted file mode 100644
index b782934ac..000000000
--- a/mm/oom_panic.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/* 
- * Just panic() instead of the default behavior of selecting processes
- * for death.
- *
- * Based on
- * Modular OOM handlers for 2.6.4 (C) 2003,2004 Tvrtko A. Ursulin
- * and
- * linux/mm/oom_kill.c (C) 1998,2000 Rik van Riel.
- *
- * Mark Huang <mlhuang@cs.princeton.edu>
- *
- * $Id$
- */
-
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/swap.h>
-
-/**
- * out_of_memory - is the system out of memory?
- */
-void out_of_memory(int gfp_mask)
-{
-	/*
-	 * oom_lock protects out_of_memory()'s static variables.
-	 * It's a global lock; this is not performance-critical.
-	 */
-	static spinlock_t oom_lock = SPIN_LOCK_UNLOCKED;
-	static unsigned long count;
-
-	spin_lock(&oom_lock);
-
-	/*
-	 * If we have gotten only a few failures,
-	 * we're not really oom. 
-	 */
-	if (++count < 10)
-		goto out_unlock;
-
-	/*
-	 * Ok, really out of memory. Panic.
-	 */
-
-	printk("oom-killer: gfp_mask=0x%x\n", gfp_mask);
-	show_free_areas();
-
-	panic("Out Of Memory");
-
-out_unlock:
-	spin_unlock(&oom_lock);
-}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 675b061b7..152299c39 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -33,7 +33,6 @@
 #include <linux/cpu.h>
 #include <linux/vs_base.h>
 #include <linux/vs_limit.h>
-#include <linux/ckrm_mem_inline.h>
 
 #include <asm/tlbflush.h>
 
@@ -277,7 +276,6 @@ free_pages_bulk(struct zone *zone, int count,
 		/* have to delete it as __free_pages_bulk list manipulates */
 		list_del(&page->lru);
 		__free_pages_bulk(page, base, zone, area, order);
-		ckrm_clear_page_class(page);
 		ret++;
 	}
 	spin_unlock_irqrestore(&zone->lock, flags);
@@ -624,10 +622,6 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
 
 	might_sleep_if(wait);
 
-	if (!ckrm_class_limit_ok((GET_MEM_CLASS(current)))) {
-		return NULL;
-	}
-
 	zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
 	if (zones[0] == NULL)     /* no zones in the zonelist */
 		return NULL;
@@ -757,7 +751,6 @@ nopage:
 	return NULL;
 got_pg:
 	kernel_map_pages(page, 1 << order, 1);
-	ckrm_set_pages_class(page, 1 << order, GET_MEM_CLASS(current));
 	return page;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e01d5c98d..95e02701a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -37,13 +37,6 @@
 #include <asm/div64.h>
 
 #include <linux/swapops.h>
-#include <linux/ckrm_mem.h>
-
-#ifndef AT_LIMIT_SUPPORT
-#warning "ckrm_at_limit disabled due to problems with memory hog tests -- seting ckrm_shrink_list_empty to true"
-#undef ckrm_shrink_list_empty
-#define ckrm_shrink_list_empty()		(1)
-#endif
 
 /* possible outcome of pageout() */
 typedef enum {
@@ -78,9 +71,6 @@ struct scan_control {
 	/* This context's GFP mask */
 	unsigned int gfp_mask;
 
-	/* Flag used by CKRM */
-	unsigned int ckrm_flags;
-
 	int may_writepage;
 };
 
@@ -559,23 +549,19 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
 {
 	LIST_HEAD(page_list);
 	struct pagevec pvec;
-	int max_scan = sc->nr_to_scan, nr_pass;
-	unsigned int ckrm_flags = sc->ckrm_flags, bit_flag;
+	int max_scan = sc->nr_to_scan;
 
 	pagevec_init(&pvec, 1);
 
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
-redo:
-	ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag);
-	nr_pass = zone->nr_inactive;
 	while (max_scan > 0) {
 		struct page *page;
 		int nr_taken = 0;
 		int nr_scan = 0;
 		int nr_freed;
 
-		while (nr_pass-- && nr_scan++ < SWAP_CLUSTER_MAX &&
+		while (nr_scan++ < SWAP_CLUSTER_MAX &&
 				!list_empty(&zone->inactive_list)) {
 			page = lru_to_page(&zone->inactive_list);
 
@@ -593,25 +579,15 @@ redo:
 				SetPageLRU(page);
 				list_add(&page->lru, &zone->inactive_list);
 				continue;
-			} else if (bit_flag && !ckrm_kick_page(page, bit_flag)) {
-				__put_page(page);
-				SetPageLRU(page);
-#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE
-				list_add_tail(&page->lru, &zone->inactive_list);
-#else
-				list_add(&page->lru, &zone->inactive_list);
-#endif
-				continue;
 			}
 			list_add(&page->lru, &page_list);
-			ckrm_mem_dec_inactive(page);
 			nr_taken++;
 		}
 		zone->nr_inactive -= nr_taken;
 		zone->pages_scanned += nr_taken;
 		spin_unlock_irq(&zone->lru_lock);
 
-		if ((bit_flag == 0) && (nr_taken == 0))
+		if (nr_taken == 0)
 			goto done;
 
 		max_scan -= nr_scan;
@@ -644,9 +620,6 @@ redo:
 				spin_lock_irq(&zone->lru_lock);
 			}
 		}
-		if (ckrm_flags && (nr_pass <= 0)) {
-			goto redo;
-		}
   	}
 	spin_unlock_irq(&zone->lru_lock);
 done:
@@ -686,17 +659,11 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
 	long mapped_ratio;
 	long distress;
 	long swap_tendency;
-	unsigned int ckrm_flags = sc->ckrm_flags, bit_flag;
-	int nr_pass;
 
 	lru_add_drain();
 	pgmoved = 0;
 	spin_lock_irq(&zone->lru_lock);
-redo:
-	ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag);
-	nr_pass = zone->nr_active;
-	while (pgscanned < nr_pages && !list_empty(&zone->active_list) &&
-						nr_pass) {
+	while (pgscanned < nr_pages && !list_empty(&zone->active_list)) {
 		page = lru_to_page(&zone->active_list);
 		prefetchw_prev_lru_page(page, &zone->active_list, flags);
 		if (!TestClearPageLRU(page))
@@ -712,24 +679,11 @@ redo:
 			__put_page(page);
 			SetPageLRU(page);
 			list_add(&page->lru, &zone->active_list);
-			pgscanned++;
-		} else if (bit_flag && !ckrm_kick_page(page, bit_flag)) {
-			__put_page(page);
-			SetPageLRU(page);
-#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE
-			list_add_tail(&page->lru, &zone->active_list);
-#else
-			list_add(&page->lru, &zone->active_list);
-#endif
 		} else {
 			list_add(&page->lru, &l_hold);
-			ckrm_mem_dec_active(page);
 			pgmoved++;
-			pgscanned++;
-		}
-		if (!--nr_pass && ckrm_flags) {
-			goto redo;
 		}
+		pgscanned++;
 	}
 	zone->nr_active -= pgmoved;
 	spin_unlock_irq(&zone->lru_lock);
@@ -804,7 +758,6 @@ redo:
 		if (!TestClearPageActive(page))
 			BUG();
 		list_move(&page->lru, &zone->inactive_list);
-		ckrm_mem_inc_inactive(page);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			zone->nr_inactive += pgmoved;
@@ -833,7 +786,6 @@ redo:
 			BUG();
 		BUG_ON(!PageActive(page));
 		list_move(&page->lru, &zone->active_list);
-		ckrm_mem_inc_active(page);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			zone->nr_active += pgmoved;
@@ -881,7 +833,6 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
 	sc->nr_to_reclaim = SWAP_CLUSTER_MAX;
 
 	while (nr_active || nr_inactive) {
-		sc->ckrm_flags = ckrm_setup_reclamation();
 		if (nr_active) {
 			sc->nr_to_scan = min(nr_active,
 					(unsigned long)SWAP_CLUSTER_MAX);
@@ -897,118 +848,9 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
 			if (sc->nr_to_reclaim <= 0)
 				break;
 		}
-		ckrm_teardown_reclamation();
 	}
 }
 
-#if defined(CONFIG_CKRM_RES_MEM) && defined(AT_LIMIT_SUPPORT)
-// This function needs to be given more thought.
-// Shrink the class to be at 90% of its limit
-static void
-ckrm_shrink_class(ckrm_mem_res_t *cls)
-{
-	struct scan_control sc;
-	struct zone *zone;
-	int zindex = 0, active_credit = 0, inactive_credit = 0;
-
-	if (ckrm_test_set_shrink(cls)) { // set the SHRINK bit atomically
-		// if it is already set somebody is working on it. so... leave
-		return;
-	}
-	sc.nr_mapped = read_page_state(nr_mapped);
-	sc.nr_scanned = 0;
-	sc.ckrm_flags = ckrm_get_reclaim_flags(cls);
-	sc.nr_reclaimed = 0;
-	sc.priority = 0; // always very high priority
-
-	for_each_zone(zone) {
-		int zone_total, zone_limit, active_limit, inactive_limit;
-		int active_over, inactive_over;
-		unsigned long nr_active, nr_inactive;
-		u64 temp;
-
-		zone->temp_priority = zone->prev_priority;
-		zone->prev_priority = sc.priority;
-
-		zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages;
-
-		temp = (u64) cls->pg_limit * zone_total;
-		do_div(temp, ckrm_tot_lru_pages);
-		zone_limit = (int) temp;
-		active_limit = (6 * zone_limit) / 10; // 2/3rd in active list
-		inactive_limit = (3 * zone_limit) / 10; // 1/3rd in inactive list
-
-		active_over = cls->nr_active[zindex] - active_limit + active_credit;
-		inactive_over = active_over +
-				(cls->nr_inactive[zindex] - inactive_limit) + inactive_credit;
-
-		if (active_over > 0) {
-			zone->nr_scan_active += active_over + 1;
-			nr_active = zone->nr_scan_active;
-			active_credit = 0;
-		} else {
-			active_credit += active_over;
-			nr_active = 0;
-		}
-
-		if (inactive_over > 0) {
-			zone->nr_scan_inactive += inactive_over;
-			nr_inactive = zone->nr_scan_inactive;
-			inactive_credit = 0;
-		} else {
-			inactive_credit += inactive_over;
-			nr_inactive = 0;
-		}
-		while (nr_active || nr_inactive) {
-			if (nr_active) {
-				sc.nr_to_scan = min(nr_active,
-						(unsigned long)SWAP_CLUSTER_MAX);
-				nr_active -= sc.nr_to_scan;
-				refill_inactive_zone(zone, &sc);
-			}
-	
-			if (nr_inactive) {
-				sc.nr_to_scan = min(nr_inactive,
-						(unsigned long)SWAP_CLUSTER_MAX);
-				nr_inactive -= sc.nr_to_scan;
-				shrink_cache(zone, &sc);
-				if (sc.nr_to_reclaim <= 0)
-					break;
-			}
-		}
-		zone->prev_priority = zone->temp_priority;
-		zindex++;
-	}
-	ckrm_clear_shrink(cls);
-}
-
-static void
-ckrm_shrink_classes(void)
-{
-	ckrm_mem_res_t *cls;
-
-	spin_lock(&ckrm_mem_lock);
-	while (!ckrm_shrink_list_empty()) {
-		cls =  list_entry(ckrm_shrink_list.next, ckrm_mem_res_t,
-				shrink_list);
-		spin_unlock(&ckrm_mem_lock);
-		ckrm_shrink_class(cls);
-		spin_lock(&ckrm_mem_lock);
-		list_del(&cls->shrink_list);
-		cls->flags &= ~MEM_AT_LIMIT;
-	}
-	spin_unlock(&ckrm_mem_lock);
-}
-
-#else
-
-#if defined(CONFIG_CKRM_RES_MEM) && !defined(AT_LIMIT_SUPPORT)
-#warning "disabling ckrm_at_limit -- setting ckrm_shrink_classes to noop "
-#endif
-
-#define ckrm_shrink_classes()	do { } while(0)
-#endif
-
 /*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -1315,9 +1157,6 @@ static int kswapd(void *p)
 		finish_wait(&pgdat->kswapd_wait, &wait);
 		try_to_clip_inodes();		
 
-		if (!ckrm_shrink_list_empty())
-			ckrm_shrink_classes();
-		else
 		balance_pgdat(pgdat, 0);
 	}
 	return 0;
@@ -1328,7 +1167,7 @@ static int kswapd(void *p)
  */
 void wakeup_kswapd(struct zone *zone)
 {
-	if ((zone->free_pages > zone->pages_low) && ckrm_shrink_list_empty())
+	if (zone->free_pages > zone->pages_low)
 		return;
 	if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
 		return;
diff --git a/net/core/sock.c b/net/core/sock.c
index 266397922..d5b2d9105 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -331,18 +331,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 				clear_bit(SOCK_PASS_CRED, &sock->flags);
 			break;
 
-		case SO_SETXID:
-			if (current->xid) {
-				ret = -EPERM;
-				break;
-			}
-			if (val < 0 || val > MAX_S_CONTEXT) {
-				ret = -EINVAL;
-				break;
-			}
-			sk->sk_xid = val;
-			break;
-
 		case SO_TIMESTAMP:
 			sk->sk_rcvtstamp = valbool;
 			if (valbool) 
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 05fbb43cc..00a89f4f8 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -670,10 +670,8 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
 	conntrack->ct_general.destroy = destroy_conntrack;
 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
-	conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
 	conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
-	conntrack->xid[IP_CT_DIR_REPLY] = -1;
 	for (i=0; i < IP_CT_NUMBER; i++)
 		conntrack->infos[i].master = &conntrack->ct_general;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_pptp.c b/net/ipv4/netfilter/ip_conntrack_pptp.c
deleted file mode 100644
index 29ab1a495..000000000
--- a/net/ipv4/netfilter/ip_conntrack_pptp.c
+++ /dev/null
@@ -1,712 +0,0 @@
-/*
- * ip_conntrack_pptp.c	- Version 2.0
- *
- * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft.  PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2003 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * Limitations:
- * 	 - We blindly assume that control connections are always
- * 	   established in PNS->PAC direction.  This is a violation
- * 	   of RFFC2673
- *
- * TODO: - finish support for multiple calls within one session
- * 	   (needs expect reservations in newnat)
- *	 - testing of incoming PPTP calls 
- *
- * Changes: 
- * 	2002-02-05 - Version 1.3
- * 	  - Call ip_conntrack_unexpect_related() from 
- * 	    pptp_timeout_related() to destroy expectations in case
- * 	    CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
- * 	    (Philip Craig <philipc@snapgear.com>)
- * 	  - Add Version information at module loadtime
- * 	2002-02-10 - Version 1.6
- * 	  - move to C99 style initializers
- * 	  - remove second expectation if first arrives
- * 	2004-10-22 - Version 2.0
- * 	  - merge Mandrake's 2.6.x port with recent 2.6.x API changes
- * 	  - fix lots of linear skb assumptions from Mandrake's port
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/lockhelp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_CT_PPTP_VERSION "2.0"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
-
-DECLARE_LOCK(ip_pptp_lock);
-
-#if 0
-#include "ip_conntrack_pptp_priv.h"
-#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define SECS *HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-#define PPTP_GRE_TIMEOUT 		(10 MINS)
-#define PPTP_GRE_STREAM_TIMEOUT 	(5 DAYS)
-
-static int pptp_expectfn(struct ip_conntrack *ct)
-{
-	struct ip_conntrack *master;
-	struct ip_conntrack_expect *exp;
-
-	DEBUGP("increasing timeouts\n");
-	/* increase timeout of GRE data channel conntrack entry */
-	ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
-	ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
-
-	master = master_ct(ct);
-	if (!master) {
-		DEBUGP(" no master!!!\n");
-		return 0;
-	}
-
-	exp = ct->master;
-	if (!exp) {
-		DEBUGP("no expectation!!\n");
-		return 0;
-	}
-
-	DEBUGP("completing tuples with ct info\n");
-	/* we can do this, since we're unconfirmed */
-	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key == 
-		htonl(master->help.ct_pptp_info.pac_call_id)) {	
-		/* assume PNS->PAC */
-		ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key = 
-			htonl(master->help.ct_pptp_info.pns_call_id);
-		ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
-			htonl(master->help.ct_pptp_info.pns_call_id);
-	} else {
-		/* assume PAC->PNS */
-		ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key =
-			htonl(master->help.ct_pptp_info.pac_call_id);
-		ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
-			htonl(master->help.ct_pptp_info.pac_call_id);
-	}
-	
-	/* delete other expectation */
-	if (exp->expected_list.next != &exp->expected_list) {
-		struct ip_conntrack_expect *other_exp;
-		struct list_head *cur_item, *next;
-
-		for (cur_item = master->sibling_list.next;
-		     cur_item != &master->sibling_list; cur_item = next) {
-			next = cur_item->next;
-			other_exp = list_entry(cur_item,
-					       struct ip_conntrack_expect,
-					       expected_list);
-			/* remove only if occurred at same sequence number */
-			if (other_exp != exp && other_exp->seq == exp->seq) {
-				DEBUGP("unexpecting other direction\n");
-				ip_ct_gre_keymap_destroy(other_exp);
-				ip_conntrack_unexpect_related(other_exp);
-			}
-		}
-	}
-
-	return 0;
-}
-
-/* timeout GRE data connections */
-static int pptp_timeout_related(struct ip_conntrack *ct)
-{
-	struct list_head *cur_item, *next;
-	struct ip_conntrack_expect *exp;
-
-	/* FIXME: do we have to lock something ? */
-	for (cur_item = ct->sibling_list.next;
-	    cur_item != &ct->sibling_list; cur_item = next) {
-		next = cur_item->next;
-		exp = list_entry(cur_item, struct ip_conntrack_expect,
-				 expected_list);
-
-		ip_ct_gre_keymap_destroy(exp);
-		if (!exp->sibling) {
-			ip_conntrack_unexpect_related(exp);
-			continue;
-		}
-
-		DEBUGP("setting timeout of conntrack %p to 0\n",
-			exp->sibling);
-		exp->sibling->proto.gre.timeout = 0;
-		exp->sibling->proto.gre.stream_timeout = 0;
-		/* refresh_acct will not modify counters if skb == NULL */
-		ip_ct_refresh_acct(exp->sibling, 0, NULL, 0);
-	}
-
-	return 0;
-}
-
-/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
-static inline int
-exp_gre(struct ip_conntrack *master,
-	u_int32_t seq,
-	u_int16_t callid,
-	u_int16_t peer_callid)
-{
-	struct ip_conntrack_tuple inv_tuple;
-	struct ip_conntrack_tuple exp_tuples[] = {
-		/* tuple in original direction, PNS->PAC */
-		{ .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
-			   .u = { .gre = { .key = htonl(ntohs(peer_callid)) } }
-			 },
-		  .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
-			   .u = { .gre = { .key = htonl(ntohs(callid)) } },
-			   .protonum = IPPROTO_GRE
-			 },
-		 },
-		/* tuple in reply direction, PAC->PNS */
-		{ .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
-			   .u = { .gre = { .key = htonl(ntohs(callid)) } }
-			 },
-		  .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
-			   .u = { .gre = { .key = htonl(ntohs(peer_callid)) } },
-			   .protonum = IPPROTO_GRE
-			 },
-		 }
-	}, *exp_tuple;
-
-	for (exp_tuple = exp_tuples; exp_tuple < &exp_tuples[2]; exp_tuple++) {
-		struct ip_conntrack_expect *exp;
-
-		exp = ip_conntrack_expect_alloc();
-		if (exp == NULL)
-			return 1;
-
-		memcpy(&exp->tuple, exp_tuple, sizeof(exp->tuple));
-
-		exp->mask.src.ip = 0xffffffff;
-		exp->mask.src.u.all = 0;
-		exp->mask.dst.u.all = 0;
-		exp->mask.dst.u.gre.key = 0xffffffff;
-		exp->mask.dst.ip = 0xffffffff;
-		exp->mask.dst.protonum = 0xffff;
-			
-		exp->seq = seq;
-		exp->expectfn = pptp_expectfn;
-
-		exp->help.exp_pptp_info.pac_call_id = ntohs(callid);
-		exp->help.exp_pptp_info.pns_call_id = ntohs(peer_callid);
-
-		DEBUGP("calling expect_related ");
-		DUMP_TUPLE_RAW(&exp->tuple);
-	
-		/* Add GRE keymap entries */
-		if (ip_ct_gre_keymap_add(exp, &exp->tuple, 0) != 0) {
-			kfree(exp);
-			return 1;
-		}
-
-		invert_tuplepr(&inv_tuple, &exp->tuple);
-		if (ip_ct_gre_keymap_add(exp, &inv_tuple, 1) != 0) {
-			ip_ct_gre_keymap_destroy(exp);
-			kfree(exp);
-			return 1;
-		}
-	
-		if (ip_conntrack_expect_related(exp, master) != 0) {
-			ip_ct_gre_keymap_destroy(exp);
-			kfree(exp);
-			DEBUGP("cannot expect_related()\n");
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static inline int 
-pptp_inbound_pkt(struct sk_buff *skb,
-		 struct tcphdr *tcph,
-		 unsigned int ctlhoff,
-		 size_t datalen,
-		 struct ip_conntrack *ct)
-{
-	struct PptpControlHeader _ctlh, *ctlh;
-	unsigned int reqlen;
-	union pptp_ctrl_union _pptpReq, *pptpReq;
-	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	u_int16_t msg, *cid, *pcid;
-	u_int32_t seq;	
-
-	ctlh = skb_header_pointer(skb, ctlhoff, sizeof(_ctlh), &_ctlh);
-	if (unlikely(!ctlh)) {
-		DEBUGP("error during skb_header_pointer\n");
-		return NF_ACCEPT;
-	}
-
-	reqlen = datalen - sizeof(struct pptp_pkt_hdr) - sizeof(_ctlh);
-	pptpReq = skb_header_pointer(skb, ctlhoff+sizeof(struct pptp_pkt_hdr),
-				     reqlen, &_pptpReq);
-	if (unlikely(!pptpReq)) {
-		DEBUGP("error during skb_header_pointer\n");
-		return NF_ACCEPT;
-	}
-
-	msg = ntohs(ctlh->messageType);
-	DEBUGP("inbound control message %s\n", strMName[msg]);
-
-	switch (msg) {
-	case PPTP_START_SESSION_REPLY:
-		if (reqlen < sizeof(_pptpReq.srep)) {
-			DEBUGP("%s: short packet\n", strMName[msg]);
-			break;
-		}
-
-		/* server confirms new control session */
-		if (info->sstate < PPTP_SESSION_REQUESTED) {
-			DEBUGP("%s without START_SESS_REQUEST\n",
-				strMName[msg]);
-			break;
-		}
-		if (pptpReq->srep.resultCode == PPTP_START_OK)
-			info->sstate = PPTP_SESSION_CONFIRMED;
-		else 
-			info->sstate = PPTP_SESSION_ERROR;
-		break;
-
-	case PPTP_STOP_SESSION_REPLY:
-		if (reqlen < sizeof(_pptpReq.strep)) {
-			DEBUGP("%s: short packet\n", strMName[msg]);
-			break;
-		}
-
-		/* server confirms end of control session */
-		if (info->sstate > PPTP_SESSION_STOPREQ) {
-			DEBUGP("%s without STOP_SESS_REQUEST\n",
-				strMName[msg]);
-			break;
-		}
-		if (pptpReq->strep.resultCode == PPTP_STOP_OK)
-			info->sstate = PPTP_SESSION_NONE;
-		else
-			info->sstate = PPTP_SESSION_ERROR;
-		break;
-
-	case PPTP_OUT_CALL_REPLY:
-		if (reqlen < sizeof(_pptpReq.ocack)) {
-			DEBUGP("%s: short packet\n", strMName[msg]);
-			break;
-		}
-
-		/* server accepted call, we now expect GRE frames */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n", strMName[msg]);
-			break;
-		}
-		if (info->cstate != PPTP_CALL_OUT_REQ &&
-		    info->cstate != PPTP_CALL_OUT_CONF) {
-			DEBUGP("%s without OUTCALL_REQ\n", strMName[msg]);
-			break;
-		}
-		if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
-			info->cstate = PPTP_CALL_NONE;
-			break;
-		}
-
-		cid = &pptpReq->ocack.callID;
-		pcid = &pptpReq->ocack.peersCallID;
-
-		info->pac_call_id = ntohs(*cid);
-		
-		if (htons(info->pns_call_id) != *pcid) {
-			DEBUGP("%s for unknown callid %u\n",
-				strMName[msg], ntohs(*pcid));
-			break;
-		}
-
-		DEBUGP("%s, CID=%X, PCID=%X\n", strMName[msg], 
-			ntohs(*cid), ntohs(*pcid));
-		
-		info->cstate = PPTP_CALL_OUT_CONF;
-
-		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
-				       + sizeof(struct PptpControlHeader)
-				       + ((void *)pcid - (void *)pptpReq);
-			
-		if (exp_gre(ct, seq, *cid, *pcid) != 0)
-			printk("ip_conntrack_pptp: error during exp_gre\n");
-		break;
-
-	case PPTP_IN_CALL_REQUEST:
-		if (reqlen < sizeof(_pptpReq.icack)) {
-			DEBUGP("%s: short packet\n", strMName[msg]);
-			break;
-		}
-
-		/* server tells us about incoming call request */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n", strMName[msg]);
-			break;
-		}
-		pcid = &pptpReq->icack.peersCallID;
-		DEBUGP("%s, PCID=%X\n", strMName[msg], ntohs(*pcid));
-		info->cstate = PPTP_CALL_IN_REQ;
-		info->pac_call_id = ntohs(*pcid);
-		break;
-
-	case PPTP_IN_CALL_CONNECT:
-		if (reqlen < sizeof(_pptpReq.iccon)) {
-			DEBUGP("%s: short packet\n", strMName[msg]);
-			break;
-		}
-
-		/* server tells us about incoming call established */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n", strMName[msg]);
-			break;
-		}
-		if (info->sstate != PPTP_CALL_IN_REP
-		    && info->sstate != PPTP_CALL_IN_CONF) {
-			DEBUGP("%s but never sent IN_CALL_REPLY\n",
-				strMName[msg]);
-			break;
-		}
-
-		pcid = &pptpReq->iccon.peersCallID;
-		cid = &info->pac_call_id;
-
-		if (info->pns_call_id != ntohs(*pcid)) {
-			DEBUGP("%s for unknown CallID %u\n", 
-				strMName[msg], ntohs(*cid));
-			break;
-		}
-
-		DEBUGP("%s, PCID=%X\n", strMName[msg], ntohs(*pcid));
-		info->cstate = PPTP_CALL_IN_CONF;
-
-		/* we expect a GRE connection from PAC to PNS */
-		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
-				       + sizeof(struct PptpControlHeader)
-				       + ((void *)pcid - (void *)pptpReq);
-			
-		if (exp_gre(ct, seq, *cid, *pcid) != 0)
-			printk("ip_conntrack_pptp: error during exp_gre\n");
-
-		break;
-
-	case PPTP_CALL_DISCONNECT_NOTIFY:
-		if (reqlen < sizeof(_pptpReq.disc)) {
-			DEBUGP("%s: short packet\n", strMName[msg]);
-			break;
-		}
-
-		/* server confirms disconnect */
-		cid = &pptpReq->disc.callID;
-		DEBUGP("%s, CID=%X\n", strMName[msg], ntohs(*cid));
-		info->cstate = PPTP_CALL_NONE;
-
-		/* untrack this call id, unexpect GRE packets */
-		pptp_timeout_related(ct);
-		break;
-
-	case PPTP_WAN_ERROR_NOTIFY:
-		break;
-
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* I don't have to explain these ;) */
-		break;
-	default:
-		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
-			? strMName[msg]:strMName[0], msg);
-		break;
-	}
-
-	return NF_ACCEPT;
-
-}
-
-static inline int
-pptp_outbound_pkt(struct sk_buff *skb,
-		  struct tcphdr *tcph,
-		  unsigned int ctlhoff,
-		  size_t datalen,
-		  struct ip_conntrack *ct)
-{
-	struct PptpControlHeader _ctlh, *ctlh;
-	unsigned int reqlen;
-	union pptp_ctrl_union _pptpReq, *pptpReq;
-	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	u_int16_t msg, *cid, *pcid;
-
-	ctlh = skb_header_pointer(skb, ctlhoff, sizeof(_ctlh), &_ctlh);
-	if (!ctlh)
-		return NF_ACCEPT;
-	
-	reqlen = datalen - sizeof(struct pptp_pkt_hdr) - sizeof(_ctlh);
-	pptpReq = skb_header_pointer(skb, ctlhoff+sizeof(_ctlh), reqlen, 
-				     &_pptpReq);
-	if (!pptpReq)
-		return NF_ACCEPT;
-
-	msg = ntohs(ctlh->messageType);
-	DEBUGP("outbound control message %s\n", strMName[msg]);
-
-	switch (msg) {
-	case PPTP_START_SESSION_REQUEST:
-		/* client requests for new control session */
-		if (info->sstate != PPTP_SESSION_NONE) {
-			DEBUGP("%s but we already have one",
-				strMName[msg]);
-		}
-		info->sstate = PPTP_SESSION_REQUESTED;
-		break;
-	case PPTP_STOP_SESSION_REQUEST:
-		/* client requests end of control session */
-		info->sstate = PPTP_SESSION_STOPREQ;
-		break;
-
-	case PPTP_OUT_CALL_REQUEST:
-		if (reqlen < sizeof(_pptpReq.ocreq)) {
-			DEBUGP("%s: short packet\n", strMName[msg]);
-			break;
-		}
-
-		/* client initiating connection to server */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("%s but no session\n",
-				strMName[msg]);
-			break;
-		}
-		info->cstate = PPTP_CALL_OUT_REQ;
-		/* track PNS call id */
-		cid = &pptpReq->ocreq.callID;
-		DEBUGP("%s, CID=%X\n", strMName[msg], ntohs(*cid));
-		info->pns_call_id = ntohs(*cid);
-		break;
-	case PPTP_IN_CALL_REPLY:
-		if (reqlen < sizeof(_pptpReq.icack)) {
-			DEBUGP("%s: short packet\n", strMName[msg]);
-			break;
-		}
-
-		/* client answers incoming call */
-		if (info->cstate != PPTP_CALL_IN_REQ
-		    && info->cstate != PPTP_CALL_IN_REP) {
-			DEBUGP("%s without incall_req\n", 
-				strMName[msg]);
-			break;
-		}
-		if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
-			info->cstate = PPTP_CALL_NONE;
-			break;
-		}
-		pcid = &pptpReq->icack.peersCallID;
-		if (info->pac_call_id != ntohs(*pcid)) {
-			DEBUGP("%s for unknown call %u\n", 
-				strMName[msg], ntohs(*pcid));
-			break;
-		}
-		DEBUGP("%s, CID=%X\n", strMName[msg], ntohs(*pcid));
-		/* part two of the three-way handshake */
-		info->cstate = PPTP_CALL_IN_REP;
-		info->pns_call_id = ntohs(pptpReq->icack.callID);
-		break;
-
-	case PPTP_CALL_CLEAR_REQUEST:
-		/* client requests hangup of call */
-		if (info->sstate != PPTP_SESSION_CONFIRMED) {
-			DEBUGP("CLEAR_CALL but no session\n");
-			break;
-		}
-		/* FUTURE: iterate over all calls and check if
-		 * call ID is valid.  We don't do this without newnat,
-		 * because we only know about last call */
-		info->cstate = PPTP_CALL_CLEAR_REQ;
-		break;
-	case PPTP_SET_LINK_INFO:
-		break;
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* I don't have to explain these ;) */
-		break;
-	default:
-		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? 
-			strMName[msg]:strMName[0], msg);
-		/* unknown: no need to create GRE masq table entry */
-		break;
-	}
-
-	return NF_ACCEPT;
-}
-
-
-/* track caller id inside control connection, call expect_related */
-static int 
-conntrack_pptp_help(struct sk_buff *skb,
-		    struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-
-{
-	struct pptp_pkt_hdr _pptph, *pptph;
-	
-	struct tcphdr _tcph, *tcph;
-	u_int32_t tcplen = skb->len - skb->nh.iph->ihl * 4;
-	u_int32_t datalen;
-	void *datalimit;
-	int dir = CTINFO2DIR(ctinfo);
-	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	unsigned int nexthdr_off;
-
-	int oldsstate, oldcstate;
-	int ret;
-
-	/* don't do any tracking before tcp handshake complete */
-	if (ctinfo != IP_CT_ESTABLISHED 
-	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
-		DEBUGP("ctinfo = %u, skipping\n", ctinfo);
-		return NF_ACCEPT;
-	}
-	
-	nexthdr_off = skb->nh.iph->ihl*4;
-	tcph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_tcph),
-				  &_tcph);
-	if (!tcph)
-		return NF_ACCEPT;
-
-	/* not a complete TCP header? */
-	if (tcplen < sizeof(struct tcphdr) || tcplen < tcph->doff * 4) {
-		DEBUGP("tcplen = %u\n", tcplen);
-		return NF_ACCEPT;
-	}
-
-
- 	datalen = tcplen - tcph->doff * 4;
-
-	/* checksum invalid? */
-	if (tcp_v4_check(tcph, tcplen, skb->nh.iph->saddr, skb->nh.iph->daddr,
-			csum_partial((char *) tcph, tcplen, 0))) {
-		printk(KERN_NOTICE __FILE__ ": bad csum\n");
-		/* W2K PPTP server sends TCP packets with wrong checksum :(( */
-		//return NF_ACCEPT;
-	}
-
-	if (tcph->fin || tcph->rst) {
-		DEBUGP("RST/FIN received, timeouting GRE\n");
-		/* can't do this after real newnat */
-		info->cstate = PPTP_CALL_NONE;
-
-		/* untrack this call id, unexpect GRE packets */
-		pptp_timeout_related(ct);
-	}
-
-	nexthdr_off += tcph->doff*4;
-	pptph = skb_header_pointer(skb, skb->nh.iph->ihl*4 + tcph->doff*4,
-				   sizeof(_pptph), &_pptph);
-	if (!pptph) {
-		DEBUGP("no full PPTP header, can't track\n");
-		return NF_ACCEPT;
-	}
-
-	datalimit = (void *) pptph + datalen;
-
-	/* if it's not a control message we can't do anything with it */
-	if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
-	    ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
-		DEBUGP("not a control packet\n");
-		return NF_ACCEPT;
-	}
-
-	oldsstate = info->sstate;
-	oldcstate = info->cstate;
-
-	LOCK_BH(&ip_pptp_lock);
-
-	nexthdr_off += sizeof(_pptph);
-	/* FIXME: We just blindly assume that the control connection is always
-	 * established from PNS->PAC.  However, RFC makes no guarantee */
-	if (dir == IP_CT_DIR_ORIGINAL)
-		/* client -> server (PNS -> PAC) */
-		ret = pptp_outbound_pkt(skb, tcph, nexthdr_off, datalen, ct);
-	else
-		/* server -> client (PAC -> PNS) */
-		ret = pptp_inbound_pkt(skb, tcph, nexthdr_off, datalen, ct);
-	DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
-		oldsstate, info->sstate, oldcstate, info->cstate);
-	UNLOCK_BH(&ip_pptp_lock);
-
-	return ret;
-}
-
-/* control protocol helper */
-static struct ip_conntrack_helper pptp = { 
-	.list = { NULL, NULL },
-	.name = "pptp", 
-	.flags = IP_CT_HELPER_F_REUSE_EXPECT,
-	.me = THIS_MODULE,
-	.max_expected = 2,
-	.timeout = 0,
-	.tuple = { .src = { .ip = 0, 
-		 	    .u = { .tcp = { .port =  
-				    __constant_htons(PPTP_CONTROL_PORT) } } 
-			  }, 
-		   .dst = { .ip = 0, 
-			    .u = { .all = 0 },
-			    .protonum = IPPROTO_TCP
-			  } 
-		 },
-	.mask = { .src = { .ip = 0, 
-			   .u = { .tcp = { .port = 0xffff } } 
-			 }, 
-		  .dst = { .ip = 0, 
-			   .u = { .all = 0 },
-			   .protonum = 0xffff 
-		 	 } 
-		},
-	.help = conntrack_pptp_help
-};
-
-/* ip_conntrack_pptp initialization */
-static int __init init(void)
-{
-	int retcode;
-
-	DEBUGP(__FILE__ ": registering helper\n");
-	if ((retcode = ip_conntrack_helper_register(&pptp))) {
-		printk(KERN_ERR "Unable to register conntrack application "
-				"helper for pptp: %d\n", retcode);
-		return -EIO;
-	}
-
-	printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	ip_conntrack_helper_unregister(&pptp);
-	printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
-}
-
-module_init(init);
-module_exit(fini);
-
-EXPORT_SYMBOL(ip_pptp_lock);
diff --git a/net/ipv4/netfilter/ip_conntrack_pptp_priv.h b/net/ipv4/netfilter/ip_conntrack_pptp_priv.h
deleted file mode 100644
index 6b52564e8..000000000
--- a/net/ipv4/netfilter/ip_conntrack_pptp_priv.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef _IP_CT_PPTP_PRIV_H
-#define _IP_CT_PPTP_PRIV_H
-
-/* PptpControlMessageType names */
-static const char *strMName[] = {
-	"UNKNOWN_MESSAGE",
-	"START_SESSION_REQUEST",
-	"START_SESSION_REPLY",
-	"STOP_SESSION_REQUEST",
-	"STOP_SESSION_REPLY",
-	"ECHO_REQUEST",
-	"ECHO_REPLY",
-	"OUT_CALL_REQUEST",
-	"OUT_CALL_REPLY",
-	"IN_CALL_REQUEST",
-	"IN_CALL_REPLY",
-	"IN_CALL_CONNECT",
-	"CALL_CLEAR_REQUEST",
-	"CALL_DISCONNECT_NOTIFY",
-	"WAN_ERROR_NOTIFY",
-	"SET_LINK_INFO"
-};
-
-#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
deleted file mode 100644
index 013f759cc..000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ /dev/null
@@ -1,349 +0,0 @@
-/*
- * ip_conntrack_proto_gre.c - Version 2.0 
- *
- * Connection tracking protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two 
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2004 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/list.h>
-
-#include <linux/netfilter_ipv4/lockhelp.h>
-
-DECLARE_RWLOCK(ip_ct_gre_lock);
-#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_ct_gre_lock)
-#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_ct_gre_lock)
-
-#include <linux/netfilter_ipv4/listhelp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
-
-/* shamelessly stolen from ip_conntrack_proto_udp.c */
-#define GRE_TIMEOUT		(30*HZ)
-#define GRE_STREAM_TIMEOUT	(180*HZ)
-
-#if 0
-#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
-			NIPQUAD((x)->src.ip), ntohl((x)->src.u.gre.key), \
-			NIPQUAD((x)->dst.ip), ntohl((x)->dst.u.gre.key))
-#else
-#define DEBUGP(x, args...)
-#define DUMP_TUPLE_GRE(x)
-#endif
-				
-/* GRE KEYMAP HANDLING FUNCTIONS */
-static LIST_HEAD(gre_keymap_list);
-
-static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
-				const struct ip_conntrack_tuple *t)
-{
-	return ((km->tuple.src.ip == t->src.ip) &&
-		(km->tuple.dst.ip == t->dst.ip) &&
-		(km->tuple.dst.protonum == t->dst.protonum) &&
-		(km->tuple.dst.u.all == t->dst.u.all));
-}
-
-/* look up the source key for a given tuple */
-static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t)
-{
-	struct ip_ct_gre_keymap *km;
-	u_int32_t key;
-
-	READ_LOCK(&ip_ct_gre_lock);
-	km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
-			struct ip_ct_gre_keymap *, t);
-	if (!km) {
-		READ_UNLOCK(&ip_ct_gre_lock);
-		return 0;
-	}
-
-	key = km->tuple.src.u.gre.key;
-	READ_UNLOCK(&ip_ct_gre_lock);
-
-	return key;
-}
-
-/* add a single keymap entry, associate with specified expect */
-int ip_ct_gre_keymap_add(struct ip_conntrack_expect *exp,
-			 struct ip_conntrack_tuple *t, int reply)
-{
-	struct ip_ct_gre_keymap *km;
-
-	km = kmalloc(sizeof(*km), GFP_ATOMIC);
-	if (!km)
-		return -1;
-
-	/* initializing list head should be sufficient */
-	memset(km, 0, sizeof(*km));
-
-	memcpy(&km->tuple, t, sizeof(*t));
-
-	if (!reply)
-		exp->proto.gre.keymap_orig = km;
-	else
-		exp->proto.gre.keymap_reply = km;
-
-	DEBUGP("adding new entry %p: ", km);
-	DUMP_TUPLE_GRE(&km->tuple);
-
-	WRITE_LOCK(&ip_ct_gre_lock);
-	list_append(&gre_keymap_list, km);
-	WRITE_UNLOCK(&ip_ct_gre_lock);
-
-	return 0;
-}
-
-/* change the tuple of a keymap entry (used by nat helper) */
-void ip_ct_gre_keymap_change(struct ip_ct_gre_keymap *km,
-			     struct ip_conntrack_tuple *t)
-{
-        if (!km)
-        {
-                printk(KERN_WARNING
-                        "NULL GRE conntrack keymap change requested\n");
-                return;
-        }
-
-	DEBUGP("changing entry %p to: ", km);
-	DUMP_TUPLE_GRE(t);
-
-	WRITE_LOCK(&ip_ct_gre_lock);
-	memcpy(&km->tuple, t, sizeof(km->tuple));
-	WRITE_UNLOCK(&ip_ct_gre_lock);
-}
-
-/* destroy the keymap entries associated with specified expect */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack_expect *exp)
-{
-	DEBUGP("entering for exp %p\n", exp);
-	WRITE_LOCK(&ip_ct_gre_lock);
-	if (exp->proto.gre.keymap_orig) {
-		DEBUGP("removing %p from list\n", exp->proto.gre.keymap_orig);
-		list_del(&exp->proto.gre.keymap_orig->list);
-		kfree(exp->proto.gre.keymap_orig);
-		exp->proto.gre.keymap_orig = NULL;
-	}
-	if (exp->proto.gre.keymap_reply) {
-		DEBUGP("removing %p from list\n", exp->proto.gre.keymap_reply);
-		list_del(&exp->proto.gre.keymap_reply->list);
-		kfree(exp->proto.gre.keymap_reply);
-		exp->proto.gre.keymap_reply = NULL;
-	}
-	WRITE_UNLOCK(&ip_ct_gre_lock);
-}
-
-
-/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
-
-/* invert gre part of tuple */
-static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
-			    const struct ip_conntrack_tuple *orig)
-{
-	tuple->dst.u.gre.key = orig->src.u.gre.key;
-	tuple->src.u.gre.key = orig->dst.u.gre.key;
-
-	return 1;
-}
-
-/* gre hdr info to tuple */
-static int gre_pkt_to_tuple(const struct sk_buff *skb,
-			   unsigned int dataoff,
-			   struct ip_conntrack_tuple *tuple)
-{
-	struct gre_hdr _grehdr, *grehdr;
-	struct gre_hdr_pptp _pgrehdr, *pgrehdr;
-	u_int32_t srckey;
-
-	grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
-	/* PPTP header is variable length, only need up to the call_id field */
-	pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
-
-	if (!grehdr || !pgrehdr)
-		return 0;
-
-	switch (grehdr->version) {
-		case GRE_VERSION_1701:
-			if (!grehdr->key) {
-				DEBUGP("Can't track GRE without key\n");
-				return 0;
-			}
-			tuple->dst.u.gre.key = *(gre_key(grehdr));
-			break;
-
-		case GRE_VERSION_PPTP:
-			if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
-				DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
-				return 0;
-			}
-			tuple->dst.u.gre.key = htonl(ntohs(pgrehdr->call_id));
-			break;
-
-		default:
-			printk(KERN_WARNING "unknown GRE version %hu\n",
-				grehdr->version);
-			return 0;
-	}
-
-	srckey = gre_keymap_lookup(tuple);
-
-	tuple->src.u.gre.key = srckey;
-#if 0
-	DEBUGP("found src key %x for tuple ", ntohl(srckey));
-	DUMP_TUPLE_GRE(tuple);
-#endif
-
-	return 1;
-}
-
-/* print gre part of tuple */
-static unsigned int gre_print_tuple(char *buffer,
-				    const struct ip_conntrack_tuple *tuple)
-{
-	return sprintf(buffer, "srckey=0x%x dstkey=0x%x ", 
-		       ntohl(tuple->src.u.gre.key),
-		       ntohl(tuple->dst.u.gre.key));
-}
-
-/* print private data for conntrack */
-static unsigned int gre_print_conntrack(char *buffer,
-					const struct ip_conntrack *ct)
-{
-	return sprintf(buffer, "timeout=%u, stream_timeout=%u ",
-		       (ct->proto.gre.timeout / HZ),
-		       (ct->proto.gre.stream_timeout / HZ));
-}
-
-/* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct ip_conntrack *ct,
-		      const struct sk_buff *skb,
-		      enum ip_conntrack_info conntrackinfo)
-{
-	/* If we've seen traffic both ways, this is a GRE connection.
-	 * Extend timeout. */
-	if (ct->status & IPS_SEEN_REPLY) {
-		ip_ct_refresh_acct(ct, conntrackinfo, skb,
-				   ct->proto.gre.stream_timeout);
-		/* Also, more likely to be important, and not a probe. */
-		set_bit(IPS_ASSURED_BIT, &ct->status);
-	} else
-		ip_ct_refresh_acct(ct, conntrackinfo, skb,
-				   ct->proto.gre.timeout);
-	
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int gre_new(struct ip_conntrack *ct,
-		   const struct sk_buff *skb)
-{ 
-	DEBUGP(": ");
-	DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
-	/* initialize to sane value.  Ideally a conntrack helper
-	 * (e.g. in case of pptp) is increasing them */
-	ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
-	ct->proto.gre.timeout = GRE_TIMEOUT;
-
-	return 1;
-}
-
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct ip_conntrack *ct)
-{
-	struct ip_conntrack_expect *master = ct->master;
-
-	DEBUGP(" entering\n");
-
-	if (!master) {
-		DEBUGP("no master exp for ct %p\n", ct);
-		return;
-	}
-
-	ip_ct_gre_keymap_destroy(master);
-}
-
-/* protocol helper struct */
-static struct ip_conntrack_protocol gre = { 
-	.proto		 = IPPROTO_GRE,
-	.name		 = "gre", 
-	.pkt_to_tuple	 = gre_pkt_to_tuple,
-	.invert_tuple	 = gre_invert_tuple,
-	.print_tuple	 = gre_print_tuple,
-	.print_conntrack = gre_print_conntrack,
-	.packet		 = gre_packet,
-	.new		 = gre_new,
-	.destroy	 = gre_destroy,
-	.exp_matches_pkt = NULL,
-	.me 		 = THIS_MODULE
-};
-
-/* ip_conntrack_proto_gre initialization */
-static int __init init(void)
-{
-	int retcode;
-
-	if ((retcode = ip_conntrack_protocol_register(&gre))) {
-		printk(KERN_ERR "Unable to register conntrack protocol "
-		       "helper for gre: %d\n", retcode);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	struct list_head *pos, *n;
-
-	/* delete all keymap entries */
-	WRITE_LOCK(&ip_ct_gre_lock);
-	list_for_each_safe(pos, n, &gre_keymap_list) {
-		DEBUGP("deleting keymap %p at module unload time\n", pos);
-		list_del(pos);
-		kfree(pos);
-	}
-	WRITE_UNLOCK(&ip_ct_gre_lock);
-
-	ip_conntrack_protocol_unregister(&gre); 
-}
-
-EXPORT_SYMBOL(ip_ct_gre_keymap_add);
-EXPORT_SYMBOL(ip_ct_gre_keymap_change);
-EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index fd688f4fe..0c935eddf 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -102,17 +102,17 @@ print_conntrack(char *buffer, struct ip_conntrack *conntrack)
 	len += print_tuple(buffer + len,
 			   &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
 			   proto);
-	len += sprintf(buffer + len, "xid=%d ", conntrack->xid[IP_CT_DIR_ORIGINAL]);
 	if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
 		len += sprintf(buffer + len, "[UNREPLIED] ");
 	len += print_tuple(buffer + len,
 			   &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
 			   proto);
-	len += sprintf(buffer + len, "xid=%d ", conntrack->xid[IP_CT_DIR_REPLY]);
 	if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
 		len += sprintf(buffer + len, "[ASSURED] ");
 	len += sprintf(buffer + len, "use=%u ",
 		       atomic_read(&conntrack->ct_general.use));
+	len += sprintf(buffer + len, "sxid=%d dxid=%d ",
+		       conntrack->xid[IP_CT_DIR_ORIGINAL], conntrack->xid[IP_CT_DIR_REPLY]);
 	len += sprintf(buffer + len, "\n");
 
 	return len;
diff --git a/net/ipv4/netfilter/ip_nat_pptp.c b/net/ipv4/netfilter/ip_nat_pptp.c
deleted file mode 100644
index 2bbb815e9..000000000
--- a/net/ipv4/netfilter/ip_nat_pptp.c
+++ /dev/null
@@ -1,477 +0,0 @@
-/*
- * ip_nat_pptp.c	- Version 2.0
- *
- * NAT support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft.  PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2004 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * TODO: - Support for multiple calls within one session
- * 	   (needs netfilter newnat code)
- * 	 - NAT to a unique tuple, not to TCP source port
- * 	   (needs netfilter tuple reservation)
- *
- * Changes:
- *     2002-02-10 - Version 1.3
- *       - Use ip_nat_mangle_tcp_packet() because of cloned skb's
- *	   in local connections (Philip Craig <philipc@snapgear.com>)
- *       - add checks for magicCookie and pptp version
- *       - make argument list of pptp_{out,in}bound_packet() shorter
- *       - move to C99 style initializers
- *       - print version number at module loadtime
- *     2003-09-22 - Version 1.5
- *       - use SNATed tcp sourceport as callid, since we get called before
- *	   TCP header is mangled (Philip Craig <philipc@snapgear.com>)
- *     2004-10-22 - Version 2.0
- *       - kernel 2.6.x version
- * 
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_pptp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_NAT_PPTP_VERSION "2.0"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
-
-
-#if 0
-#include "ip_conntrack_pptp_priv.h"
-#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \
-				       ": " format, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static unsigned int
-pptp_nat_expected(struct sk_buff **pskb,
-		  unsigned int hooknum,
-		  struct ip_conntrack *ct,
-		  struct ip_nat_info *info)
-{
-	struct ip_conntrack *master = master_ct(ct);
-	struct ip_nat_multi_range mr;
-	struct ip_ct_pptp_master *ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info;
-	u_int32_t newip, newcid;
-	int ret;
-
-	IP_NF_ASSERT(info);
-	IP_NF_ASSERT(master);
-	IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
-
-	DEBUGP("we have a connection!\n");
-
-	LOCK_BH(&ip_pptp_lock);
-	ct_pptp_info = &master->help.ct_pptp_info;
-	nat_pptp_info = &master->nat.help.nat_pptp_info;
-
-	/* need to alter GRE tuple because conntrack expectfn() used 'wrong'
-	 * (unmanipulated) values */
-	if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {
-		DEBUGP("completing tuples with NAT info \n");
-		/* we can do this, since we're unconfirmed */
-		if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key ==
-			htonl(ct_pptp_info->pac_call_id)) {	
-			/* assume PNS->PAC */
-			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key =
-				htonl(nat_pptp_info->pns_call_id);
-			ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
-				htonl(nat_pptp_info->pns_call_id);
-			newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
-			newcid = htonl(nat_pptp_info->pac_call_id);
-		} else {
-			/* assume PAC->PNS */
-			ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.gre.key =
-				htonl(nat_pptp_info->pac_call_id);
-			ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.gre.key =
-				htonl(nat_pptp_info->pac_call_id);
-			newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-			newcid = htonl(nat_pptp_info->pns_call_id);
-		}
-	} else {
-		if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.gre.key ==
-			htonl(ct_pptp_info->pac_call_id)) {	
-			/* assume PNS->PAC */
-			newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
-			newcid = htonl(ct_pptp_info->pns_call_id);
-		}
-		else {
-			/* assume PAC->PNS */
-			newip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-			newcid = htonl(ct_pptp_info->pac_call_id);
-		}
-	}
-
-	mr.rangesize = 1;
-	mr.range[0].flags = IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED;
-	mr.range[0].min_ip = mr.range[0].max_ip = newip;
-	mr.range[0].min = mr.range[0].max = 
-		((union ip_conntrack_manip_proto ) { newcid }); 
-	DEBUGP("change ip to %u.%u.%u.%u\n", 
-		NIPQUAD(newip));
-	DEBUGP("change key to 0x%x\n", ntohl(newcid));
-	ret = ip_nat_setup_info(ct, &mr, hooknum);
-
-	UNLOCK_BH(&ip_pptp_lock);
-
-	return ret;
-
-}
-
-/* outbound packets == from PNS to PAC */
-static inline unsigned int
-pptp_outbound_pkt(struct sk_buff **pskb,
-		  struct ip_conntrack *ct,
-		  enum ip_conntrack_info ctinfo,
-		  struct ip_conntrack_expect *exp)
-
-{
-	struct iphdr *iph = (*pskb)->nh.iph;
-	struct tcphdr *tcph = (void *) iph + iph->ihl*4;
-	struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *) 
-					((void *)tcph + tcph->doff*4);
-
-	struct PptpControlHeader *ctlh;
-	union pptp_ctrl_union *pptpReq;
-	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
-	u_int16_t msg, *cid = NULL, new_callid;
-
-	/* FIXME: size checks !!! */
-	ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph));
-	pptpReq = (void *) ((void *) ctlh + sizeof(*ctlh));
-
-	new_callid = htons(ct_pptp_info->pns_call_id);
-	
-	switch (msg = ntohs(ctlh->messageType)) {
-		case PPTP_OUT_CALL_REQUEST:
-			cid = &pptpReq->ocreq.callID;
-			/* FIXME: ideally we would want to reserve a call ID
-			 * here.  current netfilter NAT core is not able to do
-			 * this :( For now we use TCP source port. This breaks
-			 * multiple calls within one control session */
-
-			/* save original call ID in nat_info */
-			nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
-			/* don't use tcph->source since we are at a DSTmanip
-			 * hook (e.g. PREROUTING) and pkt is not mangled yet */
-			new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
-			/* save new call ID in ct info */
-			ct_pptp_info->pns_call_id = ntohs(new_callid);
-			break;
-		case PPTP_IN_CALL_REPLY:
-			cid = &pptpReq->icreq.callID;
-			break;
-		case PPTP_CALL_CLEAR_REQUEST:
-			cid = &pptpReq->clrreq.callID;
-			break;
-		default:
-			DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
-			      (msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]);
-			/* fall through */
-
-		case PPTP_SET_LINK_INFO:
-			/* only need to NAT in case PAC is behind NAT box */
-		case PPTP_START_SESSION_REQUEST:
-		case PPTP_START_SESSION_REPLY:
-		case PPTP_STOP_SESSION_REQUEST:
-		case PPTP_STOP_SESSION_REPLY:
-		case PPTP_ECHO_REQUEST:
-		case PPTP_ECHO_REPLY:
-			/* no need to alter packet */
-			return NF_ACCEPT;
-	}
-
-	IP_NF_ASSERT(cid);
-
-	DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-		ntohs(*cid), ntohs(new_callid));
-
-	/* mangle packet */
-	ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)cid - (void *)pptph,
-				 sizeof(new_callid), (char *)&new_callid,
-				 sizeof(new_callid));
-
-	return NF_ACCEPT;
-}
-
-/* inbound packets == from PAC to PNS */
-static inline unsigned int
-pptp_inbound_pkt(struct sk_buff **pskb,
-		 struct ip_conntrack *ct,
-		 enum ip_conntrack_info ctinfo,
-		 struct ip_conntrack_expect *oldexp)
-{
-	struct iphdr *iph = (*pskb)->nh.iph;
-	struct tcphdr *tcph = (void *) iph + iph->ihl*4;
-	struct pptp_pkt_hdr *pptph = (struct pptp_pkt_hdr *) 
-					((void *)tcph + tcph->doff*4);
-
-	struct PptpControlHeader *ctlh;
-	union pptp_ctrl_union *pptpReq;
-	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
-	u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
-	u_int32_t old_dst_ip;
-
-	struct ip_conntrack_tuple t, inv_t;
-	struct ip_conntrack_tuple *orig_t, *reply_t;
-
-	/* FIXME: size checks !!! */
-	ctlh = (struct PptpControlHeader *) ((void *) pptph + sizeof(*pptph));
-	pptpReq = (void *) ((void *) ctlh + sizeof(*ctlh));
-
-	new_pcid = htons(nat_pptp_info->pns_call_id);
-
-	switch (msg = ntohs(ctlh->messageType)) {
-	case PPTP_OUT_CALL_REPLY:
-		pcid = &pptpReq->ocack.peersCallID;	
-		cid = &pptpReq->ocack.callID;
-		if (!oldexp) {
-			DEBUGP("outcall but no expectation\n");
-			break;
-		}
-		old_dst_ip = oldexp->tuple.dst.ip;
-		t = oldexp->tuple;
-		invert_tuplepr(&inv_t, &t);
-
-		/* save original PAC call ID in nat_info */
-		nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
-
-		/* alter expectation */
-		orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-		reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-		if (t.src.ip == orig_t->src.ip && t.dst.ip == orig_t->dst.ip) {
-			/* expectation for PNS->PAC direction */
-			t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id);
-			t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id);
-			inv_t.src.ip = reply_t->src.ip;
-			inv_t.dst.ip = reply_t->dst.ip;
-			inv_t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id);
-			inv_t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id);
-		} else {
-			/* expectation for PAC->PNS direction */
-			t.src.u.gre.key = htonl(nat_pptp_info->pac_call_id);
-			t.dst.u.gre.key = htonl(ct_pptp_info->pns_call_id);
-			inv_t.src.ip = orig_t->src.ip;
-			inv_t.dst.ip = orig_t->dst.ip;
-			inv_t.src.u.gre.key = htonl(nat_pptp_info->pns_call_id);
-			inv_t.dst.u.gre.key = htonl(ct_pptp_info->pac_call_id);
-		}
-
-		if (!ip_conntrack_change_expect(oldexp, &t)) {
-			DEBUGP("successfully changed expect\n");
-		} else {
-			DEBUGP("can't change expect\n");
-		}
-		ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_orig, &t);
-		ip_ct_gre_keymap_change(oldexp->proto.gre.keymap_reply, &inv_t);
-		break;
-	case PPTP_IN_CALL_CONNECT:
-		pcid = &pptpReq->iccon.peersCallID;
-		if (!oldexp)
-			break;
-		old_dst_ip = oldexp->tuple.dst.ip;
-		t = oldexp->tuple;
-
-		/* alter expectation, no need for callID */
-		if (t.dst.ip == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip) {
-			/* expectation for PNS->PAC direction */
-			t.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
-		} else {
-			/* expectation for PAC->PNS direction */
-			t.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
-		}
-
-		if (!ip_conntrack_change_expect(oldexp, &t)) {
-			DEBUGP("successfully changed expect\n");
-		} else {
-			DEBUGP("can't change expect\n");
-		}
-		break;
-	case PPTP_IN_CALL_REQUEST:
-		/* only need to nat in case PAC is behind NAT box */
-		break;
-	case PPTP_WAN_ERROR_NOTIFY:
-		pcid = &pptpReq->wanerr.peersCallID;
-		break;
-	case PPTP_CALL_DISCONNECT_NOTIFY:
-		pcid = &pptpReq->disc.callID;
-		break;
-
-	default:
-		DEBUGP("unknown inbound packet %s\n",
-			(msg <= PPTP_MSG_MAX)? strMName[msg]:strMName[0]);
-		/* fall through */
-
-	case PPTP_START_SESSION_REQUEST:
-	case PPTP_START_SESSION_REPLY:
-	case PPTP_STOP_SESSION_REQUEST:
-	case PPTP_STOP_SESSION_REPLY:
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* no need to alter packet */
-		return NF_ACCEPT;
-	}
-
-	/* mangle packet */
-	IP_NF_ASSERT(pcid);
-	DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
-		ntohs(*pcid), ntohs(new_pcid));
-	ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, (void *)pcid - (void *)pptph,
-				 sizeof(new_pcid), (char *)&new_pcid, 
-				 sizeof(new_pcid));
-
-	if (new_cid) {
-		IP_NF_ASSERT(cid);
-		DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-			ntohs(*cid), ntohs(new_cid));
-		ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
-					 (void *)cid - (void *)pptph, 
-					 sizeof(new_cid), (char *)&new_cid, 
-					 sizeof(new_cid));
-	}
-
-	/* great, at least we don't need to resize packets */
-	return NF_ACCEPT;
-}
-
-
-static unsigned int tcp_help(struct ip_conntrack *ct,
-			     struct ip_conntrack_expect *exp,
-			     struct ip_nat_info *info,
-			     enum ip_conntrack_info ctinfo,
-			     unsigned int hooknum, struct sk_buff **pskb)
-{
-	struct iphdr *iph = (*pskb)->nh.iph;
-	struct tcphdr *tcph = (void *) iph + iph->ihl*4;
-	unsigned int datalen = (*pskb)->len - iph->ihl*4 - tcph->doff*4;
-	struct pptp_pkt_hdr *pptph;
-
-	int dir;
-
-	DEBUGP("entering\n");
-
-	/* Only mangle things once: DST for original direction
-	   and SRC for reply direction. */
-	dir = CTINFO2DIR(ctinfo);
-	if (!((HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
-	     && dir == IP_CT_DIR_ORIGINAL)
-	      || (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST
-		  && dir == IP_CT_DIR_REPLY))) {
-		DEBUGP("Not touching dir %s at hook %s\n",
-		       dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY",
-		       hooknum == NF_IP_POST_ROUTING ? "POSTROUTING"
-		       : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING"
-		       : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT"
-		       : hooknum == NF_IP_LOCAL_IN ? "INPUT" : "???");
-		return NF_ACCEPT;
-	}
-
-	/* if packet is too small, just skip it */
-	if (datalen < sizeof(struct pptp_pkt_hdr)+
-		      sizeof(struct PptpControlHeader)) {
-		DEBUGP("pptp packet too short\n");
-		return NF_ACCEPT;	
-	}
-
-	pptph = (struct pptp_pkt_hdr *) ((void *)tcph + tcph->doff*4);
-
-	/* if it's not a control message, we can't handle it */
-	if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
-	    ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
-		DEBUGP("not a pptp control packet\n");
-		return NF_ACCEPT;
-	}
-
-	LOCK_BH(&ip_pptp_lock);
-
-	if (dir == IP_CT_DIR_ORIGINAL) {
-		/* reuqests sent by client to server (PNS->PAC) */
-		pptp_outbound_pkt(pskb, ct, ctinfo, exp);
-	} else {
-		/* response from the server to the client (PAC->PNS) */
-		pptp_inbound_pkt(pskb, ct, ctinfo, exp);
-	}
-
-	UNLOCK_BH(&ip_pptp_lock);
-
-	return NF_ACCEPT;
-}
-
-/* nat helper struct for control connection */
-static struct ip_nat_helper pptp_tcp_helper = { 
-	.list = { NULL, NULL },
-	.name = "pptp", 
-	.flags = IP_NAT_HELPER_F_ALWAYS, 
-	.me = THIS_MODULE,
-	.tuple = { .src = { .ip = 0, 
-			    .u = { .tcp = { .port = 
-				    	__constant_htons(PPTP_CONTROL_PORT) } 
-				 } 
-			  },
-	  	   .dst = { .ip = 0, 
-			    .u = { .all = 0 }, 
-			    .protonum = IPPROTO_TCP 
-		   	  } 
-		 },
-
-	.mask = { .src = { .ip = 0, 
-			   .u = { .tcp = { .port = 0xFFFF } } 
-			 },
-		  .dst = { .ip = 0, 
-			   .u = { .all = 0 }, 
-			   .protonum = 0xFFFF 
-		  	 } 
-		},
-	.help = tcp_help, 
-	.expect = pptp_nat_expected 
-};
-
-			  
-static int __init init(void)
-{
-	DEBUGP("%s: registering NAT helper\n", __FILE__);
-	if (ip_nat_helper_register(&pptp_tcp_helper)) {
-		printk(KERN_ERR "Unable to register NAT application helper "
-				"for pptp\n");
-		return -EIO;
-	}
-
-	printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	DEBUGP("cleanup_module\n" );
-	ip_nat_helper_unregister(&pptp_tcp_helper);
-	printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
deleted file mode 100644
index 5691a102a..000000000
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * ip_nat_proto_gre.c - Version 2.0
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two 
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2004 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \
-				       ": " format, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
-/* is key in given range between min and max */
-static int
-gre_in_range(const struct ip_conntrack_tuple *tuple,
-	     enum ip_nat_manip_type maniptype,
-	     const union ip_conntrack_manip_proto *min,
-	     const union ip_conntrack_manip_proto *max)
-{
-	u_int32_t key;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		key = tuple->src.u.gre.key;
-	else
-		key = tuple->dst.u.gre.key;
-
-	return ntohl(key) >= ntohl(min->gre.key)
-		&& ntohl(key) <= ntohl(max->gre.key);
-}
-
-/* generate unique tuple ... */
-static int 
-gre_unique_tuple(struct ip_conntrack_tuple *tuple,
-		 const struct ip_nat_range *range,
-		 enum ip_nat_manip_type maniptype,
-		 const struct ip_conntrack *conntrack)
-{
-	u_int32_t min, i, range_size;
-	u_int32_t key = 0, *keyptr;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		keyptr = &tuple->src.u.gre.key;
-	else
-		keyptr = &tuple->dst.u.gre.key;
-
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
-		DEBUGP("%p: NATing GRE PPTP\n", conntrack);
-		min = 1;
-		range_size = 0xffff;
-	} else {
-		min = ntohl(range->min.gre.key);
-		range_size = ntohl(range->max.gre.key) - min + 1;
-	}
-
-	DEBUGP("min = %u, range_size = %u\n", min, range_size); 
-
-	for (i = 0; i < range_size; i++, key++) {
-		*keyptr = htonl(min + key % range_size);
-		if (!ip_nat_used_tuple(tuple, conntrack))
-			return 1;
-	}
-
-	DEBUGP("%p: no NAT mapping\n", conntrack);
-
-	return 0;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static int
-gre_manip_pkt(struct sk_buff **pskb,
-	      unsigned int hdroff,
-	      const struct ip_conntrack_manip *manip,
-	      enum ip_nat_manip_type maniptype)
-{
-	struct gre_hdr *greh;
-	struct gre_hdr_pptp *pgreh;
-
-	if (!skb_ip_make_writable(pskb, hdroff + sizeof(*pgreh)))
-		return 0;
-
-	greh = (void *)(*pskb)->data + hdroff;
-	pgreh = (struct gre_hdr_pptp *) greh;
-
-	/* we only have destination manip of a packet, since 'source key' 
-	 * is not present in the packet itself */
-	if (maniptype == IP_NAT_MANIP_DST) {
-		/* key manipulation is always dest */
-		switch (greh->version) {
-		case 0:
-			if (!greh->key) {
-				DEBUGP("can't nat GRE w/o key\n");
-				break;
-			}
-			if (greh->csum) {
-				/* FIXME: Never tested this code... */
-				*(gre_csum(greh)) = 
-					ip_nat_cheat_check(~*(gre_key(greh)),
-							manip->u.gre.key,
-							*(gre_csum(greh)));
-			}
-			*(gre_key(greh)) = manip->u.gre.key;
-			break;
-		case GRE_VERSION_PPTP:
-			DEBUGP("call_id -> 0x%04x\n", 
-				ntohl(manip->u.gre.key));
-			pgreh->call_id = htons(ntohl(manip->u.gre.key));
-			break;
-		default:
-			DEBUGP("can't nat unknown GRE version\n");
-			return 0;
-			break;
-		}
-	}
-	return 1;
-}
-
-/* print out a nat tuple */
-static unsigned int 
-gre_print(char *buffer, 
-	  const struct ip_conntrack_tuple *match,
-	  const struct ip_conntrack_tuple *mask)
-{
-	unsigned int len = 0;
-
-	if (mask->src.u.gre.key)
-		len += sprintf(buffer + len, "srckey=0x%x ", 
-				ntohl(match->src.u.gre.key));
-
-	if (mask->dst.u.gre.key)
-		len += sprintf(buffer + len, "dstkey=0x%x ",
-				ntohl(match->src.u.gre.key));
-
-	return len;
-}
-
-/* print a range of keys */
-static unsigned int 
-gre_print_range(char *buffer, const struct ip_nat_range *range)
-{
-	if (range->min.gre.key != 0 
-	    || range->max.gre.key != 0xFFFF) {
-		if (range->min.gre.key == range->max.gre.key)
-			return sprintf(buffer, "key 0x%x ",
-					ntohl(range->min.gre.key));
-		else
-			return sprintf(buffer, "keys 0x%u-0x%u ",
-					ntohl(range->min.gre.key),
-					ntohl(range->max.gre.key));
-	} else
-		return 0;
-}
-
-/* nat helper struct */
-static struct ip_nat_protocol gre = { 
-	.name		= "GRE", 
-	.protonum	= IPPROTO_GRE,
-	.manip_pkt	= gre_manip_pkt,
-	.in_range	= gre_in_range,
-	.unique_tuple	= gre_unique_tuple,
-	.print		= gre_print,
-	.print_range	= gre_print_range 
-};
-				  
-static int __init init(void)
-{
-	if (ip_nat_protocol_register(&gre))
-		return -EIO;
-
-	return 0;
-}
-
-static void __exit fini(void)
-{
-	ip_nat_protocol_unregister(&gre);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 70945b48a..7bbe1cb55 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1822,7 +1822,7 @@ process:
 	 * packet.
 	 */
 	if (inet_stream_ops.bind != inet_bind &&
-	    (int) sk->sk_xid > 0 && sk->sk_xid != skb->xid)
+	    (int) sk->sk_xid >= 0 && sk->sk_xid != skb->xid)
 		goto discard_it;
 
 	if (sk->sk_state == TCP_TIME_WAIT)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 844a087b0..5edc92cf8 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -451,7 +451,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packe
 	sk = pt->af_packet_priv;
 	po = pkt_sk(sk);
 
-	if ((int) sk->sk_xid > 0 && sk->sk_xid != skb->xid)
+	if (sk->sk_xid && sk->sk_xid != skb->xid)
 		goto drop;
 
 	skb->dev = dev;
diff --git a/scripts/kernel-2.6-planetlab.spec b/scripts/kernel-2.6-planetlab.spec
index 4e2be569b..81aec8077 100644
--- a/scripts/kernel-2.6-planetlab.spec
+++ b/scripts/kernel-2.6-planetlab.spec
@@ -22,7 +22,7 @@ Summary: The Linux kernel (the core of the Linux operating system)
 %define kversion 2.6.%{sublevel}
 %define rpmversion 2.6.%{sublevel}
 %define rhbsys  %([ -r /etc/beehive-root ] && echo  || echo .`whoami`)
-%define release 1.521.2.6.planetlab%{?date:.%{date}}
+%define release 1.planetlab%{?date:.%{date}}
 %define signmodules 0
 
 %define KVERREL %{PACKAGE_VERSION}-%{PACKAGE_RELEASE}
@@ -62,11 +62,6 @@ Summary: The Linux kernel (the core of the Linux operating system)
 #
 %define kernel_prereq  fileutils, module-init-tools, initscripts >= 5.83, mkinitrd >= 3.5.5
 
-Vendor: PlanetLab
-Packager: PlanetLab Central <support@planet-lab.org>
-Distribution: PlanetLab 3.0
-URL: http://cvs.planet-lab.org/cvs/linux-2.6
-
 Name: kernel
 Group: System Environment/Kernel
 License: GPLv2
@@ -178,19 +173,6 @@ Group: System Environment/Kernel
 %description uml
 This package includes a user mode version of the Linux kernel.
 
-%package vserver
-Summary: A placeholder RPM that provides kernel and kernel-drm
-
-Group: System Environment/Kernel
-Provides: kernel = %{version}
-Provides: kernel-drm = 4.3.0
-
-%description vserver
-VServers do not require and cannot use kernels, but some RPMs have
-implicit or explicit dependencies on the "kernel" package
-(e.g. tcpdump). This package installs no files but provides the
-necessary dependencies to make rpm and yum happy.
-
 %prep
 
 %setup -n linux-%{kversion}
@@ -258,7 +240,7 @@ BuildKernel() {
 	 grep "__crc_$i\$" System.map >> $RPM_BUILD_ROOT/boot/System.map-$KernelVer ||:
     done
     rm -f exported
-#    install -m 644 init/kerntypes.o $RPM_BUILD_ROOT/boot/Kerntypes-$KernelVer
+    install -m 644 init/kerntypes.o $RPM_BUILD_ROOT/boot/Kerntypes-$KernelVer
     install -m 644 .config $RPM_BUILD_ROOT/boot/config-$KernelVer
     rm -f System.map
     cp arch/*/boot/bzImage $RPM_BUILD_ROOT/%{image_install_path}/vmlinuz-$KernelVer
@@ -429,7 +411,7 @@ fi
 # make some useful links
 pushd /boot > /dev/null ; {
 	ln -sf System.map-%{KVERREL} System.map
-#	ln -sf Kerntypes-%{KVERREL} Kerntypes
+	ln -sf Kerntypes-%{KVERREL} Kerntypes
 	ln -sf config-%{KVERREL} config
 	ln -sf initrd-%{KVERREL}.img initrd-boot
 	ln -sf vmlinuz-%{KVERREL} kernel-boot
@@ -468,7 +450,7 @@ fi
 %files 
 %defattr(-,root,root)
 /%{image_install_path}/vmlinuz-%{KVERREL}
-#/boot/Kerntypes-%{KVERREL}
+/boot/Kerntypes-%{KVERREL}
 /boot/System.map-%{KVERREL}
 /boot/config-%{KVERREL}
 %dir /lib/modules/%{KVERREL}
@@ -481,7 +463,7 @@ fi
 %files smp
 %defattr(-,root,root)
 /%{image_install_path}/vmlinuz-%{KVERREL}smp
-#/boot/Kerntypes-%{KVERREL}smp
+/boot/Kerntypes-%{KVERREL}smp
 /boot/System.map-%{KVERREL}smp
 /boot/config-%{KVERREL}smp
 %dir /lib/modules/%{KVERREL}smp
@@ -511,11 +493,6 @@ fi
 /usr/share/doc/kernel-doc-%{kversion}/Documentation/*
 %endif
 
-
-%files vserver
-%defattr(-,root,root)
-# no files
-
 %changelog
 * Thu Sep 16 2004 Mark Huang <mlhuang@cs.princeton.edu>
 - merge to Fedora Core 2 2.6.8-1.521
-- 
2.51.1