This commit was manufactured by cvs2svn to create tag before-2_6_10-vs1_9_3_17-merge
authorPlanet-Lab Support <support@planet-lab.org>
Mon, 8 Aug 2005 21:12:17 +0000 (21:12 +0000)
committerPlanet-Lab Support <support@planet-lab.org>
Mon, 8 Aug 2005 21:12:17 +0000 (21:12 +0000)
'before-2_6_10-vs1_9_3_17-merge'.

343 files changed:
.cvsignore
Documentation/ckrm/mem_rc.design
Documentation/ckrm/mem_rc.usage
Documentation/ckrm/numtasks [deleted file]
Documentation/devices.txt
Documentation/kdump.txt [deleted file]
MAINTAINERS
Makefile
arch/h8300/kernel/ints.c
arch/h8300/platform/h8s/ints.c
arch/i386/Kconfig
arch/i386/boot/compressed/head.S
arch/i386/boot/compressed/misc.c
arch/i386/kernel/Makefile
arch/i386/kernel/apic.c
arch/i386/kernel/crash_dump.c [deleted file]
arch/i386/kernel/entry.S
arch/i386/kernel/i386_ksyms.c
arch/i386/kernel/i8259.c
arch/i386/kernel/machine_kexec.c [deleted file]
arch/i386/kernel/reboot.c
arch/i386/kernel/relocate_kernel.S [deleted file]
arch/i386/kernel/setup.c
arch/i386/kernel/smp.c
arch/i386/kernel/sys_i386.c
arch/i386/kernel/traps.c
arch/i386/kernel/vmlinux.lds.S
arch/i386/mm/discontig.c
arch/i386/mm/highmem.c
arch/i386/mm/init.c
arch/i386/mm/mmap.c
arch/ia64/kernel/perfmon.c
arch/ia64/mm/fault.c
arch/mips/kernel/syscall.c
arch/ppc/Kconfig
arch/ppc/kernel/Makefile
arch/ppc/kernel/machine_kexec.c [deleted file]
arch/ppc/kernel/relocate_kernel.S [deleted file]
arch/ppc64/mm/hugetlbpage.c
arch/sh64/mm/hugetlbpage.c
arch/um/Kconfig
arch/um/Makefile
arch/um/Makefile-i386
arch/um/defconfig
arch/um/drivers/chan_kern.c
arch/um/drivers/cow.h
arch/um/drivers/cow_kern.c [new file with mode: 0644]
arch/um/drivers/cow_user.c
arch/um/drivers/hostaudio_kern.c
arch/um/drivers/hostaudio_user.c
arch/um/drivers/net_user.c
arch/um/drivers/ubd_kern.c
arch/um/dyn.lds.S
arch/um/include/.cvsignore [deleted file]
arch/um/include/aio.h [moved from arch/um/kernel/skas/include/mmu.h with 50% similarity]
arch/um/include/filehandle.h [new file with mode: 0644]
arch/um/include/init.h
arch/um/include/irq_kern.h
arch/um/include/mem_kern.h
arch/um/include/os.h
arch/um/include/skas_ptregs.h [new file with mode: 0644]
arch/um/include/sysdep-i386/.cvsignore [deleted file]
arch/um/kernel/.cvsignore [deleted file]
arch/um/kernel/filehandle.c [new file with mode: 0644]
arch/um/kernel/helper.c
arch/um/kernel/physmem.c
arch/um/kernel/process.c
arch/um/kernel/process_kern.c
arch/um/kernel/ptrace.c
arch/um/kernel/skas/exec_user.c [deleted file]
arch/um/kernel/skas/include/mode.h [deleted file]
arch/um/kernel/skas/include/mode_kern.h [deleted file]
arch/um/kernel/skas/include/uaccess.h [deleted file]
arch/um/kernel/skas/uaccess.c
arch/um/kernel/skas/util/.cvsignore [deleted file]
arch/um/kernel/syscall_kern.c
arch/um/kernel/tt/include/mmu.h [deleted file]
arch/um/kernel/tt/include/mode.h [deleted file]
arch/um/kernel/tt/include/mode_kern.h [deleted file]
arch/um/kernel/tt/include/uaccess.h [deleted file]
arch/um/kernel/tt/uaccess.c
arch/um/kernel/user_syms.c [deleted file]
arch/um/kernel/vmlinux.lds.S [new file with mode: 0644]
arch/um/main.c
arch/um/os-Linux/aio.c [new file with mode: 0644]
arch/um/os-Linux/file.c
arch/um/os-Linux/user_syms.c
arch/um/sys-i386/bitops.c [new file with mode: 0644]
arch/um/sys-i386/extable.c [deleted file]
arch/um/sys-i386/semaphore.c [new file with mode: 0644]
arch/um/sys-i386/util/.cvsignore [deleted file]
arch/um/sys-i386/util/Makefile
arch/um/uml.lds.S
arch/um/util/.cvsignore [deleted file]
arch/x86_64/Kconfig
arch/x86_64/kernel/Makefile
arch/x86_64/kernel/apic.c
arch/x86_64/kernel/e820.c
arch/x86_64/kernel/i8259.c
arch/x86_64/kernel/io_apic.c
arch/x86_64/kernel/machine_kexec.c [deleted file]
arch/x86_64/kernel/reboot.c
arch/x86_64/kernel/relocate_kernel.S [deleted file]
configs/kernel-2.6.10-i586-smp.config
configs/kernel-2.6.10-i586.config
configs/kernel-2.6.10-i686-planetlab.config
configs/kernel-2.6.10-i686-smp.config
configs/kernel-2.6.10-i686-uml-planetlab.config [deleted file]
configs/kernel-2.6.10-i686.config
drivers/char/mem.c
drivers/char/mxser.c
drivers/char/n_tty.c
drivers/char/tty_io.c
drivers/dump/dump_fmt.c
drivers/dump/dump_i386.c
drivers/dump/dump_memdev.c
drivers/dump/dump_overlay.c
drivers/dump/dump_setup.c
drivers/net/tulip/tulip_core.c
drivers/scsi/scsi_devinfo.c
drivers/usb/core/hub.c
fs/Makefile
fs/aio.c
fs/attr.c
fs/binfmt_elf.c
fs/devpts/inode.c
fs/exec.c
fs/ext2/acl.c
fs/ext2/balloc.c
fs/ext2/ialloc.c
fs/ext2/inode.c
fs/ext2/ioctl.c
fs/ext2/namei.c
fs/ext3/acl.c
fs/ext3/balloc.c
fs/ext3/ialloc.c
fs/ext3/inode.c
fs/ext3/ioctl.c
fs/ext3/namei.c
fs/file_table.c
fs/hostfs/externfs.c [new file with mode: 0644]
fs/hostfs/host_file.c [new file with mode: 0644]
fs/hostfs/host_fs.c [new file with mode: 0644]
fs/hostfs/humfs.c [new file with mode: 0644]
fs/hostfs/meta_fs.c [new file with mode: 0644]
fs/hostfs/metadata.h [new file with mode: 0644]
fs/inode.c
fs/jfs/jfs_imap.c
fs/namei.c
fs/namespace.c
fs/nfs/dir.c
fs/nfs/inode.c
fs/open.c
fs/posix_acl.c
fs/proc/Makefile
fs/proc/array.c
fs/proc/base.c
fs/proc/generic.c
fs/proc/kcore.c
fs/proc/proc_misc.c
fs/proc/task_mmu.c
fs/proc/vmcore.c [deleted file]
fs/rcfs/dir.c
fs/rcfs/inode.c
fs/rcfs/magic.c
fs/rcfs/rootdir.c
fs/rcfs/socket_fs.c
fs/rcfs/super.c
fs/rcfs/tc_magic.c
fs/reiserfs/file.c
fs/reiserfs/namei.c
fs/super.c
fs/sysfs/mount.c
fs/xfs/linux-2.6/xfs_ioctl.c
fs/xfs/xfs_dinode.h
fs/xfs/xfs_fs.h
include/asm-generic/vmlinux.lds.h
include/asm-i386/apicdef.h
include/asm-i386/crash_dump.h [deleted file]
include/asm-i386/highmem.h
include/asm-i386/kexec.h [deleted file]
include/asm-i386/mach-default/irq_vectors.h
include/asm-i386/param.h
include/asm-i386/smp.h
include/asm-parisc/unistd.h
include/asm-ppc/kexec.h [deleted file]
include/asm-ppc/machdep.h
include/asm-sparc64/tlb.h
include/asm-um/cpumask.h [deleted file]
include/asm-um/diskdump.h [deleted file]
include/asm-um/init.h [deleted file]
include/asm-um/irq.h
include/asm-um/module.h [new file with mode: 0644]
include/asm-um/pgalloc.h
include/asm-um/processor-generic.h
include/asm-um/smplock.h [deleted file]
include/asm-um/spinlock.h [deleted file]
include/asm-x86_64/kexec.h [deleted file]
include/asm-x86_64/mmu_context.h
include/asm-x86_64/unistd.h
include/linux/bootmem.h
include/linux/ckrm.h [moved from include/linux/ckrm_events.h with 70% similarity]
include/linux/ckrm_ce.h
include/linux/ckrm_mem.h
include/linux/ckrm_mem_inline.h
include/linux/ckrm_rc.h
include/linux/ckrm_tc.h
include/linux/ckrm_tsk.h
include/linux/crash_dump.h [deleted file]
include/linux/crbce.h
include/linux/devpts_fs.h
include/linux/dump.h
include/linux/ext2_fs.h
include/linux/ext3_fs.h
include/linux/fs.h
include/linux/gfp.h
include/linux/ghash.h [new file with mode: 0644]
include/linux/highmem.h
include/linux/kexec.h [deleted file]
include/linux/mm.h
include/linux/mm_inline.h
include/linux/mmzone.h
include/linux/mount.h
include/linux/page-flags.h
include/linux/proc_mm.h [new file with mode: 0644]
include/linux/rcfs.h
include/linux/reboot.h
include/linux/reiserfs_fs_sb.h
include/linux/sched.h
include/linux/sysctl.h
include/linux/sysfs.h
include/linux/taskdelays.h
include/linux/vs_base.h
include/linux/vs_context.h
include/linux/vs_cvirt.h
include/linux/vs_dlimit.h
include/linux/vs_limit.h
include/linux/vs_memory.h
include/linux/vs_network.h
include/linux/vs_sched.h [deleted file]
include/linux/vs_socket.h
include/linux/vserver.h [new file with mode: 0644]
include/linux/vserver/context.h
include/linux/vserver/context_cmd.h [deleted file]
include/linux/vserver/cvirt.h
include/linux/vserver/cvirt_cmd.h [deleted file]
include/linux/vserver/cvirt_def.h [deleted file]
include/linux/vserver/debug.h
include/linux/vserver/debug_cmd.h [deleted file]
include/linux/vserver/dlimit.h
include/linux/vserver/inode.h
include/linux/vserver/legacy.h
include/linux/vserver/limit.h
include/linux/vserver/limit_cmd.h [deleted file]
include/linux/vserver/limit_def.h [deleted file]
include/linux/vserver/namespace.h
include/linux/vserver/network.h
include/linux/vserver/network_cmd.h [deleted file]
include/linux/vserver/sched.h
include/linux/vserver/sched_cmd.h [deleted file]
include/linux/vserver/sched_def.h [deleted file]
include/linux/vserver/switch.h
include/linux/vserver/xid.h
include/net/route.h
init/Kconfig
init/main.c
ipc/msg.c
ipc/sem.c
ipc/shm.c
kernel/Makefile
kernel/capability.c
kernel/ckrm/Makefile
kernel/ckrm/ckrm.c
kernel/ckrm/ckrm_cpu_class.c
kernel/ckrm/ckrm_cpu_monitor.c
kernel/ckrm/ckrm_events.c [deleted file]
kernel/ckrm/ckrm_mem.c
kernel/ckrm/ckrm_memcore.c [deleted file]
kernel/ckrm/ckrm_memctlr.c [deleted file]
kernel/ckrm/ckrm_null_class.c [deleted file]
kernel/ckrm/ckrm_numtasks.c
kernel/ckrm/ckrm_numtasks_stub.c
kernel/ckrm/ckrm_tc.c
kernel/ckrm/ckrmutils.c
kernel/ckrm/rbce/rbce_fs.c
kernel/ckrm/rbce/rbcemod.c
kernel/ckrm/rbce/rbcemod_ext.c
kernel/crash.c [deleted file]
kernel/exit.c
kernel/fork.c
kernel/kexec.c [deleted file]
kernel/panic.c
kernel/posix-timers.c
kernel/printk.c
kernel/sched.c
kernel/signal.c
kernel/sys.c
kernel/sys_ni.c
kernel/vserver/Kconfig
kernel/vserver/Makefile
kernel/vserver/context.c
kernel/vserver/cvirt.c
kernel/vserver/cvirt_init.h [deleted file]
kernel/vserver/cvirt_proc.h [deleted file]
kernel/vserver/dlimit.c
kernel/vserver/helper.c
kernel/vserver/init.c
kernel/vserver/inode.c
kernel/vserver/legacy.c
kernel/vserver/limit.c
kernel/vserver/limit_init.h [deleted file]
kernel/vserver/limit_proc.h [deleted file]
kernel/vserver/namespace.c
kernel/vserver/network.c
kernel/vserver/proc.c
kernel/vserver/sched.c
kernel/vserver/sched_init.h [deleted file]
kernel/vserver/sched_proc.h [deleted file]
kernel/vserver/signal.c
kernel/vserver/switch.c
kernel/vserver/sysctl.c
mm/Makefile
mm/bootmem.c
mm/fremap.c
mm/memory.c
mm/mlock.c
mm/mmap.c
mm/mremap.c
mm/nommu.c
mm/oom_kill.c
mm/page_alloc.c
mm/swap.c
mm/swapfile.c
mm/vmscan.c
net/core/dev.c
net/core/rtnetlink.c
net/ipv4/af_inet.c
net/ipv4/tcp.c
net/socket.c
net/sunrpc/auth.c
net/sunrpc/auth_unix.c
scripts/kernel-2.6-planetlab.spec
security/security.c

index c150355..5e7d074 100644 (file)
@@ -11,4 +11,3 @@
 Module.symvers
 System.map
 vmlinux
-linux
index 1c020ff..bc565c6 100644 (file)
@@ -12,13 +12,9 @@ These are the events in a page's lifecycle:
 
 When the memory subsystem runs low on LRU pages, pages are reclaimed by
     - moving pages from active list to inactive list (refill_inactive_zone())
-    - freeing pages from the inactive list (shrink_zone)
+       - freeing pages from the inactive list (shrink_zone)
 depending on the recent usage of the page(approximately).
 
-In the process of the life cycle a page can move from the lru list to swap
-and back. For this document's purpose, we treat it same as freeing and
-allocating the page, respectfully.
-
 1. Introduction
 ---------------
 Memory resource controller controls the number of lru physical pages
@@ -44,26 +40,26 @@ memory allocation logic.
 Note that the numbers that are specified in the shares file, doesn't
 directly correspond to the number of pages. But, the user can make
 it so by making the total_guarantee and max_limit of the default class
-(/rcfs/taskclass) to be the total number of pages(given in stats file)
+(/rcfs/taskclass) to be the total number of pages(given in config file)
 available in the system.
 
   for example: 
    # cd /rcfs/taskclass
-   # grep System stats
-   System: tot_pages=257512,active=5897,inactive=2931,free=243991
+   # cat config
+   res=mem;tot_pages=239778,active=60473,inactive=135285,free=44555
    # cat shares
    res=mem,guarantee=-2,limit=-2,total_guarantee=100,max_limit=100
 
-  "tot_pages=257512" above mean there are 257512 lru pages in
+  "tot_pages=239778" above mean there are 239778 lru pages in
   the system.
   
   By making total_guarantee and max_limit to be same as this number at 
   this level (/rcfs/taskclass), one can make guarantee and limit in all 
   classes refer to the number of pages.
 
-  # echo 'res=mem,total_guarantee=257512,max_limit=257512' > shares
+  # echo 'res=mem,total_guarantee=239778,max_limit=239778' > shares
   # cat shares
-  res=mem,guarantee=-2,limit=-2,total_guarantee=257512,max_limit=257512
+  res=mem,guarantee=-2,limit=-2,total_guarantee=239778,max_limit=239778
 
 
 The number of pages a class can use be anywhere between its guarantee and
@@ -72,100 +68,60 @@ to choose a victim page to swap out. While the number of pages a class can
 have allocated may be anywhere between its guarantee and limit, victim
 pages will be choosen from classes that are above their guarantee.
 
-Victim class will be chosen by the number pages a class is using over its
-guarantee. i.e a class that is using 10000 pages over its guarantee will be
-chosen against a class that is using 1000 pages over its guarantee.
-Pages belonging to classes that are below their guarantee will not be
-chosen as a victim.
-
-2. Configuaration parameters
----------------------------
-
-Memory controller provides the following configuration parameters. Usage of
-these parameters will be made clear in the following section.
-
-fail_over: When pages are being allocated, if the class is over fail_over % of
-    its limit, then fail the memory allocation. Default is 110.
-    ex: If limit of a class is 30000 and fail_over is 110, then memory
-    allocations would start failing once the class is using more than 33000
-    pages.
-
-shrink_at: When a class is using shrink_at % of its limit, then start
-    shrinking the class, i.e start freeing the page to make more free pages
-    available for this class. Default is 90.
-    ex: If limit of a class is 30000 and shrink_at is 90, then pages from this
-    class will start to get freed when the class's usage is above 27000
+Pages will be freed from classes that are close to their "limit" before
+freeing pages from the classes that are close to their guarantee. Pages
+belonging to classes that are below their guarantee will not be chosen as
+a victim.
 
-shrink_to: When a class reached shrink_at % of its limit, ckrm will try to
-    shrink the class's usage to shrink_to %. Defalut is 80.
-    ex: If limit of a class is 30000 with shrink_at being 90 and shrink_to
-    being 80, then ckrm will try to free pages from the class when its
-    usage reaches 27000 and will try to bring it down to 24000.
-
-num_shrinks: Number of shrink attempts ckrm will do within shrink_interval
-    seconds. After this many attempts in a period, ckrm will not attempt a
-    shrink even if the class's usage goes over shrink_at %. Default is 10.
-
-shrink_interval: Number of seconds in a shrink period. Default is 10.
-
-3. Design
+2. Core Design
 --------------------------
 
 CKRM memory resource controller taps at appropriate low level memory 
 management functions to associate a page with a class and to charge
 a class that brings the page to the LRU list.
 
-CKRM maintains lru lists per-class instead of keeping it system-wide, so
-that reducing a class's usage doesn't involve going through the system-wide
-lru lists.
-
-3.1 Changes in page allocation function(__alloc_pages())
+2.1 Changes in page allocation function(__alloc_pages())
 --------------------------------------------------------
-- If the class that the current task belong to is over 'fail_over' % of its
-  'limit', allocation of page(s) fail. Otherwise, the page allocation will
-  proceed as before.
+- If the class that the current task belong to is over 110% of its 'limit',
+  allocation of page(s) fail.
+- After succesful allocation of a page, the page is attached with the class
+  to which the current task belongs to.
 - Note that the class is _not_ charged for the page(s) here.
 
-3.2 Changes in page free(free_pages_bulk())
+2.2 Changes in page free(free_pages_bulk())
 -------------------------------------------
-- If the page still belong to a class, the class will be credited for this
-  page.
+- page is freed from the class it belongs to.
 
-3.3 Adding/Deleting page to active/inactive list
+2.3 Adding/Deleting page to active/inactive list
 -------------------------------------------------
 When a page is added to the active or inactive list, the class that the
-task belongs to is charged for the page usage.
+page belongs to is charged for the page usage.
 
 When a page is deleted from the active or inactive list, the class that the
 page belongs to is credited back.
 
-If a class uses 'shrink_at' % of its limit, attempt is made to shrink
-the class's usage to 'shrink_to' % of its limit, in order to help the class
-stay within its limit.
+If a class uses upto its limit, attempt is made to shrink the class's usage
+to 90% of its limit, in order to help the class stay within its limit.
 But, if the class is aggressive, and keep getting over the class's limit
-often(more than such 'num_shrinks' events in 'shrink_interval' seconds),
-then the memory resource controller gives up on the class and doesn't try
-to shrink the class, which will eventually lead the class to reach
-fail_over % and then the page allocations will start failing.
+often(more than 10 shrink events in 10 seconds), then the memory resource
+controller gives up on the class and doesn't try to shrink the class, which
+will eventually lead the class to reach its 110% of its limit and then the
+page allocations will start failing.
 
-3.4 Changes in the page reclaimation path (refill_inactive_zone and shrink_zone)
+2.4 Chages in the page reclaimation path (refill_inactive_zone and shrink_zone)
 -------------------------------------------------------------------------------
 Pages will be moved from active to inactive list(refill_inactive_zone) and
-pages from inactive list by choosing victim classes. Victim classes are
-chosen depending on their usage over their guarantee.
-
-Classes with DONT_CARE guarantee are assumed an implicit guarantee which is
-based on the number of children(with DONT_CARE guarantee) its parent has
-(including the default class) and the unused pages its parent still has.
-ex1: If a default root class /rcfs/taskclass has 3 children c1, c2 and c3
-and has 200000 pages, and all the classes have DONT_CARE guarantees, then
-all the classes (c1, c2, c3 and the default class of /rcfs/taskclass) will 
-get 50000 (200000 / 4) pages each.
-ex2: If, in the above example c1 is set with a guarantee of 80000 pages,
-then the other classes (c2, c3 and the default class of /rcfs/taskclass)
-will get 40000 ((200000 - 80000) / 3) pages each.
-
-3.5 Handling of Shared pages
+pages from inactive list will be freed in the following order:
+(range is calculated by subtracting 'guarantee' from 'limit')
+  - Classes that are over 110% of their range
+  - Classes that are over 100% of their range
+  - Classes that are over 75%  of their range
+  - Classes that are over 50%  of their range
+  - Classes that are over 25%  of their range
+  - Classes whose parent is over 110% of its range
+  - Classes that are over their guarantee
+
+2.5 Handling of Shared pages
 ----------------------------
 Even if a mm is shared by tasks, the pages that belong to the mm will be
 charged against the individual tasks that bring the page into LRU. 
index 3d2f2f0..faddbf8 100644 (file)
@@ -16,21 +16,20 @@ For brevity, unless otherwise specified all the following commands are
 executed in the default class (/rcfs/taskclass).
 
 Initially, the systemwide default class gets 100% of the LRU pages, and the
-stats file at the /rcfs/taskclass level displays the total number of
-physical pages.
+config file displays the total number of physical pages.
 
    # cd /rcfs/taskclass
-   # grep System stats
-   System: tot_pages=239778,active=60473,inactive=135285,free=44555
+   # cat config
+   res=mem;tot_pages=239778,active=60473,inactive=135285,free=44555
    # cat shares
    res=mem,guarantee=-2,limit=-2,total_guarantee=100,max_limit=100
 
    tot_pages - total number of pages
    active    - number of pages in the active list ( sum of all zones)
-   inactive  - number of pages in the inactive list ( sum of all zones)
-   free      - number of free pages (sum of all zones)
+   inactive  - number of pages in the inactive list ( sum of all zones )
+   free      -  number of free pages (sum of all pages)
 
-   By making total_guarantee and max_limit to be same as tot_pages, one can 
+   By making total_guarantee and max_limit to be same as tot_pages, one make 
    make the numbers in shares file be same as the number of pages for a
    class.
 
@@ -38,51 +37,13 @@ physical pages.
    # cat shares
    res=mem,guarantee=-2,limit=-2,total_guarantee=239778,max_limit=239778
 
-Changing configuration parameters:
-----------------------------------
-For description of the paramters read the file mem_rc.design in this same directory.
-
-Following is the default values for the configuration parameters:
-
-   localhost:~ # cd /rcfs/taskclass
-   localhost:/rcfs/taskclass # cat config
-   res=mem,fail_over=110,shrink_at=90,shrink_to=80,num_shrinks=10,shrink_interval=10
-
-Here is how to change a specific configuration parameter. Note that more than one 
-configuration parameter can be changed in a single echo command though for simplicity
-we show one per echo.
-
-ex: Changing fail_over: 
-   localhost:/rcfs/taskclass # echo "res=mem,fail_over=120" > config
-   localhost:/rcfs/taskclass # cat config
-   res=mem,fail_over=120,shrink_at=90,shrink_to=80,num_shrinks=10,shrink_interval=10
-
-ex: Changing shrink_at: 
-   localhost:/rcfs/taskclass # echo "res=mem,shrink_at=85" > config
-   localhost:/rcfs/taskclass # cat config
-   res=mem,fail_over=120,shrink_at=85,shrink_to=80,num_shrinks=10,shrink_interval=10
-
-ex: Changing shrink_to: 
-   localhost:/rcfs/taskclass # echo "res=mem,shrink_to=75" > config
-   localhost:/rcfs/taskclass # cat config
-   res=mem,fail_over=120,shrink_at=85,shrink_to=75,num_shrinks=10,shrink_interval=10
-
-ex: Changing num_shrinks: 
-   localhost:/rcfs/taskclass # echo "res=mem,num_shrinks=20" > config
-   localhost:/rcfs/taskclass # cat config
-   res=mem,fail_over=120,shrink_at=85,shrink_to=75,num_shrinks=20,shrink_interval=10
-
-ex: Changing shrink_interval: 
-   localhost:/rcfs/taskclass # echo "res=mem,shrink_interval=15" > config
-   localhost:/rcfs/taskclass # cat config
-   res=mem,fail_over=120,shrink_at=85,shrink_to=75,num_shrinks=20,shrink_interval=15
 
 Class creation 
 --------------
 
    # mkdir c1
 
-Its initial share is DONT_CARE. The parent's share values will be unchanged.
+Its initial share is don't care. The parent's share values will be unchanged.
 
 Setting a new class share
 -------------------------
@@ -101,7 +62,6 @@ Monitoring
 stats file shows statistics of the page usage of a class
    # cat stats
    ----------- Memory Resource stats start -----------
-   System: tot_pages=239778,active=60473,inactive=135285,free=44555
    Number of pages used(including pages lent to children): 196654
    Number of pages guaranteed: 239778
    Maximum limit of pages: 239778
diff --git a/Documentation/ckrm/numtasks b/Documentation/ckrm/numtasks
deleted file mode 100644 (file)
index 94b4b09..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-Introduction
--------------
-
-Numtasks is a resource controller under the CKRM framework that allows the 
-user/sysadmin to manage the number of tasks a class can create. It also allows
-one to limit the fork rate across the system.
-
-As with any other resource under the CKRM framework, numtasks also assigns
-all the resources to the detault class(/rcfs/taskclass). Since , the number
-of tasks in a system is not limited, this resource controller provides a
-way to set the total number of tasks available in the system through the config
-file. By default this value is 128k(131072). In other words, if not changed,
-the total number of tasks allowed in a system is 131072.
-
-The config variable that affect this is sys_total_tasks.
-
-This resource controller also allows the sysadmin to limit the number of forks
-that are allowed in the system within the specified number of seconds. This
-can be acheived by changing the attributes forkrate and forkrate_interval in 
-the config file. Through this feature one can protect the system from being
-attacked by fork bomb type applications.
-
-Installation
--------------
-
-1. Configure "Number of Tasks Resource Manager" under CKRM (see
-      Documentation/ckrm/installation). This can be configured as a module
-      also. But, when inserted as a module it cannot be removed.
-
-2. Reboot the system with the new kernel. Insert the module, if compiled
-      as a module.
-
-3. Verify that the memory controller is present by reading the file
-   /rcfs/taskclass/config (should show a line with res=numtasks)
-
-Usage
------
-
-For brevity, unless otherwise specified all the following commands are
-executed in the default class (/rcfs/taskclass).
-
-As explained above the config file shows sys_total_tasks and forkrate
-info.
-
-   # cd /rcfs/taskclass
-   # cat config
-   res=numtasks,sys_total_tasks=131072,forkrate=1000000,forkrate_interval=3600
-
-By default, the sys_total_tasks is set to 131072(128k), and forkrate is set
-to 1 million and forkrate_interval is set to 3600 seconds. Which means the
-total number of tasks in a system is limited to 131072 and the forks are 
-limited to 1 million per hour.
-
-sysadmin can change these values by just writing the attribute/value pair
-to the config file.
-
-   # echo res=numtasks,forkrate=100,forkrate_interval=10 > config
-   # cat config
-   res=numtasks,sys_total_tasks=1000,forkrate=100,forkrate_interval=10
-
-   # echo res=numtasks,forkrate=100,forkrate_interval=10 > config
-   # cat config
-   res=numtasks,sys_total_tasks=1000,forkrate=100,forkrate_interval=10
-
-By making total_guarantee and max_limit to be same as sys_total_tasks, 
-sysadmin can make the numbers in shares file be same as the number of tasks
-for a class.
-
-   # echo res=numtasks,total_guarantee=131072,max_limit=131072 > shares
-   # cat shares
-   res=numtasks,guarantee=-2,limit=-2,total_guarantee=131072,max_limit=131072
-
-
-Class creation 
---------------
-
-   # mkdir c1
-
-Its initial share is don't care. The parent's share values will be unchanged.
-
-Setting a new class share
--------------------------
-
-'guarantee' specifies the number of tasks this class is entitled to get
-'limit' is the maximum number of tasks this class can get.
-
-Following command will set the guarantee of class c1 to be 25000 and the limit 
-to be 50000
-
-   # echo 'res=numtasks,guarantee=25000,limit=50000' > c1/shares
-   # cat c1/shares     
-   res=numtasks,guarantee=25000,limit=50000,total_guarantee=100,max_limit=100
-
-Limiting forks in a time period
--------------------------------
-By default, this resource controller allows forking of 1 million tasks in
-an hour.
-
-Folowing command would change it to allow only 100 forks per 10 seconds
-
-   # echo res=numtasks,forkrate=100,forkrate_interval=10 > config
-   # cat config
-   res=numtasks,sys_total_tasks=1000,forkrate=100,forkrate_interval=10
-
-Note that the same set of values is used across the system. In other words,
-each individual class will be allowed 'forkrate' forks in 'forkrate_interval'
-seconds.
-
-Monitoring
-----------
-
-stats file shows statistics of the number of tasks usage of a class
-[root@localhost taskclass]# cat stats
-Number of tasks resource:
-Total Over limit failures: 0
-Total Over guarantee sucesses: 0
-Total Over guarantee failures: 0
-Maximum Over limit failures: 0
-Maximum Over guarantee sucesses: 0
-Maximum Over guarantee failures: 0
-cur_alloc 38; borrowed 0; cnt_guar 131072; cnt_limit 131072 cnt_unused 131072, unused_guarantee 100, cur_max_limit 0
-
index 60ce4ae..f115145 100644 (file)
@@ -100,7 +100,6 @@ Your cooperation is appreciated.
                  9 = /dev/urandom      Faster, less secure random number gen.
                 10 = /dev/aio          Asyncronous I/O notification interface
                 11 = /dev/kmsg         Writes to this come out as printk's
-                12 = /dev/oldmem               Access to kexec-ed crash dump
   1 block      RAM disk
                  0 = /dev/ram0         First RAM disk
                  1 = /dev/ram1         Second RAM disk
diff --git a/Documentation/kdump.txt b/Documentation/kdump.txt
deleted file mode 100644 (file)
index 8fc3d68..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-Documentation for kdump - the kexec based crash dumping solution
-================================================================
-
-DESIGN
-======
-
-We use kexec to reboot to a second kernel whenever a dump needs to be taken.
-This second kernel is booted with with very little memory (configurable
-at compile time). The first kernel reserves the section of memory that the
-second kernel uses. This ensures that on-going DMA from the first kernel
-does not corrupt the second kernel. The first 640k of physical memory is
-needed irrespective of where the kernel loads at. Hence, this region is
-backed up before reboot.
-
-In the second kernel, "old memory" can be accessed in two ways. The
-first one is through a device interface. We can create a /dev/oldmem or
-whatever and write out the memory in raw format. The second interface is
-through /proc/vmcore. This exports the dump as an ELF format file which
-can be written out using any file copy command (cp, scp, etc). Further, gdb
-can be used to perform some minimal debugging on the dump file. Both these
-methods ensure that there is correct ordering of the dump pages (corresponding
-to the first 640k that has been relocated).
-
-SETUP
-=====
-
-1) Obtain the appropriate -mm tree patch and apply it on to the vanilla
-   kernel tree.
-
-2) Two kernels need to be built in order to get this feature working.
-
-   For the first kernel, choose the default values for the following options.
-
-   a) Physical address where the kernel is loaded
-   b) kexec system call
-   c) kernel crash dumps
-
-   All the options are under "Processor type and features"
-
-   For the second kernel, change (a) to 16MB. If you want to choose another
-   value here, ensure "location from where the crash dumping kernel will boot
-   (MB)" under (c) reflects the same value.
-
-   Also ensure you have CONFIG_HIGHMEM on.
-
-3) Boot into the first kernel. You are now ready to try out kexec based crash
-   dumps.
-
-4) Load the second kernel to be booted using
-
-   kexec -p <second-kernel> --args-linux --append="root=<root-dev> dump
-   init 1 memmap=exactmap memmap=640k@0 memmap=32M@16M"
-
-   Note that <second-kernel> has to be a vmlinux image. bzImage will not
-   work, as of now.
-
-5) Enable kexec based dumping by
-
-   echo 1 > /proc/kexec-dump
-
-   If this is not set, the system will not do a kexec reboot in the event
-   of a panic.
-
-6) System reboots into the second kernel when a panic occurs.
-   You could write a module to call panic, for testing purposes.
-
-7) Write out the dump file using
-
-   cp /proc/vmcore <dump-file>
-
-You can also access the dump as a device for a linear/raw view. To do this,
-you will need the kd-oldmem-<version>.patch built into the kernel. To create
-the device, type
-
-  mknod /dev/oldmem c 1 12
-
-Use "dd" with suitable options for count, bs and skip to access specific
-portions of the dump.
-
-ANALYSIS
-========
-
-You can run gdb on the dump file copied out of /proc/vmcore. Use vmlinux built
-with -g and run
-
-  gdb vmlinux <dump-file>
-
-Stack trace for the task on processor 0, register display, memory display
-work fine.
-
-TODO
-====
-
-1) Provide a kernel-pages only view for the dump. This could possibly turn up
-   as /proc/vmcore-kern.
-2) Provide register contents of all processors (similar to what multi-threaded
-   core dumps does).
-3) Modify "crash" to make it recognize this dump.
-4) Make the i386 kernel boot from any location so we can run the second kernel
-   from the reserved location instead of the current approach.
-
-CONTACT
-=======
-
-Hariprasad Nellitheertha - hari at in dot ibm dot com
index 5f81698..66275d4 100644 (file)
@@ -1259,17 +1259,6 @@ M:       rml@novell.com
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 
-KEXEC
-P:     Eric Biederman
-P:     Randy Dunlap
-M:     ebiederm@xmission.com
-M:     rddunlap@osdl.org
-W:     http://www.xmission.com/~ebiederm/files/kexec/
-W:     http://developer.osdl.org/rddunlap/kexec/
-L:     linux-kernel@vger.kernel.org
-L:     fastboot@osdl.org
-S:     Maintained
-
 LANMEDIA WAN CARD DRIVER
 P:     Andrew Stanley-Jones
 M:     asj@lanmedia.com
index effcf26..5133b8b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 10
-EXTRAVERSION = -1.14_FC2.1.planetlab
+EXTRAVERSION = -1.12_FC2.1.planetlab
 NAME=AC 1
 
 # *DOCUMENTATION*
index 0b9ddba..edb3c41 100644 (file)
@@ -114,7 +114,7 @@ void __init init_IRQ(void)
                }
        }
        interrupt_redirect_table = ramvec;
-#ifdef CRASH_DUMP_VECTOR
+#ifdef DUMP_VECTOR
        ramvec_p = ramvec;
        for (i = 0; i < NR_IRQS; i++) {
                if ((i % 8) == 0)
index 6b27e5a..5441cdd 100644 (file)
@@ -134,7 +134,7 @@ void __init init_IRQ(void)
        ramvec[TRAP0_VEC] = VECTOR(system_call);
        ramvec[TRAP3_VEC] = break_vec;
        interrupt_redirect_table = ramvec;
-#ifdef CRASH_DUMP_VECTOR
+#ifdef DUMP_VECTOR
        ramvec_p = ramvec;
        for (i = 0; i < NR_IRQS; i++) {
                if ((i % 8) == 0)
index 1ed5b38..6880e96 100644 (file)
@@ -330,6 +330,14 @@ config MVIAC3_2
 
 endchoice
 
+config X86_HZ
+       int "Clock Tick Rate"
+       default 1000 if !(M386 || M486 || M586 || M586TSC || M586MMX)   
+       default 100 if (M386 || M486 || M586 || M586TSC || M586MMX)     
+       help
+         Select the kernel clock tick rate in interrupts per second.
+         Slower processors should choose 100; everything else 1000.
+
 config X86_GENERIC
        bool "Generic x86 support"
        help
@@ -553,14 +561,6 @@ config X86_IO_APIC
        depends on !SMP && X86_UP_IOAPIC
        default y
 
-config KERNEL_HZ
-       int "Timer Frequency (100-20000)"
-       range 100 20000
-       default "1000"
-       help
-         This allows you to specify the frequency at which the
-         kernel timer interrupt will occur.
-
 config X86_TSC
        bool
        depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
@@ -922,53 +922,6 @@ config REGPARM
        generate incorrect output with certain kernel constructs when
        -mregparm=3 is used.
 
-config KERN_PHYS_OFFSET
-       int "Physical address where the kernel is loaded (1-112)MB"
-       range 1 112
-       default "1"
-       help
-         This gives the physical address where the kernel is loaded.
-         Primarily used in the case of kexec on panic where the
-         recovery kernel needs to run at a different address than
-         the panic-ed kernel.
-
-config KEXEC
-       bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
-       help
-         kexec is a system call that implements the ability to shutdown your
-         current kernel, and to start another kernel.  It is like a reboot
-         but it is indepedent of the system firmware.   And like a reboot
-         you can start any kernel with it, not just Linux.
-
-         The name comes from the similiarity to the exec system call.
-
-         It is an ongoing process to be certain the hardware in a machine
-         is properly shutdown, so do not be surprised if this code does not
-         initially work for you.  It may help to enable device hotplugging
-         support.  As of this writing the exact hardware interface is
-         strongly in flux, so no good recommendation can be made.
-
-config CRASH_DUMP
-       bool "kernel crash dumps (EXPERIMENTAL)"
-       depends on KEXEC
-       help
-         Generate crash dump using kexec.
-
-config BACKUP_BASE
-       int "location from where the crash dumping kernel will boot (MB)"
-       depends on CRASH_DUMP
-       default 16
-       help
-       This is the location where the second kernel will boot from.
-
-config BACKUP_SIZE
-       int "Size of memory used by the crash dumping kernel (MB)"
-       depends on CRASH_DUMP
-       range 16 64
-       default 32
-       help
-       The size of the second kernel's memory.
 endmenu
 
 
index 4f41af3..c5e80b6 100644 (file)
@@ -74,7 +74,7 @@ startup_32:
        popl %esi       # discard address
        popl %esi       # real mode pointer
        xorl %ebx,%ebx
-       ljmp $(__BOOT_CS), $KERN_PHYS_OFFSET
+       ljmp $(__BOOT_CS), $0x100000
 
 /*
  * We come here, if we were loaded high.
@@ -99,7 +99,7 @@ startup_32:
        popl %ecx       # lcount
        popl %edx       # high_buffer_start
        popl %eax       # hcount
-       movl $KERN_PHYS_OFFSET,%edi
+       movl $0x100000,%edi
        cli             # make sure we don't get interrupted
        ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
 
@@ -124,5 +124,5 @@ move_routine_start:
        movsl
        movl %ebx,%esi  # Restore setup pointer
        xorl %ebx,%ebx
-       ljmp $(__BOOT_CS), $KERN_PHYS_OFFSET
+       ljmp $(__BOOT_CS), $0x100000
 move_routine_end:
index 9805b37..8745683 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/tty.h>
 #include <video/edid.h>
 #include <asm/io.h>
-#include <asm/segment.h>
 
 /*
  * gzip declarations
@@ -310,7 +309,7 @@ static void setup_normal_output_buffer(void)
 #else
        if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
 #endif
-       output_data = (char *)KERN_PHYS_OFFSET; /* Points to 1M */
+       output_data = (char *)0x100000; /* Points to 1M */
        free_mem_end_ptr = (long)real_mode;
 }
 
@@ -335,8 +334,8 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
        low_buffer_size = low_buffer_end - LOW_BUFFER_START;
        high_loaded = 1;
        free_mem_end_ptr = (long)high_buffer_start;
-       if ( (KERN_PHYS_OFFSET + low_buffer_size) > ((ulg)high_buffer_start)) {
-               high_buffer_start = (uch *)(KERN_PHYS_OFFSET + low_buffer_size);
+       if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) {
+               high_buffer_start = (uch *)(0x100000 + low_buffer_size);
                mv->hcount = 0; /* say: we need not to move high_buffer */
        }
        else mv->hcount = -1;
index 501c21a..8ec7eac 100644 (file)
@@ -23,8 +23,6 @@ obj-$(CONFIG_X86_TRAMPOLINE)  += trampoline.o
 obj-$(CONFIG_X86_MPPARSE)      += mpparse.o
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o nmi.o
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
-obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o
-obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
 obj-$(CONFIG_X86_NUMAQ)                += numaq.o
 obj-$(CONFIG_X86_SUMMIT_NUMA)  += summit.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
index de08552..9d5590c 100644 (file)
@@ -219,36 +219,6 @@ void disconnect_bsp_APIC(void)
                outb(0x70, 0x22);
                outb(0x00, 0x23);
        }
-       else {
-               /* Go back to Virtual Wire compatibility mode */
-               unsigned long value;
-
-               /* For the spurious interrupt use vector F, and enable it */
-               value = apic_read(APIC_SPIV);
-               value &= ~APIC_VECTOR_MASK;
-               value |= APIC_SPIV_APIC_ENABLED;
-               value |= 0xf;
-               apic_write_around(APIC_SPIV, value);
-
-               /* For LVT0 make it edge triggered, active high, external and enabled */
-               value = apic_read(APIC_LVT0);
-               value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
-               value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXINT);
-               apic_write_around(APIC_LVT0, value);
-
-               /* For LVT1 make it edge triggered, active high, nmi and enabled */
-               value = apic_read(APIC_LVT1);
-               value &= ~(
-                       APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-               value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-               apic_write_around(APIC_LVT1, value);
-       }
 }
 
 void disable_local_APIC(void)
diff --git a/arch/i386/kernel/crash_dump.c b/arch/i386/kernel/crash_dump.c
deleted file mode 100644 (file)
index 1c9bdd2..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Architecture specific (i386) functions for kexec based crash dumps.
- *
- * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
- *
- * Copyright (C) IBM Corporation, 2004. All rights reserved.
- *
- */
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/irq.h>
-
-#include <asm/crash_dump.h>
-#include <asm/processor.h>
-#include <asm/hardirq.h>
-#include <asm/nmi.h>
-#include <asm/hw_irq.h>
-
-struct pt_regs crash_smp_regs[NR_CPUS];
-long crash_smp_current_task[NR_CPUS];
-
-#ifdef CONFIG_SMP
-static atomic_t waiting_for_dump_ipi;
-static int crash_dump_expect_ipi[NR_CPUS];
-extern void crash_dump_send_ipi(void);
-extern void stop_this_cpu(void *);
-
-static int crash_dump_nmi_callback(struct pt_regs *regs, int cpu)
-{
-       if (!crash_dump_expect_ipi[cpu])
-               return 0;
-
-       crash_dump_expect_ipi[cpu] = 0;
-       crash_dump_save_this_cpu(regs, cpu);
-       atomic_dec(&waiting_for_dump_ipi);
-
-       stop_this_cpu(NULL);
-
-       return 1;
-}
-
-void __crash_dump_stop_cpus(void)
-{
-       int i, cpu, other_cpus;
-
-       preempt_disable();
-       cpu = smp_processor_id();
-       other_cpus = num_online_cpus()-1;
-
-       if (other_cpus > 0) {
-               atomic_set(&waiting_for_dump_ipi, other_cpus);
-
-               for (i = 0; i < NR_CPUS; i++)
-                       crash_dump_expect_ipi[i] = (i != cpu && cpu_online(i));
-
-               set_nmi_callback(crash_dump_nmi_callback);
-               /* Ensure the new callback function is set before sending
-                * out the IPI
-                */
-               wmb();
-
-               crash_dump_send_ipi();
-               while (atomic_read(&waiting_for_dump_ipi) > 0)
-                       cpu_relax();
-
-               unset_nmi_callback();
-       } else {
-               local_irq_disable();
-               disable_local_APIC();
-               local_irq_enable();
-       }
-       preempt_enable();
-}
-#else
-void __crash_dump_stop_cpus(void) {}
-#endif
-
-void crash_get_current_regs(struct pt_regs *regs)
-{
-       __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
-       __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
-       __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
-       __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
-       __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
-       __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
-       __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
-       __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
-       __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
-       __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
-       __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
-       __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
-       __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
-
-       regs->eip = (unsigned long)current_text_addr();
-}
-
-void crash_dump_save_this_cpu(struct pt_regs *regs, int cpu)
-{
-       crash_smp_current_task[cpu] = (long)current;
-       crash_smp_regs[cpu] = *regs;
-}
-
index 006a19a..02a2e7c 100644 (file)
@@ -871,7 +871,7 @@ ENTRY(sys_call_table)
        .long sys_mq_timedreceive       /* 280 */
        .long sys_mq_notify
        .long sys_mq_getsetattr
-       .long sys_kexec_load
+       .long sys_ni_syscall            /* reserved for kexec */
        .long sys_waitid
        .long sys_ni_syscall            /* 285 */ /* available */
        .long sys_add_key
index 9967fb3..685b7a2 100644 (file)
@@ -200,7 +200,7 @@ EXPORT_SYMBOL(ist_info);
 
 EXPORT_SYMBOL(csum_partial);
 
-#ifdef CONFIG_CRASH_DUMP
+#ifdef CONFIG_CRASH_DUMP_MODULE
 #ifdef CONFIG_SMP
 extern irq_desc_t irq_desc[NR_IRQS];
 extern unsigned long irq_affinity[NR_IRQS];
@@ -210,8 +210,8 @@ EXPORT_SYMBOL(irq_affinity);
 EXPORT_SYMBOL(stop_this_cpu);
 EXPORT_SYMBOL(dump_send_ipi);
 #endif
-extern int page_is_ram(unsigned long);
-EXPORT_SYMBOL(page_is_ram);
+extern int pfn_is_ram(unsigned long);
+EXPORT_SYMBOL(pfn_is_ram);
 #ifdef ARCH_HAS_NMI_WATCHDOG
 EXPORT_SYMBOL(touch_nmi_watchdog);
 #endif
index 12c1fb9..686a95b 100644 (file)
@@ -269,22 +269,10 @@ static int i8259A_suspend(struct sys_device *dev, u32 state)
        return 0;
 }
 
-static int i8259A_shutdown(struct sys_device *dev)
-{
-       /* Put the i8259A into a quiescent state that
-        * the kernel initialization code can get it
-        * out of.
-        */
-       outb(0xff, 0x21);       /* mask all of 8259A-1 */
-       outb(0xff, 0xA1);       /* mask all of 8259A-1 */
-       return 0;
-}
-
 static struct sysdev_class i8259_sysdev_class = {
        set_kset_name("i8259"),
        .suspend = i8259A_suspend,
        .resume = i8259A_resume,
-       .shutdown = i8259A_shutdown,
 };
 
 static struct sys_device device_i8259A = {
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
deleted file mode 100644 (file)
index ff59e77..0000000
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * machine_kexec.c - handle transition of Linux booting another kernel
- * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/kexec.h>
-#include <linux/delay.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/cpufeature.h>
-#include <asm/crash_dump.h>
-
-static inline unsigned long read_cr3(void)
-{
-       unsigned long cr3;
-       asm volatile("movl %%cr3,%0": "=r"(cr3));
-       return cr3;
-}
-
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-
-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L2_ATTR (_PAGE_PRESENT)
-
-#define LEVEL0_SIZE (1UL << 12UL)
-
-#ifndef CONFIG_X86_PAE
-#define LEVEL1_SIZE (1UL << 22UL)
-static u32 pgtable_level1[1024] PAGE_ALIGNED;
-
-static void identity_map_page(unsigned long address)
-{
-       unsigned long level1_index, level2_index;
-       u32 *pgtable_level2;
-
-       /* Find the current page table */
-       pgtable_level2 = __va(read_cr3());
-
-       /* Find the indexes of the physical address to identity map */
-       level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
-       level2_index = address / LEVEL1_SIZE;
-
-       /* Identity map the page table entry */
-       pgtable_level1[level1_index] = address | L0_ATTR;
-       pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
-
-       /* Flush the tlb so the new mapping takes effect.
-        * Global tlb entries are not flushed but that is not an issue.
-        */
-       load_cr3(pgtable_level2);
-}
-
-#else
-#define LEVEL1_SIZE (1UL << 21UL)
-#define LEVEL2_SIZE (1UL << 30UL)
-static u64 pgtable_level1[512] PAGE_ALIGNED;
-static u64 pgtable_level2[512] PAGE_ALIGNED;
-
-static void identity_map_page(unsigned long address)
-{
-       unsigned long level1_index, level2_index, level3_index;
-       u64 *pgtable_level3;
-
-       /* Find the current page table */
-       pgtable_level3 = __va(read_cr3());
-
-       /* Find the indexes of the physical address to identity map */
-       level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
-       level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
-       level3_index = address / LEVEL2_SIZE;
-
-       /* Identity map the page table entry */
-       pgtable_level1[level1_index] = address | L0_ATTR;
-       pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
-       set_64bit(&pgtable_level3[level3_index], __pa(pgtable_level2) | L2_ATTR);
-
-       /* Flush the tlb so the new mapping takes effect.
-        * Global tlb entries are not flushed but that is not an issue.
-        */
-       load_cr3(pgtable_level3);
-}
-#endif
-
-
-static void set_idt(void *newidt, __u16 limit)
-{
-       unsigned char curidt[6];
-
-       /* ia32 supports unaliged loads & stores */
-       (*(__u16 *)(curidt)) = limit;
-       (*(__u32 *)(curidt +2)) = (unsigned long)(newidt);
-
-       __asm__ __volatile__ (
-               "lidt %0\n"
-               : "=m" (curidt)
-               );
-};
-
-
-static void set_gdt(void *newgdt, __u16 limit)
-{
-       unsigned char curgdt[6];
-
-       /* ia32 supports unaligned loads & stores */
-       (*(__u16 *)(curgdt)) = limit;
-       (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt);
-
-       __asm__ __volatile__ (
-               "lgdt %0\n"
-               : "=m" (curgdt)
-               );
-};
-
-static void load_segments(void)
-{
-#define __STR(X) #X
-#define STR(X) __STR(X)
-
-       __asm__ __volatile__ (
-               "\tljmp $"STR(__KERNEL_CS)",$1f\n"
-               "\t1:\n"
-               "\tmovl $"STR(__KERNEL_DS)",%eax\n"
-               "\tmovl %eax,%ds\n"
-               "\tmovl %eax,%es\n"
-               "\tmovl %eax,%fs\n"
-               "\tmovl %eax,%gs\n"
-               "\tmovl %eax,%ss\n"
-               );
-#undef STR
-#undef __STR
-}
-
-typedef asmlinkage void (*relocate_new_kernel_t)(
-       unsigned long indirection_page, unsigned long reboot_code_buffer,
-       unsigned long start_address, unsigned int has_pae);
-
-const extern unsigned char relocate_new_kernel[];
-extern void relocate_new_kernel_end(void);
-const extern unsigned int relocate_new_kernel_size;
-
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.  Currently nothing.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
-       return 0;
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
-}
-
-/*
- * We are going to do a memory preserving reboot. So, we copy over the
- * first 640k of memory into a backup location. Though the second kernel
- * boots from a different location, it still requires the first 640k.
- * Hence this backup.
- */
-void __crash_relocate_mem(unsigned long backup_addr, unsigned long backup_size)
-{
-       unsigned long pfn, pfn_max;
-       void *src_addr, *dest_addr;
-       struct page *page;
-
-       pfn_max = backup_size >> PAGE_SHIFT;
-       for (pfn = 0; pfn < pfn_max; pfn++) {
-               src_addr = phys_to_virt(pfn << PAGE_SHIFT);
-               dest_addr = backup_addr + src_addr;
-               if (!pfn_valid(pfn))
-                       continue;
-               page = pfn_to_page(pfn);
-               if (PageReserved(page))
-                       copy_page(dest_addr, src_addr);
-       }
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-void machine_kexec(struct kimage *image)
-{
-       unsigned long indirection_page;
-       unsigned long reboot_code_buffer;
-       relocate_new_kernel_t rnk;
-
-       /* Interrupts aren't acceptable while we reboot */
-       local_irq_disable();
-
-       /* Compute some offsets */
-       reboot_code_buffer = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-       indirection_page = image->head & PAGE_MASK;
-
-       /* Set up an identity mapping for the reboot_code_buffer */
-       identity_map_page(reboot_code_buffer);
-
-       /* copy it out */
-       memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size);
-
-       /* The segment registers are funny things, they are
-        * automatically loaded from a table, in memory wherever you
-        * set them to a specific selector, but this table is never
-        * accessed again you set the segment to a different selector.
-        *
-        * The more common model is are caches where the behide
-        * the scenes work is done, but is also dropped at arbitrary
-        * times.
-        *
-        * I take advantage of this here by force loading the
-        * segments, before I zap the gdt with an invalid value.
-        */
-       load_segments();
-       /* The gdt & idt are now invalid.
-        * If you want to load them you must set up your own idt & gdt.
-        */
-       set_gdt(phys_to_virt(0),0);
-       set_idt(phys_to_virt(0),0);
-
-       /* now call it */
-       rnk = (relocate_new_kernel_t) reboot_code_buffer;
-       (*rnk)(indirection_page, reboot_code_buffer, image->start, cpu_has_pae);
-}
index c4d62d9..dd06362 100644 (file)
@@ -23,6 +23,7 @@ static int reboot_mode;
 int reboot_thru_bios;
 
 #ifdef CONFIG_SMP
+int reboot_smp = 0;
 static int reboot_cpu = -1;
 /* shamelessly grabbed from lib/vsprintf.c for readability */
 #define is_digit(c)    ((c) >= '0' && (c) <= '9')
@@ -45,6 +46,7 @@ static int __init reboot_setup(char *str)
                        break;
 #ifdef CONFIG_SMP
                case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
+                       reboot_smp = 1;
                        if (is_digit(*(str+1))) {
                                reboot_cpu = (int) (*(str+1) - '0');
                                if (is_digit(*(str+2))) 
@@ -83,9 +85,33 @@ static int __init set_bios_reboot(struct dmi_system_id *d)
        return 0;
 }
 
+/*
+ * Some machines require the "reboot=s"  commandline option, this quirk makes that automatic.
+ */
+static int __init set_smp_reboot(struct dmi_system_id *d)
+{
+#ifdef CONFIG_SMP
+       if (!reboot_smp) {
+               reboot_smp = 1;
+               printk(KERN_INFO "%s series board detected. Selecting SMP-method for reboots.\n", d->ident);
+       }
+#endif
+       return 0;
+}
+
+/*
+ * Some machines require the "reboot=b,s"  commandline option, this quirk makes that automatic.
+ */
+static int __init set_smp_bios_reboot(struct dmi_system_id *d)
+{
+       set_smp_reboot(d);
+       set_bios_reboot(d);
+       return 0;
+}
+
 static struct dmi_system_id __initdata reboot_dmi_table[] = {
        {       /* Handle problems with rebooting on Dell 1300's */
-               .callback = set_bios_reboot,
+               .callback = set_smp_bios_reboot,
                .ident = "Dell PowerEdge 1300",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
@@ -269,32 +295,41 @@ void machine_real_restart(unsigned char *code, int length)
                                : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100)));
 }
 
-void machine_shutdown(void)
+void machine_restart(char * __unused)
 {
 #ifdef CONFIG_SMP
-       int reboot_cpu_id;
-
-       /* The boot cpu is always logical cpu 0 */
-       reboot_cpu_id = 0;
-
-       /* See if there has been given a command line override */
-       if ((reboot_cpu_id != -1) && (reboot_cpu < NR_CPUS) &&
-               cpu_isset(reboot_cpu, cpu_online_map)) {
-               reboot_cpu_id = reboot_cpu;
+       int cpuid;
+       
+       cpuid = GET_APIC_ID(apic_read(APIC_ID));
+
+       if (reboot_smp) {
+
+               /* check to see if reboot_cpu is valid 
+                  if its not, default to the BSP */
+               if ((reboot_cpu == -1) ||  
+                     (reboot_cpu > (NR_CPUS -1))  || 
+                     !physid_isset(cpuid, phys_cpu_present_map))
+                       reboot_cpu = boot_cpu_physical_apicid;
+
+               reboot_smp = 0;  /* use this as a flag to only go through this once*/
+               /* re-run this function on the other CPUs
+                  it will fall though this section since we have 
+                  cleared reboot_smp, and do the reboot if it is the
+                  correct CPU, otherwise it halts. */
+               if (reboot_cpu != cpuid)
+                       smp_call_function((void *)machine_restart , NULL, 1, 0);
        }
 
-       /* Make certain the cpu I'm rebooting on is online */
-       if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
-               reboot_cpu_id = smp_processor_id();
+       /* if reboot_cpu is still -1, then we want a tradional reboot, 
+          and if we are not running on the reboot_cpu,, halt */
+       if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
+               for (;;)
+               __asm__ __volatile__ ("hlt");
        }
-
-       /* Make certain I only run on the appropriate processor */
-       set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
-
-       /* O.K. Now that I'm on the appropriate processor, stop
-        * all of the others, and disable their local APICs.
+       /*
+        * Stop all CPUs and turn off local APICs and the IO-APIC, so
+        * other OSs see a clean IRQ state.
         */
-
        smp_send_stop();
 #endif /* CONFIG_SMP */
 
@@ -303,11 +338,6 @@ void machine_shutdown(void)
 #ifdef CONFIG_X86_IO_APIC
        disable_IO_APIC();
 #endif
-}
-
-void machine_restart(char * __unused)
-{
-       machine_shutdown();
 
        if (!reboot_thru_bios) {
                if (efi_enabled) {
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
deleted file mode 100644 (file)
index 54be4c2..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * relocate_kernel.S - put the kernel image in place to boot
- * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-#include <linux/linkage.h>
-
-       /*
-        * Must be relocatable PIC code callable as a C function, that once
-        * it starts can not use the previous processes stack.
-        */
-       .globl relocate_new_kernel
-relocate_new_kernel:
-       /* read the arguments and say goodbye to the stack */
-       movl  4(%esp), %ebx /* indirection_page */
-       movl  8(%esp), %ebp /* reboot_code_buffer */
-       movl  12(%esp), %edx /* start address */
-       movl  16(%esp), %ecx /* cpu_has_pae */
-
-       /* zero out flags, and disable interrupts */
-       pushl $0
-       popfl
-
-       /* set a new stack at the bottom of our page... */
-       lea   4096(%ebp), %esp
-
-       /* store the parameters back on the stack */
-       pushl   %edx /* store the start address */
-
-       /* Set cr0 to a known state:
-        * 31 0 == Paging disabled
-        * 18 0 == Alignment check disabled
-        * 16 0 == Write protect disabled
-        * 3  0 == No task switch
-        * 2  0 == Don't do FP software emulation.
-        * 0  1 == Proctected mode enabled
-        */
-       movl    %cr0, %eax
-       andl    $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
-       orl     $(1<<0), %eax
-       movl    %eax, %cr0
-
-       /* clear cr4 if applicable */
-       testl   %ecx, %ecx
-       jz      1f
-       /* Set cr4 to a known state:
-        * Setting everything to zero seems safe.
-        */
-       movl    %cr4, %eax
-       andl    $0, %eax
-       movl    %eax, %cr4
-
-       jmp 1f
-1:
-
-       /* Flush the TLB (needed?) */
-       xorl    %eax, %eax
-       movl    %eax, %cr3
-
-       /* Do the copies */
-       cld
-0:     /* top, read another word for the indirection page */
-       movl    %ebx, %ecx
-       movl    (%ebx), %ecx
-       addl    $4, %ebx
-       testl   $0x1,   %ecx  /* is it a destination page */
-       jz      1f
-       movl    %ecx,   %edi
-       andl    $0xfffff000, %edi
-       jmp     0b
-1:
-       testl   $0x2,   %ecx  /* is it an indirection page */
-       jz      1f
-       movl    %ecx,   %ebx
-       andl    $0xfffff000, %ebx
-       jmp     0b
-1:
-       testl   $0x4,   %ecx /* is it the done indicator */
-       jz      1f
-       jmp     2f
-1:
-       testl   $0x8,   %ecx /* is it the source indicator */
-       jz      0b           /* Ignore it otherwise */
-       movl    %ecx,   %esi /* For every source page do a copy */
-       andl    $0xfffff000, %esi
-
-       movl    $1024, %ecx
-       rep ; movsl
-       jmp     0b
-
-2:
-
-       /* To be certain of avoiding problems with self-modifying code
-        * I need to execute a serializing instruction here.
-        * So I flush the TLB, it's handy, and not processor dependent.
-        */
-       xorl    %eax, %eax
-       movl    %eax, %cr3
-
-       /* set all of the registers to known values */
-       /* leave %esp alone */
-
-       xorl    %eax, %eax
-       xorl    %ebx, %ebx
-       xorl    %ecx, %ecx
-       xorl    %edx, %edx
-       xorl    %esi, %esi
-       xorl    %edi, %edi
-       xorl    %ebp, %ebp
-       ret
-relocate_new_kernel_end:
-
-       .globl relocate_new_kernel_size
-relocate_new_kernel_size:
-       .long relocate_new_kernel_end - relocate_new_kernel
index 7ccdf02..6910009 100644 (file)
@@ -48,7 +48,6 @@
 #include <asm/io_apic.h>
 #include <asm/ist.h>
 #include <asm/io.h>
-#include <asm/crash_dump.h>
 #include "setup_arch_pre.h"
 #include <bios_ebda.h>
 
@@ -58,7 +57,6 @@
 unsigned long init_pg_tables_end __initdata = ~0UL;
 
 int disable_pse __initdata = 0;
-unsigned int dump_enabled;
 
 /*
  * Machine setup..
@@ -712,11 +710,6 @@ static void __init parse_cmdline_early (char ** cmdline_p)
                        if (to != command_line)
                                to--;
                        if (!memcmp(from+7, "exactmap", 8)) {
-                               /* If we are doing a crash dump, we
-                                * still need to know the real mem
-                                * size.
-                                */
-                               set_saved_max_pfn();
                                from += 8+7;
                                e820.nr_map = 0;
                                userdef = 1;
@@ -823,9 +816,6 @@ static void __init parse_cmdline_early (char ** cmdline_p)
                 */
                if (c == ' ' && !memcmp(from, "highmem=", 8))
                        highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
-
-               if (!memcmp(from, "dump", 4))
-                       dump_enabled = 1;
        
                if (c == ' ' && !memcmp(from, "crashdump=", 10))
                        crashdump_addr = memparse(from+10, &from); 
@@ -1125,9 +1115,6 @@ static unsigned long __init setup_memory(void)
                }
        }
 #endif
-
-       crash_reserve_bootmem();
-
        return max_low_pfn;
 }
 #else
index 131ed47..c2888ac 100644 (file)
@@ -23,7 +23,6 @@
 
 #include <asm/mtrr.h>
 #include <asm/tlbflush.h>
-#include <asm/desc.h>
 #include <mach_apic.h>
 
 /*
@@ -139,15 +138,12 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector)
         */
        apic_wait_icr_idle();
 
-       if (vector == CRASH_DUMP_VECTOR)
-               cfg = (cfg&~APIC_VECTOR_MASK)|APIC_DM_NMI;
-
        /*
         * No need to touch the target chip field
         */
        cfg = __prepare_ICR(shortcut, vector);
 
-       if (vector == CRASH_DUMP_VECTOR) {
+       if (vector == DUMP_VECTOR) {
                /*
                 * Setup DUMP IPI to be delivered as an NMI
                 */
@@ -232,7 +228,7 @@ inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
                         */
                        cfg = __prepare_ICR(0, vector);
                
-                       if (vector == CRASH_DUMP_VECTOR) {
+                       if (vector == DUMP_VECTOR) {
                                /*
                                 * Setup DUMP IPI to be delivered as an NMI
                                 */
@@ -331,8 +327,6 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
        unsigned long cpu;
 
        cpu = get_cpu();
-       if (current->active_mm)
-               load_user_cs_desc(cpu, current->active_mm);
 
        if (!cpu_isset(cpu, flush_cpumask))
                goto out;
@@ -489,7 +483,7 @@ void flush_tlb_all(void)
 
 void dump_send_ipi(void)
 {
-       send_IPI_allbutself(CRASH_DUMP_VECTOR);
+       send_IPI_allbutself(DUMP_VECTOR);
 }
 
 /*
@@ -502,11 +496,6 @@ void smp_send_reschedule(int cpu)
        send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
 }
 
-void crash_dump_send_ipi(void)
-{
-       send_IPI_allbutself(CRASH_DUMP_VECTOR);
-}
-
 /*
  * Structure and data for smp_call_function(). This is designed to minimise
  * static memory requirements. It also looks cleaner.
index e68b2ba..551e33c 100644 (file)
@@ -58,7 +58,7 @@ static inline long do_mmap2(
        }
 
        down_write(&current->mm->mmap_sem);
-       error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+       error = do_mmap_pgoff(current->mm, file, addr, len, prot, flags, pgoff);
        up_write(&current->mm->mmap_sem);
 
        if (file)
index adeaef6..2c4351d 100644 (file)
@@ -53,7 +53,6 @@
 
 #include <linux/irq.h>
 #include <linux/module.h>
-#include <linux/vserver/debug.h>
 
 #include "mach_traps.h"
 
@@ -307,7 +306,6 @@ void die(const char * str, struct pt_regs * regs, long err)
        };
        static int die_counter;
 
-       vxh_throw_oops();
        if (die.lock_owner != smp_processor_id()) {
                console_verbose();
                spin_lock_irq(&die.lock);
@@ -343,7 +341,6 @@ void die(const char * str, struct pt_regs * regs, long err)
        bust_spinlocks(0);
        die.lock_owner = -1;
        spin_unlock_irq(&die.lock);
-       vxh_dump_history();
        if (in_interrupt())
                panic("Fatal exception in interrupt");
 
index cfb1b18..e0512cc 100644 (file)
@@ -2,24 +2,20 @@
  * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
  */
 
-#define LOAD_OFFSET __PAGE_OFFSET
-
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
-#include <asm/segment.h>
 
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
-ENTRY(phys_startup_32)
+ENTRY(startup_32)
 jiffies = jiffies_64;
 SECTIONS
 {
-  . = LOAD_OFFSET + KERN_PHYS_OFFSET;
-  phys_startup_32 = startup_32 - LOAD_OFFSET;
+  . = __PAGE_OFFSET + 0x100000;
   /* read-only */
   _text = .;                   /* Text and read-only data */
-  .text : AT(ADDR(.text) - LOAD_OFFSET) {
+  .text : {
        *(.text)
        SCHED_TEXT
        LOCK_TEXT
@@ -31,51 +27,49 @@ SECTIONS
 
   . = ALIGN(16);               /* Exception table */
   __start___ex_table = .;
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
+  __ex_table : { *(__ex_table) }
   __stop___ex_table = .;
 
   RODATA
 
   /* writeable */
-  .data : AT(ADDR(.data) - LOAD_OFFSET) {                      /* Data */
+  .data : {                    /* Data */
        *(.data)
        CONSTRUCTORS
        }
 
   . = ALIGN(4096);
   __nosave_begin = .;
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
+  .data_nosave : { *(.data.nosave) }
   . = ALIGN(4096);
   __nosave_end = .;
 
   . = ALIGN(4096);
-  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { *(.data.idt) }
+  .data.page_aligned : { *(.data.idt) }
 
   . = ALIGN(32);
-  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-       *(.data.cacheline_aligned)
-  }
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
 
   _edata = .;                  /* End of data section */
 
   . = ALIGN(THREAD_SIZE);      /* init_task */
-  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { *(.data.init_task) }
+  .data.init_task : { *(.data.init_task) }
 
   /* will be freed after init */
   . = ALIGN(4096);             /* Init code and data */
   __init_begin = .;
-  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+  .init.text : 
        _sinittext = .;
        *(.init.text)
        _einittext = .;
   }
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
+  .init.data : { *(.init.data) }
   . = ALIGN(16);
   __setup_start = .;
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
+  .init.setup : { *(.init.setup) }
   __setup_end = .;
   __initcall_start = .;
-  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+  .initcall.init : {
        *(.initcall1.init) 
        *(.initcall2.init) 
        *(.initcall3.init) 
@@ -86,40 +80,33 @@ SECTIONS
   }
   __initcall_end = .;
   __con_initcall_start = .;
-  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-       *(.con_initcall.init)
-  }
+  .con_initcall.init : { *(.con_initcall.init) }
   __con_initcall_end = .;
   SECURITY_INIT
   . = ALIGN(4);
   __alt_instructions = .;
-  .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
-       *(.altinstructions)
-  }
-  __alt_instructions_end = .;
- .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-       *(.altinstr_replacement)
- }
+  .altinstructions : { *(.altinstructions) } 
+  __alt_instructions_end = .; 
+ .altinstr_replacement : { *(.altinstr_replacement) } 
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
-  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
-  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
+  .exit.text : { *(.exit.text) }
+  .exit.data : { *(.exit.data) }
   . = ALIGN(4096);
   __initramfs_start = .;
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
+  .init.ramfs : { *(.init.ramfs) }
   __initramfs_end = .;
   . = ALIGN(32);
   __per_cpu_start = .;
-  .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
+  .data.percpu  : { *(.data.percpu) }
   __per_cpu_end = .;
   . = ALIGN(4096);
   __init_end = .;
   /* freed after init ends here */
        
   __bss_start = .;             /* BSS */
-  .bss.page_aligned  : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) {
-       *(.bss.page_aligned) }
-  .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+  .bss : {
+       *(.bss.page_aligned)
        *(.bss)
   }
   . = ALIGN(4);
index 5bd9e6c..33b81da 100644 (file)
@@ -32,7 +32,6 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 #include <asm/mmzone.h>
-#include <asm/crash_dump.h>
 #include <bios_ebda.h>
 
 struct pglist_data *node_data[MAX_NUMNODES];
@@ -364,9 +363,6 @@ unsigned long __init setup_memory(void)
                }
        }
 #endif
-
-       crash_reserve_bootmem();
-
        return system_max_low_pfn;
 }
 
index c554747..5817532 100644 (file)
@@ -74,24 +74,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
        preempt_check_resched();
 }
 
-/* This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-char *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
-{
-       enum fixed_addresses idx;
-       unsigned long vaddr;
-
-       inc_preempt_count();
-
-       idx = type + KM_TYPE_NR*smp_processor_id();
-       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-       set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
-       __flush_tlb_one(vaddr);
-
-       return (char *)vaddr;
-}
-
 struct page *kmap_atomic_to_page(void *ptr)
 {
        unsigned long idx, vaddr = (unsigned long)ptr;
@@ -104,3 +86,4 @@ struct page *kmap_atomic_to_page(void *ptr)
        pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
        return pte_page(*pte);
 }
+
index 7211494..7a67e3b 100644 (file)
@@ -568,10 +568,7 @@ void __init paging_init(void)
        set_nx();
        if (nx_enabled)
                printk("NX (Execute Disable) protection: active\n");
-       else
 #endif
-       if (exec_shield)
-               printk("Using x86 segment limits to approximate NX protection\n");
 
        pagetable_init();
 
index 09a693d..8ccbdab 100644 (file)
@@ -26,7 +26,6 @@
 
 #include <linux/personality.h>
 #include <linux/mm.h>
-#include <linux/random.h>
 
 /*
  * Top of mmap area (just below the process stack).
 static inline unsigned long mmap_base(struct mm_struct *mm)
 {
        unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
-       unsigned long random_factor = 0;
-
-       if (current->flags & PF_RELOCEXEC)
-               random_factor = get_random_int() % (1024*1024);
 
        if (gap < MIN_GAP)
                gap = MIN_GAP;
        else if (gap > MAX_GAP)
                gap = MAX_GAP;
 
-       return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
+       return TASK_SIZE - (gap & PAGE_MASK);
 }
 
 /*
@@ -62,9 +57,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
         * Fall back to the standard layout if the personality
         * bit is set, or if the expected stack growth is unlimited:
         */
-       if ((exec_shield != 2) && (sysctl_legacy_va_layout ||
+       if (sysctl_legacy_va_layout ||
                        (current->personality & ADDR_COMPAT_LAYOUT) ||
-                       current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY)){
+                       current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
                mm->mmap_base = TASK_UNMAPPED_BASE;
                mm->get_unmapped_area = arch_get_unmapped_area;
                mm->unmap_area = arch_unmap_area;
index 82d0682..e3e7077 100644 (file)
@@ -41,8 +41,6 @@
 #include <linux/vs_memory.h>
 #include <linux/vs_cvirt.h>
 #include <linux/bitops.h>
-#include <linux/vs_memory.h>
-#include <linux/vs_cvirt.h>
 
 #include <asm/errno.h>
 #include <asm/intrinsics.h>
index 8dce894..25da1d4 100644 (file)
@@ -44,9 +44,10 @@ expand_backing_store (struct vm_area_struct *vma, unsigned long address)
        vma->vm_end += PAGE_SIZE;
        // vma->vm_mm->total_vm += grow;
        vx_vmpages_add(vma->vm_mm, grow);
-       if (vma->vm_flags & VM_LOCKED)
+       if (vma->vm_flags & VM_LOCKED) {
                // vma->vm_mm->locked_vm += grow;
                vx_vmlocked_add(vma->vm_mm, grow);
+       }
        __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
        return 0;
 }
index 5d8ec65..84e2ee6 100644 (file)
@@ -26,7 +26,6 @@
 #include <linux/shm.h>
 #include <linux/vs_cvirt.h>
 #include <linux/compiler.h>
-#include <linux/vs_cvirt.h>
 
 #include <asm/branch.h>
 #include <asm/cachectl.h>
index b460f00..56afd54 100644 (file)
@@ -189,26 +189,6 @@ config MATH_EMULATION
          here.  Saying Y here will not hurt performance (on any machine) but
          will increase the size of the kernel.
 
-config KEXEC
-       bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
-       help
-         kexec is a system call that implements the ability to shutdown your
-         current kernel, and to start another kernel.  It is like a reboot
-         but it is indepedent of the system firmware.   And like a reboot
-         you can start any kernel with it, not just Linux.
-
-         The name comes from the similiarity to the exec system call.
-
-         It is an ongoing process to be certain the hardware in a machine
-         is properly shutdown, so do not be surprised if this code does not
-         initially work for you.  It may help to enable device hotplugging
-         support.  As of this writing the exact hardware interface is
-         strongly in flux, so no good recommendation can be made.
-
-         In the GameCube implementation, kexec allows you to load and
-         run DOL files, including kernel and homebrew DOLs.
-
 source "drivers/cpufreq/Kconfig"
 
 config CPU_FREQ_PMAC
index 24845db..7d0abff 100644 (file)
@@ -24,7 +24,6 @@ obj-$(CONFIG_KGDB)            += ppc-stub.o
 obj-$(CONFIG_SMP)              += smp.o smp-tbsync.o
 obj-$(CONFIG_TAU)              += temp.o
 obj-$(CONFIG_ALTIVEC)          += vecemu.o vector.o
-obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o
 
 ifndef CONFIG_MATH_EMULATION
 obj-$(CONFIG_8xx)              += softemu8xx.o
diff --git a/arch/ppc/kernel/machine_kexec.c b/arch/ppc/kernel/machine_kexec.c
deleted file mode 100644 (file)
index caac3d4..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * machine_kexec.c - handle transition of Linux booting another kernel
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- *
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/kexec.h>
-#include <linux/delay.h>
-#include <linux/reboot.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/mmu_context.h>
-#include <asm/io.h>
-#include <asm/hw_irq.h>
-#include <asm/cacheflush.h>
-#include <asm/machdep.h>
-
-typedef void (*relocate_new_kernel_t)(
-       unsigned long indirection_page, unsigned long reboot_code_buffer,
-       unsigned long start_address);
-
-const extern unsigned char relocate_new_kernel[];
-const extern unsigned int relocate_new_kernel_size;
-
-void machine_shutdown(void)
-{
-       if (ppc_md.machine_shutdown) {
-               ppc_md.machine_shutdown();
-       }
-}
-
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
-       if (ppc_md.machine_kexec_prepare) {
-               return ppc_md.machine_kexec_prepare(image);
-       }
-       /*
-        * Fail if platform doesn't provide its own machine_kexec_prepare
-        * implementation.
-        */
-       return -ENOSYS;
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
-       if (ppc_md.machine_kexec_cleanup) {
-               ppc_md.machine_kexec_cleanup(image);
-       }
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-void machine_kexec(struct kimage *image)
-{
-       if (ppc_md.machine_kexec) {
-               ppc_md.machine_kexec(image);
-       } else {
-               /*
-                * Fall back to normal restart if platform doesn't provide
-                * its own kexec function, and user insist to kexec...
-                */
-               machine_restart(NULL);
-       }
-}
-
-
-/*
- * This is a generic machine_kexec function suitable at least for
- * non-OpenFirmware embedded platforms.
- * It merely copies the image relocation code to the control page and
- * jumps to it.
- * A platform specific function may just call this one.
- */
-void machine_kexec_simple(struct kimage *image)
-{
-       unsigned long indirection_page;
-       unsigned long reboot_code_buffer, reboot_code_buffer_phys;
-       relocate_new_kernel_t rnk;
-
-       /* Interrupts aren't acceptable while we reboot */
-       local_irq_disable();
-
-       indirection_page = image->head & PAGE_MASK;
-
-       /* we need both effective and real address here */
-       reboot_code_buffer =
-               (unsigned long)page_address(image->control_code_page);
-       reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer);
-
-       /* copy our kernel relocation code to the control code page */
-       memcpy((void *)reboot_code_buffer,
-               relocate_new_kernel, relocate_new_kernel_size);
-
-       flush_icache_range(reboot_code_buffer,
-               reboot_code_buffer + KEXEC_CONTROL_CODE_SIZE);
-       printk(KERN_INFO "Bye!\n");
-
-       /* now call it */
-       rnk = (relocate_new_kernel_t) reboot_code_buffer;
-       (*rnk)(indirection_page, reboot_code_buffer_phys, image->start);
-}
-
diff --git a/arch/ppc/kernel/relocate_kernel.S b/arch/ppc/kernel/relocate_kernel.S
deleted file mode 100644 (file)
index e170b13..0000000
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * relocate_kernel.S - put the kernel image in place to boot
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- *
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-#include <asm/reg.h>
-#include <asm/ppc_asm.h>
-#include <asm/processor.h>
-
-#include <asm/kexec.h>
-
-#define PAGE_SIZE      4096 /* must be same value as in <asm/page.h> */
-
-/* returns  r3 = relocated address of sym */
-/* modifies r0 */
-#define RELOC_SYM(sym) \
-       mflr    r3; \
-       bl      1f; \
-1:     mflr    r0; \
-       mtlr    r3; \
-       lis     r3, 1b@ha; \
-       ori     r3, r3, 1b@l; \
-       subf    r0, r3, r0; \
-       lis     r3, sym@ha; \
-       ori     r3, r3, sym@l; \
-       add     r3, r3, r0
-
-       /*
-        * Must be relocatable PIC code callable as a C function.
-        */
-       .globl relocate_new_kernel
-relocate_new_kernel:
-       /* r3 = indirection_page   */
-       /* r4 = reboot_code_buffer */
-       /* r5 = start_address      */
-
-       li      r0, 0
-
-       /*
-        * Set Machine Status Register to a known status,
-        * switch the MMU off and jump to 1: in a single step.
-        */
-
-       mr      r8, r0
-       ori     r8, r8, MSR_RI|MSR_ME
-       mtspr   SRR1, r8
-       addi    r8, r4, 1f - relocate_new_kernel
-       mtspr   SRR0, r8
-       sync
-       rfi
-
-1:
-       /* from this point address translation is turned off */
-       /* and interrupts are disabled */
-
-       /* set a new stack at the bottom of our page... */
-       /* (not really needed now) */
-       addi    r1, r4, KEXEC_CONTROL_CODE_SIZE - 8 /* for LR Save+Back Chain */
-       stw     r0, 0(r1)
-
-       /* Do the copies */
-       li      r6, 0 /* checksum */
-       subi    r3, r3, 4
-
-0:     /* top, read another word for the indirection page */
-       lwzu    r0, 4(r3)
-
-       /* is it a destination page? (r8) */
-       rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
-       beq     1f
-
-       rlwinm  r8, r0, 0, 0, 19 /* clear kexec flags, page align */
-       b       0b
-
-1:     /* is it an indirection page? (r3) */
-       rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
-       beq     1f
-
-       rlwinm  r3, r0, 0, 0, 19 /* clear kexec flags, page align */
-       subi    r3, r3, 4
-       b       0b
-
-1:     /* are we done? */
-       rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
-       beq     1f
-       b       2f
-
-1:     /* is it a source page? (r9) */
-       rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
-       beq     0b
-
-       rlwinm  r9, r0, 0, 0, 19 /* clear kexec flags, page align */
-
-       li      r7, PAGE_SIZE / 4
-       mtctr   r7
-       subi    r9, r9, 4
-       subi    r8, r8, 4
-9:
-       lwzu    r0, 4(r9)  /* do the copy */
-       xor     r6, r6, r0
-       stwu    r0, 4(r8)
-       dcbst   0, r8
-       sync
-       icbi    0, r8
-       bdnz    9b
-
-       addi    r9, r9, 4
-       addi    r8, r8, 4
-       b       0b
-
-2:
-
-       /* To be certain of avoiding problems with self-modifying code
-        * execute a serializing instruction here.
-        */
-       isync
-       sync
-
-       /* jump to the entry point, usually the setup routine */
-       mtlr    r5
-       blrl
-
-1:     b       1b
-
-relocate_new_kernel_end:
-
-       .globl relocate_new_kernel_size
-relocate_new_kernel_size:
-       .long relocate_new_kernel_end - relocate_new_kernel
-
index e9ecc72..0c96be5 100644 (file)
@@ -154,7 +154,7 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
        pte_t entry;
 
        // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
-       vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
+       vx_rsspages_sub(mm, HPAGE_SIZE / PAGE_SIZE);
        if (write_access) {
                entry =
                    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
@@ -422,8 +422,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma,
 
                put_page(page);
        }
-       // mm->rss -= (end - start) >> PAGE_SHIFT;
-       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
+       mm->rss -= (end - start) >> PAGE_SHIFT;
        flush_tlb_pending();
 }
 
index edbbc43..50b2573 100644 (file)
@@ -62,8 +62,8 @@ static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long i;
        pte_t entry;
 
-       // mm->rss += (HPAGE_SIZE / PAGE_SIZE);
-       vx_rsspages_add(mm, HPAGE_SIZE / PAGE_SIZE);
+       mm->rss += (HPAGE_SIZE / PAGE_SIZE);
+
        if (write_access)
                entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
                                                       vma->vm_page_prot)));
@@ -115,8 +115,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                        pte_val(entry) += PAGE_SIZE;
                        dst_pte++;
                }
-               // dst->rss += (HPAGE_SIZE / PAGE_SIZE);
-               vx_rsspages_add(dst, HPAGE_SIZE / PAGE_SIZE);
+               dst->rss += (HPAGE_SIZE / PAGE_SIZE);
                addr += HPAGE_SIZE;
        }
        return 0;
@@ -207,8 +206,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma,
                        pte++;
                }
        }
-       // mm->rss -= (end - start) >> PAGE_SHIFT;
-       vx_rsspages_sub(mm, (end - start) >> PAGE_SHIFT);
+       mm->rss -= (end - start) >> PAGE_SHIFT;
        flush_tlb_range(vma, start, end);
 }
 
index 48618a9..6b10e6c 100644 (file)
@@ -83,6 +83,9 @@ config NET
 
 source "fs/Kconfig.binfmt"
 
+config EXTERNFS
+       tristate "Support for host-based filesystems"
+
 config HOSTFS
        tristate "Host filesystem"
        help
@@ -104,11 +107,15 @@ config HOSTFS
         If you'd like to be able to work with files stored on the host,
         say Y or M here; otherwise say N.
 
+config HUMFS
+       tristate 'Usable host filesystem'
+       depends on EXTERNFS
+
 config HPPFS
        tristate "HoneyPot ProcFS (EXPERIMENTAL)"
        depends on BROKEN
        help
-       hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
+       hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc 
        entries to be overridden, removed, or fabricated from the host.
        Its purpose is to allow a UML to appear to be a physical machine
        by removing or changing anything in /proc which gives away the
@@ -211,6 +218,9 @@ config HIGHMEM
        bool "Highmem support"
        depends on BROKEN
 
+config PROC_MM
+       bool "/proc/mm support"
+
 config KERNEL_STACK_ORDER
        int "Kernel stack size order"
        default 2
index b8371a9..69fbd65 100644 (file)
@@ -202,4 +202,6 @@ $(ARCH_DIR)/util: scripts_basic $(SYS_DIR)/sc.h FORCE
 $(ARCH_DIR)/kernel/skas/util: scripts_basic FORCE
        $(Q)$(MAKE) $(build)=$@
 
-export SUBARCH USER_CFLAGS OS
+define archhelp
+    echo  '* linux     - Binary kernel image (./linux)'
+endef
index 7c7d008..d87c9e7 100644 (file)
@@ -10,6 +10,12 @@ ifeq ($(CONFIG_MODE_SKAS),y)
   endif
 endif
 
+ifeq ($(CONFIG_MODE_SKAS),y)
+  ifneq ($(CONFIG_MODE_TT),y)
+     START = 0x8048000
+  endif
+endif
+
 CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH)
 ARCH_USER_CFLAGS :=
 
@@ -26,8 +32,12 @@ SYS_UTIL_DIR := $(ARCH_DIR)/sys-i386/util
 
 SYS_HEADERS := $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h
 
+sys_prepare: $(SYS_DIR)/sc.h
+
 prepare: $(SYS_HEADERS)
 
+filechk_$(SYS_DIR)/sc.h := $(SYS_UTIL_DIR)/mk_sc
+
 $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc
        $(call filechk,gen_header)
 
index 0524a22..fe6ab55 100644 (file)
@@ -17,8 +17,11 @@ CONFIG_MODE_TT=y
 CONFIG_MODE_SKAS=y
 CONFIG_NET=y
 CONFIG_BINFMT_ELF=y
-CONFIG_BINFMT_MISC=m
+CONFIG_BINFMT_MISC=y
+CONFIG_EXTERNFS=y
 CONFIG_HOSTFS=y
+CONFIG_HUMFS=y
+CONFIG_HPPFS=y
 CONFIG_MCONSOLE=y
 # CONFIG_HOST_2G_2G is not set
 # CONFIG_SMP is not set
@@ -211,6 +214,7 @@ CONFIG_TUN=m
 # Ethernet (10 or 100Mbit)
 #
 # CONFIG_NET_ETHERNET is not set
+# CONFIG_NE2000 is not set
 
 #
 # Ethernet (1000 Mbit)
@@ -317,6 +321,11 @@ CONFIG_RAMFS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
+CONFIG_JFFS_FS=y
+CONFIG_JFFS_FS_VERBOSE=0
+# CONFIG_JFFS_PROC_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
 # CONFIG_CRAMFS is not set
 # CONFIG_VXFS_FS is not set
 # CONFIG_HPFS_FS is not set
@@ -386,6 +395,18 @@ CONFIG_NLS_DEFAULT="iso8859-1"
 # CONFIG_NLS_KOI8_U is not set
 # CONFIG_NLS_UTF8 is not set
 
+#
+# Linux VServer
+#
+CONFIG_VSERVER_LEGACY=y
+CONFIG_PROC_SECURE=y
+# CONFIG_VSERVER_HARDCPU is not set
+# CONFIG_INOXID_NONE is not set
+# CONFIG_INOXID_GID16 is not set
+CONFIG_INOXID_GID24=y
+# CONFIG_INOXID_GID32 is not set
+# CONFIG_INOXID_MAGIC is not set
+
 #
 # Security options
 #
index 7a8d750..c3e3b94 100644 (file)
@@ -138,6 +138,54 @@ void generic_free(void *data)
        kfree(data);
 }
 
+void generic_close(int fd, void *unused)
+{
+       os_close_file(fd);
+}
+
+int generic_read(int fd, char *c_out, void *unused)
+{
+       int n;
+
+       n = os_read_file(fd, c_out, sizeof(*c_out));
+
+       if(n == -EAGAIN)
+               return(0);
+       else if(n == 0)
+               return(-EIO);
+       return(n);
+}
+
+/* XXX Trivial wrapper around os_write_file */
+
+int generic_write(int fd, const char *buf, int n, void *unused)
+{
+       return(os_write_file(fd, buf, n));
+}
+
+int generic_window_size(int fd, void *unused, unsigned short *rows_out,
+                       unsigned short *cols_out)
+{
+       int rows, cols;
+       int ret;
+
+       ret = os_window_size(fd, &rows, &cols);
+       if(ret < 0)
+               return(ret);
+
+       ret = ((*rows_out != rows) || (*cols_out != cols));
+
+       *rows_out = rows;
+       *cols_out = cols;
+
+       return(ret);
+}
+
+void generic_free(void *data)
+{
+       kfree(data);
+}
+
 static void tty_receive_char(struct tty_struct *tty, char ch)
 {
        if(tty == NULL) return;
index 1951731..d875d04 100644 (file)
 #error "__BYTE_ORDER not defined"
 #endif
 
-extern int init_cow_file(int fd, char *cow_file, char *backing_file,
-                        int sectorsize, int alignment, int *bitmap_offset_out,
+extern int init_cow_file(int fd, char *cow_file, char *backing_file, 
+                        int sectorsize, int alignment, int *bitmap_offset_out, 
                         unsigned long *bitmap_len_out, int *data_offset_out);
 
 extern int file_reader(__u64 offset, char *buf, int len, void *arg);
-extern int read_cow_header(int (*reader)(__u64, char *, int, void *),
-                          void *arg, __u32 *version_out,
-                          char **backing_file_out, time_t *mtime_out,
-                          __u64 *size_out, int *sectorsize_out,
+extern int read_cow_header(int (*reader)(__u64, char *, int, void *), 
+                          void *arg, __u32 *version_out, 
+                          char **backing_file_out, time_t *mtime_out, 
+                          __u64 *size_out, int *sectorsize_out, 
                           __u32 *align_out, int *bitmap_offset_out);
 
-extern int write_cow_header(char *cow_file, int fd, char *backing_file,
+extern int write_cow_header(char *cow_file, int fd, char *backing_file, 
                            int sectorsize, int alignment, long long *size);
 
 extern void cow_sizes(int version, __u64 size, int sectorsize, int align,
-                     int bitmap_offset, unsigned long *bitmap_len_out,
+                     int bitmap_offset, unsigned long *bitmap_len_out, 
                      int *data_offset_out);
 
 #endif
diff --git a/arch/um/drivers/cow_kern.c b/arch/um/drivers/cow_kern.c
new file mode 100644 (file)
index 0000000..ad843f3
--- /dev/null
@@ -0,0 +1,630 @@
+#define COW_MAJOR 60
+#define MAJOR_NR COW_MAJOR
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/stat.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/blk.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/devfs_fs.h>
+#include <asm/uaccess.h>
+#include "2_5compat.h"
+#include "cow.h"
+#include "ubd_user.h"
+
+#define COW_SHIFT 4
+
+struct cow {
+       int count;
+       char *cow_path;
+       dev_t cow_dev;
+       struct block_device *cow_bdev;
+       char *backing_path;
+       dev_t backing_dev;
+       struct block_device *backing_bdev;
+       int sectorsize;
+       unsigned long *bitmap;
+       unsigned long bitmap_len;
+       int bitmap_offset;
+       int data_offset;
+       devfs_handle_t devfs;
+       struct semaphore sem;
+       struct semaphore io_sem;
+       atomic_t working;
+       spinlock_t io_lock;
+       struct buffer_head *bh;
+       struct buffer_head *bhtail;
+       void *end_io;
+};
+
+#define DEFAULT_COW { \
+       .count                  = 0, \
+       .cow_path               = NULL, \
+       .cow_dev                = 0, \
+       .backing_path           = NULL, \
+       .backing_dev            = 0, \
+        .bitmap                        = NULL, \
+       .bitmap_len             = 0, \
+       .bitmap_offset          = 0, \
+        .data_offset           = 0, \
+       .devfs                  = NULL, \
+       .working                = ATOMIC_INIT(0), \
+       .io_lock                = SPIN_LOCK_UNLOCKED, \
+}
+
+#define MAX_DEV (8)
+#define MAX_MINOR (MAX_DEV << COW_SHIFT)
+
+struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW };
+
+/* Not modified by this driver */
+static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE };
+static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 };
+
+/* Protected by cow_lock */
+static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 };
+
+static struct hd_struct        cow_part[MAX_MINOR] =
+       { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } };
+
+/* Protected by io_request_lock */
+static request_queue_t *cow_queue;
+
+static int cow_open(struct inode *inode, struct file *filp);
+static int cow_release(struct inode * inode, struct file * file);
+static int cow_ioctl(struct inode * inode, struct file * file,
+                    unsigned int cmd, unsigned long arg);
+static int cow_revalidate(kdev_t rdev);
+
+static struct block_device_operations cow_blops = {
+       .open           = cow_open,
+       .release        = cow_release,
+       .ioctl          = cow_ioctl,
+       .revalidate     = cow_revalidate,
+};
+
+/* Initialized in an initcall, and unchanged thereafter */
+devfs_handle_t cow_dir_handle;
+
+#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \
+{ \
+       .major          = maj, \
+       .major_name     = name, \
+       .minor_shift    = shift, \
+       .max_p          = 1 << shift, \
+       .part           = parts, \
+       .sizes          = bsizes, \
+       .nr_real        = max, \
+       .real_devices   = NULL, \
+       .next           = NULL, \
+       .fops           = blops, \
+       .de_arr         = NULL, \
+       .flags          = 0 \
+}
+
+static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED;
+
+static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part,
+                                                COW_SHIFT, sizes, MAX_DEV, 
+                                                &cow_blops);
+
+static int cow_add(int n)
+{
+       struct cow *dev = &cow_dev[n];
+       char name[sizeof("nnnnnn\0")];
+       int err = -ENODEV;
+
+       if(dev->cow_path == NULL)
+               goto out;
+
+       sprintf(name, "%d", n);
+       dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE,
+                                   MAJOR_NR, n << COW_SHIFT, S_IFBLK | 
+                                   S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP,
+                                   &cow_blops, NULL);
+
+       init_MUTEX_LOCKED(&dev->sem);
+       init_MUTEX(&dev->io_sem);
+
+       return(0);
+
+ out:
+       return(err);
+}
+
+/*
+ * Add buffer_head to back of pending list
+ */
+static void cow_add_bh(struct cow *cow, struct buffer_head *bh)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cow->io_lock, flags);
+       if(cow->bhtail != NULL){
+               cow->bhtail->b_reqnext = bh;
+               cow->bhtail = bh;
+       }
+       else {
+               cow->bh = bh;
+               cow->bhtail = bh;
+       }
+       spin_unlock_irqrestore(&cow->io_lock, flags);
+}
+
+/*
+* Grab first pending buffer
+*/
+static struct buffer_head *cow_get_bh(struct cow *cow)
+{
+       struct buffer_head *bh;
+
+       spin_lock_irq(&cow->io_lock);
+       bh = cow->bh;
+       if(bh != NULL){
+               if(bh == cow->bhtail)
+                       cow->bhtail = NULL;
+               cow->bh = bh->b_reqnext;
+               bh->b_reqnext = NULL;
+       }
+       spin_unlock_irq(&cow->io_lock);
+
+       return(bh);
+}
+
+static void cow_handle_bh(struct cow *cow, struct buffer_head *bh, 
+                         struct buffer_head **cow_bh, int ncow_bh)
+{
+       int i;
+
+       if(ncow_bh > 0)
+               ll_rw_block(WRITE, ncow_bh, cow_bh);
+
+       for(i = 0; i < ncow_bh ; i++){
+               wait_on_buffer(cow_bh[i]);
+               brelse(cow_bh[i]);
+       }
+
+       ll_rw_block(WRITE, 1, &bh);
+       brelse(bh);
+}
+
+static struct buffer_head *cow_new_bh(struct cow *dev, int sector)
+{
+       struct buffer_head *bh;
+
+       sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize;
+       bh = getblk(dev->cow_dev, sector, dev->sectorsize);
+       memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])),
+              dev->sectorsize);
+       return(bh);
+}
+
+/* Copied from loop.c, needed to avoid deadlocking in make_request. */
+
+static int cow_thread(void *data)
+{
+       struct cow *dev = data;
+       struct buffer_head *bh;
+
+       daemonize();
+       exit_files(current);
+
+       sprintf(current->comm, "cow%d", dev - cow_dev);
+
+       spin_lock_irq(&current->sigmask_lock);
+       sigfillset(&current->blocked);
+       flush_signals(current);
+       spin_unlock_irq(&current->sigmask_lock);
+
+       atomic_inc(&dev->working);
+
+       current->policy = SCHED_OTHER;
+       current->nice = -20;
+
+       current->flags |= PF_NOIO;
+
+       /*
+        * up sem, we are running
+        */
+       up(&dev->sem);
+
+       for(;;){
+               int start, len, nbh, i, update_bitmap = 0;
+               struct buffer_head *cow_bh[2];
+
+               down_interruptible(&dev->io_sem);
+               /*
+                * could be upped because of tear-down, not because of
+                * pending work
+                */
+               if(!atomic_read(&dev->working))
+                       break;
+
+               bh = cow_get_bh(dev);
+               if(bh == NULL){
+                       printk(KERN_ERR "cow: missing bh\n");
+                       continue;
+               }
+
+               start = bh->b_blocknr * bh->b_size / dev->sectorsize;
+               len = bh->b_size / dev->sectorsize;
+               for(i = 0; i < len ; i++){
+                       if(ubd_test_bit(start + i, 
+                                       (unsigned char *) dev->bitmap))
+                               continue;
+
+                       update_bitmap = 1;
+                       ubd_set_bit(start + i, (unsigned char *) dev->bitmap);
+               }
+
+               cow_bh[0] = NULL;
+               cow_bh[1] = NULL;
+               nbh = 0;
+               if(update_bitmap){
+                       cow_bh[0] = cow_new_bh(dev, start);
+                       nbh++;
+                       if(start / dev->sectorsize != 
+                          (start + len) / dev->sectorsize){
+                               cow_bh[1] = cow_new_bh(dev, start + len);
+                               nbh++;
+                       }
+               }
+               
+               bh->b_dev = dev->cow_dev;
+               bh->b_blocknr += dev->data_offset / dev->sectorsize;
+
+               cow_handle_bh(dev, bh, cow_bh, nbh);
+
+               /*
+                * upped both for pending work and tear-down, lo_pending
+                * will hit zero then
+                */
+               if(atomic_dec_and_test(&dev->working))
+                       break;
+       }
+
+       up(&dev->sem);
+       return(0);
+}
+
+static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh)
+{
+       struct cow *dev;
+       int n, minor;
+
+       minor = MINOR(bh->b_rdev);
+       n = minor >> COW_SHIFT;
+       dev = &cow_dev[n];
+
+       dev->end_io = NULL;
+       if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){
+               bh->b_rdev = dev->cow_dev;
+               bh->b_rsector += dev->data_offset / dev->sectorsize;
+       }
+       else if(rw == WRITE){
+               bh->b_dev = dev->cow_dev;
+               bh->b_blocknr += dev->data_offset / dev->sectorsize;
+
+               cow_add_bh(dev, bh);
+               up(&dev->io_sem);
+               return(0);
+       }
+       else {
+               bh->b_rdev = dev->backing_dev;
+       }
+
+       return(1);
+}
+
+int cow_init(void)
+{
+       int i;
+
+       cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL);
+       if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) {
+               printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR);
+               return -1;
+       }
+       read_ahead[MAJOR_NR] = 8;               /* 8 sector (4kB) read-ahead */
+       blksize_size[MAJOR_NR] = blk_sizes;
+       blk_size[MAJOR_NR] = sizes;
+       INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes);
+
+       cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR);
+       blk_init_queue(cow_queue, NULL);
+       INIT_ELV(cow_queue, &cow_queue->elevator);
+       blk_queue_make_request(cow_queue, cow_make_request);
+
+       add_gendisk(&cow_gendisk);
+
+       for(i=0;i<MAX_DEV;i++) 
+               cow_add(i);
+
+       return(0);
+}
+
+__initcall(cow_init);
+
+static int reader(__u64 start, char *buf, int count, void *arg)
+{
+       dev_t dev = *((dev_t *) arg);
+       struct buffer_head *bh;
+       __u64 block;
+       int cur, offset, left, n, blocksize = get_hardsect_size(dev);
+
+       if(blocksize == 0)
+               panic("Zero blocksize");
+
+       block = start / blocksize;
+       offset = start % blocksize;
+       left = count;
+       cur = 0;
+       while(left > 0){
+               n = (left > blocksize) ? blocksize : left;
+
+               bh = bread(dev, block, (n < 512) ? 512 : n);
+               if(bh == NULL)
+                       return(-EIO);
+
+               n -= offset;
+               memcpy(&buf[cur], bh->b_data + offset, n);
+               block++;
+               left -= n;
+               cur += n;
+               offset = 0;
+               brelse(bh);
+       }
+
+       return(count);
+}
+
+static int cow_open(struct inode *inode, struct file *filp)
+{
+       int (*dev_ioctl)(struct inode *, struct file *, unsigned int, 
+                        unsigned long);
+       mm_segment_t fs;
+       struct cow *dev;
+       __u64 size;
+       __u32 version, align;
+       time_t mtime;
+       char *backing_file;
+       int n, offset, err = 0;
+
+       n = DEVICE_NR(inode->i_rdev);
+       if(n >= MAX_DEV)
+               return(-ENODEV);
+       dev = &cow_dev[n];
+       offset = n << COW_SHIFT;
+
+       spin_lock(&cow_lock);
+
+       if(dev->count == 0){
+               dev->cow_dev = name_to_kdev_t(dev->cow_path);
+               if(dev->cow_dev == 0){
+                       printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") "
+                              "failed\n", dev->cow_path);
+                       err = -ENODEV;
+               }
+
+               dev->backing_dev = name_to_kdev_t(dev->backing_path);
+               if(dev->backing_dev == 0){
+                       printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") "
+                              "failed\n", dev->backing_path);
+                       err = -ENODEV;
+               }
+
+               if(err) 
+                       goto out;
+
+               dev->cow_bdev = bdget(dev->cow_dev);
+               if(dev->cow_bdev == NULL){
+                       printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", 
+                              dev->cow_path);
+                       err = -ENOMEM;
+               }
+               dev->backing_bdev = bdget(dev->backing_dev);
+               if(dev->backing_bdev == NULL){
+                       printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", 
+                              dev->backing_path);
+                       err = -ENOMEM;
+               }
+
+               if(err) 
+                       goto out;
+
+               err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0, 
+                                BDEV_RAW);
+               if(err){
+                       printk("cow_open - blkdev_get of COW device failed, "
+                              "error = %d\n", err);
+                       goto out;
+               }
+               
+               err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW);
+               if(err){
+                       printk("cow_open - blkdev_get of backing device "
+                              "failed, error = %d\n", err);
+                       goto out;
+               }
+               
+               err = read_cow_header(reader, &dev->cow_dev, &version, 
+                                     &backing_file, &mtime, &size,
+                                     &dev->sectorsize, &align, 
+                                     &dev->bitmap_offset);
+               if(err){
+                       printk(KERN_ERR "cow_open - read_cow_header failed, "
+                              "err = %d\n", err);
+                       goto out;
+               }
+
+               cow_sizes(version, size, dev->sectorsize, align, 
+                         dev->bitmap_offset, &dev->bitmap_len, 
+                         &dev->data_offset);
+               dev->bitmap = (void *) vmalloc(dev->bitmap_len);
+               if(dev->bitmap == NULL){
+                       err = -ENOMEM;
+                       printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
+                       goto out;
+               }
+               flush_tlb_kernel_vm();
+               
+               err = reader(dev->bitmap_offset, (char *) dev->bitmap, 
+                            dev->bitmap_len, &dev->cow_dev);
+               if(err < 0){
+                       printk(KERN_ERR "Failed to read COW bitmap\n");
+                       vfree(dev->bitmap);
+                       goto out;
+               }
+
+               dev_ioctl = dev->backing_bdev->bd_op->ioctl;
+               fs = get_fs();
+               set_fs(KERNEL_DS);
+               err = (*dev_ioctl)(inode, filp, BLKGETSIZE, 
+                                  (unsigned long) &sizes[offset]);
+               set_fs(fs);
+               if(err){
+                       printk(KERN_ERR "cow_open - BLKGETSIZE failed, "
+                              "error = %d\n", err);
+                       goto out;
+               }
+
+               kernel_thread(cow_thread, dev, 
+                             CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+               down(&dev->sem);
+       }
+       dev->count++;
+ out:
+       spin_unlock(&cow_lock);
+       return(err);
+}
+
+static int cow_release(struct inode * inode, struct file * file)
+{
+       struct cow *dev;
+       int n, err;
+
+       n = DEVICE_NR(inode->i_rdev);
+       if(n >= MAX_DEV)
+               return(-ENODEV);
+       dev = &cow_dev[n];
+
+       spin_lock(&cow_lock);
+
+       if(--dev->count > 0)
+               goto out;
+
+       err = blkdev_put(dev->cow_bdev, BDEV_RAW);
+       if(err)
+               printk("cow_release - blkdev_put of cow device failed, "
+                      "error = %d\n", err);
+       bdput(dev->cow_bdev);
+       dev->cow_bdev = 0;
+
+       err = blkdev_put(dev->backing_bdev, BDEV_RAW);
+       if(err)
+               printk("cow_release - blkdev_put of backing device failed, "
+                      "error = %d\n", err);
+       bdput(dev->backing_bdev);
+       dev->backing_bdev = 0;
+
+ out:
+       spin_unlock(&cow_lock);
+       return(0);
+}
+
+static int cow_ioctl(struct inode * inode, struct file * file,
+                    unsigned int cmd, unsigned long arg)
+{
+       struct cow *dev;
+       int (*dev_ioctl)(struct inode *, struct file *, unsigned int, 
+                        unsigned long);
+       int n;
+
+       n = DEVICE_NR(inode->i_rdev);
+       if(n >= MAX_DEV)
+               return(-ENODEV);
+       dev = &cow_dev[n];
+
+       dev_ioctl = dev->backing_bdev->bd_op->ioctl;
+       return((*dev_ioctl)(inode, file, cmd, arg));
+}
+
+static int cow_revalidate(kdev_t rdev)
+{
+       printk(KERN_ERR "Need to implement cow_revalidate\n");
+       return(0);
+}
+
+static int parse_unit(char **ptr)
+{
+       char *str = *ptr, *end;
+       int n = -1;
+
+       if(isdigit(*str)) {
+               n = simple_strtoul(str, &end, 0);
+               if(end == str)
+                       return(-1);
+               *ptr = end;
+       }
+       else if (('a' <= *str) && (*str <= 'h')) {
+               n = *str - 'a';
+               str++;
+               *ptr = str;
+       }
+       return(n);
+}
+
+static int cow_setup(char *str)
+{
+       struct cow *dev;
+       char *cow_name, *backing_name;
+       int unit;
+
+       unit = parse_unit(&str);
+       if(unit < 0){
+               printk(KERN_ERR "cow_setup - Couldn't parse unit number\n");
+               return(1);
+       }
+
+       if(*str != '='){
+               printk(KERN_ERR "cow_setup - Missing '=' after unit "
+                      "number\n");
+               return(1);
+       }
+       str++;
+
+       cow_name = str;
+       backing_name = strchr(str, ',');
+       if(backing_name == NULL){
+               printk(KERN_ERR "cow_setup - missing backing device name\n");
+               return(0);
+       }
+       *backing_name = '\0';
+       backing_name++;
+
+       spin_lock(&cow_lock);
+
+       dev = &cow_dev[unit];
+       dev->cow_path = cow_name;
+       dev->backing_path = backing_name;
+       
+       spin_unlock(&cow_lock);
+       return(0);
+}
+
+__setup("cow", cow_setup);
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
index 122664b..e3a40f3 100644 (file)
@@ -35,12 +35,12 @@ struct cow_header_v2 {
        int sectorsize;
 };
 
-/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in
+/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in 
  * case other systems have different values for MAXPATHLEN
  */
 #define PATH_LEN_V3 4096
 
-/* Changes from V2 -
+/* Changes from V2 - 
  *     PATH_LEN_V3 as described above
  *     Explicitly specify field bit lengths for systems with different
  *             lengths for the usual C types.  Not sure whether char or
@@ -90,15 +90,15 @@ union cow_header {
 #define DIV_ROUND(x, len) (((x) + (len) - 1) / (len))
 #define ROUND_UP(x, align) DIV_ROUND(x, align) * (align)
 
-void cow_sizes(int version, __u64 size, int sectorsize, int align,
-              int bitmap_offset, unsigned long *bitmap_len_out,
+void cow_sizes(int version, __u64 size, int sectorsize, int align, 
+              int bitmap_offset, unsigned long *bitmap_len_out, 
               int *data_offset_out)
 {
        if(version < 3){
                *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
 
                *data_offset_out = bitmap_offset + *bitmap_len_out;
-               *data_offset_out = (*data_offset_out + sectorsize - 1) /
+               *data_offset_out = (*data_offset_out + sectorsize - 1) / 
                        sectorsize;
                *data_offset_out *= sectorsize;
        }
@@ -117,7 +117,7 @@ static int absolutize(char *to, int size, char *from)
        int remaining;
 
        if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
-               cow_printf("absolutize : unable to get cwd - errno = %d\n",
+               cow_printf("absolutize : unable to get cwd - errno = %d\n", 
                           errno);
                return(-1);
        }
@@ -126,7 +126,7 @@ static int absolutize(char *to, int size, char *from)
                *slash = '\0';
                if(chdir(from)){
                        *slash = '/';
-                       cow_printf("absolutize : Can't cd to '%s' - "
+                       cow_printf("absolutize : Can't cd to '%s' - " 
                                   "errno = %d\n", from, errno);
                        return(-1);
                }
@@ -158,7 +158,7 @@ static int absolutize(char *to, int size, char *from)
        return(0);
 }
 
-int write_cow_header(char *cow_file, int fd, char *backing_file,
+int write_cow_header(char *cow_file, int fd, char *backing_file, 
                     int sectorsize, int alignment, long long *size)
 {
        struct cow_header_v3 *header;
@@ -183,12 +183,12 @@ int write_cow_header(char *cow_file, int fd, char *backing_file,
        err = -EINVAL;
        if(strlen(backing_file) > sizeof(header->backing_file) - 1){
                cow_printf("Backing file name \"%s\" is too long - names are "
-                          "limited to %d characters\n", backing_file,
+                          "limited to %d characters\n", backing_file, 
                           sizeof(header->backing_file) - 1);
                goto out_free;
        }
 
-       if(absolutize(header->backing_file, sizeof(header->backing_file),
+       if(absolutize(header->backing_file, sizeof(header->backing_file), 
                      backing_file))
                goto out_free;
 
@@ -234,10 +234,10 @@ int file_reader(__u64 offset, char *buf, int len, void *arg)
 
 /* XXX Need to sanity-check the values read from the header */
 
-int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
-                   __u32 *version_out, char **backing_file_out,
-                   time_t *mtime_out, __u64 *size_out,
-                   int *sectorsize_out, __u32 *align_out,
+int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, 
+                   __u32 *version_out, char **backing_file_out, 
+                   time_t *mtime_out, __u64 *size_out, 
+                   int *sectorsize_out, __u32 *align_out, 
                    int *bitmap_offset_out)
 {
        union cow_header *header;
@@ -310,7 +310,7 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
        }
        else {
                cow_printf("read_cow_header - invalid COW version\n");
-               goto out;
+               goto out;               
        }
        err = -ENOMEM;
        *backing_file_out = cow_strdup(file);
@@ -326,18 +326,18 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 }
 
 int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
-                 int alignment, int *bitmap_offset_out,
+                 int alignment, int *bitmap_offset_out, 
                  unsigned long *bitmap_len_out, int *data_offset_out)
 {
        __u64 size, offset;
        char zero = 0;
        int err;
 
-       err = write_cow_header(cow_file, fd, backing_file, sectorsize,
+       err = write_cow_header(cow_file, fd, backing_file, sectorsize, 
                               alignment, &size);
-       if(err)
+       if(err) 
                goto out;
-
+       
        *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment);
        cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out,
                  bitmap_len_out, data_offset_out);
@@ -349,9 +349,9 @@ int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
                goto out;
        }
 
-       /* does not really matter how much we write it is just to set EOF
+       /* does not really matter how much we write it is just to set EOF 
         * this also sets the entire COW bitmap
-        * to zero without having to allocate it
+        * to zero without having to allocate it 
         */
        err = cow_write_file(fd, &zero, sizeof(zero));
        if(err != sizeof(zero)){
index d574278..6223a31 100644 (file)
@@ -214,7 +214,7 @@ static int hostaudio_release(struct inode *inode, struct file *file)
         printk("hostaudio: release called\n");
 #endif
 
-               os_close_file(state->fd);
+       os_close_file(state->fd);
         kfree(state);
 
        return(0);
@@ -271,7 +271,7 @@ static int hostmixer_release(struct inode *inode, struct file *file)
         printk("hostmixer: release called\n");
 #endif
 
-               os_close_file(state->fd);
+       os_close_file(state->fd);
         kfree(state);
 
        return(0);
index c32fa1b..b89fefb 100644 (file)
@@ -4,9 +4,6 @@
  */
 
 #include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
-#include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include "hostaudio.h"
 ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer, 
                            size_t count, loff_t *ppos)
 {
-       ssize_t ret;
-
 #ifdef DEBUG
         printk("hostaudio: read_user called, count = %d\n", count);
 #endif
 
-        ret = read(state->fd, buffer, count);
-
-        if(ret < 0) return(-errno);
-        return(ret);
+       return(os_read_file(state->fd, buffer, count));
 }
 
 ssize_t hostaudio_write_user(struct hostaudio_state *state, const char *buffer,
                             size_t count, loff_t *ppos)
 {
-       ssize_t ret;
-
 #ifdef DEBUG
         printk("hostaudio: write_user called, count = %d\n", count);
 #endif
 
-        ret = write(state->fd, buffer, count);
-
-        if(ret < 0) return(-errno);
-        return(ret);
+       return(os_write_file(state->fd, buffer, count));
 }
 
 int hostaudio_ioctl_user(struct hostaudio_state *state, unsigned int cmd, 
                         unsigned long arg)
 {
-       int ret;
 #ifdef DEBUG
         printk("hostaudio: ioctl_user called, cmd = %u\n", cmd);
 #endif
 
-        ret = ioctl(state->fd, cmd, arg);
-       
-        if(ret < 0) return(-errno);
-        return(ret);
+       return(os_ioctl_generic(state->fd, cmd, arg));
 }
 
 int hostaudio_open_user(struct hostaudio_state *state, int r, int w, char *dsp)
@@ -67,14 +50,15 @@ int hostaudio_open_user(struct hostaudio_state *state, int r, int w, char *dsp)
         printk("hostaudio: open_user called\n");
 #endif
 
-        state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
-
-        if(state->fd >= 0) return(0);
+       state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
 
-        printk("hostaudio_open_user failed to open '%s', errno = %d\n",
-              dsp, errno);
+       if(state->fd < 0) {
+               printk("hostaudio_open_user failed to open '%s', err = %d\n",
+                      dsp, -state->fd);
+               return(state->fd); 
+       }
         
-        return(-errno); 
+       return(0);
 }
 
 int hostaudio_release_user(struct hostaudio_state *state)
@@ -82,10 +66,10 @@ int hostaudio_release_user(struct hostaudio_state *state)
 #ifdef DEBUG
         printk("hostaudio: release called\n");
 #endif
-        if(state->fd >= 0){
-               close(state->fd);
-               state->fd=-1;
-        }
+       if(state->fd >= 0){
+               os_close_file(state->fd);
+               state->fd = -1;
+       }
 
         return(0);
 }
@@ -95,15 +79,11 @@ int hostaudio_release_user(struct hostaudio_state *state)
 int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state, 
                                unsigned int cmd, unsigned long arg)
 {
-       int ret;
 #ifdef DEBUG
         printk("hostmixer: ioctl_user called cmd = %u\n",cmd);
 #endif
 
-        ret = ioctl(state->fd, cmd, arg);
-       if(ret < 0) 
-               return(-errno);
-       return(ret);
+       return(os_ioctl_generic(state->fd, cmd, arg));
 }
 
 int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, int w,
@@ -115,12 +95,13 @@ int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, int w,
 
         state->fd = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0);
 
-        if(state->fd >= 0) return(0);
-
-        printk("hostaudio_open_mixdev_user failed to open '%s', errno = %d\n",
-              mixer, errno);
+       if(state->fd < 0) {
+               printk("hostaudio_open_mixdev_user failed to open '%s', "
+                      "err = %d\n", mixer, state->fd);
+               return(state->fd); 
+       }
         
-        return(-errno); 
+       return(0);
 }
 
 int hostmixer_release_mixdev_user(struct hostmixer_state *state)
@@ -130,7 +111,7 @@ int hostmixer_release_mixdev_user(struct hostmixer_state *state)
 #endif
 
         if(state->fd >= 0){
-               close(state->fd);
+               os_close_file(state->fd);
                state->fd = -1;
         }
 
index 2e4e1d4..ef8a1a9 100644 (file)
@@ -175,7 +175,6 @@ static int change_tramp(char **argv, char *output, int output_len)
 
        os_close_file(fds[1]);
        read_output(fds[0], output, output_len);
-
        CATCH_EINTR(err = waitpid(pid, NULL, 0));
        return(pid);
 }
index 53fba09..6f1730d 100644 (file)
@@ -783,6 +783,7 @@ int ubd_driver_init(void){
        io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), 
                                 &thread_fd);
        if(io_pid < 0){
+               io_pid = -1;
                printk(KERN_ERR 
                       "ubd : Failed to start I/O thread (errno = %d) - "
                       "falling back to synchronous I/O\n", -io_pid);
index 361d930..a331e2b 100644 (file)
@@ -1,21 +1,23 @@
+#include <asm-generic/vmlinux.lds.h>
+
 OUTPUT_FORMAT(ELF_FORMAT)
 OUTPUT_ARCH(ELF_ARCH)
 ENTRY(_start)
 jiffies = jiffies_64;
 
-SEARCH_DIR("/usr/local/i686-pc-linux-gnu/lib"); SEARCH_DIR("/usr/local/lib"); SEARCH_DIR("/lib"); SEARCH_DIR("/usr/lib");
-/* Do we need any of these for elf?
-   __DYNAMIC = 0;    */
 SECTIONS
 {
   . = START + SIZEOF_HEADERS;
   .interp         : { *(.interp) }
-  . = ALIGN(4096);
   __binary_start = .;
   . = ALIGN(4096);             /* Init code and data */
   _stext = .;
   __init_begin = .;
-  .text.init : { *(.text.init) }
+  .init.text : { 
+       _sinittext = .;
+       *(.init.text)
+       _einittext = .;
+  }
 
   . = ALIGN(4096);
 
@@ -55,7 +57,9 @@ SECTIONS
   } =0x90909090
   .plt            : { *(.plt) }
   .text           : {
-    *(.text .stub .text.* .gnu.linkonce.t.*)
+    *(.text)
+    SCHED_TEXT
+    *(.stub .text.* .gnu.linkonce.t.*)
     /* .gnu.warning sections are handled specially by elf32.em.  */
     *(.gnu.warning)
   } =0x90909090
@@ -67,7 +71,7 @@ SECTIONS
 
   #include "asm/common.lds.S"
 
-  .data.init : { *(.data.init) }
+  init.data : { *(.init.data) }
 
   /* Ensure the __preinit_array_start label is properly aligned.  We
      could instead move the label definition inside the section, but
diff --git a/arch/um/include/.cvsignore b/arch/um/include/.cvsignore
deleted file mode 100644 (file)
index 6c778ee..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-kern_constants.h
-skas_ptregs.h
-task.h
-uml-config.h
similarity index 50%
rename from arch/um/kernel/skas/include/mmu.h
rename to arch/um/include/aio.h
index cfbc062..6096f4f 100644 (file)
@@ -1,18 +1,27 @@
 /* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2004 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
 
-#ifndef __SKAS_MMU_H
-#define __SKAS_MMU_H
+#ifndef AIO_H__
+#define AIO_H__
 
-#include "linux/list.h"
-#include "linux/spinlock.h"
+enum aio_type { AIO_READ, AIO_WRITE, AIO_MMAP };
 
-struct mmu_context_skas {
-       int mm_fd;
+struct aio_thread_reply {
+       void *data;
+       int err;
 };
 
+struct aio_context {
+       int reply_fd;
+};
+
+#define INIT_AIO_CONTEXT { .reply_fd   = -1 }
+
+extern int submit_aio(enum aio_type type, int fd, char *buf, int len, 
+                     unsigned long long offset, int reply_fd, void *data);
+
 #endif
 
 /*
diff --git a/arch/um/include/filehandle.h b/arch/um/include/filehandle.h
new file mode 100644 (file)
index 0000000..adc5108
--- /dev/null
@@ -0,0 +1,51 @@
+/* 
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __FILEHANDLE_H__
+#define __FILEHANDLE_H__
+
+#include "linux/list.h"
+#include "linux/fs.h"
+#include "os.h"
+
+struct file_handle {
+       struct list_head list;
+       int fd;
+       char *(*get_name)(struct inode *);
+       struct inode *inode;
+       struct openflags flags;
+};
+
+extern struct file_handle bad_filehandle;
+
+extern int open_file(char *name, struct openflags flags, int mode);
+extern void *open_dir(char *file);
+extern int open_filehandle(char *name, struct openflags flags, int mode, 
+                          struct file_handle *fh);
+extern int read_file(struct file_handle *fh, unsigned long long offset, 
+                    char *buf, int len);
+extern int write_file(struct file_handle *fh, unsigned long long offset, 
+                     const char *buf, int len);
+extern int truncate_file(struct file_handle *fh, unsigned long long size);
+extern int close_file(struct file_handle *fh);
+extern void not_reclaimable(struct file_handle *fh);
+extern void is_reclaimable(struct file_handle *fh, 
+                          char *(name_proc)(struct inode *),
+                          struct inode *inode);
+extern int filehandle_fd(struct file_handle *fh);
+extern int make_pipe(struct file_handle *fhs);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
index f1d82e0..52c393f 100644 (file)
@@ -100,6 +100,16 @@ extern struct uml_param __uml_setup_start, __uml_setup_end;
 #define __uml_postsetup_call   __attribute__ ((unused,__section__ (".uml.postsetup.init")))
 #define __uml_exit_call                __attribute__ ((unused,__section__ (".uml.exitcall.exit")))
 
+#ifndef __KERNEL__
+
+#define __initcall(fn) static initcall_t __initcall_##fn __init_call = fn
+#define __exitcall(fn) static exitcall_t __exitcall_##fn __exit_call = fn
+
+#define __init_call __attribute__ ((unused,__section__ (".initcall.init")))
+#define __exit_call __attribute__ ((unused,__section__ (".exitcall.exit")))
+
+#endif
+
 #endif /* _LINUX_UML_INIT_H */
 
 /*
index 3af52a6..4bcb829 100644 (file)
@@ -1,4 +1,4 @@
-/*
+/* 
  * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -9,7 +9,7 @@
 #include "linux/interrupt.h"
 
 extern int um_request_irq(unsigned int irq, int fd, int type,
-                         irqreturn_t (*handler)(int, void *,
+                         irqreturn_t (*handler)(int, void *, 
                                                 struct pt_regs *),
                          unsigned long irqflags,  const char * devname,
                          void *dev_id);
index cb7e196..699d46d 100644 (file)
@@ -1,4 +1,4 @@
-/*
+/* 
  * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com)
  * Licensed under the GPL
  */
@@ -11,7 +11,7 @@
 
 struct remapper {
        struct list_head list;
-       int (*proc)(int, unsigned long, int, __u64);
+       int (*proc)(int, unsigned long, int, __u64, int);
 };
 
 extern void register_remapper(struct remapper *info);
index 07340c8..b966b6e 100644 (file)
@@ -52,10 +52,12 @@ struct openflags {
        unsigned int a : 1;     /* O_APPEND */
        unsigned int e : 1;     /* O_EXCL */
        unsigned int cl : 1;    /* FD_CLOEXEC */
+       unsigned int d : 1;     /* O_DIRECT */
 };
 
 #define OPENFLAGS() ((struct openflags) { .r = 0, .w = 0, .s = 0, .c = 0, \
-                                         .t = 0, .a = 0, .e = 0, .cl = 0 })
+                                         .t = 0, .a = 0, .e = 0, .cl = 0, \
+                                         .d = 0 })
 
 static inline struct openflags of_read(struct openflags flags)
 {
@@ -134,6 +136,16 @@ extern int os_mode_fd(int fd, int mode);
 
 extern int os_seek_file(int fd, __u64 offset);
 extern int os_open_file(char *file, struct openflags flags, int mode);
+extern void *os_open_dir(char *dir, int *err_out);
+extern int os_seek_dir(void *stream, unsigned long long pos);
+extern int os_read_dir(void *stream, unsigned long long *ino_out, 
+                      char **name_out);
+extern int os_tell_dir(void *stream);
+extern int os_close_dir(void *stream);
+extern int os_remove_file(const char *file);
+extern int os_move_file(const char *from, const char *to);
+extern int os_truncate_file(const char *file, unsigned long long len);
+extern int os_truncate_fd(int fd, unsigned long long len);
 extern int os_read_file(int fd, void *buf, int len);
 extern int os_write_file(int fd, const void *buf, int count);
 extern int os_file_size(char *file, long long *size_out);
diff --git a/arch/um/include/skas_ptregs.h b/arch/um/include/skas_ptregs.h
new file mode 100644 (file)
index 0000000..afd5fc3
--- /dev/null
@@ -0,0 +1,26 @@
+/* Automatically generated by arch/um/kernel/skas/util/mk_ptregs */
+
+#ifndef __SKAS_PT_REGS_
+#define __SKAS_PT_REGS_
+
+#define HOST_FRAME_SIZE 17
+#define HOST_FP_SIZE 27
+#define HOST_XFP_SIZE 128
+#define HOST_IP 12
+#define HOST_SP 15
+#define HOST_EFLAGS 14
+#define HOST_EAX 6
+#define HOST_EBX 0
+#define HOST_ECX 1
+#define HOST_EDX 2
+#define HOST_ESI 3
+#define HOST_EDI 4
+#define HOST_EBP 5
+#define HOST_CS 13
+#define HOST_SS 16
+#define HOST_DS 7
+#define HOST_FS 9
+#define HOST_ES 8
+#define HOST_GS 10
+
+#endif
diff --git a/arch/um/include/sysdep-i386/.cvsignore b/arch/um/include/sysdep-i386/.cvsignore
deleted file mode 100644 (file)
index 79cc687..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-sc.h
-thread.h
diff --git a/arch/um/kernel/.cvsignore b/arch/um/kernel/.cvsignore
deleted file mode 100644 (file)
index 9775420..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-config.c
-vmlinux.lds
-
diff --git a/arch/um/kernel/filehandle.c b/arch/um/kernel/filehandle.c
new file mode 100644 (file)
index 0000000..a44dccf
--- /dev/null
@@ -0,0 +1,250 @@
+/* 
+ * Copyright (C) 2004 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/slab.h"
+#include "linux/list.h"
+#include "linux/spinlock.h"
+#include "linux/fs.h"
+#include "linux/errno.h"
+#include "filehandle.h"
+#include "os.h"
+#include "kern_util.h"
+
+static spinlock_t open_files_lock = SPIN_LOCK_UNLOCKED;
+static struct list_head open_files = LIST_HEAD_INIT(open_files);
+
+#define NUM_RECLAIM 128
+
+static void reclaim_fds(void)
+{
+       struct file_handle *victim;
+       int closed = NUM_RECLAIM;
+
+       spin_lock(&open_files_lock);
+       while(!list_empty(&open_files) && closed--){
+               victim = list_entry(open_files.prev, struct file_handle, list);
+               os_close_file(victim->fd);
+               victim->fd = -1;
+               list_del_init(&victim->list);
+       }
+       spin_unlock(&open_files_lock);
+}
+
+int open_file(char *name, struct openflags flags, int mode)
+{
+       int fd;
+
+       fd = os_open_file(name, flags, mode);
+       if(fd != -EMFILE)
+               return(fd);
+
+       reclaim_fds();
+       fd = os_open_file(name, flags, mode);
+
+       return(fd);
+}
+
+void *open_dir(char *file)
+{
+       void *dir;
+       int err;
+
+       dir = os_open_dir(file, &err);
+       if(dir != NULL)
+               return(dir);
+       if(err != -EMFILE)
+               return(ERR_PTR(err));
+
+       reclaim_fds();
+
+       dir = os_open_dir(file, &err);
+       if(dir == NULL)
+               dir = ERR_PTR(err);
+
+       return(dir);
+}
+
+void not_reclaimable(struct file_handle *fh)
+{
+       char *name;
+
+       if(fh->get_name == NULL)
+               return;
+
+       if(list_empty(&fh->list)){
+               name = (*fh->get_name)(fh->inode);
+               if(name != NULL){
+                       fh->fd = open_file(name, fh->flags, 0);
+                       kfree(name);
+               }
+               else printk("File descriptor %d has no name\n", fh->fd);
+       }
+       else {
+               spin_lock(&open_files_lock);
+               list_del_init(&fh->list);
+               spin_unlock(&open_files_lock);
+       }
+}
+
+void is_reclaimable(struct file_handle *fh, char *(name_proc)(struct inode *),
+                   struct inode *inode)
+{
+       fh->get_name = name_proc;
+       fh->inode = inode;
+
+       spin_lock(&open_files_lock);
+       list_add(&fh->list, &open_files);
+       spin_unlock(&open_files_lock);
+}
+
+static int active_handle(struct file_handle *fh)
+{
+       int fd;
+       char *name;
+
+       if(!list_empty(&fh->list))
+               list_move(&fh->list, &open_files);
+
+       if(fh->fd != -1)
+               return(0);
+
+       if(fh->inode == NULL)
+               return(-ENOENT);
+
+       name = (*fh->get_name)(fh->inode);
+       if(name == NULL)
+               return(-ENOMEM);
+
+       fd = open_file(name, fh->flags, 0);
+       kfree(name);
+       if(fd < 0)
+               return(fd);
+
+       fh->fd = fd;
+       is_reclaimable(fh, fh->get_name, fh->inode);
+
+       return(0);
+}
+
+int filehandle_fd(struct file_handle *fh)
+{
+       int err;
+
+       err = active_handle(fh);
+       if(err)
+               return(err);
+
+       return(fh->fd);
+}
+
+static void init_fh(struct file_handle *fh, int fd, struct openflags flags)
+{
+       flags.c = 0;
+       *fh = ((struct file_handle) { .list     = LIST_HEAD_INIT(fh->list),
+                                     .fd       = fd,
+                                     .get_name = NULL,
+                                     .inode    = NULL,
+                                     .flags    = flags });
+}
+
+int open_filehandle(char *name, struct openflags flags, int mode, 
+                   struct file_handle *fh)
+{
+       int fd;
+
+       fd = open_file(name, flags, mode);
+       if(fd < 0)
+               return(fd);
+
+       init_fh(fh, fd, flags);
+       return(0);
+}
+
+int close_file(struct file_handle *fh)
+{
+       spin_lock(&open_files_lock);
+       list_del(&fh->list);
+       spin_unlock(&open_files_lock);
+
+       os_close_file(fh->fd);
+
+       fh->fd = -1;
+       return(0);
+}
+
+int read_file(struct file_handle *fh, unsigned long long offset, char *buf,
+             int len)
+{
+       int err;
+
+       err = active_handle(fh);
+       if(err)
+               return(err);
+
+       err = os_seek_file(fh->fd, offset);
+       if(err)
+               return(err);
+
+       return(os_read_file(fh->fd, buf, len));
+}
+
+int write_file(struct file_handle *fh, unsigned long long offset, 
+              const char *buf, int len)
+{
+       int err;
+
+       err = active_handle(fh);
+       if(err)
+               return(err);
+
+       if(offset != -1)
+               err = os_seek_file(fh->fd, offset);
+       if(err)
+               return(err);
+
+       return(os_write_file(fh->fd, buf, len));
+}
+
+int truncate_file(struct file_handle *fh, unsigned long long size)
+{
+       int err;
+
+       err = active_handle(fh);
+       if(err)
+               return(err);
+
+       return(os_truncate_fd(fh->fd, size));
+}
+
+int make_pipe(struct file_handle *fhs)
+{
+       int fds[2], err;
+
+       err = os_pipe(fds, 1, 1);
+       if(err && (err != -EMFILE))
+               return(err);
+
+       if(err){
+               reclaim_fds();
+               err = os_pipe(fds, 1, 1);
+       }
+       if(err)
+               return(err);
+
+       init_fh(&fhs[0], fds[0], OPENFLAGS());
+       init_fh(&fhs[1], fds[1], OPENFLAGS());
+       return(0);
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
index 0e3d409..6c71144 100644 (file)
@@ -100,8 +100,13 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv,
                CATCH_EINTR(n = waitpid(pid, NULL, 0));
                pid = -errno;
        }
+       err = pid;
 
-       if(stack_out == NULL) free_stack(stack, 0);
+ out_close:
+       os_close_file(fds[0]);
+ out_free:
+       if(stack_out == NULL) 
+               free_stack(stack, 0);
         else *stack_out = stack;
        return(pid);
 
index 3253bc0..258e158 100644 (file)
@@ -1,14 +1,13 @@
-/*
+/* 
  * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
  * Licensed under the GPL
  */
 
 #include "linux/mm.h"
-#include "linux/rbtree.h"
+#include "linux/ghash.h"
 #include "linux/slab.h"
 #include "linux/vmalloc.h"
 #include "linux/bootmem.h"
-#include "linux/module.h"
 #include "asm/types.h"
 #include "asm/pgtable.h"
 #include "kern_util.h"
 #include "kern.h"
 #include "init.h"
 
+#if 0
+static pgd_t physmem_pgd[PTRS_PER_PGD];
+
+static struct phys_desc *lookup_mapping(void *addr)
+{
+       pgd = &physmem_pgd[pgd_index(addr)];
+       if(pgd_none(pgd))
+               return(NULL);
+
+       pmd = pmd_offset(pgd, addr);
+       if(pmd_none(pmd))
+               return(NULL);
+
+       pte = pte_offset_kernel(pmd, addr);
+       return((struct phys_desc *) pte_val(pte));
+}
+
+static struct add_mapping(void *addr, struct phys_desc *new)
+{
+}
+#endif
+
+#define PHYS_HASHSIZE (8192)
+
+struct phys_desc;
+
+DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc);
+
 struct phys_desc {
-       struct rb_node rb;
+       struct virtmem_ptrs virt_ptrs;
        int fd;
        __u64 offset;
        void *virt;
@@ -29,48 +56,21 @@ struct phys_desc {
        struct list_head list;
 };
 
-static struct rb_root phys_mappings = RB_ROOT;
+struct virtmem_table virtmem_hash;
 
-static struct rb_node **find_rb(void *virt)
+static int virt_cmp(void *virt1, void *virt2)
 {
-       struct rb_node **n = &phys_mappings.rb_node;
-       struct phys_desc *d;
-
-       while(*n != NULL){
-               d = rb_entry(*n, struct phys_desc, rb);
-               if(d->virt == virt)
-                       return(n);
-
-               if(d->virt > virt)
-                       n = &(*n)->rb_left;
-               else
-                       n = &(*n)->rb_right;
-       }
-
-       return(n);
+       return(virt1 != virt2);
 }
 
-static struct phys_desc *find_phys_mapping(void *virt)
+static int virt_hash(void *virt)
 {
-       struct rb_node **n = find_rb(virt);
-
-       if(*n == NULL)
-               return(NULL);
-
-       return(rb_entry(*n, struct phys_desc, rb));
+       unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT;
+       return(addr % PHYS_HASHSIZE);
 }
 
-static void insert_phys_mapping(struct phys_desc *desc)
-{
-       struct rb_node **n = find_rb(desc->virt);
-
-       if(*n != NULL)
-               panic("Physical remapping for %p already present",
-                     desc->virt);
-
-       rb_link_node(&desc->rb, (*n)->rb_parent, n);
-       rb_insert_color(&desc->rb, &phys_mappings);
-}
+DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp, 
+        virt_hash);
 
 LIST_HEAD(descriptor_mappings);
 
@@ -106,7 +106,7 @@ static struct desc_mapping *descriptor_mapping(int fd)
        if(desc == NULL)
                return(NULL);
 
-       *desc = ((struct desc_mapping)
+       *desc = ((struct desc_mapping) 
                { .fd =         fd,
                  .list =       LIST_HEAD_INIT(desc->list),
                  .pages =      LIST_HEAD_INIT(desc->pages) });
@@ -122,27 +122,32 @@ int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w)
        unsigned long phys;
        int err;
 
+       phys = __pa(virt);
+       desc = find_virtmem_hash(&virtmem_hash, (void *) virt);
+       if(desc != NULL){
+               if((virt != desc->virt) || (fd != desc->fd) || 
+                  (offset != desc->offset))
+                       panic("Address 0x%p is already substituted\n", virt);
+               return(0);
+       }
+
        fd_maps = descriptor_mapping(fd);
        if(fd_maps == NULL)
                return(-ENOMEM);
 
-       phys = __pa(virt);
-       desc = find_phys_mapping(virt);
-       if(desc != NULL)
-               panic("Address 0x%p is already substituted\n", virt);
-
        err = -ENOMEM;
        desc = kmalloc(sizeof(*desc), GFP_ATOMIC);
        if(desc == NULL)
                goto out;
 
-       *desc = ((struct phys_desc)
-               { .fd =                 fd,
+       *desc = ((struct phys_desc) 
+               { .virt_ptrs =  { NULL, NULL },
+                 .fd =         fd,
                  .offset =             offset,
                  .virt =               virt,
                  .phys =               __pa(virt),
                  .list =               LIST_HEAD_INIT(desc->list) });
-       insert_phys_mapping(desc);
+       insert_virtmem_hash(&virtmem_hash, desc);
 
        list_add(&desc->list, &fd_maps->pages);
 
@@ -151,7 +156,7 @@ int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w)
        if(!err)
                goto out;
 
-       rb_erase(&desc->rb, &phys_mappings);
+       remove_virtmem_hash(&virtmem_hash, desc);
        kfree(desc);
  out:
        return(err);
@@ -164,7 +169,7 @@ static void remove_mapping(struct phys_desc *desc)
        void *virt = desc->virt;
        int err;
 
-       rb_erase(&desc->rb, &phys_mappings);
+       remove_virtmem_hash(&virtmem_hash, desc);
        list_del(&desc->list);
        kfree(desc);
 
@@ -179,7 +184,7 @@ int physmem_remove_mapping(void *virt)
        struct phys_desc *desc;
 
        virt = (void *) ((unsigned long) virt & PAGE_MASK);
-       desc = find_phys_mapping(virt);
+       desc = find_virtmem_hash(&virtmem_hash, virt);
        if(desc == NULL)
                return(0);
 
@@ -200,6 +205,9 @@ void physmem_forget_descriptor(int fd)
        if(desc == NULL)
                return;
 
+       if(!list_empty(&desc->pages))
+               printk("Still have mapped pages on fd %d\n", fd);
+
        list_for_each_safe(ele, next, &desc->pages){
                page = list_entry(ele, struct phys_desc, list);
                offset = page->offset;
@@ -221,10 +229,6 @@ void physmem_forget_descriptor(int fd)
        kfree(desc);
 }
 
-EXPORT_SYMBOL(physmem_forget_descriptor);
-EXPORT_SYMBOL(physmem_remove_mapping);
-EXPORT_SYMBOL(physmem_subst_mapping);
-
 void arch_free_page(struct page *page, int order)
 {
        void *virt;
@@ -236,11 +240,16 @@ void arch_free_page(struct page *page, int order)
        }
 }
 
-int is_remapped(void *virt)
+int is_remapped(const void *virt, int fd, __u64 offset)
 {
-       struct phys_desc *desc = find_phys_mapping(virt);
+       struct phys_desc *desc;
 
-       return(desc != NULL);
+       desc = find_virtmem_hash(&virtmem_hash, (void *) virt);
+       if(desc == NULL)
+               return(0);
+       if(offset != desc->offset)
+               printk("offset mismatch\n");
+       return(find_virtmem_hash(&virtmem_hash, (void *) virt) != NULL);
 }
 
 /* Changed during early boot */
@@ -279,7 +288,7 @@ int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
 
        if(kmalloc_ok){
                map = kmalloc(total_len, GFP_KERNEL);
-               if(map == NULL)
+               if(map == NULL) 
                        map = vmalloc(total_len);
        }
        else map = alloc_bootmem_low_pages(total_len);
@@ -328,12 +337,12 @@ static unsigned long kmem_top = 0;
 
 unsigned long get_kmem_end(void)
 {
-       if(kmem_top == 0)
+       if(kmem_top == 0) 
                kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas);
        return(kmem_top);
 }
 
-void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
+void map_memory(unsigned long virt, unsigned long phys, unsigned long len, 
                int r, int w, int x)
 {
        __u64 offset;
@@ -341,14 +350,9 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
 
        fd = phys_mapping(phys, &offset);
        err = os_map_memory((void *) virt, fd, offset, len, r, w, x);
-       if(err) {
-               if(err == -ENOMEM)
-                       printk("try increasing the host's "
-                              "/proc/sys/vm/max_map_count to <physical "
-                              "memory size>/4096\n");
+       if(err)
                panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, "
                      "err = %d\n", virt, fd, offset, len, r, w, x, err);
-       }
 }
 
 #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
@@ -364,7 +368,7 @@ void setup_physmem(unsigned long start, unsigned long reserve_end,
        physmem_fd = create_mem_file(len + highmem);
 
        offset = uml_reserved - uml_physmem;
-       err = os_map_memory((void *) uml_reserved, physmem_fd, offset,
+       err = os_map_memory((void *) uml_reserved, physmem_fd, offset, 
                            len - offset, 1, 1, 0);
        if(err < 0){
                os_print_error(err, "Mapping memory");
@@ -378,7 +382,8 @@ void setup_physmem(unsigned long start, unsigned long reserve_end,
 
 int phys_mapping(unsigned long phys, __u64 *offset_out)
 {
-       struct phys_desc *desc = find_phys_mapping(__va(phys & PAGE_MASK));
+       struct phys_desc *desc = find_virtmem_hash(&virtmem_hash, 
+                                                  __va(phys & PAGE_MASK));
        int fd = -1;
 
        if(desc != NULL){
@@ -391,9 +396,9 @@ int phys_mapping(unsigned long phys, __u64 *offset_out)
        }
        else if(phys < __pa(end_iomem)){
                struct iomem_region *region = iomem_regions;
-
+       
                while(region != NULL){
-                       if((phys >= region->phys) &&
+                       if((phys >= region->phys) && 
                           (phys < region->phys + region->size)){
                                fd = region->fd;
                                *offset_out = phys - region->phys;
@@ -429,7 +434,7 @@ __uml_setup("mem=", uml_mem_setup,
 unsigned long find_iomem(char *driver, unsigned long *len_out)
 {
        struct iomem_region *region = iomem_regions;
-
+       
        while(region != NULL){
                if(!strcmp(region->driver, driver)){
                        *len_out = region->size;
@@ -447,7 +452,7 @@ int setup_iomem(void)
        int err;
 
        while(region != NULL){
-               err = os_map_memory((void *) iomem_start, region->fd, 0,
+               err = os_map_memory((void *) iomem_start, region->fd, 0, 
                                    region->size, 1, 1, 0);
                if(err)
                        printk("Mapping iomem region for driver '%s' failed, "
index d2f0e82..e795546 100644 (file)
@@ -57,7 +57,7 @@ void init_new_thread_signals(int altstack)
 {
        int flags = altstack ? SA_ONSTACK : 0;
 
-       set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags,
+       set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, 
                    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
        set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, 
                    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
index 18255bd..b701cb2 100644 (file)
@@ -19,6 +19,7 @@
 #include "linux/capability.h"
 #include "linux/vmalloc.h"
 #include "linux/spinlock.h"
+#include "linux/vs_cvirt.h"
 #include "linux/proc_fs.h"
 #include "linux/ptrace.h"
 #include "asm/unistd.h"
index e8caff9..a8439e2 100644 (file)
@@ -55,6 +55,8 @@ long sys_ptrace(long request, long pid, long addr, long data)
        read_unlock(&tasklist_lock);
        if (!child)
                goto out;
+       if (!vx_check(vx_task_xid(child), VX_WATCH|VX_IDENT))
+               goto out_tsk;
 
        ret = -EPERM;
        if (pid == 1)           /* you may not mess with init */
diff --git a/arch/um/kernel/skas/exec_user.c b/arch/um/kernel/skas/exec_user.c
deleted file mode 100644 (file)
index d50633a..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdlib.h>
-#include <errno.h>
-#include <signal.h>
-#include <sched.h>
-#include <sys/wait.h>
-#include <sys/ptrace.h>
-#include "user.h"
-#include "kern_util.h"
-#include "user_util.h"
-#include "os.h"
-#include "time_user.h"
-
-static int user_thread_tramp(void *arg)
-{
-       if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0)
-               panic("user_thread_tramp - PTRACE_TRACEME failed, "
-                     "errno = %d\n", errno);
-       enable_timer();
-       os_stop_process(os_getpid());
-       return(0);
-}
-
-int user_thread(unsigned long stack, int flags)
-{
-       int pid, status, err;
-
-       pid = clone(user_thread_tramp, (void *) stack_sp(stack), 
-                   flags | CLONE_FILES | SIGCHLD, NULL);
-       if(pid < 0){
-               printk("user_thread - clone failed, errno = %d\n", errno);
-               return(pid);
-       }
-
-       CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED));
-       if(err < 0){
-               printk("user_thread - waitpid failed, errno = %d\n", errno);
-               return(-errno);
-       }
-
-       if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){
-               printk("user_thread - trampoline didn't stop, status = %d\n", 
-                      status);
-               return(-EINVAL);
-       }
-
-       return(pid);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/skas/include/mode.h b/arch/um/kernel/skas/include/mode.h
deleted file mode 100644 (file)
index 285edc5..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __MODE_SKAS_H__
-#define __MODE_SKAS_H__
-
-extern unsigned long exec_regs[];
-extern unsigned long exec_fp_regs[];
-extern unsigned long exec_fpx_regs[];
-extern int have_fpx_regs;
-
-extern void user_time_init_skas(void);
-extern int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs,
-                                 void *from_ptr);
-extern int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp,
-                               union uml_pt_regs *regs, 
-                               unsigned long fault_addr, int fault_type);
-extern void sig_handler_common_skas(int sig, void *sc_ptr);
-extern void halt_skas(void);
-extern void reboot_skas(void);
-extern void kill_off_processes_skas(void);
-extern int is_skas_winch(int pid, int fd, void *data);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/skas/include/mode_kern.h b/arch/um/kernel/skas/include/mode_kern.h
deleted file mode 100644 (file)
index 3597c09..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SKAS_MODE_KERN_H__
-#define __SKAS_MODE_KERN_H__
-
-#include "linux/sched.h"
-#include "asm/page.h"
-#include "asm/ptrace.h"
-
-extern void flush_thread_skas(void);
-extern void *switch_to_skas(void *prev, void *next);
-extern void start_thread_skas(struct pt_regs *regs, unsigned long eip, 
-                             unsigned long esp);
-extern int copy_thread_skas(int nr, unsigned long clone_flags, 
-                           unsigned long sp, unsigned long stack_top, 
-                           struct task_struct *p, struct pt_regs *regs);
-extern void release_thread_skas(struct task_struct *task);
-extern void exit_thread_skas(void);
-extern void initial_thread_cb_skas(void (*proc)(void *), void *arg);
-extern void init_idle_skas(void);
-extern void flush_tlb_kernel_range_skas(unsigned long start, 
-                                       unsigned long end);
-extern void flush_tlb_kernel_vm_skas(void);
-extern void __flush_tlb_one_skas(unsigned long addr);
-extern void flush_tlb_range_skas(struct vm_area_struct *vma, 
-                                unsigned long start, unsigned long end);
-extern void flush_tlb_mm_skas(struct mm_struct *mm);
-extern void force_flush_all_skas(void);
-extern long execute_syscall_skas(void *r);
-extern void before_mem_skas(unsigned long unused);
-extern unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out,
-                                        unsigned long *task_size_out);
-extern int start_uml_skas(void);
-extern int external_pid_skas(struct task_struct *task);
-extern int thread_pid_skas(struct task_struct *task);
-
-#define kmem_end_skas (host_task_size - 1024 * 1024)
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/skas/include/uaccess.h b/arch/um/kernel/skas/include/uaccess.h
deleted file mode 100644 (file)
index 0d6f30b..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SKAS_UACCESS_H
-#define __SKAS_UACCESS_H
-
-#include "asm/errno.h"
-
-#define access_ok_skas(type, addr, size) \
-       ((segment_eq(get_fs(), KERNEL_DS)) || \
-        (((unsigned long) (addr) < TASK_SIZE) && \
-         ((unsigned long) (addr) + (size) <= TASK_SIZE)))
-
-static inline int verify_area_skas(int type, const void * addr, 
-                                  unsigned long size)
-{
-       return(access_ok_skas(type, addr, size) ? 0 : -EFAULT);
-}
-
-extern int copy_from_user_skas(void *to, const void *from, int n);
-extern int copy_to_user_skas(void *to, const void *from, int n);
-extern int strncpy_from_user_skas(char *dst, const char *src, int count);
-extern int __clear_user_skas(void *mem, int len);
-extern int clear_user_skas(void *mem, int len);
-extern int strnlen_user_skas(const void *str, int len);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
index 77048cd..d7ff0b9 100644 (file)
@@ -1,4 +1,4 @@
-/*
+/* 
  * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
  * Licensed under the GPL
  */
@@ -14,7 +14,7 @@
 #include "kern_util.h"
 #include "user_util.h"
 
-extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
+extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, 
                             pte_t *pte_out);
 
 static unsigned long maybe_map(unsigned long virt, int is_write)
@@ -34,7 +34,7 @@ static unsigned long maybe_map(unsigned long virt, int is_write)
        return((unsigned long) phys);
 }
 
-static int do_op(unsigned long addr, int len, int is_write,
+static int do_op(unsigned long addr, int len, int is_write, 
                 int (*op)(unsigned long addr, int len, void *arg), void *arg)
 {
        struct page *page;
@@ -106,8 +106,8 @@ static int buffer_op(unsigned long addr, int len, int is_write,
                     void *arg)
 {
        int faulted, res;
-
-       faulted = setjmp_wrapper(do_buffer_op, addr, len, is_write, op, arg,
+       
+       faulted = setjmp_wrapper(do_buffer_op, addr, len, is_write, op, arg, 
                                 &res);
        if(!faulted)
                return(res);
@@ -166,7 +166,7 @@ static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
        n = strnlen(to, len);
        *to_ptr += n;
 
-       if(n < len)
+       if(n < len) 
                return(1);
        return(0);
 }
@@ -184,7 +184,7 @@ int strncpy_from_user_skas(char *dst, const char *src, int count)
        if(!access_ok_skas(VERIFY_READ, src, 1))
                return(-EFAULT);
 
-       n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user,
+       n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, 
                      &ptr);
        if(n != 0)
                return(-EFAULT);
@@ -209,7 +209,7 @@ int clear_user_skas(void *mem, int len)
                return(0);
        }
 
-       return(access_ok_skas(VERIFY_WRITE, mem, len) ?
+       return(access_ok_skas(VERIFY_WRITE, mem, len) ? 
               buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len);
 }
 
diff --git a/arch/um/kernel/skas/util/.cvsignore b/arch/um/kernel/skas/util/.cvsignore
deleted file mode 100644 (file)
index cf990da..0000000
+++ /dev/null
@@ -1 +0,0 @@
-mk_ptregs
index a549b3b..2afef5c 100644 (file)
@@ -15,6 +15,8 @@
 #include "linux/unistd.h"
 #include "linux/slab.h"
 #include "linux/utime.h"
+#include <linux/vs_cvirt.h>
+
 #include "asm/mman.h"
 #include "asm/uaccess.h"
 #include "asm/ipc.h"
@@ -56,10 +58,9 @@ long sys_vfork(void)
 }
 
 /* common code for old and new mmaps */
-static inline long do_mmap2(
-       unsigned long addr, unsigned long len,
-       unsigned long prot, unsigned long flags,
-       unsigned long fd, unsigned long pgoff)
+long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len,
+             unsigned long prot, unsigned long flags, unsigned long fd,
+             unsigned long pgoff)
 {
        int error = -EBADF;
        struct file * file = NULL;
@@ -71,9 +72,9 @@ static inline long do_mmap2(
                        goto out;
        }
 
-       down_write(&current->mm->mmap_sem);
-       error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-       up_write(&current->mm->mmap_sem);
+       down_write(&mm->mmap_sem);
+       error = do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff);
+       up_write(&mm->mmap_sem);
 
        if (file)
                fput(file);
@@ -85,7 +86,7 @@ long sys_mmap2(unsigned long addr, unsigned long len,
               unsigned long prot, unsigned long flags,
               unsigned long fd, unsigned long pgoff)
 {
-       return do_mmap2(addr, len, prot, flags, fd, pgoff);
+       return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff);
 }
 
 /*
@@ -112,7 +113,8 @@ long old_mmap(unsigned long addr, unsigned long len,
        if (offset & ~PAGE_MASK)
                goto out;
 
-       err = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
+       err = do_mmap2(current->mm, addr, len, prot, flags, fd, 
+                      offset >> PAGE_SHIFT);
  out:
        return err;
 }
@@ -224,7 +226,7 @@ long sys_uname(struct old_utsname * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       err=copy_to_user(name, vx_new_utsname(), sizeof (*name));
        up_read(&uts_sem);
        return err?-EFAULT:0;
 }
@@ -232,6 +234,7 @@ long sys_uname(struct old_utsname * name)
 long sys_olduname(struct oldold_utsname * name)
 {
        long error;
+       struct new_utsname *ptr;
 
        if (!name)
                return -EFAULT;
@@ -240,19 +243,20 @@ long sys_olduname(struct oldold_utsname * name)
   
        down_read(&uts_sem);
        
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,
+       ptr = vx_new_utsname();
+       error = __copy_to_user(&name->sysname,ptr->sysname,
                               __OLD_UTS_LEN);
        error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+       error |= __copy_to_user(&name->nodename,ptr->nodename,
                                __OLD_UTS_LEN);
        error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->release,&system_utsname.release,
+       error |= __copy_to_user(&name->release,ptr->release,
                                __OLD_UTS_LEN);
        error |= __put_user(0,name->release+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->version,&system_utsname.version,
+       error |= __copy_to_user(&name->version,ptr->version,
                                __OLD_UTS_LEN);
        error |= __put_user(0,name->version+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->machine,&system_utsname.machine,
+       error |= __copy_to_user(&name->machine,ptr->machine,
                                __OLD_UTS_LEN);
        error |= __put_user(0,name->machine+__OLD_UTS_LEN);
        
diff --git a/arch/um/kernel/tt/include/mmu.h b/arch/um/kernel/tt/include/mmu.h
deleted file mode 100644 (file)
index 6b146bd..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __TT_MMU_H
-#define __TT_MMU_H
-
-struct mmu_context_tt {
-};
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/include/mode.h b/arch/um/kernel/tt/include/mode.h
deleted file mode 100644 (file)
index 1a64e75..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __MODE_TT_H__
-#define __MODE_TT_H__
-
-#include "sysdep/ptrace.h"
-
-enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB };
-
-extern int tracing_pid;
-
-extern int tracer(int (*init_proc)(void *), void *sp);
-extern void user_time_init_tt(void);
-extern int copy_sc_from_user_tt(void *to_ptr, void *from_ptr, void *data);
-extern int copy_sc_to_user_tt(void *to_ptr, void *fp, void *from_ptr, 
-                             void *data);
-extern void sig_handler_common_tt(int sig, void *sc);
-extern void syscall_handler_tt(int sig, union uml_pt_regs *regs);
-extern void reboot_tt(void);
-extern void halt_tt(void);
-extern int is_tracer_winch(int pid, int fd, void *data);
-extern void kill_off_processes_tt(void);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/include/mode_kern.h b/arch/um/kernel/tt/include/mode_kern.h
deleted file mode 100644 (file)
index a8c3134..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __TT_MODE_KERN_H__
-#define __TT_MODE_KERN_H__
-
-#include "linux/sched.h"
-#include "asm/page.h"
-#include "asm/ptrace.h"
-#include "asm/uaccess.h"
-
-extern void *switch_to_tt(void *prev, void *next);
-extern void flush_thread_tt(void);
-extern void start_thread_tt(struct pt_regs *regs, unsigned long eip, 
-                          unsigned long esp);
-extern int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp,
-                         unsigned long stack_top, struct task_struct *p, 
-                         struct pt_regs *regs);
-extern void release_thread_tt(struct task_struct *task);
-extern void exit_thread_tt(void);
-extern void initial_thread_cb_tt(void (*proc)(void *), void *arg);
-extern void init_idle_tt(void);
-extern void flush_tlb_kernel_range_tt(unsigned long start, unsigned long end);
-extern void flush_tlb_kernel_vm_tt(void);
-extern void __flush_tlb_one_tt(unsigned long addr);
-extern void flush_tlb_range_tt(struct vm_area_struct *vma, 
-                              unsigned long start, unsigned long end);
-extern void flush_tlb_mm_tt(struct mm_struct *mm);
-extern void force_flush_all_tt(void);
-extern long execute_syscall_tt(void *r);
-extern void before_mem_tt(unsigned long brk_start);
-extern unsigned long set_task_sizes_tt(int arg, unsigned long *host_size_out, 
-                                      unsigned long *task_size_out);
-extern int start_uml_tt(void);
-extern int external_pid_tt(struct task_struct *task);
-extern int thread_pid_tt(struct task_struct *task);
-
-#define kmem_end_tt (host_task_size - ABOVE_KMEM)
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/include/uaccess.h b/arch/um/kernel/tt/include/uaccess.h
deleted file mode 100644 (file)
index 7399836..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#ifndef __TT_UACCESS_H
-#define __TT_UACCESS_H
-
-#include "linux/string.h"
-#include "linux/sched.h"
-#include "asm/processor.h"
-#include "asm/errno.h"
-#include "asm/current.h"
-#include "asm/a.out.h"
-#include "uml_uaccess.h"
-
-#define ABOVE_KMEM (16 * 1024 * 1024)
-
-extern unsigned long end_vm;
-extern unsigned long uml_physmem;
-
-#define under_task_size(addr, size) \
-       (((unsigned long) (addr) < TASK_SIZE) && \
-         (((unsigned long) (addr) + (size)) < TASK_SIZE))
-
-#define is_stack(addr, size) \
-       (((unsigned long) (addr) < STACK_TOP) && \
-        ((unsigned long) (addr) >= STACK_TOP - ABOVE_KMEM) && \
-        (((unsigned long) (addr) + (size)) <= STACK_TOP))
-
-#define access_ok_tt(type, addr, size) \
-       ((type == VERIFY_READ) || (segment_eq(get_fs(), KERNEL_DS)) || \
-         (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \
-          (under_task_size(addr, size) || is_stack(addr, size))))
-
-static inline int verify_area_tt(int type, const void * addr, 
-                                unsigned long size)
-{
-       return(access_ok_tt(type, addr, size) ? 0 : -EFAULT);
-}
-
-extern unsigned long get_fault_addr(void);
-
-extern int __do_copy_from_user(void *to, const void *from, int n,
-                              void **fault_addr, void **fault_catcher);
-extern int __do_strncpy_from_user(char *dst, const char *src, size_t n,
-                                 void **fault_addr, void **fault_catcher);
-extern int __do_clear_user(void *mem, size_t len, void **fault_addr,
-                          void **fault_catcher);
-extern int __do_strnlen_user(const char *str, unsigned long n,
-                            void **fault_addr, void **fault_catcher);
-
-extern int copy_from_user_tt(void *to, const void *from, int n);
-extern int copy_to_user_tt(void *to, const void *from, int n);
-extern int strncpy_from_user_tt(char *dst, const char *src, int count);
-extern int __clear_user_tt(void *mem, int len);
-extern int clear_user_tt(void *mem, int len);
-extern int strnlen_user_tt(const void *str, int len);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
index 0409718..9c84011 100644 (file)
@@ -1,4 +1,4 @@
-/*
+/* 
  * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
  * Licensed under the GPL
  */
@@ -8,7 +8,7 @@
 
 int copy_from_user_tt(void *to, const void *from, int n)
 {
-       if(!access_ok_tt(VERIFY_READ, from, n))
+       if(!access_ok_tt(VERIFY_READ, from, n)) 
                return(n);
 
        return(__do_copy_from_user(to, from, n, &current->thread.fault_addr,
@@ -19,7 +19,7 @@ int copy_to_user_tt(void *to, const void *from, int n)
 {
        if(!access_ok_tt(VERIFY_WRITE, to, n))
                return(n);
-
+               
        return(__do_copy_to_user(to, from, n, &current->thread.fault_addr,
                                 &current->thread.fault_catcher));
 }
@@ -28,10 +28,10 @@ int strncpy_from_user_tt(char *dst, const char *src, int count)
 {
        int n;
 
-       if(!access_ok_tt(VERIFY_READ, src, 1))
+       if(!access_ok_tt(VERIFY_READ, src, 1)) 
                return(-EFAULT);
 
-       n = __do_strncpy_from_user(dst, src, count,
+       n = __do_strncpy_from_user(dst, src, count, 
                                   &current->thread.fault_addr,
                                   &current->thread.fault_catcher);
        if(n < 0) return(-EFAULT);
diff --git a/arch/um/kernel/user_syms.c b/arch/um/kernel/user_syms.c
deleted file mode 100644 (file)
index 2d32ea3..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <dirent.h>
-#include <errno.h>
-#include <utime.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/vfs.h>
-#include <sys/ioctl.h>
-#include "user_util.h"
-#include "mem_user.h"
-#include "uml-config.h"
-
-/* Had to steal this from linux/module.h because that file can't be included
- * since this includes various user-level headers.
- */
-
-struct module_symbol
-{
-       unsigned long value;
-       const char *name;
-};
-
-/* Indirect stringification.  */
-
-#define __MODULE_STRING_1(x)   #x
-#define __MODULE_STRING(x)     __MODULE_STRING_1(x)
-
-#if !defined(__AUTOCONF_INCLUDED__)
-
-#define __EXPORT_SYMBOL(sym,str)   error config_must_be_included_before_module
-#define EXPORT_SYMBOL(var)        error config_must_be_included_before_module
-#define EXPORT_SYMBOL_NOVERS(var)  error config_must_be_included_before_module
-
-#elif !defined(UML_CONFIG_MODULES)
-
-#define __EXPORT_SYMBOL(sym,str)
-#define EXPORT_SYMBOL(var)
-#define EXPORT_SYMBOL_NOVERS(var)
-
-#else
-
-#define __EXPORT_SYMBOL(sym, str)                      \
-const char __kstrtab_##sym[]                           \
-__attribute__((section(".kstrtab"))) = str;            \
-const struct module_symbol __ksymtab_##sym             \
-__attribute__((section("__ksymtab"))) =                        \
-{ (unsigned long)&sym, __kstrtab_##sym }
-
-#if defined(__MODVERSIONS__) || !defined(UML_CONFIG_MODVERSIONS)
-#define EXPORT_SYMBOL(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(var))
-#else
-#define EXPORT_SYMBOL(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var)))
-#endif
-
-#define EXPORT_SYMBOL_NOVERS(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(var))
-
-#endif
-
-EXPORT_SYMBOL(__errno_location);
-
-EXPORT_SYMBOL(access);
-EXPORT_SYMBOL(open);
-EXPORT_SYMBOL(open64);
-EXPORT_SYMBOL(close);
-EXPORT_SYMBOL(read);
-EXPORT_SYMBOL(write);
-EXPORT_SYMBOL(dup2);
-EXPORT_SYMBOL(__xstat);
-EXPORT_SYMBOL(__lxstat);
-EXPORT_SYMBOL(__lxstat64);
-EXPORT_SYMBOL(lseek);
-EXPORT_SYMBOL(lseek64);
-EXPORT_SYMBOL(chown);
-EXPORT_SYMBOL(truncate);
-EXPORT_SYMBOL(utime);
-EXPORT_SYMBOL(chmod);
-EXPORT_SYMBOL(rename);
-EXPORT_SYMBOL(__xmknod);
-
-EXPORT_SYMBOL(symlink);
-EXPORT_SYMBOL(link);
-EXPORT_SYMBOL(unlink);
-EXPORT_SYMBOL(readlink);
-
-EXPORT_SYMBOL(mkdir);
-EXPORT_SYMBOL(rmdir);
-EXPORT_SYMBOL(opendir);
-EXPORT_SYMBOL(readdir);
-EXPORT_SYMBOL(closedir);
-EXPORT_SYMBOL(seekdir);
-EXPORT_SYMBOL(telldir);
-
-EXPORT_SYMBOL(ioctl);
-
-extern ssize_t pread64 (int __fd, void *__buf, size_t __nbytes,
-                       __off64_t __offset);
-extern ssize_t pwrite64 (int __fd, __const void *__buf, size_t __n,
-                        __off64_t __offset);
-EXPORT_SYMBOL(pread64);
-EXPORT_SYMBOL(pwrite64);
-
-EXPORT_SYMBOL(statfs);
-EXPORT_SYMBOL(statfs64);
-
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(getuid);
-
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(strstr);
-
-EXPORT_SYMBOL(find_iomem);
diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S
new file mode 100644 (file)
index 0000000..db0445e
--- /dev/null
@@ -0,0 +1,11 @@
+#include <asm-generic/vmlinux.lds.h>
+       
+OUTPUT_FORMAT(ELF_FORMAT)
+OUTPUT_ARCH(ELF_ARCH)
+ENTRY(_start)
+jiffies = jiffies_64;
+
+SECTIONS
+{
+#include "asm/common.lds.S"
+}
index 3866884..9d06171 100644 (file)
@@ -8,6 +8,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <signal.h>
+#include <errno.h>
 #include <sys/resource.h>
 #include <sys/mman.h>
 #include <sys/user.h>
@@ -16,6 +17,8 @@
 #include "kern_util.h"
 #include "mem_user.h"
 #include "signal_user.h"
+#include "time_user.h"
+#include "irq_user.h"
 #include "user.h"
 #include "init.h"
 #include "mode.h"
@@ -123,12 +126,14 @@ int main(int argc, char **argv, char **envp)
 
        set_stklim();
 
-       if((new_argv = malloc((argc + 1) * sizeof(char *))) == NULL){
+       new_argv = malloc((argc + 1) * sizeof(char *));
+       if(new_argv == NULL){
                perror("Mallocing argv");
                exit(1);
        }
        for(i=0;i<argc;i++){
-               if((new_argv[i] = strdup(argv[i])) == NULL){
+               new_argv[i] = strdup(argv[i]);
+               if(new_argv[i] == NULL){
                        perror("Mallocing an arg");
                        exit(1);
                }
@@ -144,7 +149,20 @@ int main(int argc, char **argv, char **envp)
        
        /* Reboot */
        if(ret){
+               int err;
+
                printf("\n");
+
+               /* Let any pending signals fire, then disable them.  This 
+                * ensures that they won't be delivered after the exec, when 
+                * they are definitely not expected.
+                */
+               unblock_signals();
+               disable_timer();
+               err = deactivate_all_fds();
+               if(err)
+                       printf("deactivate_all_fds failed, errno = %d\n", -err);
+
                execvp(new_argv[0], new_argv);
                perror("Failed to exec kernel");
                ret = 1;
@@ -160,10 +178,21 @@ extern void *__real_malloc(int);
 
 void *__wrap_malloc(int size)
 {
-       if(CAN_KMALLOC())
-               return(um_kmalloc(size));
-       else
+       void *ret;
+
+       if(!CAN_KMALLOC())
                return(__real_malloc(size));
+       else if(size <= PAGE_SIZE) /* finding contiguos pages is hard */
+               ret = um_kmalloc(size);
+       else ret = um_vmalloc(size);
+
+       /* glibc people insist that if malloc fails, errno should be
+        * set by malloc as well. So we do.
+        */
+       if(ret == NULL)
+               errno = ENOMEM;
+
+       return(ret);
 }
 
 void *__wrap_calloc(int n, int size)
@@ -177,9 +206,35 @@ void *__wrap_calloc(int n, int size)
 
 extern void __real_free(void *);
 
+extern unsigned long high_physmem;
+
 void __wrap_free(void *ptr)
 {
-       if(CAN_KMALLOC()) kfree(ptr);
+       unsigned long addr = (unsigned long) ptr;
+
+       /* We need to know how the allocation happened, so it can be correctly
+        * freed.  This is done by seeing what region of memory the pointer is
+        * in -
+        *      physical memory - kmalloc/kfree
+        *      kernel virtual memory - vmalloc/vfree
+        *      anywhere else - malloc/free
+        * If kmalloc is not yet possible, then the kernel memory regions
+        * may not be set up yet, and the variables not set up.  So,
+        * free is called.
+        *
+        * CAN_KMALLOC is checked because it would be bad to free a buffer
+        * with kmalloc/vmalloc after they have been turned off during 
+        * shutdown.
+        */
+
+       if((addr >= uml_physmem) && (addr < high_physmem)){
+               if(CAN_KMALLOC())
+                       kfree(ptr);
+       }
+       else if((addr >= start_vm) && (addr < end_vm)){
+               if(CAN_KMALLOC())
+                       vfree(ptr);
+       }
        else __real_free(ptr);
 }
 
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
new file mode 100644 (file)
index 0000000..56b3782
--- /dev/null
@@ -0,0 +1,404 @@
+/* 
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include "os.h"
+#include "helper.h"
+#include "aio.h"
+#include "init.h"
+#include "user.h"
+#include "mode.h"
+
+struct aio_thread_req {
+       enum aio_type type;
+       int io_fd;
+       unsigned long long offset;
+       char *buf;
+       int len;
+       int reply_fd;
+       void *data;
+};
+
+static int aio_req_fd_r = -1;
+static int aio_req_fd_w = -1;
+
+#if defined(HAVE_AIO_ABI)
+#include <linux/aio_abi.h>
+
+/* If we have the headers, we are going to build with AIO enabled.
+ * If we don't have aio in libc, we define the necessary stubs here.
+ */
+
+#if !defined(HAVE_AIO_LIBC)
+
+#define __NR_io_setup 245
+#define __NR_io_getevents 247
+#define __NR_io_submit 248
+
+static long io_setup(int n, aio_context_t *ctxp)
+{
+  return(syscall(__NR_io_setup, n, ctxp));
+}
+
+static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
+{
+  return(syscall(__NR_io_submit, ctx, nr, iocbpp));
+}
+
+static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
+                        struct io_event *events, struct timespec *timeout)
+{
+  return(syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout));
+}
+
+#endif
+
+/* The AIO_MMAP cases force the mmapped page into memory here
+ * rather than in whatever place first touches the data.  I used
+ * to do this by touching the page, but that's delicate because
+ * gcc is prone to optimizing that away.  So, what's done here
+ * is we read from the descriptor from which the page was 
+ * mapped.  The caller is required to pass an offset which is
+ * inside the page that was mapped.  Thus, when the read 
+ * returns, we know that the page is in the page cache, and
+ * that it now backs the mmapped area.
+ */
+
+static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, 
+                 int len, unsigned long long offset, void *data)
+{
+       struct iocb iocb, *iocbp = &iocb;
+       char c;
+       int err;
+
+       iocb = ((struct iocb) { .aio_data       = (unsigned long) data,
+                               .aio_reqprio    = 0,
+                               .aio_fildes     = fd,
+                               .aio_buf        = (unsigned long) buf,
+                               .aio_nbytes     = len,
+                               .aio_offset     = offset,
+                               .aio_reserved1  = 0,
+                               .aio_reserved2  = 0,
+                               .aio_reserved3  = 0 });
+
+       switch(type){
+       case AIO_READ:
+               iocb.aio_lio_opcode = IOCB_CMD_PREAD;
+               err = io_submit(ctx, 1, &iocbp);
+               break;
+       case AIO_WRITE:
+               iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
+               err = io_submit(ctx, 1, &iocbp);
+               break;
+       case AIO_MMAP:
+               iocb.aio_lio_opcode = IOCB_CMD_PREAD;
+               iocb.aio_buf = (unsigned long) &c;
+               iocb.aio_nbytes = sizeof(c);
+               err = io_submit(ctx, 1, &iocbp);
+               break;
+       default:
+               printk("Bogus op in do_aio - %d\n", type);
+               err = -EINVAL;
+               break;
+       }
+       if(err > 0)
+               err = 0;
+
+       return(err);    
+}
+
+static aio_context_t ctx = 0;
+
+static int aio_thread(void *arg)
+{
+       struct aio_thread_reply reply;
+       struct io_event event;
+       int err, n, reply_fd;
+
+       signal(SIGWINCH, SIG_IGN);
+
+       while(1){
+               n = io_getevents(ctx, 1, 1, &event, NULL);
+               if(n < 0){
+                       if(errno == EINTR)
+                               continue;
+                       printk("aio_thread - io_getevents failed, "
+                              "errno = %d\n", errno);
+               }
+               else {
+                       reply = ((struct aio_thread_reply) 
+                               { .data = (void *) event.data,
+                                 .err  = event.res });
+                       reply_fd = 
+                               ((struct aio_context *) event.data)->reply_fd;
+                       err = os_write_file(reply_fd, &reply, sizeof(reply));
+                       if(err != sizeof(reply))
+                               printk("not_aio_thread - write failed, "
+                                      "fd = %d, err = %d\n", 
+                                      aio_req_fd_r, -err);
+               }
+       }
+       return(0);
+}
+
+#endif
+
+static int do_not_aio(struct aio_thread_req *req)
+{
+       char c;
+       int err;
+
+       switch(req->type){
+       case AIO_READ:
+               err = os_seek_file(req->io_fd, req->offset);
+               if(err)
+                       goto out;
+
+               err = os_read_file(req->io_fd, req->buf, req->len);
+               break;
+       case AIO_WRITE:
+               err = os_seek_file(req->io_fd, req->offset);
+               if(err)
+                       goto out;
+
+               err = os_write_file(req->io_fd, req->buf, req->len);
+               break;
+       case AIO_MMAP:
+               err = os_seek_file(req->io_fd, req->offset);
+               if(err)
+                       goto out;
+
+               err = os_read_file(req->io_fd, &c, sizeof(c));
+               break;
+       default:
+               printk("do_not_aio - bad request type : %d\n", req->type);
+               err = -EINVAL;
+               break;
+       }
+
+ out:
+       return(err);
+}
+
+static int not_aio_thread(void *arg)
+{
+       struct aio_thread_req req;
+       struct aio_thread_reply reply;
+       int err;
+
+       signal(SIGWINCH, SIG_IGN);
+       while(1){
+               err = os_read_file(aio_req_fd_r, &req, sizeof(req));
+               if(err != sizeof(req)){
+                       if(err < 0)
+                               printk("not_aio_thread - read failed, fd = %d, "
+                                      "err = %d\n", aio_req_fd_r, -err);
+                       else {
+                               printk("not_aio_thread - short read, fd = %d, "
+                                      "length = %d\n", aio_req_fd_r, err);
+                       }
+                       continue;
+               }
+               err = do_not_aio(&req);
+               reply = ((struct aio_thread_reply) { .data      = req.data,
+                                                    .err       = err });
+               err = os_write_file(req.reply_fd, &reply, sizeof(reply));
+               if(err != sizeof(reply))
+                       printk("not_aio_thread - write failed, fd = %d, "
+                              "err = %d\n", aio_req_fd_r, -err);
+       }
+}
+
+static int aio_pid = -1;
+
+static int init_aio_24(void)
+{
+       unsigned long stack;
+       int fds[2], err;
+       
+       err = os_pipe(fds, 1, 1);
+       if(err)
+               goto out;
+
+       aio_req_fd_w = fds[0];
+       aio_req_fd_r = fds[1];
+       err = run_helper_thread(not_aio_thread, NULL, 
+                               CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
+       if(err < 0)
+               goto out_close_pipe;
+
+       aio_pid = err;
+       goto out;
+
+ out_close_pipe:
+       os_close_file(fds[0]);
+       os_close_file(fds[1]);
+       aio_req_fd_w = -1;
+       aio_req_fd_r = -1;      
+ out:
+       return(0);
+}
+
+#ifdef HAVE_AIO_ABI
+#define DEFAULT_24_AIO 0
+static int init_aio_26(void)
+{
+       unsigned long stack;
+       int err;
+       
+       if(io_setup(256, &ctx)){
+               printk("aio_thread failed to initialize context, err = %d\n",
+                      errno);
+               return(-errno);
+       }
+
+       err = run_helper_thread(aio_thread, NULL, 
+                               CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
+       if(err < 0)
+               return(-errno);
+
+       aio_pid = err;
+       err = 0;
+ out:
+       return(err);
+}
+
+int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, 
+                 unsigned long long offset, int reply_fd, void *data)
+{
+       struct aio_thread_reply reply;
+       int err;
+
+       ((struct aio_context *) data)->reply_fd = reply_fd;
+
+       err = do_aio(ctx, type, io_fd, buf, len, offset, data);
+       if(err){
+               reply = ((struct aio_thread_reply) { .data = data,
+                                                    .err  = err });
+               err = os_write_file(reply_fd, &reply, sizeof(reply));
+               if(err != sizeof(reply))
+                       printk("submit_aio_26 - write failed, "
+                              "fd = %d, err = %d\n", reply_fd, -err);
+               else err = 0;
+       }
+
+       return(err);
+}
+
+#else
+#define DEFAULT_24_AIO 1
+static int init_aio_26(void)
+{
+       return(-ENOSYS);
+}
+
+int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, 
+                 unsigned long long offset, int reply_fd, void *data)
+{
+       return(-ENOSYS);
+}
+#endif
+
+static int aio_24 = DEFAULT_24_AIO;
+
+static int __init set_aio_24(char *name, int *add)
+{
+       aio_24 = 1;
+       return(0);
+}
+
+__uml_setup("aio=2.4", set_aio_24,
+"aio=2.4\n"
+"    This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
+"    available.  2.4 AIO is a single thread that handles one request at a\n"
+"    time, synchronously.  2.6 AIO is a thread which uses 2.5 AIO interface\n"
+"    to handle an arbitrary number of pending requests.  2.6 AIO is not\n"
+"    available in tt mode, on 2.4 hosts, or when UML is built with\n"
+"    /usr/include/linux/aio_abi no available.\n\n"
+);
+
+static int init_aio(void)
+{
+       int err;
+
+       CHOOSE_MODE(({ 
+               if(!aio_24){ 
+                       printk("Disabling 2.6 AIO in tt mode\n");
+                       aio_24 = 1;
+               } }), (void) 0);
+
+       if(!aio_24){
+               err = init_aio_26();
+               if(err && (errno == ENOSYS)){
+                       printk("2.6 AIO not supported on the host - "
+                              "reverting to 2.4 AIO\n");
+                       aio_24 = 1;
+               }
+               else return(err);
+       }
+
+       if(aio_24)
+               return(init_aio_24());
+
+       return(0);
+}
+
+__initcall(init_aio);
+
+static void exit_aio(void)
+{
+       if(aio_pid != -1)
+               os_kill_process(aio_pid, 1);
+}
+
+__uml_exitcall(exit_aio);
+
+int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, 
+                 unsigned long long offset, int reply_fd, void *data)
+{
+       struct aio_thread_req req = { .type             = type,
+                                     .io_fd            = io_fd,
+                                     .offset           = offset,
+                                     .buf              = buf,
+                                     .len              = len,
+                                     .reply_fd         = reply_fd,
+                                     .data             = data,
+       };
+       int err;
+
+       err = os_write_file(aio_req_fd_w, &req, sizeof(req));
+       if(err == sizeof(req))
+               err = 0;
+
+       return(err);
+}
+
+int submit_aio(enum aio_type type, int io_fd, char *buf, int len, 
+              unsigned long long offset, int reply_fd, void *data)
+{
+       if(aio_24)
+               return(submit_aio_24(type, io_fd, buf, len, offset, reply_fd, 
+                                    data));
+       else {
+               return(submit_aio_26(type, io_fd, buf, len, offset, reply_fd, 
+                                    data));
+       }
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
index 77d4066..53a4a15 100644 (file)
@@ -15,6 +15,8 @@
 #include <sys/ioctl.h>
 #include <sys/mount.h>
 #include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/vfs.h>
 #include "os.h"
 #include "user.h"
 #include "kern_util.h"
@@ -266,6 +268,7 @@ int os_open_file(char *file, struct openflags flags, int mode)
        if(flags.c) f |= O_CREAT;
        if(flags.t) f |= O_TRUNC;
        if(flags.e) f |= O_EXCL;
+       if(flags.d) f |= O_DIRECT;
 
        fd = open64(file, f, mode);
        if(fd < 0)
index 2433c9e..ef0fb71 100644 (file)
 
 extern size_t strlen(const char *);
 extern void *memcpy(void *, const void *, size_t);
-extern void *memmove(void *, const void *, size_t);
 extern void *memset(void *, int, size_t);
 extern int printf(const char *, ...);
 
 EXPORT_SYMBOL(strlen);
 EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(printf);
 
 EXPORT_SYMBOL(strstr);
 
 /* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms.
- * However, the modules will use the CRC defined *here*, no matter if it is
+ * However, the modules will use the CRC defined *here*, no matter if it is 
  * good; so the versions of these symbols will always match
  */
 #define EXPORT_SYMBOL_PROTO(sym)       \
diff --git a/arch/um/sys-i386/bitops.c b/arch/um/sys-i386/bitops.c
new file mode 100644 (file)
index 0000000..97db385
--- /dev/null
@@ -0,0 +1,70 @@
+#include <linux/bitops.h>
+#include <linux/module.h>
+
+/**
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+int find_next_bit(const unsigned long *addr, int size, int offset)
+{
+       const unsigned long *p = addr + (offset >> 5);
+       int set = 0, bit = offset & 31, res;
+
+       if (bit) {
+               /*
+                * Look for nonzero in the first 32 bits:
+                */
+               __asm__("bsfl %1,%0\n\t"
+                       "jne 1f\n\t"
+                       "movl $32, %0\n"
+                       "1:"
+                       : "=r" (set)
+                       : "r" (*p >> bit));
+               if (set < (32 - bit))
+                       return set + offset;
+               set = 32 - bit;
+               p++;
+       }
+       /*
+        * No set bit yet, search remaining full words for a bit
+        */
+       res = find_first_bit (p, size - 32 * (p - addr));
+       return (offset + set + res);
+}
+EXPORT_SYMBOL(find_next_bit);
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+int find_next_zero_bit(const unsigned long *addr, int size, int offset)
+{
+       unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+       int set = 0, bit = offset & 31, res;
+
+       if (bit) {
+               /*
+                * Look for zero in the first 32 bits.
+                */
+               __asm__("bsfl %1,%0\n\t"
+                       "jne 1f\n\t"
+                       "movl $32, %0\n"
+                       "1:"
+                       : "=r" (set)
+                       : "r" (~(*p >> bit)));
+               if (set < (32 - bit))
+                       return set + offset;
+               set = 32 - bit;
+               p++;
+       }
+       /*
+        * No zero yet, search remaining full bytes for a zero
+        */
+       res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
+       return (offset + set + res);
+}
+EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/um/sys-i386/extable.c b/arch/um/sys-i386/extable.c
deleted file mode 100644 (file)
index 946e7ad..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * linux/arch/i386/mm/extable.c
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <asm/uaccess.h>
-
-/* Simple binary search */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
-              const struct exception_table_entry *last,
-              unsigned long value)
-{
-        while (first <= last) {
-               const struct exception_table_entry *mid;
-               long diff;
-
-               mid = (last - first) / 2 + first;
-               diff = mid->insn - value;
-                if (diff == 0)
-                        return mid;
-                else if (diff < 0)
-                        first = mid+1;
-                else
-                        last = mid-1;
-        }
-        return NULL;
-}
diff --git a/arch/um/sys-i386/semaphore.c b/arch/um/sys-i386/semaphore.c
new file mode 100644 (file)
index 0000000..073912c
--- /dev/null
@@ -0,0 +1,301 @@
+/*
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Portions Copyright 1999 Red Hat, Inc.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ *
+ * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@redhat.com>
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <asm/semaphore.h>
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is protected
+ * by the spinlock in the semaphore's waitqueue head.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ *  - only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - when we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleeper" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+
+asmlinkage void __up(struct semaphore *sem)
+{
+       wake_up(&sem->wait);
+}
+
+asmlinkage void __sched __down(struct semaphore * sem)
+{
+       struct task_struct *tsk = current;
+       DECLARE_WAITQUEUE(wait, tsk);
+       unsigned long flags;
+
+       tsk->state = TASK_UNINTERRUPTIBLE;
+       spin_lock_irqsave(&sem->wait.lock, flags);
+       add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+       sem->sleepers++;
+       for (;;) {
+               int sleepers = sem->sleepers;
+
+               /*
+                * Add "everybody else" into it. They aren't
+                * playing, because we own the spinlock in
+                * the wait_queue_head.
+                */
+               if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+                       sem->sleepers = 0;
+                       break;
+               }
+               sem->sleepers = 1;      /* us - see -1 above */
+               spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+               schedule();
+
+               spin_lock_irqsave(&sem->wait.lock, flags);
+               tsk->state = TASK_UNINTERRUPTIBLE;
+       }
+       remove_wait_queue_locked(&sem->wait, &wait);
+       wake_up_locked(&sem->wait);
+       spin_unlock_irqrestore(&sem->wait.lock, flags);
+       tsk->state = TASK_RUNNING;
+}
+
+asmlinkage int __sched __down_interruptible(struct semaphore * sem)
+{
+       int retval = 0;
+       struct task_struct *tsk = current;
+       DECLARE_WAITQUEUE(wait, tsk);
+       unsigned long flags;
+
+       tsk->state = TASK_INTERRUPTIBLE;
+       spin_lock_irqsave(&sem->wait.lock, flags);
+       add_wait_queue_exclusive_locked(&sem->wait, &wait);
+
+       sem->sleepers++;
+       for (;;) {
+               int sleepers = sem->sleepers;
+
+               /*
+                * With signals pending, this turns into
+                * the trylock failure case - we won't be
+                * sleeping, and we* can't get the lock as
+                * it has contention. Just correct the count
+                * and exit.
+                */
+               if (signal_pending(current)) {
+                       retval = -EINTR;
+                       sem->sleepers = 0;
+                       atomic_add(sleepers, &sem->count);
+                       break;
+               }
+
+               /*
+                * Add "everybody else" into it. They aren't
+                * playing, because we own the spinlock in
+                * wait_queue_head. The "-1" is because we're
+                * still hoping to get the semaphore.
+                */
+               if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+                       sem->sleepers = 0;
+                       break;
+               }
+               sem->sleepers = 1;      /* us - see -1 above */
+               spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+               schedule();
+
+               spin_lock_irqsave(&sem->wait.lock, flags);
+               tsk->state = TASK_INTERRUPTIBLE;
+       }
+       remove_wait_queue_locked(&sem->wait, &wait);
+       wake_up_locked(&sem->wait);
+       spin_unlock_irqrestore(&sem->wait.lock, flags);
+
+       tsk->state = TASK_RUNNING;
+       return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+asmlinkage int __down_trylock(struct semaphore * sem)
+{
+       int sleepers;
+       unsigned long flags;
+
+       spin_lock_irqsave(&sem->wait.lock, flags);
+       sleepers = sem->sleepers + 1;
+       sem->sleepers = 0;
+
+       /*
+        * Add "everybody else" and us into it. They aren't
+        * playing, because we own the spinlock in the
+        * wait_queue_head.
+        */
+       if (!atomic_add_negative(sleepers, &sem->count)) {
+               wake_up_locked(&sem->wait);
+       }
+
+       spin_unlock_irqrestore(&sem->wait.lock, flags);
+       return 1;
+}
+
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * %ecx contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (%eax, %edx and %ecx) except %eax when used as a return
+ * value..
+ */
+asm(
+".section .sched.text\n"
+".align 4\n"
+".globl __down_failed\n"
+"__down_failed:\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "pushl %ebp\n\t"
+       "movl  %esp,%ebp\n\t"
+#endif
+       "pushl %eax\n\t"
+       "pushl %edx\n\t"
+       "pushl %ecx\n\t"
+       "call __down\n\t"
+       "popl %ecx\n\t"
+       "popl %edx\n\t"
+       "popl %eax\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "movl %ebp,%esp\n\t"
+       "popl %ebp\n\t"
+#endif
+       "ret"
+);
+
+asm(
+".section .sched.text\n"
+".align 4\n"
+".globl __down_failed_interruptible\n"
+"__down_failed_interruptible:\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "pushl %ebp\n\t"
+       "movl  %esp,%ebp\n\t"
+#endif
+       "pushl %edx\n\t"
+       "pushl %ecx\n\t"
+       "call __down_interruptible\n\t"
+       "popl %ecx\n\t"
+       "popl %edx\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "movl %ebp,%esp\n\t"
+       "popl %ebp\n\t"
+#endif
+       "ret"
+);
+
+asm(
+".section .sched.text\n"
+".align 4\n"
+".globl __down_failed_trylock\n"
+"__down_failed_trylock:\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "pushl %ebp\n\t"
+       "movl  %esp,%ebp\n\t"
+#endif
+       "pushl %edx\n\t"
+       "pushl %ecx\n\t"
+       "call __down_trylock\n\t"
+       "popl %ecx\n\t"
+       "popl %edx\n\t"
+#if defined(CONFIG_FRAME_POINTER)
+       "movl %ebp,%esp\n\t"
+       "popl %ebp\n\t"
+#endif
+       "ret"
+);
+
+asm(
+".section .sched.text\n"
+".align 4\n"
+".globl __up_wakeup\n"
+"__up_wakeup:\n\t"
+       "pushl %eax\n\t"
+       "pushl %edx\n\t"
+       "pushl %ecx\n\t"
+       "call __up\n\t"
+       "popl %ecx\n\t"
+       "popl %edx\n\t"
+       "popl %eax\n\t"
+       "ret"
+);
+
+/*
+ * rw spinlock fallbacks
+ */
+#if defined(CONFIG_SMP)
+asm(
+".section .sched.text\n"
+".align        4\n"
+".globl        __write_lock_failed\n"
+"__write_lock_failed:\n\t"
+       LOCK "addl      $" RW_LOCK_BIAS_STR ",(%eax)\n"
+"1:    rep; nop\n\t"
+       "cmpl   $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
+       "jne    1b\n\t"
+       LOCK "subl      $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
+       "jnz    __write_lock_failed\n\t"
+       "ret"
+);
+
+asm(
+".section .sched.text\n"
+".align        4\n"
+".globl        __read_lock_failed\n"
+"__read_lock_failed:\n\t"
+       LOCK "incl      (%eax)\n"
+"1:    rep; nop\n\t"
+       "cmpl   $1,(%eax)\n\t"
+       "js     1b\n\t"
+       LOCK "decl      (%eax)\n\t"
+       "js     __read_lock_failed\n\t"
+       "ret"
+);
+#endif
diff --git a/arch/um/sys-i386/util/.cvsignore b/arch/um/sys-i386/util/.cvsignore
deleted file mode 100644 (file)
index c3413c2..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-mk_sc
-mk_thread
index 34860f9..42e7432 100644 (file)
@@ -1,3 +1,5 @@
+host-progs     := mk_sc mk_thread
+always         := $(host-progs)
 
 hostprogs-y    := mk_sc mk_thread
 always         := $(hostprogs-y)
index d182fb5..3c2fdec 100644 (file)
@@ -9,7 +9,6 @@ SECTIONS
 {
   . = START + SIZEOF_HEADERS;
 
-  . = ALIGN(4096);
   __binary_start = .;
 #ifdef MODE_TT
   .thread_private : {
@@ -26,11 +25,16 @@ SECTIONS
   . = ALIGN(4096);             /* Init code and data */
   _stext = .;
   __init_begin = .;
-  .text.init : { *(.text.init) }
+  .init.text : { 
+       _sinittext = .;
+       *(.init.text)
+       _einittext = .;
+  }
   . = ALIGN(4096);
   .text      :
   {
     *(.text)
+    SCHED_TEXT
     /* .gnu.warning sections are handled specially by elf32.em.  */
     *(.gnu.warning)
     *(.gnu.linkonce.t*)
@@ -38,7 +42,7 @@ SECTIONS
 
   #include "asm/common.lds.S"
 
-  .data.init : { *(.data.init) }
+  init.data : { *(init.data) }
   .data    :
   {
     . = ALIGN(KERNEL_STACK_SIZE);              /* init_task */
diff --git a/arch/um/util/.cvsignore b/arch/um/util/.cvsignore
deleted file mode 100644 (file)
index da37075..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-mk_constants
-mk_task
index 23db325..fd2eb18 100644 (file)
@@ -428,23 +428,6 @@ config UID16
        depends on IA32_EMULATION
        default y
 
-config KEXEC
-       bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
-       help
-         kexec is a system call that implements the ability to shutdown your
-         current kernel, and to start another kernel.  It is like a reboot
-         but it is indepedent of the system firmware.   And like a reboot
-         you can start any kernel with it, not just Linux.
-
-         The name comes from the similiarity to the exec system call.
-
-         It is an ongoing process to be certain the hardware in a machine
-         is properly shutdown, so do not be surprised if this code does not
-         initially work for you.  It may help to enable device hotplugging
-         support.  As of this writing the exact hardware interface is
-         strongly in flux, so no good recommendation can be made.
-
 endmenu
 
 source drivers/Kconfig
index e0405ab..2c0f3af 100644 (file)
@@ -19,7 +19,6 @@ obj-$(CONFIG_SMP)             += smp.o smpboot.o trampoline.o
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o  nmi.o
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o \
                genapic.o genapic_cluster.o genapic_flat.o
-obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_PM)               += suspend.o
 obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
 obj-$(CONFIG_CPU_FREQ)         += cpufreq/
index 5aac099..cd37a0a 100644 (file)
@@ -145,36 +145,6 @@ void disconnect_bsp_APIC(void)
                outb(0x70, 0x22);
                outb(0x00, 0x23);
        }
-       else {
-               /* Go back to Virtual Wire compatibility mode */
-               unsigned long value;
-
-               /* For the spurious interrupt use vector F, and enable it */
-               value = apic_read(APIC_SPIV);
-               value &= ~APIC_VECTOR_MASK;
-               value |= APIC_SPIV_APIC_ENABLED;
-               value |= 0xf;
-               apic_write_around(APIC_SPIV, value);
-
-               /* For LVT0 make it edge triggered, active high, external and enabled */
-               value = apic_read(APIC_LVT0);
-               value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
-               value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXINT);
-               apic_write_around(APIC_LVT0, value);
-
-               /* For LVT1 make it edge triggered, active high, nmi and enabled */
-               value = apic_read(APIC_LVT1);
-               value &= ~(
-                       APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-               value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-               apic_write_around(APIC_LVT1, value);
-       }
 }
 
 void disable_local_APIC(void)
index 72f7a7f..5b65992 100644 (file)
@@ -185,6 +185,8 @@ void __init e820_reserve_resources(void)
        int i;
        for (i = 0; i < e820.nr_map; i++) {
                struct resource *res;
+               if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
+                       continue;
                res = alloc_bootmem_low(sizeof(struct resource));
                switch (e820.map[i].type) {
                case E820_RAM:  res->name = "System RAM"; break;
index f8c10a6..7929a2e 100644 (file)
@@ -415,24 +415,10 @@ static int i8259A_suspend(struct sys_device *dev, u32 state)
        return 0;
 }
 
-
-
-static int i8259A_shutdown(struct sys_device *dev)
-{
-       /* Put the i8259A into a quiescent state that
-        * the kernel initialization code can get it
-        * out of.
-        */
-       outb(0xff, 0x21);       /* mask all of 8259A-1 */
-       outb(0xff, 0xA1);       /* mask all of 8259A-1 */
-       return 0;
-}
-
 static struct sysdev_class i8259_sysdev_class = {
        set_kset_name("i8259"),
        .suspend = i8259A_suspend,
        .resume = i8259A_resume,
-       .shutdown = i8259A_shutdown,
 };
 
 static struct sys_device device_i8259A = {
index 28c1cb3..c22a8f0 100644 (file)
@@ -327,7 +327,7 @@ static int find_irq_entry(int apic, int pin, int type)
 /*
  * Find the pin to which IRQ[irq] (ISA) is connected
  */
-static int find_isa_irq_pin(int irq, int type)
+static int __init find_isa_irq_pin(int irq, int type)
 {
        int i;
 
@@ -1125,43 +1125,11 @@ static void __init enable_IO_APIC(void)
  */
 void disable_IO_APIC(void)
 {
-       int pin;
        /*
         * Clear the IO-APIC before rebooting:
         */
        clear_IO_APIC();
 
-       /*
-        * If the i82559 is routed through an IOAPIC
-        * Put that IOAPIC in virtual wire mode
-        * so legacy interrups can be delivered.
-        */
-       pin = find_isa_irq_pin(0, mp_ExtINT);
-       if (pin != -1) {
-               struct IO_APIC_route_entry entry;
-               unsigned long flags;
-
-               memset(&entry, 0, sizeof(entry));
-               entry.mask            = 0; /* Enabled */
-               entry.trigger         = 0; /* Edge */
-               entry.irr             = 0;
-               entry.polarity        = 0; /* High */
-               entry.delivery_status = 0;
-               entry.dest_mode       = 0; /* Physical */
-               entry.delivery_mode   = 7; /* ExtInt */
-               entry.vector          = 0;
-               entry.dest.physical.physical_dest = 0;
-
-
-               /*
-                * Add it to the IO-APIC irq-routing table:
-                */
-               spin_lock_irqsave(&ioapic_lock, flags);
-               io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
-               io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
-               spin_unlock_irqrestore(&ioapic_lock, flags);
-       }
-
        disconnect_bsp_APIC();
 }
 
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
deleted file mode 100644 (file)
index 974d235..0000000
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * machine_kexec.c - handle transition of Linux booting another kernel
- * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/kexec.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/reboot.h>
-#include <asm/pda.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/cpufeature.h>
-#include <asm/hw_irq.h>
-
-#define LEVEL0_SIZE (1UL << 12UL)
-#define LEVEL1_SIZE (1UL << 21UL)
-#define LEVEL2_SIZE (1UL << 30UL)
-#define LEVEL3_SIZE (1UL << 39UL)
-#define LEVEL4_SIZE (1UL << 48UL)
-
-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE)
-#define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-static void init_level2_page(
-       uint64_t *level2p, unsigned long addr)
-{
-       unsigned long end_addr;
-       addr &= PAGE_MASK;
-       end_addr = addr + LEVEL2_SIZE;
-       while(addr < end_addr) {
-               *(level2p++) = addr | L1_ATTR;
-               addr += LEVEL1_SIZE;
-       }
-}
-
-static int init_level3_page(struct kimage *image,
-       uint64_t *level3p, unsigned long addr, unsigned long last_addr)
-{
-       unsigned long end_addr;
-       int result;
-       result = 0;
-       addr &= PAGE_MASK;
-       end_addr = addr + LEVEL3_SIZE;
-       while((addr < last_addr) && (addr < end_addr)) {
-               struct page *page;
-               uint64_t *level2p;
-               page = kimage_alloc_control_pages(image, 0);
-               if (!page) {
-                       result = -ENOMEM;
-                       goto out;
-               }
-               level2p = (uint64_t *)page_address(page);
-               init_level2_page(level2p, addr);
-               *(level3p++) = __pa(level2p) | L2_ATTR;
-               addr += LEVEL2_SIZE;
-       }
-       /* clear the unused entries */
-       while(addr < end_addr) {
-               *(level3p++) = 0;
-               addr += LEVEL2_SIZE;
-       }
-out:
-       return result;
-}
-
-
-static int init_level4_page(struct kimage *image,
-       uint64_t *level4p, unsigned long addr, unsigned long last_addr)
-{
-       unsigned long end_addr;
-       int result;
-       result = 0;
-       addr &= PAGE_MASK;
-       end_addr = addr + LEVEL4_SIZE;
-       while((addr < last_addr) && (addr < end_addr)) {
-               struct page *page;
-               uint64_t *level3p;
-               page = kimage_alloc_control_pages(image, 0);
-               if (!page) {
-                       result = -ENOMEM;
-                       goto out;
-               }
-               level3p = (uint64_t *)page_address(page);
-               result = init_level3_page(image, level3p, addr, last_addr);
-               if (result) {
-                       goto out;
-               }
-               *(level4p++) = __pa(level3p) | L3_ATTR;
-               addr += LEVEL3_SIZE;
-       }
-       /* clear the unused entries */
-       while(addr < end_addr) {
-               *(level4p++) = 0;
-               addr += LEVEL3_SIZE;
-       }
- out:
-       return result;
-}
-
-
-static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
-{
-       uint64_t *level4p;
-       level4p = (uint64_t *)__va(start_pgtable);
-       return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
-}
-
-static void set_idt(void *newidt, __u16 limit)
-{
-       unsigned char curidt[10];
-
-       /* x86-64 supports unaliged loads & stores */
-       (*(__u16 *)(curidt)) = limit;
-       (*(__u64 *)(curidt +2)) = (unsigned long)(newidt);
-
-       __asm__ __volatile__ (
-               "lidt %0\n"
-               : "=m" (curidt)
-               );
-};
-
-
-static void set_gdt(void *newgdt, __u16 limit)
-{
-       unsigned char curgdt[10];
-
-       /* x86-64 supports unaligned loads & stores */
-       (*(__u16 *)(curgdt)) = limit;
-       (*(__u64 *)(curgdt +2)) = (unsigned long)(newgdt);
-
-       __asm__ __volatile__ (
-               "lgdt %0\n"
-               : "=m" (curgdt)
-               );
-};
-
-static void load_segments(void)
-{
-       __asm__ __volatile__ (
-               "\tmovl $"STR(__KERNEL_DS)",%eax\n"
-               "\tmovl %eax,%ds\n"
-               "\tmovl %eax,%es\n"
-               "\tmovl %eax,%ss\n"
-               "\tmovl %eax,%fs\n"
-               "\tmovl %eax,%gs\n"
-               );
-#undef STR
-#undef __STR
-}
-
-typedef void (*relocate_new_kernel_t)(
-       unsigned long indirection_page, unsigned long control_code_buffer,
-       unsigned long start_address, unsigned long pgtable);
-
-const extern unsigned char relocate_new_kernel[];
-extern void relocate_new_kernel_end(void);
-const extern unsigned long relocate_new_kernel_size;
-
-int machine_kexec_prepare(struct kimage *image)
-{
-       unsigned long start_pgtable, control_code_buffer;
-       int result;
-
-       /* Calculate the offsets */
-       start_pgtable       = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-       control_code_buffer = start_pgtable + 4096UL;
-
-       /* Setup the identity mapped 64bit page table */
-       result = init_pgtable(image, start_pgtable);
-       if (result) {
-               return result;
-       }
-
-       /* Place the code in the reboot code buffer */
-       memcpy(__va(control_code_buffer), relocate_new_kernel, relocate_new_kernel_size);
-
-       return 0;
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
-       return;
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-void machine_kexec(struct kimage *image)
-{
-       unsigned long indirection_page;
-       unsigned long control_code_buffer;
-       unsigned long start_pgtable;
-       relocate_new_kernel_t rnk;
-
-       /* Interrupts aren't acceptable while we reboot */
-       local_irq_disable();
-
-       /* Calculate the offsets */
-       indirection_page    = image->head & PAGE_MASK;
-       start_pgtable       = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
-       control_code_buffer = start_pgtable + 4096UL;
-
-       /* Set the low half of the page table to my identity mapped
-        * page table for kexec.  Leave the high half pointing at the
-        * kernel pages.   Don't bother to flush the global pages
-        * as that will happen when I fully switch to my identity mapped
-        * page table anyway.
-        */
-//     memcpy(current->active_mm->pml4, __va(start_pgtable), PAGE_SIZE/2);
-       __flush_tlb();
-
-
-       /* The segment registers are funny things, they are
-        * automatically loaded from a table, in memory wherever you
-        * set them to a specific selector, but this table is never
-        * accessed again unless you set the segment to a different selector.
-        *
-        * The more common model are caches where the behide
-        * the scenes work is done, but is also dropped at arbitrary
-        * times.
-        *
-        * I take advantage of this here by force loading the
-        * segments, before I zap the gdt with an invalid value.
-        */
-       load_segments();
-       /* The gdt & idt are now invalid.
-        * If you want to load them you must set up your own idt & gdt.
-        */
-       set_gdt(phys_to_virt(0),0);
-       set_idt(phys_to_virt(0),0);
-       /* now call it */
-       rnk = (relocate_new_kernel_t) control_code_buffer;
-       (*rnk)(indirection_page, control_code_buffer, image->start, start_pgtable);
-}
index ce78914..3dac339 100644 (file)
@@ -91,54 +91,47 @@ static void reboot_warm(void)
                      [target] "b" (WARMBOOT_TRAMP));
 }
 
-static inline void kb_wait(void)
-{
-       int i;
-  
-       for (i=0; i<0x10000; i++)
-               if ((inb_p(0x64) & 0x02) == 0)
-                       break;
-}
-  
-void machine_shutdown(void)
-{
-       /* Stop the cpus and apics */
 #ifdef CONFIG_SMP
-       int reboot_cpu_id;
-  
-       /* The boot cpu is always logical cpu 0 */
-       reboot_cpu_id = 0;
-
-       /* Make certain the cpu I'm about to reboot on is online */
-       if (!cpu_isset(reboot_cpu_id, cpu_online_map)) {
-               reboot_cpu_id = smp_processor_id();
+static void smp_halt(void)
+{
+       int cpuid = safe_smp_processor_id(); 
+               static int first_entry = 1;
+
+               if (first_entry) { 
+                       first_entry = 0;
+                       smp_call_function((void *)machine_restart, NULL, 1, 0);
+               } 
+                       
+       smp_stop_cpu(); 
+
+       /* AP calling this. Just halt */
+       if (cpuid != boot_cpu_id) { 
+               for (;;) 
+                       asm("hlt");
        }
 
-       /* Make certain I only run on the appropriate processor */
-       set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id));
-
-       /* O.K Now that I'm on the appropriate processor,
-        * stop all of the others.
-        */
-       smp_send_stop();
-#endif
-
-       local_irq_disable();
-  
-#ifndef CONFIG_SMP
-       disable_local_APIC();
+       /* Wait for all other CPUs to have run smp_stop_cpu */
+       while (!cpus_empty(cpu_online_map))
+               rep_nop(); 
+}
 #endif
 
-       disable_IO_APIC();
+static inline void kb_wait(void)
+{
+       int i;
 
-       local_irq_enable();
+       for (i=0; i<0x10000; i++)
+               if ((inb_p(0x64) & 0x02) == 0)
+                       break;
 }
 
 void machine_restart(char * __unused)
 {
        int i;
 
-       machine_shutdown();
+#ifdef CONFIG_SMP
+       smp_halt(); 
+#endif
 
        local_irq_disable();
        
diff --git a/arch/x86_64/kernel/relocate_kernel.S b/arch/x86_64/kernel/relocate_kernel.S
deleted file mode 100644 (file)
index c944e59..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * relocate_kernel.S - put the kernel image in place to boot
- * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-#include <linux/linkage.h>
-
-       /*
-        * Must be relocatable PIC code callable as a C function, that once
-        * it starts can not use the previous processes stack.
-        */
-       .globl relocate_new_kernel
-       .code64
-relocate_new_kernel:
-       /* %rdi indirection_page
-        * %rsi reboot_code_buffer
-        * %rdx start address
-        * %rcx page_table
-        * %r8  arg5
-        * %r9  arg6
-        */
-
-       /* zero out flags, and disable interrupts */
-       pushq $0
-       popfq
-
-       /* set a new stack at the bottom of our page... */
-       lea   4096(%rsi), %rsp
-
-       /* store the parameters back on the stack */
-       pushq   %rdx /* store the start address */
-
-       /* Set cr0 to a known state:
-        * 31 1 == Paging enabled
-        * 18 0 == Alignment check disabled
-        * 16 0 == Write protect disabled
-        * 3  0 == No task switch
-        * 2  0 == Don't do FP software emulation.
-        * 0  1 == Proctected mode enabled
-        */
-       movq    %cr0, %rax
-       andq    $~((1<<18)|(1<<16)|(1<<3)|(1<<2)), %rax
-       orl     $((1<<31)|(1<<0)), %eax
-       movq    %rax, %cr0
-
-       /* Set cr4 to a known state:
-        * 10 0 == xmm exceptions disabled
-        * 9  0 == xmm registers instructions disabled
-        * 8  0 == performance monitoring counter disabled
-        * 7  0 == page global disabled
-        * 6  0 == machine check exceptions disabled
-        * 5  1 == physical address extension enabled
-        * 4  0 == page size extensions disabled
-        * 3  0 == Debug extensions disabled
-        * 2  0 == Time stamp disable (disabled)
-        * 1  0 == Protected mode virtual interrupts disabled
-        * 0  0 == VME disabled
-        */
-
-       movq    $((1<<5)), %rax
-       movq    %rax, %cr4
-
-       jmp 1f
-1:
-
-       /* Switch to the identity mapped page tables,
-        * and flush the TLB.
-       */
-       movq    %rcx, %cr3
-
-       /* Do the copies */
-       movq    %rdi, %rbx      /* Put the indirection page in %rbx */
-       xorq    %rdi, %rdi
-       xorq    %rsi, %rsi
-
-0:     /* top, read another word for the indirection page */
-
-       movq    (%rbx), %rcx
-       addq    $8,     %rbx
-       testq   $0x1,   %rcx  /* is it a destination page? */
-       jz      1f
-       movq    %rcx,   %rdi
-       andq    $0xfffffffffffff000, %rdi
-       jmp     0b
-1:
-       testq   $0x2,   %rcx  /* is it an indirection page? */
-       jz      1f
-       movq    %rcx,   %rbx
-       andq    $0xfffffffffffff000, %rbx
-       jmp     0b
-1:
-       testq   $0x4,   %rcx  /* is it the done indicator? */
-       jz      1f
-       jmp     2f
-1:
-       testq   $0x8,   %rcx  /* is it the source indicator? */
-       jz      0b            /* Ignore it otherwise */
-       movq    %rcx,   %rsi  /* For ever source page do a copy */
-       andq    $0xfffffffffffff000, %rsi
-
-       movq    $512,   %rcx
-       rep ; movsq
-       jmp     0b
-2:
-
-       /* To be certain of avoiding problems with self-modifying code
-        * I need to execute a serializing instruction here.
-        * So I flush the TLB by reloading %cr3 here, it's handy,
-        * and not processor dependent.
-        */
-       movq    %cr3, %rax
-       movq    %rax, %cr3
-
-       /* set all of the registers to known values */
-       /* leave %rsp alone */
-
-       xorq    %rax, %rax
-       xorq    %rbx, %rbx
-       xorq    %rcx, %rcx
-       xorq    %rdx, %rdx
-       xorq    %rsi, %rsi
-       xorq    %rdi, %rdi
-       xorq    %rbp, %rbp
-       xorq    %r8,  %r8
-       xorq    %r9,  %r9
-       xorq    %r10, %r9
-       xorq    %r11, %r11
-       xorq    %r12, %r12
-       xorq    %r13, %r13
-       xorq    %r14, %r14
-       xorq    %r15, %r15
-
-       ret
-relocate_new_kernel_end:
-
-       .globl relocate_new_kernel_size
-relocate_new_kernel_size:
-       .quad relocate_new_kernel_end - relocate_new_kernel
index 9887d02..062b233 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-ac12
-# Tue Feb 15 15:58:28 2005
+# Linux kernel version: 2.6.10-ac11
+# Thu Feb  3 14:51:24 2005
 #
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -583,7 +583,7 @@ CONFIG_CHR_DEV_SG=m
 #
 # Some SCSI devices (e.g. CD jukebox) support multiple LUNs
 #
-CONFIG_SCSI_MULTI_LUN=y
+# CONFIG_SCSI_MULTI_LUN is not set
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 
index 426934d..1172277 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-ac12
-# Tue Feb 15 15:58:28 2005
+# Linux kernel version: 2.6.10-ac11
+# Thu Feb  3 14:51:24 2005
 #
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -574,7 +574,7 @@ CONFIG_CHR_DEV_SG=m
 #
 # Some SCSI devices (e.g. CD jukebox) support multiple LUNs
 #
-CONFIG_SCSI_MULTI_LUN=y
+# CONFIG_SCSI_MULTI_LUN is not set
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 
index bd63671..ef86d9e 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-1.14_FC2.1.planetlab.2005.04.14
-# Sat May  7 01:45:01 2005
+# Linux kernel version: 2.6.10-1.12_FC2.1.planetlab
+# Wed Feb  9 08:06:26 2005
 #
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -32,16 +32,13 @@ CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_CKRM=y
 CONFIG_RCFS_FS=y
 CONFIG_CKRM_TYPE_TASKCLASS=y
-CONFIG_CKRM_RES_NULL=m
-# CONFIG_CKRM_RES_MEM is not set
-# CONFIG_CKRM_TYPE_SOCKETCLASS is not set
 CONFIG_CKRM_RES_NUMTASKS=y
-# CONFIG_CKRM_RES_NUMTASKS_FORKRATE is not set
 CONFIG_CKRM_CPU_SCHEDULE=y
 # CONFIG_CKRM_RES_BLKIO is not set
+# CONFIG_CKRM_RES_MEM is not set
 CONFIG_CKRM_CPU_SCHEDULE_AT_BOOT=y
+# CONFIG_CKRM_TYPE_SOCKETCLASS is not set
 CONFIG_CKRM_RBCE=y
-# CONFIG_CKRM_CRBCE is not set
 CONFIG_SYSCTL=y
 CONFIG_AUDIT=y
 CONFIG_AUDITSYSCALL=y
@@ -110,6 +107,7 @@ CONFIG_M686=y
 # CONFIG_MWINCHIP3D is not set
 # CONFIG_MCYRIXIII is not set
 # CONFIG_MVIAC3_2 is not set
+CONFIG_X86_HZ=1000
 CONFIG_X86_GENERIC=y
 CONFIG_X86_CMPXCHG=y
 CONFIG_X86_XADD=y
@@ -128,7 +126,6 @@ CONFIG_HPET_TIMER=y
 # CONFIG_SMP is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_X86_UP_APIC is not set
-CONFIG_KERNEL_HZ=1000
 CONFIG_X86_TSC=y
 CONFIG_X86_MCE=y
 # CONFIG_X86_MCE_NONFATAL is not set
@@ -156,9 +153,6 @@ CONFIG_HIGHPTE=y
 CONFIG_MTRR=y
 # CONFIG_EFI is not set
 CONFIG_REGPARM=y
-CONFIG_KERN_PHYS_OFFSET=1
-CONFIG_KEXEC=y
-# CONFIG_CRASH_DUMP is not set
 
 #
 # Power management options (ACPI, APM)
@@ -595,7 +589,7 @@ CONFIG_CHR_DEV_SG=m
 #
 # Some SCSI devices (e.g. CD jukebox) support multiple LUNs
 #
-CONFIG_SCSI_MULTI_LUN=y
+# CONFIG_SCSI_MULTI_LUN is not set
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 
@@ -702,7 +696,7 @@ CONFIG_MD_RAID5=m
 CONFIG_MD_RAID6=m
 CONFIG_MD_MULTIPATH=m
 CONFIG_MD_FAULTY=m
-CONFIG_BLK_DEV_DM=y
+CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
 CONFIG_DM_MIRROR=m
@@ -791,7 +785,7 @@ CONFIG_INET_IPCOMP=m
 CONFIG_INET_TUNNEL=m
 # CONFIG_ACCEPT_QUEUES is not set
 CONFIG_IP_TCPDIAG=m
-# CONFIG_IP_TCPDIAG_IPV6 is not set
+CONFIG_IP_TCPDIAG_IPV6=y
 
 #
 # IP: Virtual Server Configuration
@@ -827,7 +821,13 @@ CONFIG_IP_VS_NQ=m
 #
 CONFIG_IP_VS_FTP=m
 CONFIG_ICMP_IPOD=y
-# CONFIG_IPV6 is not set
+CONFIG_IPV6=m
+CONFIG_IPV6_PRIVACY=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_IPV6_TUNNEL=m
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_DEBUG is not set
 CONFIG_BRIDGE_NETFILTER=y
@@ -904,6 +904,31 @@ CONFIG_IP_NF_ARP_MANGLE=m
 # CONFIG_IP_NF_COMPAT_IPFWADM is not set
 # CONFIG_IP_NF_CT_PROTO_GRE is not set
 
+#
+# IPv6: Netfilter Configuration
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_PHYSDEV=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+CONFIG_IP6_NF_RAW=m
+
 #
 # Bridge: Netfilter Configuration
 #
@@ -949,7 +974,7 @@ CONFIG_ATM_BR2684=m
 CONFIG_BRIDGE=m
 CONFIG_VLAN_8021Q=m
 # CONFIG_DECNET is not set
-CONFIG_LLC=m
+CONFIG_LLC=y
 # CONFIG_LLC2 is not set
 CONFIG_IPX=m
 # CONFIG_IPX_INTERN is not set
@@ -1008,9 +1033,98 @@ CONFIG_NETPOLL=y
 CONFIG_NETPOLL_TRAP=y
 CONFIG_NET_POLL_CONTROLLER=y
 # CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_TUX is not set
+CONFIG_IRDA=m
+
+#
+# IrDA protocols
+#
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+CONFIG_IRCOMM=m
+# CONFIG_IRDA_ULTRA is not set
+
+#
+# IrDA options
+#
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+CONFIG_IRDA_FAST_RR=y
+# CONFIG_IRDA_DEBUG is not set
+
+#
+# Infrared-port device drivers
+#
+
+#
+# SIR device drivers
+#
+CONFIG_IRTTY_SIR=m
+
+#
+# Dongle support
+#
+CONFIG_DONGLE=y
+CONFIG_ESI_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_LITELINK_DONGLE=m
+CONFIG_MA600_DONGLE=m
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_MCP2120_DONGLE=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_ACT200L_DONGLE=m
+
+#
+# Old SIR device drivers
+#
+CONFIG_IRPORT_SIR=m
+
+#
+# Old Serial dongle support
+#
+# CONFIG_DONGLE_OLD is not set
+
+#
+# FIR device drivers
+#
+CONFIG_USB_IRDA=m
+CONFIG_SIGMATEL_FIR=m
+CONFIG_TOSHIBA_FIR=m
+CONFIG_VLSI_FIR=m
+CONFIG_BT=m
+CONFIG_BT_L2CAP=m
+CONFIG_BT_SCO=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+CONFIG_BT_HIDP=m
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_HCIUSB=m
+CONFIG_BT_HCIUSB_SCO=y
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIBTUART=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_TUX=m
+
+#
+# TUX options
+#
+CONFIG_TUX_EXTCGI=y
+CONFIG_TUX_EXTENDED_LOG=y
+# CONFIG_TUX_DEBUG is not set
 CONFIG_NETDEVICES=y
 CONFIG_DUMMY=m
 CONFIG_BONDING=m
@@ -1108,7 +1222,13 @@ CONFIG_S2IO_NAPI=y
 #
 # Token Ring devices
 #
-# CONFIG_TR is not set
+CONFIG_TR=y
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+CONFIG_TMS380TR=m
+CONFIG_TMSPCI=m
+CONFIG_ABYSS=m
 
 #
 # Wireless LAN (non-hamradio)
@@ -1174,6 +1294,7 @@ CONFIG_PCMCIA_NMCLAN=m
 CONFIG_PCMCIA_SMC91C92=m
 CONFIG_PCMCIA_XIRC2PS=m
 CONFIG_PCMCIA_AXNET=m
+CONFIG_PCMCIA_IBMTR=m
 
 #
 # Wan interfaces
@@ -1210,9 +1331,20 @@ CONFIG_FDDI=y
 # CONFIG_DEFXX is not set
 CONFIG_SKFP=m
 # CONFIG_HIPPI is not set
-# CONFIG_PLIP is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
+CONFIG_PLIP=m
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+# CONFIG_PPP_BSDCOMP is not set
+CONFIG_PPPOE=m
+CONFIG_PPPOATM=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+# CONFIG_SLIP_MODE_SLIP6 is not set
 CONFIG_NET_FC=y
 # CONFIG_SHAPER is not set
 CONFIG_NETCONSOLE=m
@@ -1886,7 +2018,95 @@ CONFIG_LOGO_LINUX_CLUT224=y
 #
 # Sound
 #
-# CONFIG_SOUND is not set
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_RTCTIMER=m
+# CONFIG_SND_VERBOSE_PRINTK is not set
+# CONFIG_SND_DEBUG is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+# CONFIG_SND_SERIAL_U16550 is not set
+CONFIG_SND_MPU401=m
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_ATIIXP_MODEM=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+# CONFIG_SND_BT87X_OVERCLOCK is not set
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CS4281=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_KORG1212=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_NM256=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_HDSP=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_YMFPCI=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VX222=m
+
+#
+# USB devices
+#
+CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_USB_USX2Y=m
+
+#
+# PCMCIA devices
+#
+
+#
+# Open Sound System
+#
+# CONFIG_SOUND_PRIME is not set
 
 #
 # USB support
@@ -1918,7 +2138,12 @@ CONFIG_USB_SL811_HCD=m
 #
 # USB Device Class drivers
 #
-# CONFIG_USB_BLUETOOTH_TTY is not set
+# CONFIG_USB_AUDIO is not set
+
+#
+# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
 CONFIG_USB_ACM=m
 CONFIG_USB_PRINTER=m
 
@@ -2331,9 +2556,8 @@ CONFIG_STACK_WARN=4096
 #
 # Linux VServer
 #
-CONFIG_VSERVER_FILESHARING=y
 CONFIG_VSERVER_LEGACY=y
-# CONFIG_VSERVER_PROC_SECURE is not set
+CONFIG_VSERVER_PROC_SECURE=y
 # CONFIG_VSERVER_HARDCPU is not set
 # CONFIG_INOXID_NONE is not set
 # CONFIG_INOXID_UID16 is not set
index f1837c1..b020c0d 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-ac12
-# Tue Feb 15 15:58:29 2005
+# Linux kernel version: 2.6.10-ac11
+# Thu Feb  3 14:51:25 2005
 #
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -587,7 +587,7 @@ CONFIG_CHR_DEV_SG=m
 #
 # Some SCSI devices (e.g. CD jukebox) support multiple LUNs
 #
-CONFIG_SCSI_MULTI_LUN=y
+# CONFIG_SCSI_MULTI_LUN is not set
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 
diff --git a/configs/kernel-2.6.10-i686-uml-planetlab.config b/configs/kernel-2.6.10-i686-uml-planetlab.config
deleted file mode 100644 (file)
index c15d03f..0000000
+++ /dev/null
@@ -1,537 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-1.14_FC2.1.planetlab
-# Fri Mar  4 15:18:24 2005
-#
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_USERMODE=y
-CONFIG_MMU=y
-CONFIG_UID16=y
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-
-#
-# UML-specific options
-#
-CONFIG_MODE_TT=y
-CONFIG_MODE_SKAS=y
-CONFIG_NET=y
-CONFIG_BINFMT_ELF=y
-CONFIG_BINFMT_MISC=m
-CONFIG_HOSTFS=y
-CONFIG_MCONSOLE=y
-# CONFIG_HOST_2G_2G is not set
-# CONFIG_SMP is not set
-CONFIG_NEST_LEVEL=0
-CONFIG_KERNEL_HALF_GIGS=1
-CONFIG_KERNEL_STACK_ORDER=2
-CONFIG_UML_REAL_TIME_CLOCK=y
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_CLEAN_COMPILE=y
-CONFIG_BROKEN_ON_SMP=y
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-# CONFIG_BSD_PROCESS_ACCT_V3 is not set
-
-#
-# Class Based Kernel Resource Management
-#
-# CONFIG_CKRM is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_HOTPLUG is not set
-CONFIG_KOBJECT_UEVENT=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_OOM_PANIC=y
-# CONFIG_EMBEDDED is not set
-# CONFIG_DELAY_ACCT is not set
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_FUTEX=y
-CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-# CONFIG_TINY_SHMEM is not set
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-# CONFIG_MODULE_SIG is not set
-CONFIG_KMOD=y
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_DEBUG_DRIVER is not set
-
-#
-# Character Devices
-#
-CONFIG_STDIO_CONSOLE=y
-CONFIG_SSL=y
-CONFIG_FD_CHAN=y
-CONFIG_NULL_CHAN=y
-CONFIG_PORT_CHAN=y
-CONFIG_PTY_CHAN=y
-CONFIG_TTY_CHAN=y
-CONFIG_XTERM_CHAN=y
-# CONFIG_NOCONFIG_CHAN is not set
-CONFIG_CON_ZERO_CHAN="fd:0,fd:1"
-CONFIG_CON_CHAN="xterm"
-CONFIG_SSL_CHAN="pty"
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-# CONFIG_WATCHDOG is not set
-# CONFIG_UML_SOUND is not set
-# CONFIG_SOUND is not set
-# CONFIG_HOSTAUDIO is not set
-
-#
-# Block Devices
-#
-CONFIG_BLK_DEV_UBD=y
-CONFIG_BLK_DEV_UBD_SYNC=y
-CONFIG_BLK_DEV_COW_COMMON=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=4096
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-CONFIG_NETDEVICES=y
-
-#
-# UML Network Devices
-#
-CONFIG_UML_NET=y
-CONFIG_UML_NET_ETHERTAP=y
-CONFIG_UML_NET_TUNTAP=y
-CONFIG_UML_NET_SLIP=y
-CONFIG_UML_NET_DAEMON=y
-CONFIG_UML_NET_MCAST=y
-CONFIG_UML_NET_SLIRP=y
-
-#
-# Networking support
-#
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-CONFIG_PACKET_MMAP=y
-# CONFIG_NETLINK_DEV is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-# CONFIG_IP_PNP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-# CONFIG_ACCEPT_QUEUES is not set
-CONFIG_IP_TCPDIAG=y
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-
-#
-# IP: Virtual Server Configuration
-#
-# CONFIG_IP_VS is not set
-CONFIG_ICMP_IPOD=y
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_DEBUG is not set
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_IP_NF_CONNTRACK=m
-# CONFIG_IP_NF_CT_ACCT is not set
-# CONFIG_IP_NF_CONNTRACK_MARK is not set
-CONFIG_IP_NF_CT_PROTO_SCTP=m
-CONFIG_IP_NF_FTP=m
-CONFIG_IP_NF_IRC=m
-CONFIG_IP_NF_TFTP=m
-CONFIG_IP_NF_AMANDA=m
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_LIMIT=m
-CONFIG_IP_NF_MATCH_IPRANGE=m
-CONFIG_IP_NF_MATCH_MAC=m
-CONFIG_IP_NF_MATCH_PKTTYPE=m
-CONFIG_IP_NF_MATCH_MARK=m
-CONFIG_IP_NF_MATCH_MULTIPORT=m
-CONFIG_IP_NF_MATCH_TOS=m
-CONFIG_IP_NF_MATCH_RECENT=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_DSCP=m
-CONFIG_IP_NF_MATCH_AH_ESP=m
-CONFIG_IP_NF_MATCH_LENGTH=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_MATCH_TCPMSS=m
-CONFIG_IP_NF_MATCH_HELPER=m
-CONFIG_IP_NF_MATCH_STATE=m
-CONFIG_IP_NF_MATCH_CONNTRACK=m
-CONFIG_IP_NF_MATCH_OWNER=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
-CONFIG_IP_NF_MATCH_REALM=m
-CONFIG_IP_NF_MATCH_SCTP=m
-CONFIG_IP_NF_MATCH_COMMENT=m
-CONFIG_IP_NF_MATCH_HASHLIMIT=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_IP_NF_TARGET_TCPMSS=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_NAT_NEEDED=y
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_SAME=m
-# CONFIG_IP_NF_NAT_LOCAL is not set
-CONFIG_IP_NF_NAT_SNMP_BASIC=m
-CONFIG_IP_NF_NAT_IRC=m
-CONFIG_IP_NF_NAT_FTP=m
-CONFIG_IP_NF_NAT_TFTP=m
-CONFIG_IP_NF_NAT_AMANDA=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_TOS=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_DSCP=m
-CONFIG_IP_NF_TARGET_MARK=m
-CONFIG_IP_NF_TARGET_CLASSIFY=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_TARGET_NOTRACK=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-# CONFIG_IP_NF_COMPAT_IPCHAINS is not set
-# CONFIG_IP_NF_COMPAT_IPFWADM is not set
-CONFIG_IP_NF_CT_PROTO_GRE=m
-CONFIG_IP_NF_PPTP=m
-CONFIG_IP_NF_NAT_PPTP=m
-CONFIG_IP_NF_NAT_PROTO_GRE=m
-CONFIG_VNET=m
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CLK_JIFFIES=y
-# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
-# CONFIG_NET_SCH_CLK_CPU is not set
-# CONFIG_NET_SCH_CBQ is not set
-CONFIG_NET_SCH_HTB=m
-# CONFIG_NET_SCH_HFSC is not set
-# CONFIG_NET_SCH_PRIO is not set
-# CONFIG_NET_SCH_RED is not set
-# CONFIG_NET_SCH_SFQ is not set
-# CONFIG_NET_SCH_TEQL is not set
-# CONFIG_NET_SCH_TBF is not set
-# CONFIG_NET_SCH_GRED is not set
-# CONFIG_NET_SCH_DSMARK is not set
-# CONFIG_NET_SCH_NETEM is not set
-# CONFIG_NET_SCH_INGRESS is not set
-# CONFIG_NET_QOS is not set
-CONFIG_NET_CLS=y
-# CONFIG_NET_CLS_TCINDEX is not set
-# CONFIG_NET_CLS_ROUTE4 is not set
-CONFIG_NET_CLS_ROUTE=y
-CONFIG_NET_CLS_FW=m
-# CONFIG_NET_CLS_U32 is not set
-# CONFIG_NET_CLS_IND is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_TUX is not set
-CONFIG_DUMMY=m
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-CONFIG_TUN=m
-
-#
-# Ethernet (10 or 100Mbit)
-#
-# CONFIG_NET_ETHERNET is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-
-#
-# Ethernet (10000 Mbit)
-#
-
-#
-# Token Ring devices
-#
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_REISERFS_FS=y
-# CONFIG_REISERFS_CHECK is not set
-# CONFIG_REISERFS_PROC_INFO is not set
-# CONFIG_REISERFS_FS_XATTR is not set
-# CONFIG_JFS_FS is not set
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_QUOTA=y
-# CONFIG_QFMT_V1 is not set
-# CONFIG_QFMT_V2 is not set
-CONFIG_QUOTACTL=y
-CONFIG_DNOTIFY=y
-CONFIG_AUTOFS_FS=m
-CONFIG_AUTOFS4_FS=m
-
-#
-# CD-ROM/DVD Filesystems
-#
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-# CONFIG_ZISOFS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
-CONFIG_DEVFS_FS=y
-CONFIG_DEVFS_MOUNT=y
-# CONFIG_DEVFS_DEBUG is not set
-# CONFIG_DEVPTS_FS_XATTR is not set
-CONFIG_TMPFS=y
-# CONFIG_TMPFS_XATTR is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-# CONFIG_NFS_FS is not set
-# CONFIG_NFSD is not set
-# CONFIG_EXPORTFS is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf-8"
-CONFIG_NLS_CODEPAGE_437=m
-# CONFIG_NLS_CODEPAGE_737 is not set
-# CONFIG_NLS_CODEPAGE_775 is not set
-# CONFIG_NLS_CODEPAGE_850 is not set
-# CONFIG_NLS_CODEPAGE_852 is not set
-# CONFIG_NLS_CODEPAGE_855 is not set
-# CONFIG_NLS_CODEPAGE_857 is not set
-# CONFIG_NLS_CODEPAGE_860 is not set
-# CONFIG_NLS_CODEPAGE_861 is not set
-# CONFIG_NLS_CODEPAGE_862 is not set
-# CONFIG_NLS_CODEPAGE_863 is not set
-# CONFIG_NLS_CODEPAGE_864 is not set
-# CONFIG_NLS_CODEPAGE_865 is not set
-# CONFIG_NLS_CODEPAGE_866 is not set
-# CONFIG_NLS_CODEPAGE_869 is not set
-# CONFIG_NLS_CODEPAGE_936 is not set
-# CONFIG_NLS_CODEPAGE_950 is not set
-# CONFIG_NLS_CODEPAGE_932 is not set
-# CONFIG_NLS_CODEPAGE_949 is not set
-# CONFIG_NLS_CODEPAGE_874 is not set
-# CONFIG_NLS_ISO8859_8 is not set
-# CONFIG_NLS_CODEPAGE_1250 is not set
-# CONFIG_NLS_CODEPAGE_1251 is not set
-# CONFIG_NLS_ASCII is not set
-CONFIG_NLS_ISO8859_1=m
-# CONFIG_NLS_ISO8859_2 is not set
-# CONFIG_NLS_ISO8859_3 is not set
-# CONFIG_NLS_ISO8859_4 is not set
-# CONFIG_NLS_ISO8859_5 is not set
-# CONFIG_NLS_ISO8859_6 is not set
-# CONFIG_NLS_ISO8859_7 is not set
-# CONFIG_NLS_ISO8859_9 is not set
-# CONFIG_NLS_ISO8859_13 is not set
-# CONFIG_NLS_ISO8859_14 is not set
-# CONFIG_NLS_ISO8859_15 is not set
-# CONFIG_NLS_KOI8_R is not set
-# CONFIG_NLS_KOI8_U is not set
-CONFIG_NLS_UTF8=m
-
-#
-# Linux VServer
-#
-CONFIG_VSERVER_FILESHARING=y
-CONFIG_VSERVER_LEGACY=y
-# CONFIG_VSERVER_PROC_SECURE is not set
-# CONFIG_VSERVER_HARDCPU is not set
-# CONFIG_INOXID_NONE is not set
-# CONFIG_INOXID_UID16 is not set
-# CONFIG_INOXID_GID16 is not set
-CONFIG_INOXID_UGID24=y
-# CONFIG_INOXID_INTERN is not set
-# CONFIG_INOXID_RUNTIME is not set
-# CONFIG_VSERVER_DEBUG is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-# CONFIG_INPUT is not set
-
-#
-# Kernel hacking
-#
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_MAGIC_SYSRQ is not set
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_INFO=y
-CONFIG_FRAME_POINTER=y
-CONFIG_PT_PROXY=y
-# CONFIG_GPROF is not set
-# CONFIG_GCOV is not set
index 412fe08..dcb6182 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.10-ac12
-# Tue Feb 15 15:58:29 2005
+# Linux kernel version: 2.6.10-ac11
+# Thu Feb  3 14:51:25 2005
 #
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -575,7 +575,7 @@ CONFIG_CHR_DEV_SG=m
 #
 # Some SCSI devices (e.g. CD jukebox) support multiple LUNs
 #
-CONFIG_SCSI_MULTI_LUN=y
+# CONFIG_SCSI_MULTI_LUN is not set
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 
index aeeb1a7..e5245cb 100644 (file)
@@ -23,8 +23,6 @@
 #include <linux/devfs_fs_kernel.h>
 #include <linux/ptrace.h>
 #include <linux/device.h>
-#include <linux/highmem.h>
-#include <linux/crash_dump.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -229,62 +227,6 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma)
        return 0;
 }
 
-#ifdef CONFIG_CRASH_DUMP
-/*
- * Read memory corresponding to the old kernel.
- * If we are reading from the reserved section, which is
- * actually used by the current kernel, we just return zeroes.
- * Or if we are reading from the first 640k, we return from the
- * backed up area.
- */
-static ssize_t read_oldmem(struct file * file, char * buf,
-                               size_t count, loff_t *ppos)
-{
-       unsigned long pfn;
-       unsigned backup_start, backup_end, relocate_start;
-       size_t read=0, csize;
-
-       backup_start = CRASH_BACKUP_BASE / PAGE_SIZE;
-       backup_end = backup_start + (CRASH_BACKUP_SIZE / PAGE_SIZE);
-       relocate_start = (CRASH_BACKUP_BASE + CRASH_BACKUP_SIZE) / PAGE_SIZE;
-
-       while(count) {
-               pfn = *ppos / PAGE_SIZE;
-
-               csize = (count > PAGE_SIZE) ? PAGE_SIZE : count;
-
-               /* Perform translation (see comment above) */
-               if ((pfn >= backup_start) && (pfn < backup_end)) {
-                       if (clear_user(buf, csize)) {
-                               read = -EFAULT;
-                               goto done;
-                       }
-
-                       goto copy_done;
-               } else if (pfn < (CRASH_RELOCATE_SIZE / PAGE_SIZE))
-                       pfn += relocate_start;
-
-               if (pfn > saved_max_pfn) {
-                       read = 0;
-                       goto done;
-               }
-
-               if (copy_oldmem_page(pfn, buf, csize, 1)) {
-                       read = -EFAULT;
-                       goto done;
-               }
-
-copy_done:
-               buf += csize;
-               *ppos += csize;
-               read += csize;
-               count -= csize;
-       }
-done:
-       return read;
-}
-#endif
-
 extern long vread(char *buf, char *addr, unsigned long count);
 extern long vwrite(char *buf, char *addr, unsigned long count);
 
@@ -591,7 +533,6 @@ static int open_port(struct inode * inode, struct file * filp)
 #define read_full       read_zero
 #define open_mem       open_port
 #define open_kmem      open_mem
-#define open_oldmem    open_mem
 
 static struct file_operations mem_fops = {
        .llseek         = memory_lseek,
@@ -636,13 +577,6 @@ static struct file_operations full_fops = {
        .write          = write_full,
 };
 
-#ifdef CONFIG_CRASH_DUMP
-static struct file_operations oldmem_fops = {
-       .read   = read_oldmem,
-       .open   = open_oldmem,
-};
-#endif
-
 static ssize_t kmsg_write(struct file * file, const char __user * buf,
                          size_t count, loff_t *ppos)
 {
@@ -697,11 +631,6 @@ static int memory_open(struct inode * inode, struct file * filp)
                case 11:
                        filp->f_op = &kmsg_fops;
                        break;
-#ifdef CONFIG_CRASH_DUMP
-               case 12:
-                       filp->f_op = &oldmem_fops;
-                       break;
-#endif
                default:
                        return -ENXIO;
        }
@@ -730,9 +659,6 @@ static const struct {
        {8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
        {9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops},
        {11,"kmsg",    S_IRUGO | S_IWUSR,           &kmsg_fops},
-#ifdef CONFIG_CRASH_DUMP
-       {12,"oldmem",    S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
-#endif
 };
 
 static struct class_simple *mem_class;
index 367f12d..19bacae 100644 (file)
@@ -556,7 +556,7 @@ static int mxser_initbrd(int board, struct mxser_hwconf *hwconf)
        info = &mxvar_table[n];
        /*if (verbose) */  {
                printk(KERN_DEBUG "        ttyM%d - ttyM%d ", n, n + hwconf->ports - 1);
-               printk(" max. baud rate = %d bps.\n", hwconf->MaxCanSetBaudRate[0]);
+               printk(KERN_DEBUG " max. baud rate = %d bps.\n", hwconf->MaxCanSetBaudRate[0]);
        }
 
        for (i = 0; i < hwconf->ports; i++, n++, info++) {
@@ -609,12 +609,18 @@ static int mxser_initbrd(int board, struct mxser_hwconf *hwconf)
        n = board * MXSER_PORTS_PER_BOARD;
        info = &mxvar_table[n];
 
+       spin_lock_irqsave(&info->slock, flags);
        retval = request_irq(hwconf->irq, mxser_interrupt, IRQ_T(info), "mxser", info);
        if (retval) {
+               spin_unlock_irqrestore(&info->slock, flags);
                printk(KERN_ERR "Board %d: %s", board, mxser_brdname[hwconf->board_type - 1]);
                printk("  Request irq fail,IRQ (%d) may be conflit with another device.\n", info->irq);
                return retval;
        }
+
+       spin_unlock_irqrestore(&info->slock, flags);
+
+
        return 0;
 }
 
@@ -2138,9 +2144,10 @@ intr_old:
        mxvar_log.rxcnt[info->port] += cnt;
        info->mon_data.rxcnt += cnt;
        info->mon_data.up_rxcnt += cnt;
+
+       tty->ldisc.receive_buf(tty, tty->flip.char_buf, tty->flip.flag_buf, count);
        spin_unlock_irqrestore(&info->slock, flags);
-       
-       tty_flip_buffer_push(tty);
+
 }
 
 static void mxser_transmit_chars(struct mxser_struct *info)
index 64a7294..a29dffd 100644 (file)
@@ -1143,13 +1143,13 @@ static inline int copy_from_read_buf(struct tty_struct *tty,
 
 {
        int retval;
-       size_t n;
+       ssize_t n;
        unsigned long flags;
 
        retval = 0;
        spin_lock_irqsave(&tty->read_lock, flags);
        n = min(tty->read_cnt, N_TTY_BUF_SIZE - tty->read_tail);
-       n = min(*nr, n);
+       n = min((ssize_t)*nr, n);
        spin_unlock_irqrestore(&tty->read_lock, flags);
        if (n) {
                mb();
index 9935d1c..f61189f 100644 (file)
@@ -1156,8 +1156,8 @@ static inline void pty_line_name(struct tty_driver *driver, int index, char *p)
        int i = index + driver->name_base;
        /* ->name is initialized to "ttyp", but "tty" is expected */
        sprintf(p, "%s%c%x",
-               driver->subtype == PTY_TYPE_SLAVE ? "pty" : driver->name,
-               ptychar[i >> 4 & 0xf], i & 0xf);
+                       driver->subtype == PTY_TYPE_SLAVE ? "tty" : driver->name,
+                       ptychar[i >> 4 & 0xf], i & 0xf);
 }
 
 static inline void tty_line_name(struct tty_driver *driver, int index, char *p)
index 1ab0354..afa0aed 100644 (file)
@@ -305,7 +305,7 @@ int dump_lcrash_add_data(unsigned long loc, unsigned long len)
        buf += sizeof(struct __dump_page);
 
        while (len) {
-               addr = kmap_atomic(page, KM_CRASHDUMP);
+               addr = kmap_atomic(page, KM_DUMP);
                size = bytes = (len > PAGE_SIZE) ? PAGE_SIZE : len;     
                /* check for compression */
                if (dump_allow_compress(page, bytes)) {
@@ -321,7 +321,7 @@ int dump_lcrash_add_data(unsigned long loc, unsigned long len)
                        size = bytes;
                }
                /* memset(buf, 'A', size); temporary: testing only !! */
-               kunmap_atomic(addr, KM_CRASHDUMP);
+               kunmap_atomic(addr, KM_DUMP);
                dp->dp_size += size;
                buf += size;
                len -= bytes;
index 991db8e..5a01e0f 100644 (file)
@@ -314,7 +314,7 @@ __dump_cleanup(void)
        free_dha_stack();
 }
 
-extern int page_is_ram(unsigned long);
+extern int pfn_is_ram(unsigned long);
 
 /*
  * Name: __dump_page_valid()
@@ -326,7 +326,7 @@ __dump_page_valid(unsigned long index)
        if (!pfn_valid(index))
                return 0;
 
-       return page_is_ram(index);
+       return pfn_is_ram(index);
 }
 
 /* 
index b2bb642..1cd700d 100644 (file)
@@ -146,7 +146,7 @@ void dump_mark_map(struct dump_memdev *dev)
                        pr_debug("indirect map[%d] = 0x%lx\n", i, map1[i]);
                        page = pfn_to_page(map1[i]);
                        set_page_count(page, 1);
-                       map2 = kmap_atomic(page, KM_CRASHDUMP);
+                       map2 = kmap_atomic(page, KM_DUMP);
                        for (j = 0 ; (j < DUMP_MAP_SZ) && map2[j] && 
                                (off + j < last); j++) {
                                pr_debug("\t map[%d][%d] = 0x%lx\n", i, j, 
@@ -198,7 +198,7 @@ struct page *dump_mem_lookup(struct dump_memdev *dump_mdev, unsigned long loc)
        }
 
        if (page)
-               map = kmap_atomic(page, KM_CRASHDUMP);
+               map = kmap_atomic(page, KM_DUMP);
        else 
                return NULL;
 
@@ -213,7 +213,7 @@ struct page *dump_mem_lookup(struct dump_memdev *dump_mdev, unsigned long loc)
        } else {
                page = NULL;
        }
-       kunmap_atomic(map, KM_CRASHDUMP);
+       kunmap_atomic(map, KM_DUMP);
 
        return page;
 }
@@ -248,10 +248,10 @@ struct page *dump_mem_next_page(struct dump_memdev *dev)
        };
        
        if (*dev->curr_map) {
-               map = kmap_atomic(pfn_to_page(*dev->curr_map), KM_CRASHDUMP);
+               map = kmap_atomic(pfn_to_page(*dev->curr_map), KM_DUMP);
                if (map[i])
                        page = pfn_to_page(map[i]);
-               kunmap_atomic(map, KM_CRASHDUMP);
+               kunmap_atomic(map, KM_DUMP);
                dev->ddev.curr_offset += PAGE_SIZE;
        };
 
@@ -308,9 +308,9 @@ int dump_mem_add_space(struct dump_memdev *dev, struct page *page)
        /* add data space */
        i = dev->curr_map_offset;
        map_page = pfn_to_page(*dev->curr_map);
-       map = (unsigned long *)kmap_atomic(map_page, KM_CRASHDUMP);
+       map = (unsigned long *)kmap_atomic(map_page, KM_DUMP);
        map[i] = page_to_pfn(page);
-       kunmap_atomic(map, KM_CRASHDUMP);
+       kunmap_atomic(map, KM_DUMP);
        dev->curr_map_offset = ++i;
        dev->last_offset += PAGE_SIZE;
        if (i >= DUMP_MAP_SZ) {
@@ -572,10 +572,10 @@ int dump_mem_write(struct dump_dev *dev, void *buf, unsigned long len)
        page = dump_mem_lookup(dump_mdev, dev->curr_offset >> PAGE_SHIFT);
 
        for (n = len; (n > 0) && page; n -= PAGE_SIZE, buf += PAGE_SIZE ) {
-               addr = kmap_atomic(page, KM_CRASHDUMP);
+               addr = kmap_atomic(page, KM_DUMP);
                /* memset(addr, 'x', PAGE_SIZE); */
                memcpy(addr, buf, PAGE_SIZE);
-               kunmap_atomic(addr, KM_CRASHDUMP);
+               kunmap_atomic(addr, KM_DUMP);
                /* dev->curr_offset += PAGE_SIZE; */
                page = dump_mem_next_page(dump_mdev);
        }
index a23f1b2..8e10b78 100644 (file)
@@ -481,7 +481,7 @@ int dump_saved_data_iterator(int pass, int (*action)(unsigned long,
                                else
                                        count++;
                                /* clear the contents of page */
-                               /* fixme: consider using KM_CRASHDUMP instead */
+                               /* fixme: consider using KM_DUMP instead */
                                clear_highpage(page);
                        
                        }
@@ -679,10 +679,10 @@ int dump_copy_pages(void *buf, struct page *page, unsigned long sz)
        void *addr;
 
        while (len < sz) {
-               addr = kmap_atomic(page, KM_CRASHDUMP);
+               addr = kmap_atomic(page, KM_DUMP);
                bytes = (sz > len + PAGE_SIZE) ? PAGE_SIZE : sz - len;  
                memcpy(buf, addr, bytes); 
-               kunmap_atomic(addr, KM_CRASHDUMP);
+               kunmap_atomic(addr, KM_DUMP);
                buf += bytes;
                len += bytes;
                page++;
index 338f723..668b2d0 100644 (file)
@@ -740,7 +740,9 @@ static inline void
 dump_sysrq_register(void) 
 {
 #ifdef CONFIG_MAGIC_SYSRQ
-       register_sysrq_key(DUMP_SYSRQ_KEY, &sysrq_crashdump_op);
+       __sysrq_lock_table();
+       __sysrq_put_key_op(DUMP_SYSRQ_KEY, &sysrq_crashdump_op);
+       __sysrq_unlock_table();
 #endif
 }
 
@@ -748,7 +750,10 @@ static inline void
 dump_sysrq_unregister(void)
 {
 #ifdef CONFIG_MAGIC_SYSRQ
-       unregister_sysrq_key(DUMP_SYSRQ_KEY, &sysrq_crashdump_op);
+       __sysrq_lock_table();
+       if (__sysrq_get_key_op(DUMP_SYSRQ_KEY) == &sysrq_crashdump_op)
+               __sysrq_put_key_op(DUMP_SYSRQ_KEY, NULL);
+       __sysrq_unlock_table();
 #endif
 }
 
index 70cf372..7b5303b 100644 (file)
@@ -1100,18 +1100,15 @@ static void set_rx_mode(struct net_device *dev)
                        entry = tp->cur_tx++ % TX_RING_SIZE;
 
                        if (entry != 0) {
-                               /* Avoid a chip errata by prefixing a dummy entry. Don't do
-                                  this on the ULI526X as it triggers a different problem */
-                               if (!(tp->chip_id == ULI526X && (tp->revision = 0x40 || tp->revision == 0x50))) {
-                                       tp->tx_buffers[entry].skb = NULL;
-                                       tp->tx_buffers[entry].mapping = 0;
-                                       tp->tx_ring[entry].length =
-                                               (entry == TX_RING_SIZE-1) ? cpu_to_le32(DESC_RING_WRAP) : 0;
-                                       tp->tx_ring[entry].buffer1 = 0;
-                                       /* Must set DescOwned later to avoid race with chip */
-                                       dummy = entry;
-                                       entry = tp->cur_tx++ % TX_RING_SIZE;
-                               }
+                               /* Avoid a chip errata by prefixing a dummy entry. */
+                               tp->tx_buffers[entry].skb = NULL;
+                               tp->tx_buffers[entry].mapping = 0;
+                               tp->tx_ring[entry].length =
+                                       (entry == TX_RING_SIZE-1) ? cpu_to_le32(DESC_RING_WRAP) : 0;
+                               tp->tx_ring[entry].buffer1 = 0;
+                               /* Must set DescOwned later to avoid race with chip */
+                               dummy = entry;
+                               entry = tp->cur_tx++ % TX_RING_SIZE;
                        }
 
                        tp->tx_buffers[entry].skb = NULL;
index 4347594..9b340d9 100644 (file)
@@ -121,10 +121,7 @@ static struct {
        {"3PARdata", "VV", NULL, BLIST_REPORTLUN2},
        {"ADAPTEC", "AACRAID", NULL, BLIST_FORCELUN},
        {"ADAPTEC", "Adaptec 5400S", NULL, BLIST_FORCELUN},
-       {"AFT PRO", "-IX CF", "0.0>", BLIST_FORCELUN},
-       {"BELKIN", "USB 2 HS-CF", "1.95",  BLIST_FORCELUN | BLIST_INQUIRY_36},
        {"CANON", "IPUBJD", NULL, BLIST_SPARSELUN},
-       {"CBOX3", "USB Storage-SMC", "300A", BLIST_FORCELUN | BLIST_INQUIRY_36},
        {"CMD", "CRA-7280", NULL, BLIST_SPARSELUN},     /* CMD RAID Controller */
        {"CNSI", "G7324", NULL, BLIST_SPARSELUN},       /* Chaparral G7324 RAID */
        {"CNSi", "G8324", NULL, BLIST_SPARSELUN},       /* Chaparral G8324 RAID */
@@ -145,9 +142,6 @@ static struct {
        {"EMC", "SYMMETRIX", NULL, BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN},
        {"EMULEX", "MD21/S2     ESDI", NULL, BLIST_SINGLELUN},
        {"FSC", "CentricStor", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
-       {"Generic", "USB SD Reader", "1.00", BLIST_FORCELUN | BLIST_INQUIRY_36},
-       {"Generic", "USB Storage-SMC", "0180", BLIST_FORCELUN | BLIST_INQUIRY_36},
-       {"Generic", "USB Storage-SMC", "0207", BLIST_FORCELUN | BLIST_INQUIRY_36},
        {"HITACHI", "DF400", "*", BLIST_SPARSELUN},
        {"HITACHI", "DF500", "*", BLIST_SPARSELUN},
        {"HITACHI", "DF600", "*", BLIST_SPARSELUN},
@@ -178,18 +172,17 @@ static struct {
        {"NEC", "PD-1 ODX654P", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
        {"NRC", "MBR-7", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
        {"NRC", "MBR-7.4", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
+       {"OTi", "CF CARD Reader", "2.00", BLIST_FORCELUN | BLIST_SINGLELUN},
        {"PIONEER", "CD-ROM DRM-600", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
        {"PIONEER", "CD-ROM DRM-602X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
        {"PIONEER", "CD-ROM DRM-604X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
        {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN},
-       {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN},
        {"SEAGATE", "ST34555N", "0930", BLIST_NOTQ},    /* Chokes on tagged INQUIRY */
        {"SEAGATE", "ST3390N", "9546", BLIST_NOTQ},
        {"SGI", "RAID3", "*", BLIST_SPARSELUN},
        {"SGI", "RAID5", "*", BLIST_SPARSELUN},
        {"SGI", "TP9100", "*", BLIST_REPORTLUN2},
        {"SGI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
-       {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36},
        {"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN},
        {"SONY", "TSL", NULL, BLIST_FORCELUN},          /* DDS3 & DDS4 autoloaders */
        {"SUN", "T300", "*", BLIST_SPARSELUN},
@@ -197,13 +190,48 @@ static struct {
        {"TEXEL", "CD-ROM", "1.06", BLIST_BORKEN},
        {"TOSHIBA", "CDROM", NULL, BLIST_ISROM},
        {"TOSHIBA", "CD-ROM", NULL, BLIST_ISROM},
-       {"USB2.0", "SMARTMEDIA/XD", NULL, BLIST_FORCELUN | BLIST_INQUIRY_36},
        {"WangDAT", "Model 2600", "01.7", BLIST_SELECT_NO_ATN},
        {"WangDAT", "Model 3200", "02.2", BLIST_SELECT_NO_ATN},
        {"WangDAT", "Model 1300", "02.4", BLIST_SELECT_NO_ATN},
        {"XYRATEX", "RS", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
        {"Zzyzx", "RocketStor 500S", NULL, BLIST_SPARSELUN},
        {"Zzyzx", "RocketStor 2000", NULL, BLIST_SPARSELUN},
+
+       /*
+        * USB multi card readers.
+        */
+       {"AFT", "CF  PRO-9XP", "9144", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"AFT PRO", "-IX CF", "0.0>", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"BELKIN", "USB 2 HS-CF", "1.95",  BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"CBOX3", "USB Storage-SMC", "300A", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"DMI", "MultiFlash", "3.00", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"eUSB", "Compact Flash", NULL, BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"GENERIC", "Card Reader   CF", "v26F", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"General", "USB Disk Drive","1.00", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"Generic", "USB SD Reader", "1.00", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"Generic", "USB Storage-SMC", "0090", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"Generic", "USB Storage-SMC", "0180", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"Generic", "USB Storage-SMC", "0207", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"generic", "USB Storage-SMC", "0207", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"IC", "USB Storage-CFC", "322E", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"ICSI", "SD Card", "2.7C", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"IOI", "Media Bay", "*", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"Lexar", "Media Inc. SM/xD", "009E", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"Lexar", "USB Storage-SMC", "I18A", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"Medion", "Flash XL  MMC/SD", "2.6D", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"OEI-USB", "CompactFlash", "1.01", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"OEI-USB2", "CompactFlash", "2.00", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"SMSC", "223 U HS-CF", "1.95", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"SMSC", "USB 2 HS-CF", NULL, BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"TwinMOS", "7-in-1 Card RWCF", "0100", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"USB2.0", "CardReader CF RW", "0.0>", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"USB2.0", "CardReader SM RW", "0814", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"USB2.0", "CF  CardReader", NULL, BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"USB2.0", "SMARTMEDIA/XD", NULL, BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"Y-E DATA", "CF Card Reader", "1.03", BLIST_FORCELUN | BLIST_INQUIRY_36},
+       {"Zynet", "USB Storage-SMC", "I03A", BLIST_FORCELUN | BLIST_INQUIRY_36},
+
        { NULL, NULL, NULL, 0 },
 };
 
index 0d512de..161c52f 100644 (file)
@@ -75,7 +75,7 @@ module_param(old_scheme_first, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(old_scheme_first,
                 "start with the old device initialization scheme");
 
-static int use_both_schemes = 1;
+static int use_both_schemes = 0;
 module_param(use_both_schemes, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(use_both_schemes,
                "try the other device initialization scheme if the "
index c588782..82730d5 100644 (file)
@@ -93,6 +93,7 @@ obj-$(CONFIG_JFS_FS)          += jfs/
 obj-$(CONFIG_XFS_FS)           += xfs/
 obj-$(CONFIG_AFS_FS)           += afs/
 obj-$(CONFIG_BEFS_FS)          += befs/
+obj-$(CONFIG_EXTERNFS)         += hostfs/
 obj-$(CONFIG_RCFS_FS)          += rcfs/
 obj-$(CONFIG_HOSTFS)           += hostfs/
 obj-$(CONFIG_HPPFS)            += hppfs/
index 6b523da..7a9c7a1 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -564,7 +564,7 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id)
  *     (Note: this routine is intended to be called only
  *     from a kernel thread context)
  */
-void use_mm(struct mm_struct *mm)
+static void use_mm(struct mm_struct *mm)
 {
        struct mm_struct *active_mm;
        struct task_struct *tsk = current;
index fed1192..5f78d75 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,9 +14,9 @@
 #include <linux/fcntl.h>
 #include <linux/quotaops.h>
 #include <linux/security.h>
+#include <linux/vs_base.h>
 #include <linux/proc_fs.h>
 #include <linux/devpts_fs.h>
-#include <linux/vserver/debug.h>
 
 /* Taken over from the old code... */
 
@@ -64,19 +64,22 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
                goto fine;
 
        if (IS_BARRIER(inode)) {
-               vxwprintk(1, "xid=%d messing with the barrier.",
+               printk(KERN_WARNING
+                       "VSW: xid=%d messing with the barrier.\n",
                        vx_current_xid());
                goto error;
        }
        switch (inode->i_sb->s_magic) {
                case PROC_SUPER_MAGIC:
-                       vxwprintk(1, "xid=%d messing with the procfs.",
+                       printk(KERN_WARNING
+                               "VSW: xid=%d messing with the procfs.\n",
                                vx_current_xid());
                        goto error;
                case DEVPTS_SUPER_MAGIC:
                        if (vx_check(inode->i_xid, VX_IDENT))
                                goto fine;
-                       vxwprintk(1, "xid=%d messing with the devpts.",
+                       printk(KERN_WARNING
+                               "VSW: xid=%d messing with the devpts.\n",
                                vx_current_xid());
                        goto error;
        }
index 55235a6..42de9c8 100644 (file)
@@ -776,7 +776,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
         * Turn off the CS limit completely if exec-shield disabled or
         * NX active:
         */
-       if (!exec_shield || executable_stack != EXSTACK_DISABLE_X || nx_enabled)
+       if (!exec_shield || executable_stack != EXSTACK_DISABLE_X)
                arch_add_exec_range(current->mm, -1);
 #endif
 
@@ -798,8 +798,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
           may depend on the personality.  */
        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
-       if (exec_shield != 2 &&
-                       elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack))
+       if (elf_read_implies_exec(loc->elf_ex, have_pt_gnu_stack))
                current->personality |= READ_IMPLIES_EXEC;
 
        arch_pick_mmap_layout(current->mm);
index 004d7ac..6fb3d1f 100644 (file)
@@ -32,25 +32,6 @@ static struct xattr_handler *devpts_xattr_handlers[] = {
        NULL
 };
 
-static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
-{
-       int ret = -EACCES;
-
-       if (vx_check(inode->i_xid, VX_IDENT))
-               ret = generic_permission(inode, mask, NULL);
-       return ret;
-}
-
-struct inode_operations devpts_file_inode_operations = {
-#ifdef CONFIG_DEVPTS_FS_XATTR
-       .setxattr       = generic_setxattr,
-       .getxattr       = generic_getxattr,
-       .listxattr      = generic_listxattr,
-       .removexattr    = generic_removexattr,
-#endif
-       .permission     = devpts_permission,
-};
-
 static struct vfsmount *devpts_mnt;
 static struct dentry *devpts_root;
 
@@ -227,6 +208,26 @@ static struct dentry *get_node(int num)
        return lookup_one_len(s, root, sprintf(s, "%d", num));
 }
 
+#ifdef CONFIG_DEVPTS_FS_XATTR
+static int devpts_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+       int ret = -EACCES;
+
+       if (vx_check(inode->i_xid, VX_IDENT))
+               ret = generic_permission(inode, mask, NULL);
+       return ret;
+}
+#endif
+
+struct inode_operations devpts_file_inode_operations = {
+#ifdef CONFIG_DEVPTS_FS_XATTR
+       .setxattr       = generic_setxattr,
+       .getxattr       = generic_getxattr,
+       .listxattr      = generic_listxattr,
+       .removexattr    = generic_removexattr,
+       .permission     = devpts_permission,
+#endif
+};
 
 int devpts_pty_new(struct tty_struct *tty)
 {
index 95ae49b..b9888ba 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -47,9 +47,9 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/rmap.h>
-#include <linux/ckrm_events.h>
-#include <linux/ckrm_mem_inline.h>
+#include <linux/ckrm.h>
 #include <linux/vs_memory.h>
+#include <linux/ckrm_mem.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -564,7 +564,18 @@ static int exec_mmap(struct mm_struct *mm)
        activate_mm(active_mm, mm);
        task_unlock(tsk);
        arch_pick_mmap_layout(mm);
-       ckrm_task_change_mm(tsk, old_mm, mm);
+#ifdef CONFIG_CKRM_RES_MEM
+       if (old_mm) {
+               spin_lock(&old_mm->peertask_lock);
+               list_del(&tsk->mm_peers);
+               ckrm_mem_evaluate_mm(old_mm);
+               spin_unlock(&old_mm->peertask_lock);
+       }
+       spin_lock(&mm->peertask_lock);
+       list_add_tail(&tsk->mm_peers, &mm->tasklist);
+       ckrm_mem_evaluate_mm(mm);
+       spin_unlock(&mm->peertask_lock);
+#endif
        if (old_mm) {
                if (active_mm != old_mm) BUG();
                mmput(old_mm);
@@ -739,11 +750,14 @@ no_thread_group:
                atomic_set(&newsighand->count, 1);
                memcpy(newsighand->action, oldsighand->action,
                       sizeof(newsighand->action));
+
                write_lock_irq(&tasklist_lock);
                spin_lock(&oldsighand->siglock);
                spin_lock(&newsighand->siglock);
+
                current->sighand = newsighand;
                recalc_sigpending();
+
                spin_unlock(&newsighand->siglock);
                spin_unlock(&oldsighand->siglock);
                write_unlock_irq(&tasklist_lock);
index 9c3e032..c6e8f53 100644 (file)
@@ -282,24 +282,19 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
        return error;
 }
 
-static int
-ext2_check_acl(struct inode *inode, int mask)
-{
-       struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
-
-       if (acl) {
-               int error = posix_acl_permission(inode, acl, mask);
-               posix_acl_release(acl);
-               return error;
-       }
-
-       return -EAGAIN;
-}
-
 int
 ext2_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
-       return generic_permission(inode, mask, ext2_check_acl);
+       int mode = inode->i_mode;
+
+#warning MEF Get new BME patch, which I believe pushes these checks higher
+       /* Nobody gets write access to a read-only fs */
+       if ((mask & MAY_WRITE) && (IS_RDONLY(inode) ||
+           (nd && MNT_IS_RDONLY(nd->mnt))) &&
+           (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+               return -EROFS;
+
+       return generic_permission(inode, mask, 0);
 }
 
 /*
index 2aa5850..5fbe1ca 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/quotaops.h>
 #include <linux/sched.h>
 #include <linux/buffer_head.h>
+#include <linux/vs_base.h>
 #include <linux/vs_dlimit.h>
 
 /*
index 3272b02..3d9fa57 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/random.h>
-
+#include <linux/vs_base.h>
 #include <linux/vs_dlimit.h>
 
 #include "ext2.h"
@@ -470,7 +470,7 @@ struct inode *ext2_new_inode(struct inode *dir, int mode)
                return ERR_PTR(-ENOMEM);
 
        if (sb->s_flags & MS_TAGXID)
-               inode->i_xid = vx_current_xid();
+               inode->i_xid = current->xid;
        else
                inode->i_xid = 0;
 
index ffd30ed..ba3cc99 100644 (file)
@@ -1191,7 +1191,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
                raw_inode->i_uid_high = 0;
                raw_inode->i_gid_high = 0;
        }
-#ifdef CONFIG_INOXID_INTERN
+#ifdef CONFIG_INOXID_GID32
        raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
 #endif
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
index 96bfa89..594c16c 100644 (file)
@@ -50,11 +50,11 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                 *
                 * This test looks nicer. Thanks to Pauline Middelink
                 */
-               if ((oldflags & EXT2_IMMUTABLE_FL) ||
-                       ((flags ^ oldflags) & (EXT2_APPEND_FL |
-                       EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL))) {
-                       if (!capable(CAP_LINUX_IMMUTABLE))
-                               return -EPERM;
+               if (((oldflags & EXT2_IMMUTABLE_FL) ||
+                       ((flags ^ oldflags) &
+                        (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL | EXT2_IUNLINK_FL)))
+                   && !capable(CAP_LINUX_IMMUTABLE)) {
+                       return -EPERM;          
                }
 
                flags = flags & EXT2_FL_USER_MODIFIABLE;
index bb62484..4c61667 100644 (file)
@@ -31,7 +31,6 @@
  */
 
 #include <linux/pagemap.h>
-#include <linux/vserver/xid.h>
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -82,7 +81,6 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
                inode = iget(dir->i_sb, ino);
                if (!inode)
                        return ERR_PTR(-EACCES);
-               vx_propagate_xid(nd, inode);
        }
        if (inode)
                return d_splice_alias(inode, dentry);
index 0425fc8..a85b755 100644 (file)
@@ -287,24 +287,19 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
        return error;
 }
 
-static int
-ext3_check_acl(struct inode *inode, int mask)
-{
-       struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
-
-       if (acl) {
-               int error = posix_acl_permission(inode, acl, mask);
-               posix_acl_release(acl);
-               return error;
-       }
-
-       return -EAGAIN;
-}
-
 int
 ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
-       return generic_permission(inode, mask, ext3_check_acl);
+       int mode = inode->i_mode;
+
+#warning MEF Need new BME patch for 2.6.10
+       /* Nobody gets write access to a read-only fs */
+       if ((mask & MAY_WRITE) && (IS_RDONLY(inode) ||
+           (nd && MNT_IS_RDONLY(nd->mnt))) &&
+           (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+               return -EROFS;
+
+       return generic_permission(inode, mask, 0);
 }
 
 /*
index 47fff3b..4839138 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/ext3_jbd.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include <linux/vs_base.h>
 #include <linux/vs_dlimit.h>
 
 /*
index b7d4e57..8c6456a 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/random.h>
 #include <linux/vs_dlimit.h>
 #include <linux/bitops.h>
-#include <linux/vs_dlimit.h>
 
 #include <asm/byteorder.h>
 
@@ -448,7 +447,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
                return ERR_PTR(-ENOMEM);
 
        if (sb->s_flags & MS_TAGXID)
-               inode->i_xid = vx_current_xid();
+               inode->i_xid = current->xid;
        else
                inode->i_xid = 0;
 
index 2a45280..fac1e98 100644 (file)
@@ -2582,7 +2582,7 @@ static int ext3_do_update_inode(handle_t *handle,
                raw_inode->i_uid_high = 0;
                raw_inode->i_gid_high = 0;
        }
-#ifdef CONFIG_INOXID_INTERN
+#ifdef CONFIG_INOXID_GID32
        raw_inode->i_raw_xid = cpu_to_le16(inode->i_xid);
 #endif
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
index aaf679c..a040edf 100644 (file)
@@ -60,11 +60,11 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                 *
                 * This test looks nicer. Thanks to Pauline Middelink
                 */
-               if ((oldflags & EXT3_IMMUTABLE_FL) ||
-                       ((flags ^ oldflags) & (EXT3_APPEND_FL |
-                       EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL))) {
-                       if (!capable(CAP_LINUX_IMMUTABLE))
-                               return -EPERM;
+               if (((oldflags & EXT3_IMMUTABLE_FL) ||
+                       ((flags ^ oldflags) &
+                        (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL | EXT3_IUNLINK_FL)))
+                   && !capable(CAP_LINUX_IMMUTABLE)) {
+                       return -EPERM;          
                }
 
                /*
@@ -156,6 +156,38 @@ flags_err:
                        remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
                        return ret;
                }
+#endif
+#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE)
+       case EXT3_IOC_SETXID: {
+               handle_t *handle;
+               struct ext3_iloc iloc;
+               int xid;
+               int err;
+
+               /* fixme: if stealth, return -ENOTTY */
+               if (!capable(CAP_CONTEXT))
+                       return -EPERM;
+               if (IS_RDONLY(inode))
+                       return -EROFS;
+               if (!(inode->i_sb->s_flags & MS_TAGXID))
+                       return -ENOSYS;
+               if (get_user(xid, (int *) arg))
+                       return -EFAULT;
+
+               handle = ext3_journal_start(inode, 1);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+               err = ext3_reserve_inode_write(handle, inode, &iloc);
+               if (err)
+                       return err;
+
+               inode->i_xid = (xid & 0xFFFF);
+               inode->i_ctime = CURRENT_TIME;
+
+               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+               ext3_journal_stop(handle);
+               return err;
+       }
 #endif
        case EXT3_IOC_GETRSVSZ:
                if (test_opt(inode->i_sb, RESERVATION) && S_ISREG(inode->i_mode)) {
@@ -224,39 +256,6 @@ flags_err:
                return err;
        }
 
-#if defined(CONFIG_VSERVER_LEGACY) && !defined(CONFIG_INOXID_NONE)
-       case EXT3_IOC_SETXID: {
-               handle_t *handle;
-               struct ext3_iloc iloc;
-               int xid;
-               int err;
-
-               /* fixme: if stealth, return -ENOTTY */
-               if (!capable(CAP_CONTEXT))
-                       return -EPERM;
-               if (IS_RDONLY(inode))
-                       return -EROFS;
-               if (!(inode->i_sb->s_flags & MS_TAGXID))
-                       return -ENOSYS;
-               if (get_user(xid, (int *) arg))
-                       return -EFAULT;
-
-               handle = ext3_journal_start(inode, 1);
-               if (IS_ERR(handle))
-                       return PTR_ERR(handle);
-               err = ext3_reserve_inode_write(handle, inode, &iloc);
-               if (err)
-                       return err;
-
-               inode->i_xid = (xid & 0xFFFF);
-               inode->i_ctime = CURRENT_TIME;
-
-               err = ext3_mark_iloc_dirty(handle, inode, &iloc);
-               ext3_journal_stop(handle);
-               return err;
-       }
-#endif
-
        default:
                return -ENOTTY;
        }
index b0b8e10..bfaf8a4 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/smp_lock.h>
-#include <linux/vserver/xid.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -990,7 +989,6 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 
                if (!inode)
                        return ERR_PTR(-EACCES);
-               vx_propagate_xid(nd, inode);
        }
        if (inode)
                return d_splice_alias(inode, dentry);
index d68ac33..75c94a4 100644 (file)
@@ -88,7 +88,7 @@ static int old_max;
                        /* f->f_version: 0 */
                        INIT_LIST_HEAD(&f->f_list);
                        // set_vx_info(&f->f_vx_info, current->vx_info);
-                       f->f_xid = vx_current_xid();
+                       f->f_xid = current->xid;
                        vx_files_inc(f);
                        return f;
                }
diff --git a/fs/hostfs/externfs.c b/fs/hostfs/externfs.c
new file mode 100644 (file)
index 0000000..884c33c
--- /dev/null
@@ -0,0 +1,1317 @@
+/* 
+ * Copyright (C) 2000 - 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/stddef.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/blkdev.h>
+#include <linux/statfs.h>
+#include <asm/uaccess.h>
+#include "hostfs.h"
+#include "kern_util.h"
+#include "kern.h"
+#include "user_util.h"
+#include "2_5compat.h"
+#include "mem.h"
+#include "filehandle.h"
+
+struct externfs {
+       struct list_head list;
+       struct externfs_mount_ops *mount_ops;
+       struct file_system_type type;
+};
+
+static inline struct externfs_inode *EXTERNFS_I(struct inode *inode)
+{
+       return(container_of(inode, struct externfs_inode, vfs_inode));
+}
+
+#define file_externfs_i(file) EXTERNFS_I((file)->f_dentry->d_inode)
+
+int externfs_d_delete(struct dentry *dentry)
+{
+       return(1);
+}
+
+struct dentry_operations externfs_dentry_ops = {
+};
+
+#define EXTERNFS_SUPER_MAGIC 0x00c0ffee
+
+static struct inode_operations externfs_iops;
+static struct inode_operations externfs_dir_iops;
+static struct address_space_operations externfs_link_aops;
+
+static char *dentry_name(struct dentry *dentry, int extra)
+{
+       struct dentry *parent;
+       char *name;
+       int len;
+
+       len = 0;
+       parent = dentry;
+       while(parent->d_parent != parent){
+               len += parent->d_name.len + 1;
+               parent = parent->d_parent;
+       }
+       
+       name = kmalloc(len + extra + 1, GFP_KERNEL);
+       if(name == NULL) return(NULL);
+
+       name[len] = '\0';
+       parent = dentry;
+       while(parent->d_parent != parent){
+               len -= parent->d_name.len + 1;
+               name[len] = '/';
+               strncpy(&name[len + 1], parent->d_name.name, 
+                       parent->d_name.len);
+               parent = parent->d_parent;
+       }
+
+       return(name);
+}
+
+char *inode_name(struct inode *ino, int extra)
+{
+       struct dentry *dentry;
+
+       dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
+       return(dentry_name(dentry, extra));
+}
+
+char *inode_name_prefix(struct inode *inode, char *prefix)
+{
+       int len;
+       char *name;
+
+       len = strlen(prefix);
+       name = inode_name(inode, len);
+       if(name == NULL)
+               return(name);
+
+       memmove(&name[len], name, strlen(name) + 1);
+       memcpy(name, prefix, strlen(prefix));
+       return(name);
+}
+
+static int read_name(struct inode *ino, char *name)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       /* The non-int inode fields are copied into ints by stat_file and
+        * then copied into the inode because passing the actual pointers
+        * in and having them treated as int * breaks on big-endian machines
+        */
+       dev_t i_rdev;
+       int err;
+       int i_mode, i_nlink, i_blksize;
+       unsigned long atime, mtime, ctime;
+       unsigned long long i_size;
+       unsigned long long i_ino;
+       unsigned long long i_blocks;
+
+       err = (*ops->stat_file)(name, ino->i_sb->s_fs_info, &i_rdev, &i_ino,
+                               &i_mode, &i_nlink, &ino->i_uid, &ino->i_gid,
+                               &i_size, &atime, &mtime, &ctime, &i_blksize, 
+                               &i_blocks);
+       if(err) return(err);
+
+       ino->i_atime.tv_sec = atime;
+       ino->i_atime.tv_nsec = 0;
+       
+       ino->i_ctime.tv_sec = ctime;
+       ino->i_ctime.tv_nsec = 0;
+       
+       ino->i_mtime.tv_sec = mtime;
+       ino->i_mtime.tv_nsec = 0;
+
+       ino->i_ino = i_ino;
+       ino->i_rdev = i_rdev;
+       ino->i_mode = i_mode;
+       ino->i_nlink = i_nlink;
+       ino->i_size = i_size;
+       ino->i_blksize = i_blksize;
+       ino->i_blocks = i_blocks;
+       return(0);
+}
+
+static char *follow_link(char *link, 
+                        int (*do_read_link)(char *path, int uid, int gid,
+                                            char *buf, int size, 
+                                            struct externfs_data *ed),
+                        int uid, int gid, struct externfs_data *ed)
+{
+       int len, n;
+       char *name, *resolved, *end;
+
+       len = 64;
+       while(1){
+               n = -ENOMEM;
+               name = kmalloc(len, GFP_KERNEL);
+               if(name == NULL)
+                       goto out;
+
+               n = (*do_read_link)(link, uid, gid, name, len, ed);
+               if(n < len)
+                       break;
+               len *= 2;
+               kfree(name);
+       }
+       if(n < 0)
+               goto out_free;
+
+       if(*name == '/')
+               return(name);
+
+       end = strrchr(link, '/');
+       if(end == NULL)
+               return(name);
+
+       *(end + 1) = '\0';
+       len = strlen(link) + strlen(name) + 1;
+
+       resolved = kmalloc(len, GFP_KERNEL);
+       if(resolved == NULL){
+               n = -ENOMEM;
+               goto out_free;
+       }
+
+       sprintf(resolved, "%s%s", link, name);
+       kfree(name);
+       return(resolved);
+
+ out_free:
+       kfree(name);
+ out:
+       return(ERR_PTR(n));
+}
+
+static int read_inode(struct inode *ino)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+       char *name, *new;
+       int err = 0, type;
+
+       /* Unfortunately, we are called from iget() when we don't have a dentry
+        * allocated yet.
+        */
+       if(list_empty(&ino->i_dentry))
+               goto out;
+       err = -ENOMEM;
+       name = inode_name(ino, 0);
+       if(name == NULL) 
+               goto out;
+
+       type = (*ops->file_type)(name, NULL, ed);
+       if(type < 0){
+               err = type;
+               goto out_free;
+       }
+
+       if(type == OS_TYPE_SYMLINK){
+               new = follow_link(name, ops->read_link, current->fsuid,
+                                 current->fsgid, ed);
+               if(IS_ERR(new)){
+                       err = PTR_ERR(new);
+                       goto out_free;
+               }
+               kfree(name);
+               name = new;
+       }
+       
+       err = read_name(ino, name);
+ out_free:
+       kfree(name);
+ out:
+       return(err);
+}
+
+int externfs_statfs(struct super_block *sb, struct kstatfs *sf)
+{
+       /* do_statfs uses struct statfs64 internally, but the linux kernel
+        * struct statfs still has 32-bit versions for most of these fields,
+        * so we convert them here
+        */
+       int err;
+       long long f_blocks;
+       long long f_bfree;
+       long long f_bavail;
+       long long f_files;
+       long long f_ffree;
+       struct externfs_data *ed = sb->s_fs_info;
+       
+       err = (*ed->file_ops->statfs)(&sf->f_bsize, &f_blocks, &f_bfree, 
+                                     &f_bavail, &f_files, &f_ffree, 
+                                     &sf->f_fsid, sizeof(sf->f_fsid), 
+                                     &sf->f_namelen, sf->f_spare, ed);
+       if(err)
+               return(err);
+
+       sf->f_blocks = f_blocks;
+       sf->f_bfree = f_bfree;
+       sf->f_bavail = f_bavail;
+       sf->f_files = f_files;
+       sf->f_ffree = f_ffree;
+       sf->f_type = EXTERNFS_SUPER_MAGIC;
+       return(0);
+}
+
+static struct inode *externfs_alloc_inode(struct super_block *sb)
+{
+       struct externfs_data *ed = sb->s_fs_info;
+       struct externfs_inode *ext;
+
+       ext = (*ed->mount_ops->init_file)(ed);
+       if(ext == NULL) 
+               return(NULL);
+
+       *ext = ((struct externfs_inode) { .ops  = ed->file_ops });
+
+       inode_init_once(&ext->vfs_inode);
+       return(&ext->vfs_inode);
+}
+
+static void externfs_destroy_inode(struct inode *inode)
+{
+       struct externfs_inode *ext = EXTERNFS_I(inode);
+
+       (*ext->ops->close_file)(ext, inode->i_size);
+}
+
+static void externfs_read_inode(struct inode *inode)
+{
+       read_inode(inode);
+}
+
+static struct super_operations externfs_sbops = { 
+       .alloc_inode    = externfs_alloc_inode,
+       .destroy_inode  = externfs_destroy_inode,
+       .read_inode     = externfs_read_inode,
+       .statfs         = externfs_statfs,
+};
+
+int externfs_readdir(struct file *file, void *ent, filldir_t filldir)
+{
+       void *dir;
+       char *name;
+       unsigned long long next, ino;
+       int error, len;
+       struct externfs_file_ops *ops = file_externfs_i(file)->ops;
+       struct externfs_data *ed = file->f_dentry->d_inode->i_sb->s_fs_info;
+
+       name = dentry_name(file->f_dentry, 0);
+       if(name == NULL) 
+               return(-ENOMEM);
+
+       dir = (*ops->open_dir)(name, current->fsuid, current->fsgid, ed);
+       kfree(name);
+       if(IS_ERR(dir)) 
+               return(PTR_ERR(dir));
+
+       next = file->f_pos;
+       while((name = (*ops->read_dir)(dir, &next, &ino, &len, ed)) != NULL){
+               error = (*filldir)(ent, name, len, file->f_pos, ino, 
+                                  DT_UNKNOWN);
+               if(error) 
+                       break;
+               file->f_pos = next;
+       }
+       (*ops->close_dir)(dir, ed);
+       return(0);
+}
+
+int externfs_file_open(struct inode *ino, struct file *file)
+{
+       ino->i_nlink++;
+       return(0);
+}
+
+int externfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+       struct externfs_file_ops *ops = file_externfs_i(file)->ops;
+       struct inode *inode = dentry->d_inode;
+       struct externfs_data *ed = inode->i_sb->s_fs_info;
+
+       return((*ops->truncate_file)(EXTERNFS_I(inode), inode->i_size, ed));
+}
+
+static struct file_operations externfs_file_fops = {
+       .llseek         = generic_file_llseek,
+       .read           = generic_file_read,
+       .write          = generic_file_write,
+       .mmap           = generic_file_mmap,
+       .open           = externfs_file_open,
+       .release        = NULL,
+       .fsync          = externfs_fsync,
+};
+
+static struct file_operations externfs_dir_fops = {
+       .readdir        = externfs_readdir,
+       .read           = generic_read_dir,
+};
+
+struct wp_info {
+       struct page *page;
+       int count;
+       unsigned long long start;
+       unsigned long long size;
+       int (*truncate)(struct externfs_inode *ext, __u64 size, 
+                       struct externfs_data *ed);
+       struct externfs_inode *ei;
+       struct externfs_data *ed;
+};
+
+static void externfs_finish_writepage(char *buffer, int res, void *arg)
+{
+       struct wp_info *wp = arg;
+
+       if(res == wp->count){
+               ClearPageError(wp->page);
+               if(wp->start + res > wp->size)
+                       (*wp->truncate)(wp->ei, wp->size, wp->ed);
+       }
+       else {
+               SetPageError(wp->page);
+               ClearPageUptodate(wp->page);
+       }               
+
+       kunmap(wp->page);
+       unlock_page(wp->page);
+       kfree(wp);
+}
+
+int externfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+       struct address_space *mapping = page->mapping;
+       struct inode *inode = mapping->host;
+       struct externfs_file_ops *ops = EXTERNFS_I(inode)->ops;
+       struct wp_info *wp;
+       struct externfs_data *ed = inode->i_sb->s_fs_info;
+       char *buffer;
+       unsigned long long base;
+       int count = PAGE_CACHE_SIZE;
+       int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+       int err, offset;
+
+       base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
+
+       /* If we are entirely outside the file, then return an error */
+       err = -EIO;
+       offset = inode->i_size & (PAGE_CACHE_SIZE-1);
+       if (page->index > end_index || 
+           ((page->index == end_index) && !offset))
+               goto out_unlock;
+
+       err = -ENOMEM;
+       wp = kmalloc(sizeof(*wp), GFP_KERNEL);
+       if(wp == NULL)
+               goto out_unlock;
+
+       *wp = ((struct wp_info) { .page         = page,
+                                 .count        = count,
+                                 .start        = base,
+                                 .size         = inode->i_size,
+                                 .truncate     = ops->truncate_file,
+                                 .ei           = EXTERNFS_I(inode),
+                                 .ed           = ed });
+
+       buffer = kmap(page);
+       err = (*ops->write_file)(EXTERNFS_I(inode), base, buffer, 0, 
+                                count, externfs_finish_writepage, wp, ed);
+
+       return err;
+
+ out_unlock:
+       unlock_page(page);
+       return(err);
+}
+
+static void externfs_finish_readpage(char *buffer, int res, void *arg)
+{
+       struct page *page = arg;
+       struct inode *inode;
+
+       if(res < 0){
+               SetPageError(page);
+               goto out;
+       }
+
+       inode = page->mapping->host;
+       if(inode->i_size >> PAGE_CACHE_SHIFT == page->index)
+               res = inode->i_size % PAGE_CACHE_SIZE;
+
+       memset(&buffer[res], 0, PAGE_CACHE_SIZE - res);
+
+       flush_dcache_page(page);
+       SetPageUptodate(page);
+       if (PageError(page)) 
+               ClearPageError(page);
+ out:
+       kunmap(page);
+       unlock_page(page);
+}
+
+static int externfs_readpage(struct file *file, struct page *page)
+{
+       struct inode *ino = page->mapping->host;
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+       char *buffer;
+       long long start;
+       int err = 0;
+
+       start = (long long) page->index << PAGE_CACHE_SHIFT;
+       buffer = kmap(page);
+
+       if(ops->map_file_page != NULL){
+               /* XXX What happens when PAGE_SIZE != PAGE_CACHE_SIZE? */
+               err = (*ops->map_file_page)(file_externfs_i(file), start, 
+                                           buffer, file->f_mode & FMODE_WRITE,
+                                           ed);
+               if(!err)
+                       err = PAGE_CACHE_SIZE;
+       }
+       else err = (*ops->read_file)(file_externfs_i(file), start, buffer,
+                                    PAGE_CACHE_SIZE, 0, 0, 
+                                    externfs_finish_readpage, page, ed);
+
+       if(err > 0)
+               err = 0;
+       return(err);
+}
+
+struct writepage_info {
+       struct semaphore sem;
+       int res;
+};
+
+static void externfs_finish_prepare(char *buffer, int res, void *arg)
+{
+       struct writepage_info *wp = arg;
+
+       wp->res = res;
+       up(&wp->sem);
+}
+
+int externfs_prepare_write(struct file *file, struct page *page, 
+                        unsigned int from, unsigned int to)
+{
+       struct address_space *mapping = page->mapping;
+       struct inode *inode = mapping->host;
+       struct externfs_file_ops *ops = EXTERNFS_I(inode)->ops;
+       struct externfs_data *ed = inode->i_sb->s_fs_info;
+       char *buffer;
+       long long start;
+       int err;
+       struct writepage_info wp;
+
+       if(PageUptodate(page))
+               return(0);
+
+       start = (long long) page->index << PAGE_CACHE_SHIFT;
+       buffer = kmap(page);
+
+       if(ops->map_file_page != NULL){
+               err = (*ops->map_file_page)(file_externfs_i(file), start, 
+                                           buffer, file->f_mode & FMODE_WRITE,
+                                           ed);
+               goto out;
+               
+       }
+
+       init_MUTEX_LOCKED(&wp.sem);
+       err = (*ops->read_file)(file_externfs_i(file), start, buffer,
+                               PAGE_CACHE_SIZE, from, to, 
+                               externfs_finish_prepare, &wp, ed);
+       down(&wp.sem);
+       if(err < 0) 
+               goto out;
+
+       err = wp.res;
+       if(err < 0)
+               goto out;
+
+       if(from > 0)
+               memset(buffer, 0, from);
+       if(to < PAGE_CACHE_SIZE)
+               memset(buffer + to, 0, PAGE_CACHE_SIZE - to);
+
+       SetPageUptodate(page);
+       err = 0;
+ out:
+       kunmap(page);
+       return(err);
+}
+
+static int externfs_commit_write(struct file *file, struct page *page, 
+                              unsigned from, unsigned to)
+{
+       struct address_space *mapping = page->mapping;
+       struct inode *inode = mapping->host;
+       struct externfs_file_ops *ops = EXTERNFS_I(inode)->ops;
+       unsigned long long size;
+       long long start;
+       int err;
+
+       start = (long long) (page->index << PAGE_CACHE_SHIFT);
+
+       if(ops->map_file_page != NULL)
+               err = to - from;
+       else {
+               size = start + to;
+               if(size > inode->i_size){
+                       inode->i_size = size;
+                       mark_inode_dirty(inode);
+               }
+       }
+
+       set_page_dirty(page);
+       return(to - from);
+}
+
+static int externfs_removepage(struct page *page, int gfpmask)
+{
+       physmem_remove_mapping(page_address(page));
+       return(0);
+}
+
+static struct address_space_operations externfs_aops = {
+       .writepage      = externfs_writepage,
+       .readpage       = externfs_readpage,
+       .releasepage    = externfs_removepage,
+/*     .set_page_dirty = __set_page_dirty_nobuffers, */
+       .prepare_write  = externfs_prepare_write,
+       .commit_write   = externfs_commit_write
+};
+
+static int init_inode(struct inode *inode, struct dentry *dentry)
+{
+       char *name = NULL;
+       int type, err = -ENOMEM, rdev;
+       struct externfs_inode *ext = EXTERNFS_I(inode);
+       struct externfs_file_ops *ops = ext->ops;
+       struct externfs_data *ed = inode->i_sb->s_fs_info;      
+
+       if(dentry){
+               name = dentry_name(dentry, 0);
+               if(name == NULL)
+                       goto out;
+               type = (*ops->file_type)(name, &rdev, ed);
+       }
+       else type = OS_TYPE_DIR;
+
+       err = 0;
+       if(type == OS_TYPE_SYMLINK)
+               inode->i_op = &page_symlink_inode_operations;
+       else if(type == OS_TYPE_DIR)
+               inode->i_op = &externfs_dir_iops;
+       else inode->i_op = &externfs_iops;
+
+       if(type == OS_TYPE_DIR) inode->i_fop = &externfs_dir_fops;
+       else inode->i_fop = &externfs_file_fops;
+
+       if(type == OS_TYPE_SYMLINK) 
+               inode->i_mapping->a_ops = &externfs_link_aops;
+       else inode->i_mapping->a_ops = &externfs_aops;
+
+       switch (type) {
+       case OS_TYPE_CHARDEV:
+               init_special_inode(inode, S_IFCHR, rdev);
+               break;
+       case OS_TYPE_BLOCKDEV:
+               init_special_inode(inode, S_IFBLK, rdev);
+               break;
+       case OS_TYPE_FIFO:
+               init_special_inode(inode, S_IFIFO, 0);
+               break;
+       case OS_TYPE_SOCK:
+               init_special_inode(inode, S_IFSOCK, 0);
+               break;
+       case OS_TYPE_SYMLINK:
+               inode->i_mode = S_IFLNK | S_IRWXUGO;
+       }
+
+       err = (*ops->open_file)(ext, name, current->fsuid, current->fsgid, 
+                               inode, ed);
+       if((err != -EISDIR) && (err != -ENOENT) && (err != -ENXIO))
+               goto out_put;
+
+       err = 0;
+
+ out_free:
+       kfree(name);
+ out:
+       return(err);
+
+ out_put:
+       iput(inode);
+       goto out_free;
+}
+
+int externfs_create(struct inode *dir, struct dentry *dentry, int mode, 
+                 struct nameidata *nd)
+{
+       struct externfs_inode *ext = EXTERNFS_I(dir);
+       struct externfs_file_ops *ops = ext->ops;
+       struct inode *inode;
+       struct externfs_data *ed = dir->i_sb->s_fs_info;
+       char *name;
+       int err = -ENOMEM;
+
+       inode = iget(dir->i_sb, 0);
+       if(inode == NULL) 
+               goto out;
+
+       err = init_inode(inode, dentry);
+       if(err) 
+               goto out_put;
+       
+       err = -ENOMEM;
+       name = dentry_name(dentry, 0);
+       if(name == NULL)
+               goto out_put;
+
+       err = (*ops->create_file)(ext, name, mode, current->fsuid, 
+                                 current->fsuid, inode, ed);
+       if(err)
+               goto out_free;
+
+       err = read_name(inode, name);
+       if(err)
+               goto out_rm;
+
+       inode->i_nlink++;
+       d_instantiate(dentry, inode);
+       kfree(name);
+ out:
+       return(err);
+
+ out_rm:
+       (*ops->unlink_file)(name, ed);
+ out_free:
+       kfree(name);
+ out_put:
+       inode->i_nlink = 0;
+       iput(inode);
+       goto out;
+}
+struct dentry *externfs_lookup(struct inode *ino, struct dentry *dentry, 
+                              struct nameidata *nd)
+{
+       struct inode *inode;
+       char *name;
+       int err = -ENOMEM;
+
+       inode = iget(ino->i_sb, 0);
+       if(inode == NULL) 
+               goto out;
+
+       err = init_inode(inode, dentry);
+       if(err) 
+               goto out_put;
+       
+       err = -ENOMEM;
+       name = dentry_name(dentry, 0);
+       if(name == NULL)
+               goto out_put;
+
+       err = read_name(inode, name);
+       kfree(name);
+       if(err){
+               if(err != -ENOENT)
+                       goto out_put;
+
+               inode->i_nlink = 0;
+               iput(inode);
+               inode = NULL;
+       }
+       d_add(dentry, inode);
+       dentry->d_op = &externfs_dentry_ops;
+       return(NULL);
+
+ out_put:
+       inode->i_nlink = 0;
+       iput(inode);
+ out:
+       return(ERR_PTR(err));
+}
+
+static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
+{
+        char *file;
+       int len;
+
+       file = inode_name(ino, dentry->d_name.len + 1);
+       if(file == NULL) return(NULL);
+        strcat(file, "/");
+       len = strlen(file);
+        strncat(file, dentry->d_name.name, dentry->d_name.len);
+       file[len + dentry->d_name.len] = '\0';
+        return(file);
+}
+
+int externfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+        char *from_name, *to_name;
+        int err = -ENOMEM;
+
+        from_name = inode_dentry_name(ino, from); 
+        if(from_name == NULL) 
+               goto out;
+
+        to_name = dentry_name(to, 0);
+       if(to_name == NULL)
+               goto out_free_from;
+
+        err = (*ops->link_file)(to_name, from_name, current->fsuid, 
+                               current->fsgid, ed);
+       if(err)
+               goto out_free_to;
+
+       d_instantiate(from, to->d_inode);
+       to->d_inode->i_nlink++;
+       atomic_inc(&to->d_inode->i_count);
+
+ out_free_to:
+        kfree(to_name);
+ out_free_from:
+        kfree(from_name);
+ out:
+        return(err);
+}
+
+int externfs_unlink(struct inode *ino, struct dentry *dentry)
+{
+       struct inode *inode;
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+       char *file;
+       int err;
+
+       file = inode_dentry_name(ino, dentry);
+       if(file == NULL) 
+               return(-ENOMEM);
+
+       inode = dentry->d_inode;
+       if((inode->i_nlink == 1) && (ops->invisible != NULL))
+               (*ops->invisible)(EXTERNFS_I(inode));
+
+       err = (*ops->unlink_file)(file, ed);
+       kfree(file);
+
+       inode->i_nlink--;
+
+       return(err);
+}
+
+int externfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct inode *inode;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+       char *file;
+       int err;
+
+       file = inode_dentry_name(ino, dentry);
+       if(file == NULL) 
+               return(-ENOMEM);
+       err = (*ops->make_symlink)(file, to, current->fsuid, current->fsgid,
+                                  ed);
+       kfree(file);
+       if(err) 
+               goto out;
+
+       err = -ENOMEM;
+       inode = iget(ino->i_sb, 0);
+       if(inode == NULL) 
+               goto out;
+
+       err = init_inode(inode, dentry);
+       if(err) 
+               goto out_put;
+       
+       d_instantiate(dentry, inode);
+       inode->i_nlink++;
+ out:
+       return(err);
+
+ out_put:
+       iput(inode);
+       goto out;
+}
+
+int externfs_make_dir(struct inode *ino, struct dentry *dentry, int mode)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct inode *inode;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+       char *file;
+       int err = -ENOMEM;
+
+       file = inode_dentry_name(ino, dentry);
+       if(file == NULL) 
+               goto out;
+       err = (*ops->make_dir)(file, mode, current->fsuid, current->fsgid, ed);
+
+       err = -ENOMEM;
+       inode = iget(ino->i_sb, 0);
+       if(inode == NULL) 
+               goto out_free;
+
+       err = init_inode(inode, dentry);
+       if(err) 
+               goto out_put;
+       
+       err = read_name(inode, file);
+       if(err)
+               goto out_put;
+
+       kfree(file);
+       d_instantiate(dentry, inode);
+       inode->i_nlink = 2;
+       inode->i_mode = S_IFDIR | mode;
+
+       ino->i_nlink++;
+ out:
+       return(err);
+ out_put:
+       inode->i_nlink = 0;
+       iput(inode);
+ out_free:
+       kfree(file);
+       goto out;       
+}
+
+int externfs_remove_dir(struct inode *ino, struct dentry *dentry)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+       char *file;
+       int err;
+
+       file = inode_dentry_name(ino, dentry);
+       if(file == NULL) 
+               return(-ENOMEM);
+       err = (*ops->remove_dir)(file, current->fsuid, current->fsgid, ed);
+       kfree(file);
+
+       dentry->d_inode->i_nlink = 0;
+       ino->i_nlink--;
+       return(err);
+}
+
+int externfs_make_node(struct inode *dir, struct dentry *dentry, int mode, 
+                    dev_t dev)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(dir)->ops;
+       struct externfs_data *ed = dir->i_sb->s_fs_info;
+       struct inode *inode;
+       char *name;
+       int err = -ENOMEM;
+       inode = iget(dir->i_sb, 0);
+       if(inode == NULL) 
+               goto out;
+
+       err = init_inode(inode, dentry);
+       if(err) 
+               goto out_put;
+
+       err = -ENOMEM;
+       name = dentry_name(dentry, 0);
+       if(name == NULL)
+               goto out_put;
+
+       init_special_inode(inode, mode, dev);
+       err = (*ops->make_node)(name, mode & S_IRWXUGO, current->fsuid, 
+                               current->fsgid, mode & S_IFMT, MAJOR(dev), 
+                               MINOR(dev), ed);
+       if(err)
+               goto out_free;
+       
+       err = read_name(inode, name);
+       if(err)
+               goto out_rm;
+
+       inode->i_nlink++;
+       d_instantiate(dentry, inode);
+       kfree(name);
+ out:
+       return(err);
+
+ out_rm:
+       (*ops->unlink_file)(name, ed);
+ out_free:
+       kfree(name);
+ out_put:
+       inode->i_nlink = 0;
+       iput(inode);
+       goto out;
+}
+
+int externfs_rename(struct inode *from_ino, struct dentry *from,
+                 struct inode *to_ino, struct dentry *to)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(from_ino)->ops;
+       struct externfs_data *ed = from_ino->i_sb->s_fs_info;
+       char *from_name, *to_name;
+       int err;
+
+       from_name = inode_dentry_name(from_ino, from);
+       if(from_name == NULL)
+               return(-ENOMEM);
+       to_name = inode_dentry_name(to_ino, to);
+       if(to_name == NULL){
+               kfree(from_name);
+               return(-ENOMEM);
+       }
+       err = (*ops->rename_file)(from_name, to_name, ed);
+       kfree(from_name);
+       kfree(to_name);
+
+       from_ino->i_nlink--;
+       to_ino->i_nlink++;
+       return(err);
+}
+
+void externfs_truncate(struct inode *ino)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+
+       (*ops->truncate_file)(EXTERNFS_I(ino), ino->i_size, ed);
+}
+
+int externfs_permission(struct inode *ino, int desired, struct nameidata *nd)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+       char *name;
+       int r = 0, w = 0, x = 0, err;
+
+       if(ops->access_file == NULL)
+               return(vfs_permission(ino, desired));
+
+       if(desired & MAY_READ) r = 1;
+       if(desired & MAY_WRITE) w = 1;
+       if(desired & MAY_EXEC) x = 1;
+       name = inode_name(ino, 0);
+       if(name == NULL) 
+               return(-ENOMEM);
+
+       err = (*ops->access_file)(name, r, w, x, current->fsuid,
+                                 current->fsgid, ed);
+       kfree(name);
+
+       if(!err) 
+               err = vfs_permission(ino, desired);
+       return(err);
+}
+
+int externfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+       struct externfs_file_ops *ops = EXTERNFS_I(dentry->d_inode)->ops;
+       struct externfs_data *ed = dentry->d_inode->i_sb->s_fs_info;
+       struct externfs_iattr attrs;
+       char *name;
+       int err;
+       
+       attrs.ia_valid = 0;
+       if(attr->ia_valid & ATTR_MODE){
+               attrs.ia_valid |= EXTERNFS_ATTR_MODE;
+               attrs.ia_mode = attr->ia_mode;
+       }
+       if(attr->ia_valid & ATTR_UID){
+               attrs.ia_valid |= EXTERNFS_ATTR_UID;
+               attrs.ia_uid = attr->ia_uid;
+       }
+       if(attr->ia_valid & ATTR_GID){
+               attrs.ia_valid |= EXTERNFS_ATTR_GID;
+               attrs.ia_gid = attr->ia_gid;
+       }
+       if(attr->ia_valid & ATTR_SIZE){
+               attrs.ia_valid |= EXTERNFS_ATTR_SIZE;
+               attrs.ia_size = attr->ia_size;
+       }
+       if(attr->ia_valid & ATTR_ATIME){
+               attrs.ia_valid |= EXTERNFS_ATTR_ATIME;
+               attrs.ia_atime = attr->ia_atime.tv_sec;
+       }
+       if(attr->ia_valid & ATTR_MTIME){
+               attrs.ia_valid |= EXTERNFS_ATTR_MTIME;
+               attrs.ia_mtime = attr->ia_mtime.tv_sec;
+       }
+       if(attr->ia_valid & ATTR_CTIME){
+               attrs.ia_valid |= EXTERNFS_ATTR_CTIME;
+               attrs.ia_ctime = attr->ia_ctime.tv_sec;
+       }
+       if(attr->ia_valid & ATTR_ATIME_SET){
+               attrs.ia_valid |= EXTERNFS_ATTR_ATIME_SET;
+               attrs.ia_atime = attr->ia_atime.tv_sec;
+       }
+       if(attr->ia_valid & ATTR_MTIME_SET){
+               attrs.ia_valid |= EXTERNFS_ATTR_MTIME_SET;
+       }
+       name = dentry_name(dentry, 0);
+       if(name == NULL) 
+               return(-ENOMEM);
+       err = (*ops->set_attr)(name, &attrs, ed);
+       kfree(name);
+       if(err)
+               return(err);
+
+       return(inode_setattr(dentry->d_inode, attr));
+}
+
+int externfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 
+                    struct kstat *stat)
+{
+       generic_fillattr(dentry->d_inode, stat);
+       return(0);
+}
+
+static struct inode_operations externfs_iops = {
+       .create         = externfs_create,
+       .link           = externfs_link,
+       .unlink         = externfs_unlink,
+       .symlink        = externfs_symlink,
+       .mkdir          = externfs_make_dir,
+       .rmdir          = externfs_remove_dir,
+       .mknod          = externfs_make_node,
+       .rename         = externfs_rename,
+       .truncate       = externfs_truncate,
+       .permission     = externfs_permission,
+       .setattr        = externfs_setattr,
+       .getattr        = externfs_getattr,
+};
+
+static struct inode_operations externfs_dir_iops = {
+       .create         = externfs_create,
+       .lookup         = externfs_lookup,
+       .link           = externfs_link,
+       .unlink         = externfs_unlink,
+       .symlink        = externfs_symlink,
+       .mkdir          = externfs_make_dir,
+       .rmdir          = externfs_remove_dir,
+       .mknod          = externfs_make_node,
+       .rename         = externfs_rename,
+       .truncate       = externfs_truncate,
+       .permission     = externfs_permission,
+       .setattr        = externfs_setattr,
+       .getattr        = externfs_getattr,
+};
+
+int externfs_link_readpage(struct file *file, struct page *page)
+{
+       struct inode *ino = page->mapping->host;
+       struct externfs_file_ops *ops = EXTERNFS_I(ino)->ops;
+       struct externfs_data *ed = ino->i_sb->s_fs_info;
+       char *buffer, *name;
+       long long start;
+       int err;
+
+       start = page->index << PAGE_CACHE_SHIFT;
+       buffer = kmap(page);
+       name = inode_name(ino, 0);
+       if(name == NULL) 
+               return(-ENOMEM);
+
+       err = (*ops->read_link)(name, current->fsuid, current->fsgid, buffer, 
+                               PAGE_CACHE_SIZE, ed);
+
+       kfree(name);
+       if(err == PAGE_CACHE_SIZE)
+               err = -E2BIG;
+       else if(err > 0){
+               flush_dcache_page(page);
+               SetPageUptodate(page);
+               if (PageError(page)) ClearPageError(page);
+               err = 0;
+       }
+       kunmap(page);
+       unlock_page(page);
+       return(err);
+}
+
+static int externfs_flushpage(struct page *page, unsigned long offset)
+{
+       return(externfs_writepage(page, NULL));
+}
+
+struct externfs_data *inode_externfs_info(struct inode *inode)
+{
+       return(inode->i_sb->s_fs_info);
+}
+
+static struct address_space_operations externfs_link_aops = {
+       .readpage       = externfs_link_readpage,
+       .releasepage    = externfs_removepage,
+       .invalidatepage = externfs_flushpage,
+};
+
+DECLARE_MUTEX(externfs_sem);
+struct list_head externfses = LIST_HEAD_INIT(externfses);
+
+static struct externfs *find_externfs(struct file_system_type *type)
+{
+       struct list_head *ele;
+       struct externfs *fs;
+
+       down(&externfs_sem);
+       list_for_each(ele, &externfses){
+               fs = list_entry(ele, struct externfs, list);
+               if(&fs->type == type)
+                       goto out;
+       }
+       fs = NULL;
+ out:
+       up(&externfs_sem);
+       return(fs);
+}
+
+#define DEFAULT_ROOT "/"
+
+char *host_root_filename(char *mount_arg)
+{
+       char *root = DEFAULT_ROOT;
+
+       if((mount_arg != NULL) && (*mount_arg != '\0'))
+               root = mount_arg;
+
+       return(uml_strdup(root));
+}
+
+static int externfs_fill_sb(struct super_block *sb, void *data, int silent)
+{
+       struct externfs *fs;
+       struct inode *root_inode;
+       struct externfs_data *sb_data;
+       int err = -EINVAL;
+
+       sb->s_blocksize = 1024;
+       sb->s_blocksize_bits = 10;
+       sb->s_magic = EXTERNFS_SUPER_MAGIC;
+       sb->s_op = &externfs_sbops;
+
+       fs = find_externfs(sb->s_type);
+       if(fs == NULL){
+               printk("Couldn't find externfs for filesystem '%s'\n",
+                      sb->s_type->name);
+               goto out;
+       }
+
+       sb_data = (*fs->mount_ops->mount)(data);
+       if(IS_ERR(sb_data)){
+               err = PTR_ERR(sb_data);
+               goto out;
+       }
+               
+       sb->s_fs_info = sb_data;
+       sb_data->mount_ops = fs->mount_ops;
+
+       root_inode = iget(sb, 0);
+       if(root_inode == NULL)
+               goto out;
+
+       err = init_inode(root_inode, NULL);
+       if(err)
+               goto out_put;
+
+       err = -ENOMEM;
+       sb->s_root = d_alloc_root(root_inode);
+       if(sb->s_root == NULL)
+               goto out_put;
+
+       err = read_inode(root_inode);
+       if(err)
+               goto out_put;
+
+ out:
+       return(err);
+
+ out_put:
+       iput(root_inode);
+       goto out;
+}      
+
+struct super_block *externfs_read_super(struct file_system_type *type, 
+                                       int flags, const char *dev_name, 
+                                       void *data)
+{
+       return(get_sb_nodev(type, flags, data, externfs_fill_sb));
+}
+
+void init_externfs(struct externfs_data *ed, struct externfs_file_ops *ops)
+{
+       ed->file_ops = ops;
+}
+
+int register_externfs(char *name, struct externfs_mount_ops *mount_ops)
+{
+       struct externfs *new;
+       int err = -ENOMEM;
+
+       new = kmalloc(sizeof(*new), GFP_KERNEL);
+       if(new == NULL)
+               goto out;
+
+       memset(new, 0, sizeof(*new));
+       *new = ((struct externfs) { .list       = LIST_HEAD_INIT(new->list),
+                                   .mount_ops  = mount_ops,
+                                   .type = { .name     = name,
+                                             .get_sb   = externfs_read_super,
+                                             .kill_sb  = kill_anon_super,
+                                             .fs_flags = 0,
+                                             .owner    = THIS_MODULE } });
+       list_add(&new->list, &externfses);
+
+       err = register_filesystem(&new->type);
+       if(err)
+               goto out_del;
+       return(0);
+
+ out_del:
+       list_del(&new->list);
+       kfree(new);
+ out:
+       return(err);
+}
+
+void unregister_externfs(char *name)
+{
+       struct list_head *ele;
+       struct externfs *fs;
+
+       down(&externfs_sem);
+       list_for_each(ele, &externfses){
+               fs = list_entry(ele, struct externfs, list);
+               if(!strcmp(fs->type.name, name)){
+                       list_del(ele);
+                       up(&externfs_sem);
+                       return;
+               }
+       }
+       up(&externfs_sem);
+       printk("Unregister_externfs - filesystem '%s' not found\n", name);
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/hostfs/host_file.c b/fs/hostfs/host_file.c
new file mode 100644 (file)
index 0000000..e8eb901
--- /dev/null
@@ -0,0 +1,442 @@
+/* 
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/stddef.h"
+#include "linux/string.h"
+#include "linux/errno.h"
+#include "linux/types.h"
+#include "linux/slab.h"
+#include "linux/fs.h"
+#include "asm/fcntl.h"
+#include "hostfs.h"
+#include "filehandle.h"
+
+extern int append;
+
+char *get_path(const char *path[], char *buf, int size)
+{
+       const char **s;
+       char *p;
+       int new = 1;
+
+       for(s = path; *s != NULL; s++){
+               new += strlen(*s);
+               if((*(s + 1) != NULL) && (strlen(*s) > 0) && 
+                  ((*s)[strlen(*s) - 1] != '/'))
+                       new++;
+       }
+
+       if(new > size){
+               buf = kmalloc(new, GFP_KERNEL);
+               if(buf == NULL)
+                       return(NULL);
+       }
+
+       p = buf;
+       for(s = path; *s != NULL; s++){
+               strcpy(p, *s);
+               p += strlen(*s);
+               if((*(s + 1) != NULL) && (strlen(*s) > 0) && 
+                  ((*s)[strlen(*s) - 1] != '/'))
+                       strcpy(p++, "/");
+       }
+               
+       return(buf);
+}
+
+void free_path(const char *buf, char *tmp)
+{
+       if((buf != tmp) && (buf != NULL))
+               kfree((char *) buf);
+}
+
+int host_open_file(const char *path[], int r, int w, struct file_handle *fh)
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int mode = 0, err;
+       struct openflags flags = OPENFLAGS();
+
+       if (r)
+               flags = of_read(flags);
+       if (w)
+               flags = of_write(flags);
+       if(append)
+               flags = of_append(flags);
+
+       err = -ENOMEM;
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+       
+       err = open_filehandle(file, flags, mode, fh);
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+void *host_open_dir(const char *path[])
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       void *dir = ERR_PTR(-ENOMEM);
+
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+       
+       dir = open_dir(file);
+ out:
+       free_path(file, tmp);
+       return(dir);
+}
+
+char *host_read_dir(void *stream, unsigned long long *pos, 
+                   unsigned long long *ino_out, int *len_out)
+{
+       int err;
+       char *name;
+
+       err = os_seek_dir(stream, *pos);
+       if(err)
+               return(ERR_PTR(err));
+
+       err = os_read_dir(stream, ino_out, &name);
+       if(err)
+               return(ERR_PTR(err));
+
+       if(name == NULL)
+               return(NULL);
+
+       *len_out = strlen(name);
+       *pos = os_tell_dir(stream);
+       return(name);
+}
+
+int host_file_type(const char *path[], int *rdev)
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       struct uml_stat buf;
+       int ret;
+
+       ret = -ENOMEM;
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       if(rdev != NULL){
+               ret = os_lstat_file(file, &buf);
+               if(ret)
+                       goto out;
+               *rdev = MKDEV(buf.ust_rmajor, buf.ust_rminor);
+       }
+
+       ret = os_file_type(file);
+ out:
+       free_path(file, tmp);
+       return(ret);
+}
+
+int host_create_file(const char *path[], int mode, struct file_handle *fh)
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int err = -ENOMEM;
+
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = open_filehandle(file, of_create(of_rdwr(OPENFLAGS())), mode, fh);
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+static int do_stat_file(const char *path, int *dev_out, 
+                       unsigned long long *inode_out, int *mode_out, 
+                       int *nlink_out, int *uid_out, int *gid_out, 
+                       unsigned long long *size_out, unsigned long *atime_out,
+                       unsigned long *mtime_out, unsigned long *ctime_out,
+                       int *blksize_out, unsigned long long *blocks_out)
+{
+       struct uml_stat buf;
+       int err;
+
+       err = os_lstat_file(path, &buf);
+       if(err < 0)
+               return(err);
+
+       if(dev_out != NULL) *dev_out = MKDEV(buf.ust_major, buf.ust_minor);
+       if(inode_out != NULL) *inode_out = buf.ust_ino;
+       if(mode_out != NULL) *mode_out = buf.ust_mode;
+       if(nlink_out != NULL) *nlink_out = buf.ust_nlink;
+       if(uid_out != NULL) *uid_out = buf.ust_uid;
+       if(gid_out != NULL) *gid_out = buf.ust_gid;
+       if(size_out != NULL) *size_out = buf.ust_size;
+       if(atime_out != NULL) *atime_out = buf.ust_atime;
+       if(mtime_out != NULL) *mtime_out = buf.ust_mtime;
+       if(ctime_out != NULL) *ctime_out = buf.ust_ctime;
+       if(blksize_out != NULL) *blksize_out = buf.ust_blksize;
+       if(blocks_out != NULL) *blocks_out = buf.ust_blocks;
+
+       return(0);
+}
+
+int host_stat_file(const char *path[], int *dev_out,
+                  unsigned long long *inode_out, int *mode_out, 
+                  int *nlink_out, int *uid_out, int *gid_out, 
+                  unsigned long long *size_out, unsigned long *atime_out,
+                  unsigned long *mtime_out, unsigned long *ctime_out,
+                  int *blksize_out, unsigned long long *blocks_out)
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int err;
+
+       err = -ENOMEM;
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = do_stat_file(file, dev_out, inode_out, mode_out, nlink_out, 
+                          uid_out, gid_out, size_out, atime_out, mtime_out,
+                          ctime_out, blksize_out, blocks_out);
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+int host_set_attr(const char *path[], struct externfs_iattr *attrs)
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       unsigned long time;
+       int err = 0, ma;
+
+       if(append && (attrs->ia_valid & EXTERNFS_ATTR_SIZE))
+               return(-EPERM);
+
+       err = -ENOMEM;
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       if(attrs->ia_valid & EXTERNFS_ATTR_MODE){
+               err = os_set_file_perms(file, attrs->ia_mode);
+               if(err < 0)
+                       goto out;
+       }
+       if(attrs->ia_valid & EXTERNFS_ATTR_UID){
+               err = os_set_file_owner(file, attrs->ia_uid, -1);
+               if(err < 0)
+                       goto out;
+       }
+       if(attrs->ia_valid & EXTERNFS_ATTR_GID){
+               err = os_set_file_owner(file, -1, attrs->ia_gid);
+               if(err < 0)
+                       goto out;
+       }
+       if(attrs->ia_valid & EXTERNFS_ATTR_SIZE){
+               err = os_truncate_file(file, attrs->ia_size);
+               if(err < 0)
+                       goto out;
+       }
+       ma = EXTERNFS_ATTR_ATIME_SET | EXTERNFS_ATTR_MTIME_SET;
+       if((attrs->ia_valid & ma) == ma){
+               err = os_set_file_time(file, attrs->ia_atime, attrs->ia_mtime);
+               if(err)
+                       goto out;
+       }
+       else {
+               if(attrs->ia_valid & EXTERNFS_ATTR_ATIME_SET){
+                       err = do_stat_file(file, NULL, NULL, NULL, NULL, NULL, 
+                                          NULL, NULL, NULL, &time, 
+                                          NULL, NULL, NULL);
+                       if(err != 0)
+                               goto out;
+
+                       err = os_set_file_time(file, attrs->ia_atime, time);
+                       if(err)
+                               goto out;
+               }
+               if(attrs->ia_valid & EXTERNFS_ATTR_MTIME_SET){
+                       err = do_stat_file(file, NULL, NULL, NULL, NULL, NULL, 
+                                          NULL, NULL, &time, NULL, 
+                                          NULL, NULL, NULL);
+                       if(err != 0)
+                               goto out;
+
+                       err = os_set_file_time(file, time, attrs->ia_mtime);
+                       if(err)
+                               goto out;
+               }
+       }
+       if(attrs->ia_valid & EXTERNFS_ATTR_CTIME) ;
+       if(attrs->ia_valid & (EXTERNFS_ATTR_ATIME | EXTERNFS_ATTR_MTIME)){
+               err = do_stat_file(file, NULL, NULL, NULL, NULL, NULL, 
+                                  NULL, NULL, &attrs->ia_atime, 
+                                  &attrs->ia_mtime, NULL, NULL, NULL);
+               if(err != 0)
+                       goto out;
+       }
+
+       err = 0;
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+int host_make_symlink(const char *from[], const char *to)
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int err = -ENOMEM;
+
+       file = get_path(from, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+       
+       err = os_make_symlink(to, file);
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+int host_unlink_file(const char *path[])
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int err = -ENOMEM;
+
+       if(append)
+               return(-EPERM);
+
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = os_remove_file(file);
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+int host_make_dir(const char *path[], int mode)
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int err = -ENOMEM;
+
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = os_make_dir(file, mode);
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+int host_remove_dir(const char *path[])
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int err = -ENOMEM;
+
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = os_remove_dir(file);
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+int host_link_file(const char *to[], const char *from[])
+{
+       char from_tmp[HOSTFS_BUFSIZE], *f, to_tmp[HOSTFS_BUFSIZE], *t;
+       int err = -ENOMEM;
+
+       f = get_path(from, from_tmp, sizeof(from_tmp));
+       t = get_path(to, to_tmp, sizeof(to_tmp));
+       if((f == NULL) || (t == NULL))
+               goto out;
+
+       err = os_link_file(t, f);
+ out:
+       free_path(f, from_tmp);
+       free_path(t, to_tmp);
+       return(err);
+}
+
+int host_read_link(const char *path[], char *buf, int size)
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int n = -ENOMEM;
+
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       n = os_read_symlink(file, buf, size);
+       if(n < size) 
+               buf[n] = '\0';
+ out:
+       free_path(file, tmp);
+       return(n);
+}
+
+int host_rename_file(const char *from[], const char *to[])
+{
+       char from_tmp[HOSTFS_BUFSIZE], *f, to_tmp[HOSTFS_BUFSIZE], *t;
+       int err = -ENOMEM;
+
+       f = get_path(from, from_tmp, sizeof(from_tmp));
+       t = get_path(to, to_tmp, sizeof(to_tmp));
+       if((f == NULL) || (t == NULL))
+               goto out;
+
+       err = os_move_file(f, t);
+ out:
+       free_path(f, from_tmp);
+       free_path(t, to_tmp);
+       return(err);
+}
+
+int host_stat_fs(const char *path[], long *bsize_out, long long *blocks_out, 
+                long long *bfree_out, long long *bavail_out, 
+                long long *files_out, long long *ffree_out, void *fsid_out, 
+                int fsid_size, long *namelen_out, long *spare_out)
+{
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int err = -ENOMEM;
+
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = os_stat_filesystem(file, bsize_out, blocks_out, bfree_out, 
+                                bavail_out, files_out, ffree_out, fsid_out, 
+                                fsid_size, namelen_out, spare_out);
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+char *generic_host_read_dir(void *stream, unsigned long long *pos, 
+                           unsigned long long *ino_out, int *len_out, 
+                           void *mount)
+{
+       return(host_read_dir(stream, pos, ino_out, len_out));
+}
+
+int generic_host_truncate_file(struct file_handle *fh, __u64 size, void *m)
+{
+       return(truncate_file(fh, size));
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/hostfs/host_fs.c b/fs/hostfs/host_fs.c
new file mode 100644 (file)
index 0000000..c059539
--- /dev/null
@@ -0,0 +1,467 @@
+/* 
+ * Copyright (C) 2000 - 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/stddef.h"
+#include "linux/string.h"
+#include "linux/types.h"
+#include "linux/errno.h"
+#include "linux/slab.h"
+#include "linux/init.h"
+#include "linux/fs.h"
+#include "linux/stat.h"
+#include "hostfs.h"
+#include "kern.h"
+#include "init.h"
+#include "kern_util.h"
+#include "filehandle.h"
+#include "os.h"
+
+/* Changed in hostfs_args before the kernel starts running */
+static char *jail_dir = "/";
+int append = 0;
+
+static int __init hostfs_args(char *options, int *add)
+{
+       char *ptr;
+
+       ptr = strchr(options, ',');
+       if(ptr != NULL)
+               *ptr++ = '\0';
+       if(*options != '\0')
+               jail_dir = options;
+
+       options = ptr;
+       while(options){
+               ptr = strchr(options, ',');
+               if(ptr != NULL)
+                       *ptr++ = '\0';
+               if(*options != '\0'){
+                       if(!strcmp(options, "append"))
+                               append = 1;
+                       else printf("hostfs_args - unsupported option - %s\n",
+                                   options);
+               }
+               options = ptr;
+       }
+       return(0);
+}
+
+__uml_setup("hostfs=", hostfs_args,
+"hostfs=<root dir>,<flags>,...\n"
+"    This is used to set hostfs parameters.  The root directory argument\n"
+"    is used to confine all hostfs mounts to within the specified directory\n"
+"    tree on the host.  If this isn't specified, then a user inside UML can\n"
+"    mount anything on the host that's accessible to the user that's running\n"
+"    it.\n"
+"    The only flag currently supported is 'append', which specifies that all\n"
+"    files opened by hostfs will be opened in append mode.\n\n"
+);
+
+struct hostfs_data {
+       struct externfs_data ext;
+       char *mount;
+};
+
+struct hostfs_file {
+       struct externfs_inode ext;
+       struct file_handle fh;
+};
+
+static int hostfs_access_file(char *file, int uid, int w, int x, int gid, 
+                             int r, struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+       char tmp[HOSTFS_BUFSIZE];
+       int err, mode = 0;
+
+       if(r) mode = OS_ACC_R_OK;
+       if(w) mode |= OS_ACC_W_OK;
+       if(x) mode |= OS_ACC_X_OK;
+       
+       err = -ENOMEM;
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = os_access(file, mode);
+       free_path(file, tmp);
+ out:
+       return(err);
+}
+
+static int hostfs_make_node(const char *file, int mode, int uid, int gid, 
+                           int type, int major, int minor, 
+                           struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+       char tmp[HOSTFS_BUFSIZE];
+       int err = -ENOMEM;
+
+       file = get_path(path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       /* XXX Pass type in an OS-independent way */
+       mode |= type;
+
+       err = os_make_dev(file, mode, major, minor);
+       free_path(file, tmp);
+ out:
+       return(err);
+}
+
+static int hostfs_stat_file(const char *file, struct externfs_data *ed, 
+                           dev_t *dev_out, unsigned long long *inode_out, 
+                           int *mode_out, int *nlink_out, int *uid_out, 
+                           int *gid_out, unsigned long long *size_out, 
+                           unsigned long *atime_out, unsigned long *mtime_out,
+                           unsigned long *ctime_out, int *blksize_out, 
+                           unsigned long long *blocks_out)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+
+       /* XXX Why pretend everything is owned by root? */
+       *uid_out = 0;
+       *gid_out = 0;
+       return(host_stat_file(path, dev_out, inode_out, mode_out, nlink_out,
+                             NULL, NULL, size_out, atime_out, mtime_out,
+                             ctime_out, blksize_out, blocks_out));
+}
+
+static int hostfs_file_type(const char *file, int *rdev, 
+                           struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+
+       return(host_file_type(path, rdev));
+}
+
+static char *hostfs_name(struct inode *inode)
+{
+       struct externfs_data *ed = inode_externfs_info(inode);
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+
+       return(inode_name_prefix(inode, mount));        
+}
+
+static struct externfs_inode *hostfs_init_file(struct externfs_data *ed)
+{
+       struct hostfs_file *hf;
+
+       hf = kmalloc(sizeof(*hf), GFP_KERNEL);
+       if(hf == NULL)
+               return(NULL);
+
+       hf->fh.fd = -1;
+       return(&hf->ext);
+}
+
+static int hostfs_open_file(struct externfs_inode *ext, char *file, 
+                           int uid, int gid, struct inode *inode, 
+                           struct externfs_data *ed)
+{
+       struct hostfs_file *hf = container_of(ext, struct hostfs_file, ext);
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+       int err;
+
+       err = host_open_file(path, 1, 1, &hf->fh);
+       if(err == -EISDIR)
+               goto out;
+
+       if(err == -EACCES)
+               err = host_open_file(path, 1, 0, &hf->fh);
+
+       if(err)
+               goto out;
+
+       is_reclaimable(&hf->fh, hostfs_name, inode);
+ out:
+       return(err);
+}
+
+static void *hostfs_open_dir(char *file, int uid, int gid, 
+                            struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+
+       return(host_open_dir(path));
+}
+
+static void hostfs_close_dir(void *stream, struct externfs_data *ed)
+{
+       os_close_dir(stream);
+}
+
+static char *hostfs_read_dir(void *stream, unsigned long long *pos, 
+                            unsigned long long *ino_out, int *len_out, 
+                            struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+
+       return(generic_host_read_dir(stream, pos, ino_out, len_out, mount));
+}
+
+static int hostfs_read_file(struct externfs_inode *ext, 
+                           unsigned long long offset, char *buf, int len, 
+                           int ignore_start, int ignore_end,
+                           void (*completion)(char *, int, void *), void *arg,
+                           struct externfs_data *ed)
+{
+       struct hostfs_file *hf = container_of(ext, struct hostfs_file, ext);
+       int err = 0;
+
+       if(ignore_start != 0){
+               err = read_file(&hf->fh, offset, buf, ignore_start);
+               if(err < 0)
+                       goto out;
+       }
+
+       if(ignore_end != len)
+               err = read_file(&hf->fh, offset + ignore_end, buf + ignore_end,
+                               len - ignore_end);
+
+ out:
+
+       (*completion)(buf, err, arg);
+       if (err > 0)
+               err = 0;
+       return(err);
+}
+
+static int hostfs_write_file(struct externfs_inode *ext,
+                            unsigned long long offset, const char *buf, 
+                            int start, int len, 
+                            void (*completion)(char *, int, void *), 
+                            void *arg, struct externfs_data *ed)
+{
+       struct file_handle *fh;
+       int err;
+
+       fh = &container_of(ext, struct hostfs_file, ext)->fh;
+       err = write_file(fh, offset + start, buf + start, len);
+
+       (*completion)((char *) buf, err, arg);
+       if (err > 0)
+               err = 0;
+
+       return(err);
+}
+
+static int hostfs_create_file(struct externfs_inode *ext, char *file, int mode,
+                             int uid, int gid, struct inode *inode, 
+                             struct externfs_data *ed)
+{
+       struct hostfs_file *hf = container_of(ext, struct hostfs_file, 
+                                             ext);
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+       int err = -ENOMEM;
+       
+       err = host_create_file(path, mode, &hf->fh);
+       if(err)
+               goto out;
+
+       is_reclaimable(&hf->fh, hostfs_name, inode);
+ out:
+       return(err);
+}
+
+static int hostfs_set_attr(const char *file, struct externfs_iattr *attrs, 
+                          struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+
+       return(host_set_attr(path, attrs));
+}
+
+static int hostfs_make_symlink(const char *from, const char *to, int uid, 
+                              int gid, struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, from, NULL };
+
+       return(host_make_symlink(path, to));
+}
+
+static int hostfs_link_file(const char *to, const char *from, int uid, int gid,
+                           struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *to_path[] = { jail_dir, mount, to, NULL };
+       const char *from_path[] = { jail_dir, mount, from, NULL };
+
+       return(host_link_file(to_path, from_path));
+}
+
+static int hostfs_unlink_file(const char *file, struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+
+       return(host_unlink_file(path));
+}
+
+static int hostfs_make_dir(const char *file, int mode, int uid, int gid, 
+                          struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+
+       return(host_make_dir(path, mode));
+}
+
+static int hostfs_remove_dir(const char *file, int uid, int gid, 
+                            struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+
+       return(host_remove_dir(path));
+}
+
+static int hostfs_read_link(char *file, int uid, int gid, char *buf, int size, 
+                           struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, file, NULL };
+
+       return(host_read_link(path, buf, size));
+}
+
+static int hostfs_rename_file(char *from, char *to, struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *to_path[] = { jail_dir, mount, to, NULL };
+       const char *from_path[] = { jail_dir, mount, from, NULL };
+
+       return(host_rename_file(from_path, to_path));
+}
+
+static int hostfs_stat_fs(long *bsize_out, long long *blocks_out, 
+                         long long *bfree_out, long long *bavail_out, 
+                         long long *files_out, long long *ffree_out,
+                         void *fsid_out, int fsid_size, long *namelen_out, 
+                         long *spare_out, struct externfs_data *ed)
+{
+       char *mount = container_of(ed, struct hostfs_data, ext)->mount;
+       const char *path[] = { jail_dir, mount, NULL };
+
+       return(host_stat_fs(path, bsize_out, blocks_out, bfree_out, bavail_out,
+                           files_out, ffree_out, fsid_out, fsid_size, 
+                           namelen_out, spare_out));
+}
+
+void hostfs_close_file(struct externfs_inode *ext,
+                      unsigned long long size)
+{
+       struct hostfs_file *hf = container_of(ext, struct hostfs_file, ext);
+
+       if(hf->fh.fd != -1){
+               truncate_file(&hf->fh, size);
+               close_file(&hf->fh);
+       }
+
+       kfree(hf);
+}
+
+int hostfs_truncate_file(struct externfs_inode *ext, __u64 size, 
+                        struct externfs_data *ed)
+{
+       struct hostfs_file *hf = container_of(ext, struct hostfs_file, ext);
+
+       return(truncate_file(&hf->fh, size));
+}
+
+static struct externfs_file_ops hostfs_file_ops = {
+       .stat_file              = hostfs_stat_file,
+       .file_type              = hostfs_file_type,
+       .access_file            = hostfs_access_file,
+       .open_file              = hostfs_open_file,
+       .open_dir               = hostfs_open_dir,
+       .read_dir               = hostfs_read_dir,
+       .read_file              = hostfs_read_file,
+       .write_file             = hostfs_write_file,
+       .map_file_page          = NULL,
+       .close_file             = hostfs_close_file,
+       .close_dir              = hostfs_close_dir,
+       .invisible              = NULL,
+       .create_file            = hostfs_create_file,
+       .set_attr               = hostfs_set_attr,
+       .make_symlink           = hostfs_make_symlink,
+       .unlink_file            = hostfs_unlink_file,
+       .make_dir               = hostfs_make_dir,
+       .remove_dir             = hostfs_remove_dir,
+       .make_node              = hostfs_make_node,
+       .link_file              = hostfs_link_file,
+       .read_link              = hostfs_read_link,
+       .rename_file            = hostfs_rename_file,
+       .statfs                 = hostfs_stat_fs,
+       .truncate_file          = hostfs_truncate_file
+};
+
+static struct externfs_data *mount_fs(char *mount_arg)
+{
+       struct hostfs_data *hd;
+       int err = -ENOMEM;
+
+       hd = kmalloc(sizeof(*hd), GFP_KERNEL);
+       if(hd == NULL)
+               goto out;
+
+       hd->mount = host_root_filename(mount_arg);
+       if(hd->mount == NULL)
+               goto out_free;
+
+       init_externfs(&hd->ext, &hostfs_file_ops);
+
+       return(&hd->ext);
+
+ out_free:
+       kfree(hd);
+ out:
+       return(ERR_PTR(err));
+}
+
+static struct externfs_mount_ops hostfs_mount_ops = {
+       .init_file              = hostfs_init_file,
+       .mount                  = mount_fs,
+};
+
+static int __init init_hostfs(void)
+{
+       return(register_externfs("hostfs", &hostfs_mount_ops));
+}
+
+static void __exit exit_hostfs(void)
+{
+       unregister_externfs("hostfs");
+}
+
+__initcall(init_hostfs);
+__exitcall(exit_hostfs);
+
+#if 0
+module_init(init_hostfs)
+module_exit(exit_hostfs)
+MODULE_LICENSE("GPL");
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/hostfs/humfs.c b/fs/hostfs/humfs.c
new file mode 100644 (file)
index 0000000..7878be5
--- /dev/null
@@ -0,0 +1,1026 @@
+/* 
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/kdev_t.h>
+#include "linux/init.h"
+#include "linux/workqueue.h"
+#include <asm/irq.h>
+#include "hostfs.h"
+#include "mem.h"
+#include "os.h"
+#include "mode.h"
+#include "aio.h"
+#include "irq_user.h"
+#include "irq_kern.h"
+#include "filehandle.h"
+#include "metadata.h"
+
+#define HUMFS_VERSION 2
+
+static int humfs_stat_file(const char *path, struct externfs_data *ed, 
+                          dev_t *dev_out, unsigned long long *inode_out, 
+                          int *mode_out, int *nlink_out, int *uid_out, 
+                          int *gid_out, unsigned long long *size_out, 
+                          unsigned long *atime_out, unsigned long *mtime_out, 
+                          unsigned long *ctime_out, int *blksize_out, 
+                          unsigned long long *blocks_out)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path[3] = { mount->data, path, NULL };
+       int err, mode, perms, major, minor;
+       char type;
+
+       err = host_stat_file(data_path, NULL, inode_out, mode_out, 
+                            nlink_out, NULL, NULL, size_out, atime_out, 
+                            mtime_out, ctime_out, blksize_out, blocks_out);
+       if(err)
+               return(err);
+
+       err = (*mount->meta->ownerships)(path, &perms, uid_out, gid_out, 
+                                        &type, &major, &minor, mount);
+       if(err)
+               return(err);
+
+       *mode_out = (*mode_out & ~S_IRWXUGO) | perms;
+
+       mode = 0;
+       switch(type){
+       case 'c':
+               mode = S_IFCHR;
+               *dev_out = MKDEV(major, minor);
+               break;
+       case 'b':
+               mode = S_IFBLK;
+               *dev_out = MKDEV(major, minor);
+               break;
+       case 's':
+               mode = S_IFSOCK;
+               break;
+       default:
+               break;
+       }
+
+       if(mode != 0)
+               *mode_out = (*mode_out & ~S_IFMT) | mode;
+
+       return(0);
+}
+
+static int meta_type(const char *path, int *dev_out, void *m)
+{
+       struct humfs *mount = m;
+       int err, type, maj, min;
+       char c;
+
+       err = (*mount->meta->ownerships)(path, NULL, NULL, NULL, &c, &maj, 
+                                        &min, mount);
+       if(err)
+               return(err);
+
+       if(c == 0)
+               return(0);
+
+       if(dev_out)
+               *dev_out = MKDEV(maj, min);
+
+       switch(c){
+       case 'c':
+               type = OS_TYPE_CHARDEV;
+               break;
+       case 'b':
+               type = OS_TYPE_BLOCKDEV;
+               break;
+       case 'p':
+               type = OS_TYPE_FIFO;
+               break;
+       case 's':
+               type = OS_TYPE_SOCK;
+               break;
+       default:
+               type = -EINVAL;
+               break;
+       }
+
+       return(type);
+}
+
+static int humfs_file_type(const char *path, int *dev_out, 
+                          struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path[3] = { mount->data, path, NULL };
+       int type;
+
+       type = meta_type(path, dev_out, mount);
+       if(type != 0)
+               return(type);
+
+       return(host_file_type(data_path, dev_out));
+}
+
+static char *humfs_data_name(struct inode *inode)
+{
+       struct externfs_data *ed = inode_externfs_info(inode);
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+
+       return(inode_name_prefix(inode, mount->data));
+}
+
+static struct externfs_inode *humfs_init_file(struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       struct humfs_file *hf;
+
+       hf = (*mount->meta->init_file)();
+       if(hf == NULL)
+               return(NULL);
+
+       hf->data.fd = -1;
+       return(&hf->ext);
+}
+
+static int humfs_open_file(struct externfs_inode *ext, char *path, int uid, 
+                          int gid, struct inode *inode, 
+                          struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       struct humfs_file *hf = container_of(ext, struct humfs_file, ext);
+       const char *data_path[3] = { mount->data, path, NULL };
+       struct openflags flags;
+       char tmp[HOSTFS_BUFSIZE], *file;
+       int err = -ENOMEM;
+
+       file = get_path(data_path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       flags = of_rdwr(OPENFLAGS());
+       if(mount->direct)
+               flags = of_direct(flags);
+
+       if(path == NULL)
+               path = "";
+       err = (*mount->meta->open_file)(hf, path, inode, mount);
+       if(err)
+               goto out_free;
+
+       err = open_filehandle(file, flags, 0, &hf->data);
+       if(err == -EISDIR)
+               goto out;
+       else if(err == -EPERM){
+               flags = of_set_rw(flags, 1, 0);
+               err = open_filehandle(file, flags, 0, &hf->data);
+       }
+       
+       if(err)
+               goto out_close;
+
+       hf->mount = mount;
+       is_reclaimable(&hf->data, humfs_data_name, inode);
+
+ out_free:
+       free_path(file, tmp);
+ out:
+       return(err);
+       
+ out_close:
+       (*mount->meta->close_file)(hf);
+       goto out_free;
+}
+
+static void *humfs_open_dir(char *path, int uid, int gid, 
+                           struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path[3] = { mount->data, path, NULL };
+
+       return(host_open_dir(data_path));
+}
+
+static void humfs_close_dir(void *stream, struct externfs_data *ed)
+{
+       os_close_dir(stream);
+}
+
+static char *humfs_read_dir(void *stream, unsigned long long *pos, 
+                           unsigned long long *ino_out, int *len_out, 
+                           struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+
+       return(generic_host_read_dir(stream, pos, ino_out, len_out, mount));
+}
+
+LIST_HEAD(humfs_replies);
+
+struct humfs_aio {
+       struct aio_context aio;
+       struct list_head list;
+       void (*completion)(char *, int, void *);
+       char *buf;
+       int real_len;
+       int err;
+       void *data;
+};
+
+static int humfs_reply_fd = -1;
+
+struct humfs_aio last_task_aio, last_intr_aio;
+struct humfs_aio *last_task_aio_ptr, *last_intr_aio_ptr;
+
+void humfs_work_proc(void *unused)
+{
+       struct humfs_aio *aio;
+       unsigned long flags;
+
+       while(!list_empty(&humfs_replies)){
+               local_irq_save(flags);
+               aio = list_entry(humfs_replies.next, struct humfs_aio, list);
+
+               last_task_aio = *aio;
+               last_task_aio_ptr = aio;
+
+               list_del(&aio->list);
+               local_irq_restore(flags);
+
+               if(aio->err >= 0)
+                       aio->err = aio->real_len;
+               (*aio->completion)(aio->buf, aio->err, aio->data);
+               kfree(aio);
+       }
+}
+
+DECLARE_WORK(humfs_work, humfs_work_proc, NULL);
+
+static irqreturn_t humfs_interrupt(int irq, void *dev_id, 
+                                  struct pt_regs *unused)
+{
+       struct aio_thread_reply reply;
+       struct humfs_aio *aio;
+       int err, fd = (int) dev_id;
+
+       while(1){
+               err = os_read_file(fd, &reply, sizeof(reply));
+               if(err < 0){
+                       if(err == -EAGAIN)
+                               break;
+                       printk("humfs_interrupt - read returned err %d\n", 
+                              -err);
+                       return(IRQ_HANDLED);
+               }
+               aio = reply.data;
+               aio->err = reply.err;
+               list_add(&aio->list, &humfs_replies);
+               last_intr_aio = *aio;
+               last_intr_aio_ptr = aio;
+       }
+
+       if(!list_empty(&humfs_replies))
+               schedule_work(&humfs_work);
+       reactivate_fd(fd, HUMFS_IRQ);
+       return(IRQ_HANDLED);
+}
+
+static int init_humfs_aio(void)
+{
+       int fds[2], err;
+
+       err = os_pipe(fds, 1, 1);
+       if(err){
+               printk("init_humfs_aio - pipe failed, err = %d\n", -err);
+               goto out;
+       }
+
+       err = um_request_irq(HUMFS_IRQ, fds[0], IRQ_READ, humfs_interrupt,
+                            SA_INTERRUPT | SA_SAMPLE_RANDOM, "humfs", 
+                            (void *) fds[0]);
+       if(err){
+               printk("init_humfs_aio - : um_request_irq failed, err = %d\n",
+                      err);
+               goto out_close;
+       }
+
+       humfs_reply_fd = fds[1];
+       goto out;
+       
+ out_close:
+       os_close_file(fds[0]);
+       os_close_file(fds[1]);
+ out:
+       return(0);
+}
+
+__initcall(init_humfs_aio);
+
+static int humfs_aio(enum aio_type type, int fd, unsigned long long offset,
+                    char *buf, int len, int real_len,
+                    void (*completion)(char *, int, void *), void *arg)
+{
+       struct humfs_aio *aio;
+       int err = -ENOMEM;
+
+       aio = kmalloc(sizeof(*aio), GFP_KERNEL);
+       if(aio == NULL)
+               goto out;
+       *aio = ((struct humfs_aio) { .aio       = INIT_AIO_CONTEXT,
+                                    .list      = LIST_HEAD_INIT(aio->list),
+                                    .completion= completion,
+                                    .buf       = buf,
+                                    .err       = 0,
+                                    .real_len  = real_len,
+                                    .data      = arg });
+
+       err = submit_aio(type, fd, buf, len, offset, humfs_reply_fd, aio);
+       if(err)
+               (*completion)(buf, err, arg);
+
+ out:
+       return(err);
+}
+
+static int humfs_read_file(struct externfs_inode *ext,
+                          unsigned long long offset, char *buf, int len,
+                          int ignore_start, int ignore_end,
+                          void (*completion)(char *, int, void *), void *arg, 
+                          struct externfs_data *ed)
+{
+       struct humfs_file *hf = container_of(ext, struct humfs_file, ext);
+       int fd = filehandle_fd(&hf->data);
+
+       if(fd < 0){
+               (*completion)(buf, fd, arg);
+               return(fd);
+       }
+
+       return(humfs_aio(AIO_READ, fd, offset, buf, len, len, completion, 
+                        arg));
+}
+
+static int humfs_write_file(struct externfs_inode *ext,
+                           unsigned long long offset, 
+                           const char *buf, int start, int len, 
+                           void (*completion)(char *, int, void *), void *arg,
+                           struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       struct humfs_file *hf = container_of(ext, struct humfs_file, ext);
+       int err, orig_len = len, fd = filehandle_fd(&hf->data);
+
+       if(fd < 0){
+               (*completion)((char *) buf, fd, arg);
+               return(fd);
+       }
+
+       if(mount->direct)
+               len = PAGE_SIZE;
+       else {
+               offset += start;
+               buf += start;
+       }
+
+       err = humfs_aio(AIO_WRITE, fd, offset, (char *) buf, len, orig_len, 
+                       completion, arg);
+
+       if(err < 0)
+               return(err);
+
+       if(mount->direct)
+               err = orig_len;
+
+       return(err);
+}
+
+static int humfs_map_file_page(struct externfs_inode *ext, 
+                              unsigned long long offset, char *buf, int w, 
+                              struct externfs_data *ed)
+{
+       struct humfs_file *hf = container_of(ext, struct humfs_file, ext);
+       unsigned long long size, need;
+       int err, fd = filehandle_fd(&hf->data);
+
+       if(fd < 0)
+               return(fd);
+
+       err = os_fd_size(fd, &size);
+       if(err)
+               return(err);
+
+       need = offset + PAGE_SIZE;
+       if(size < need){
+               err = os_truncate_fd(fd, need);
+               if(err)
+                       return(err);
+       }
+       
+       return(physmem_subst_mapping(buf, fd, offset, w));
+}
+
+static void humfs_close_file(struct externfs_inode *ext,
+                            unsigned long long size)
+{
+       struct humfs_file *hf = container_of(ext, struct humfs_file, ext);
+       int fd;
+
+       if(hf->data.fd == -1)
+               return;
+
+       fd = filehandle_fd(&hf->data);
+       physmem_forget_descriptor(fd);
+       truncate_file(&hf->data, size);
+       close_file(&hf->data);
+
+       (*hf->mount->meta->close_file)(hf);
+}
+
+/* XXX Assumes that you can't make a normal file */
+
+static int humfs_make_node(const char *path, int mode, int uid, int gid, 
+                          int type, int major, int minor, 
+                          struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       struct file_handle fh;
+       const char *data_path[3] = { mount->data, path, NULL };
+       int err;
+       char t;
+
+       err = host_create_file(data_path, S_IRWXUGO, &fh);
+       if(err)
+               goto out;
+
+       close_file(&fh);
+
+       switch(type){
+       case S_IFCHR:
+               t = 'c';
+               break;
+       case S_IFBLK:
+               t = 'b';
+               break;
+       case S_IFIFO:
+               t = 'p';
+               break;
+       case S_IFSOCK:
+               t = 's';
+               break;
+       default:
+               err = -EINVAL;
+               printk("make_node - bad node type : %d\n", type);
+               goto out_rm;
+       }
+
+       err = (*mount->meta->make_node)(path, mode, uid, gid, t, major, minor, 
+                                       mount);
+       if(err)
+               goto out_rm;
+
+ out:
+       return(err);
+
+ out_rm:
+       host_unlink_file(data_path);
+       goto out;
+}
+               
+static int humfs_create_file(struct externfs_inode *ext, char *path, int mode,
+                            int uid, int gid, struct inode *inode, 
+                            struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       struct humfs_file *hf = container_of(ext, struct humfs_file, ext);
+       const char *data_path[3] = { mount->data, path, NULL };
+       int err;
+
+       err = (*mount->meta->create_file)(hf, path, mode, uid, gid, inode, 
+                                         mount);
+       if(err)
+               goto out;
+
+       err = host_create_file(data_path, S_IRWXUGO, &hf->data);
+       if(err)
+               goto out_rm;
+
+       
+       is_reclaimable(&hf->data, humfs_data_name, inode);
+
+       return(0);
+
+ out_rm:
+       (*mount->meta->remove_file)(path, mount);
+       (*mount->meta->close_file)(hf);
+ out:
+       return(err);
+}
+
+static int humfs_set_attr(const char *path, struct externfs_iattr *attrs, 
+                         struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path[3] = { mount->data, path, NULL };
+       int (*chown)(const char *, int, int, int, struct humfs *);
+       int err;
+
+       chown = mount->meta->change_ownerships;
+       if(attrs->ia_valid & EXTERNFS_ATTR_MODE){
+               err = (*chown)(path, attrs->ia_mode, -1, -1, mount);
+               if(err)
+                       return(err);
+       }
+       if(attrs->ia_valid & EXTERNFS_ATTR_UID){
+               err = (*chown)(path, -1, attrs->ia_uid, -1, mount);
+               if(err)
+                       return(err);
+       }
+       if(attrs->ia_valid & EXTERNFS_ATTR_GID){
+               err = (*chown)(path, -1, -1, attrs->ia_gid, mount);
+               if(err)
+                       return(err);
+       }
+
+       attrs->ia_valid &= ~(EXTERNFS_ATTR_MODE | EXTERNFS_ATTR_UID | 
+                            EXTERNFS_ATTR_GID);
+
+       return(host_set_attr(data_path, attrs));
+}
+
+static int humfs_make_symlink(const char *from, const char *to, int uid, 
+                             int gid, struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       struct humfs_file *hf;
+       const char *data_path[3] = { mount->data, from, NULL };
+       int err = -ENOMEM;
+
+       hf = (*mount->meta->init_file)();
+       if(hf == NULL)
+               goto out;
+
+       err = (*mount->meta->create_file)(hf, from, S_IRWXUGO, uid, gid, NULL, 
+                                         mount);
+       if(err)
+               goto out_close;
+
+       err = host_make_symlink(data_path, to);
+       if(err)
+               (*mount->meta->remove_file)(from, mount);
+
+ out_close:
+       (*mount->meta->close_file)(hf);
+ out:
+       return(err);
+}
+
+static int humfs_link_file(const char *to, const char *from, int uid, int gid, 
+                          struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path_from[3] = { mount->data, from, NULL };
+       const char *data_path_to[3] = { mount->data, to, NULL };
+       int err;
+
+       err = (*mount->meta->create_link)(to, from, mount);
+       if(err)
+               return(err);
+
+       err = host_link_file(data_path_to, data_path_from);
+       if(err)
+               (*mount->meta->remove_file)(from, mount);
+       
+       return(err);
+}
+
+static int humfs_unlink_file(const char *path, struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path[3] = { mount->data, path, NULL };
+       int err;
+
+       err = (*mount->meta->remove_file)(path, mount);
+       if (err)
+               return err;
+
+       (*mount->meta->remove_file)(path, mount);
+       return(host_unlink_file(data_path));
+}
+
+static void humfs_invisible(struct externfs_inode *ext)
+{
+       struct humfs_file *hf = container_of(ext, struct humfs_file, ext);
+       struct humfs *mount = hf->mount;
+       
+       (*mount->meta->invisible)(hf);
+       not_reclaimable(&hf->data);
+}
+
+static int humfs_make_dir(const char *path, int mode, int uid, int gid, 
+                         struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path[3] = { mount->data, path, NULL };
+       int err;
+
+       err = (*mount->meta->create_dir)(path, mode, uid, gid, mount);
+       if(err)
+               return(err);
+       
+       err = host_make_dir(data_path, S_IRWXUGO);
+       if(err)
+               (*mount->meta->remove_dir)(path, mount);
+
+       return(err);
+}
+
+static int humfs_remove_dir(const char *path, int uid, int gid, 
+                           struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path[3] = { mount->data, path, NULL };
+       int err;
+
+       err = host_remove_dir(data_path);
+       if (err)
+               return err;
+
+       (*mount->meta->remove_dir)(path, mount);
+
+       return(err);
+}
+
+static int humfs_read_link(char *file, int uid, int gid, char *buf, int size, 
+                          struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path[3] = { mount->data, file, NULL };
+
+       return(host_read_link(data_path, buf, size));
+}
+
+struct humfs *inode_humfs_info(struct inode *inode)
+{
+       return(container_of(inode_externfs_info(inode), struct humfs, ext));
+}
+
+static int humfs_rename_file(char *from, char *to, struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path_from[3] = { mount->data, from, NULL };
+       const char *data_path_to[3] = { mount->data, to, NULL };
+       int err;
+
+       err = (*mount->meta->rename_file)(from, to, mount);
+       if(err)
+               return(err);
+       
+       err = host_rename_file(data_path_from, data_path_to);
+       if(err)
+               (*mount->meta->rename_file)(to, from, mount);
+
+       return(err);
+}
+
+static int humfs_stat_fs(long *bsize_out, long long *blocks_out, 
+                        long long *bfree_out, long long *bavail_out, 
+                        long long *files_out, long long *ffree_out, 
+                        void *fsid_out, int fsid_size, long *namelen_out, 
+                        long *spare_out, struct externfs_data *ed)
+{
+       struct humfs *mount = container_of(ed, struct humfs, ext);
+       const char *data_path[3] = { mount->data, NULL };
+       int err;
+
+       /* XXX Needs to maintain this info as metadata */
+       err = host_stat_fs(data_path, bsize_out, blocks_out, bfree_out, 
+                          bavail_out, files_out, ffree_out, fsid_out, 
+                          fsid_size, namelen_out, spare_out);
+       if(err)
+               return(err);
+
+       *blocks_out = mount->total / *bsize_out;
+       *bfree_out = (mount->total - mount->used) / *bsize_out;
+       *bavail_out = (mount->total - mount->used) / *bsize_out;
+       return(0);
+}
+
+int humfs_truncate_file(struct externfs_inode *ext, __u64 size, 
+                       struct externfs_data *ed)
+{
+       struct humfs_file *hf = container_of(ext, struct humfs_file, ext);
+
+       return(truncate_file(&hf->data, size));
+}
+
+char *humfs_path(char *dir, char *file)
+{
+       int need_slash, len = strlen(dir) + strlen(file);
+       char *new;
+
+       need_slash = (dir[strlen(dir) - 1] != '/');
+       if(need_slash)
+               len++;
+
+       new = kmalloc(len + 1, GFP_KERNEL);
+       if(new == NULL)
+               return(NULL);
+
+       strcpy(new, dir);
+       if(need_slash)
+               strcat(new, "/");
+       strcat(new, file);
+
+       return(new);
+}
+
+DECLARE_MUTEX(meta_sem);
+struct list_head metas = LIST_HEAD_INIT(metas);
+
+static struct humfs_meta_ops *find_meta(const char *name)
+{
+       struct list_head *ele;
+       struct humfs_meta_ops *m;
+       down(&meta_sem);
+       list_for_each(ele, &metas){
+               m = list_entry(ele, struct humfs_meta_ops, list);
+               if(!strcmp(m->name, name))
+                       goto out;
+       }
+       m = NULL;
+ out:
+       up(&meta_sem);
+       return(m);
+}
+
+void register_meta(struct humfs_meta_ops *ops)
+{
+       down(&meta_sem);
+       list_add(&ops->list, &metas);
+       up(&meta_sem);
+}
+void unregister_meta(struct humfs_meta_ops *ops)
+{
+       down(&meta_sem);
+       list_del(&ops->list);
+       up(&meta_sem);
+}
+static struct humfs *read_superblock(char *root)
+{
+       struct humfs *mount;
+       struct humfs_meta_ops *meta = NULL;
+       struct file_handle *fh;
+       const char *path[] = { root, "superblock", NULL };
+       u64 used, total;
+       char meta_buf[33], line[HOSTFS_BUFSIZE], *newline;
+       unsigned long long pos;
+       int version, i, n, err;
+
+       fh = kmalloc(sizeof(*fh), GFP_KERNEL);
+       if(fh == NULL)
+               return(ERR_PTR(-ENOMEM));
+
+       err = host_open_file(path, 1, 0, fh);
+       if(err){
+               printk("Failed to open %s/%s, errno = %d\n", path[0],
+                      path[1], err);
+               return(ERR_PTR(err));
+       }
+
+       used = 0;
+       total = 0;
+       pos = 0;
+       i = 0;
+       while(1){
+               n = read_file(fh, pos, &line[i], sizeof(line) - i - 1);
+               if((n == 0) && (i == 0))
+                       break;
+               if(n < 0)
+                       return(ERR_PTR(n));
+
+               pos += n;
+               if(n > 0)
+                       line[n + i] = '\0';
+
+               newline = strchr(line, '\n');
+               if(newline == NULL){
+                       printk("read_superblock - line too long : '%s'\n", 
+                              line);
+                       return(ERR_PTR(-EINVAL));
+               }
+               newline++;
+
+               if(sscanf(line, "version %d\n", &version) == 1){
+                       if(version != HUMFS_VERSION){
+                               printk("humfs version mismatch - want version "
+                                      "%d, got version %d.\n", HUMFS_VERSION,
+                                      version);
+                               return(ERR_PTR(-EINVAL));
+                       }
+               }
+               else if(sscanf(line, "used %Lu\n", &used) == 1) ;
+               else if(sscanf(line, "total %Lu\n", &total) == 1) ;
+               else if(sscanf(line, "metadata %32s\n", meta_buf) == 1){
+                       meta = find_meta(meta_buf);
+                       if(meta == NULL){
+                               printk("read_superblock - meta api \"%s\" not "
+                                      "registered\n", meta_buf);
+                               return(ERR_PTR(-EINVAL));
+                       }
+               }
+               
+               else {
+                       printk("read_superblock - bogus line : '%s'\n", line);
+                       return(ERR_PTR(-EINVAL));
+               }
+
+               i = newline - line;
+               memmove(line, newline, sizeof(line) - i);
+               i = strlen(line);
+       }
+
+       if(used == 0){
+               printk("read_superblock - used not specified or set to "
+                      "zero\n");
+               return(ERR_PTR(-EINVAL));
+       }
+       if(total == 0){
+               printk("read_superblock - total not specified or set to "
+                      "zero\n");
+               return(ERR_PTR(-EINVAL));
+       }
+       if(used > total){
+               printk("read_superblock - used is greater than total\n");
+               return(ERR_PTR(-EINVAL));
+       }
+
+       if(meta == NULL){
+               meta = find_meta("shadow_fs");
+       }
+
+       if(meta == NULL){
+               printk("read_superblock - valid meta api was not specified\n");
+               return(ERR_PTR(-EINVAL));
+       }
+
+       mount = (*meta->init_mount)(root);
+       if(IS_ERR(mount))
+               return(mount);
+
+       *mount = ((struct humfs) { .total       = total,
+                                  .used        = used,
+                                  .meta        = meta });
+       return(mount);
+}
+
+struct externfs_file_ops humfs_no_mmap_file_ops = {
+       .stat_file              = humfs_stat_file,
+       .file_type              = humfs_file_type,
+       .access_file            = NULL,
+       .open_file              = humfs_open_file,
+       .open_dir               = humfs_open_dir,
+       .read_dir               = humfs_read_dir,
+       .read_file              = humfs_read_file,
+       .write_file             = humfs_write_file,
+       .map_file_page          = NULL,
+       .close_file             = humfs_close_file,
+       .close_dir              = humfs_close_dir,
+       .invisible              = humfs_invisible,
+       .create_file            = humfs_create_file,
+       .set_attr               = humfs_set_attr,
+       .make_symlink           = humfs_make_symlink,
+       .unlink_file            = humfs_unlink_file,
+       .make_dir               = humfs_make_dir,
+       .remove_dir             = humfs_remove_dir,
+       .make_node              = humfs_make_node,
+       .link_file              = humfs_link_file,
+       .read_link              = humfs_read_link,
+       .rename_file            = humfs_rename_file,
+       .statfs                 = humfs_stat_fs,
+       .truncate_file          = humfs_truncate_file
+};
+
+struct externfs_file_ops humfs_mmap_file_ops = {
+       .stat_file              = humfs_stat_file,
+       .file_type              = humfs_file_type,
+       .access_file            = NULL,
+       .open_file              = humfs_open_file,
+       .open_dir               = humfs_open_dir,
+       .invisible              = humfs_invisible,
+       .read_dir               = humfs_read_dir,
+       .read_file              = humfs_read_file,
+       .write_file             = humfs_write_file,
+       .map_file_page          = humfs_map_file_page,
+       .close_file             = humfs_close_file,
+       .close_dir              = humfs_close_dir,
+       .create_file            = humfs_create_file,
+       .set_attr               = humfs_set_attr,
+       .make_symlink           = humfs_make_symlink,
+       .unlink_file            = humfs_unlink_file,
+       .make_dir               = humfs_make_dir,
+       .remove_dir             = humfs_remove_dir,
+       .make_node              = humfs_make_node,
+       .link_file              = humfs_link_file,
+       .read_link              = humfs_read_link,
+       .rename_file            = humfs_rename_file,
+       .statfs                 = humfs_stat_fs,
+       .truncate_file          = humfs_truncate_file
+};
+
+static struct externfs_data *mount_fs(char *mount_arg)
+{
+       char *root, *data, *flags;
+       struct humfs *mount;
+       struct externfs_file_ops *file_ops;
+       int err, do_mmap = 0;
+
+       if(mount_arg == NULL){
+               printk("humfs - no host directory specified\n");
+               return(NULL);
+       }
+
+       flags = strchr((char *) mount_arg, ',');
+       if(flags != NULL){
+               do {
+                       *flags++ = '\0';
+
+                       if(!strcmp(flags, "mmap"))
+                               do_mmap = 1;
+
+                       flags = strchr(flags, ',');
+               } while(flags != NULL);
+       }
+
+       err = -ENOMEM;
+       root = host_root_filename(mount_arg);
+       if(root == NULL)
+               goto err;
+
+       mount = read_superblock(root);
+       if(IS_ERR(mount)){
+               err = PTR_ERR(mount);
+               goto err_free_root;
+       }
+
+       data = humfs_path(root, "data/");
+       if(data == NULL)
+               goto err_free_mount;
+
+       if(CHOOSE_MODE(do_mmap, 0)){
+               printk("humfs doesn't support mmap in tt mode\n");
+               do_mmap = 0;
+       }
+
+       mount->data = data;
+       mount->mmap = do_mmap;
+
+       file_ops = do_mmap ? &humfs_mmap_file_ops : &humfs_no_mmap_file_ops;
+       init_externfs(&mount->ext, file_ops);
+
+       return(&mount->ext);
+
+ err_free_mount:
+       kfree(mount);
+ err_free_root:
+       kfree(root);
+ err:
+       return(NULL);
+}
+
+struct externfs_mount_ops humfs_mount_ops = {
+       .init_file              = humfs_init_file,
+       .mount                  = mount_fs,
+};
+
+static int __init init_humfs(void)
+{
+       return(register_externfs("humfs", &humfs_mount_ops));
+}
+
+static void __exit exit_humfs(void)
+{
+       unregister_externfs("humfs");
+}
+
+__initcall(init_humfs);
+__exitcall(exit_humfs);
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/hostfs/meta_fs.c b/fs/hostfs/meta_fs.c
new file mode 100644 (file)
index 0000000..7464149
--- /dev/null
@@ -0,0 +1,520 @@
+/* 
+ * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/slab.h>
+#include <linux/init.h>
+#include "hostfs.h"
+#include "metadata.h"
+#include "kern_util.h"
+
+#define METADATA_FILE_PATH(meta) (meta)->root, "file_metadata"
+#define METADATA_DIR_PATH(meta) (meta)->root, "dir_metadata"
+
+struct meta_fs {
+       struct humfs humfs;
+       char *root;
+};
+
+struct meta_file {
+       struct humfs_file humfs;
+       struct file_handle fh;
+};
+
+static int meta_file_path(const char *path, struct meta_fs *meta, 
+                         const char *path_out[])
+{
+       const char *data_path[] = { meta->root, "data", path, NULL };
+       char data_tmp[HOSTFS_BUFSIZE];
+       char *data_file = get_path(data_path, data_tmp, sizeof(data_tmp));
+
+       if(data_file == NULL)
+               return(-ENOMEM);
+
+       path_out[0] = meta->root;
+       path_out[2] = path;
+       if(os_file_type(data_file) == OS_TYPE_DIR){
+               path_out[1] = "dir_metadata";
+               path_out[3] = "metadata";
+               path_out[4] = NULL;
+       }
+       else {
+               path_out[1] = "file_metadata";
+               path_out[3] = NULL;
+       }
+
+       return(0);
+}
+
+static int open_meta_file(const char *path, struct humfs *humfs,
+                         struct file_handle *fh)
+{
+       struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs);
+       const char *meta_path[5];
+       char meta_tmp[HOSTFS_BUFSIZE];
+       char *meta_file;
+       int err;
+
+       err = meta_file_path(path, meta, meta_path);
+       if(err)
+               goto out;
+
+       meta_file = get_path(meta_path, meta_tmp, sizeof(meta_tmp));
+       if(meta_file == NULL)
+               goto out;
+       
+       err = open_filehandle(meta_file, of_rdwr(OPENFLAGS()), 0, fh);
+
+ out:
+       return(err);
+}
+
+static char *meta_fs_name(struct inode *inode)
+{
+       struct humfs *mount = inode_humfs_info(inode);
+       struct meta_fs *meta = container_of(mount, struct meta_fs, humfs);
+       const char *metadata_path[5];
+       char tmp[HOSTFS_BUFSIZE], *name, *file;
+
+       if(meta_file_path("", meta, metadata_path))
+               return(NULL);
+
+       file = get_path(metadata_path, tmp, sizeof(tmp));
+       if(file == NULL)
+               return(NULL);
+
+       name = inode_name_prefix(inode, file);
+
+       free_path(file, tmp);
+       return(name);
+}
+
+static void metafs_invisible(struct humfs_file *hf)
+{
+       struct meta_file *mf = container_of(hf, struct meta_file, humfs);
+
+       not_reclaimable(&mf->fh);
+}
+
+static struct humfs_file *metafs_init_file(void)
+{
+       struct meta_file *mf;
+       int err = -ENOMEM;
+
+       mf = kmalloc(sizeof(*mf), GFP_KERNEL);
+       if(mf == NULL)
+               return(ERR_PTR(err));
+
+       return(&mf->humfs);
+}
+
+static int metafs_open_file(struct humfs_file *hf, const char *path, 
+                           struct inode *inode, struct humfs *humfs)
+{
+       struct meta_file *mf = container_of(hf, struct meta_file, humfs);
+       int err;
+
+       err = open_meta_file(path, humfs, &mf->fh);
+       if(err)
+               return(err);
+
+       is_reclaimable(&mf->fh, meta_fs_name, inode);
+
+       return(0);
+}
+
+static void metafs_close_file(struct humfs_file *hf)
+{
+       struct meta_file *meta = container_of(hf, struct meta_file, humfs);
+
+       close_file(&meta->fh);
+       kfree(meta);
+}
+
+static int metafs_create_file(struct humfs_file *hf, const char *path, 
+                             int mode, int uid, int gid, struct inode *inode, 
+                             struct humfs *humfs)
+{
+       struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs);
+       struct meta_file *mf = container_of(hf, struct meta_file, humfs);
+       char tmp[HOSTFS_BUFSIZE];
+       const char *metadata_path[] = { METADATA_FILE_PATH(meta), path, NULL };
+       char *file = get_path(metadata_path, tmp, sizeof(tmp));
+       char buf[sizeof("mmmm uuuuuuuuuu gggggggggg")];
+       int err = -ENOMEM;
+
+       if(file == NULL)
+               goto out;
+
+       err = open_filehandle(file, of_write(of_create(OPENFLAGS())), 0644, 
+                             &mf->fh);
+       if(err)
+               goto out_free_path;
+
+       if(inode != NULL)
+               is_reclaimable(&mf->fh, meta_fs_name, inode);
+
+       sprintf(buf, "%d %d %d\n", mode  & S_IRWXUGO, uid, gid);
+       err = write_file(&mf->fh, 0, buf, strlen(buf));
+       if(err < 0)
+               goto out_rm;
+
+       free_path(file, tmp);
+       return(0);
+
+ out_rm:
+       close_file(&mf->fh);
+       os_remove_file(file);
+ out_free_path:
+       free_path(file, tmp);
+ out:
+       return(err);
+}
+
+static int metafs_create_link(const char *to, const char *from, 
+                             struct humfs *humfs)
+{
+       struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs);
+       const char *path_to[] = { METADATA_FILE_PATH(meta), to,  NULL };
+       const char *path_from[] = { METADATA_FILE_PATH(meta), from, NULL };
+
+       return(host_link_file(path_to, path_from));
+}
+
+static int metafs_remove_file(const char *path, struct humfs *humfs)
+{
+       struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs);
+       char tmp[HOSTFS_BUFSIZE];
+       const char *metadata_path[] = { METADATA_FILE_PATH(meta), path, NULL };
+       char *file = get_path(metadata_path, tmp, sizeof(tmp));
+       int err = -ENOMEM;
+
+       if(file == NULL)
+               goto out;
+
+       err = os_remove_file(file);
+
+ out:
+       free_path(file, tmp);
+       return(err);
+}
+
+static int metafs_create_directory(const char *path, int mode, int uid, 
+                                  int gid, struct humfs *humfs)
+{
+       struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs);
+       char tmp[HOSTFS_BUFSIZE];
+       const char *dir_path[] = { METADATA_DIR_PATH(meta), path, NULL, NULL };
+       const char *file_path[] = { METADATA_FILE_PATH(meta), path, NULL, 
+                                   NULL };
+       char *file, dir_meta[sizeof("mmmm uuuuuuuuuu gggggggggg\n")];
+       int err, fd;
+
+       err = host_make_dir(dir_path, 0755);
+       if(err)
+               goto out;
+
+       err = host_make_dir(file_path, 0755);
+       if(err)
+               goto out_rm;
+
+       /* This to make the index independent of the number of elements in
+        * METADATA_DIR_PATH().
+        */
+       dir_path[sizeof(dir_path) / sizeof(dir_path[0]) - 2] = "metadata";
+
+       err = -ENOMEM;
+       file = get_path(dir_path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       fd = os_open_file(file, of_create(of_rdwr(OPENFLAGS())), 0644);
+       if(fd < 0){
+               err = fd;
+               goto out_free;
+       }
+
+       sprintf(dir_meta, "%d %d %d\n", mode & S_IRWXUGO, uid, gid);
+       err = os_write_file(fd, dir_meta, strlen(dir_meta));
+       if(err > 0)
+               err = 0;
+
+       os_close_file(fd);
+
+ out_free:
+       free_path(file, tmp);
+ out_rm:
+       host_remove_dir(dir_path);
+ out:
+       return(err);
+}
+
+static int metafs_remove_directory(const char *path, struct humfs *humfs)
+{
+       struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs);
+       char tmp[HOSTFS_BUFSIZE], *file;
+       const char *dir_path[] = { METADATA_DIR_PATH(meta), path, "metadata", 
+                                  NULL };
+       const char *file_path[] = { METADATA_FILE_PATH(meta), path, NULL };
+       char *slash;
+       int err;
+
+       err = -ENOMEM;
+       file = get_path(dir_path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = os_remove_file(file);
+       if(err)
+               goto out_free;
+
+       slash = strrchr(file, '/');
+       if(slash == NULL){
+               printk("remove_shadow_directory failed to find last slash\n");
+               goto out_free;
+       }
+       *slash = '\0';
+       err = os_remove_dir(file);
+       free_path(file, tmp);
+
+       file = get_path(file_path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = os_remove_dir(file);
+       if(err)
+               goto out_free;
+
+ out:
+       return(err);
+ out_free:
+       free_path(file, tmp);
+       goto out;
+}
+
+static int metafs_make_node(const char *path, int mode, int uid, int gid, 
+                           int type, int maj, int min, struct humfs *humfs)
+{
+       struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs);
+       struct file_handle fh;
+       char tmp[HOSTFS_BUFSIZE];
+       const char *metadata_path[] = { METADATA_FILE_PATH(meta), path, NULL };
+       int err;
+       char buf[sizeof("mmmm uuuuuuuuuu gggggggggg x nnn mmm\n")], *file;
+
+       sprintf(buf, "%d %d %d %c %d %d\n", mode & S_IRWXUGO, uid, gid, type, 
+               maj, min);
+
+       err = -ENOMEM;
+       file = get_path(metadata_path, tmp, sizeof(tmp));
+       if(file == NULL)
+               goto out;
+
+       err = open_filehandle(file, 
+                             of_create(of_rdwr(OPENFLAGS())), 0644, &fh);
+       if(err)
+               goto out_free;
+
+       err = write_file(&fh, 0, buf, strlen(buf));
+       if(err > 0)
+               err = 0;
+
+       close_file(&fh);
+
+ out_free:
+       free_path(file, tmp);
+ out:
+       return(err);
+}
+
+static int metafs_ownerships(const char *path, int *mode_out, int *uid_out, 
+                            int *gid_out, char *type_out, int *maj_out, 
+                            int *min_out, struct humfs *humfs)
+{
+       struct file_handle fh;
+       char buf[sizeof("mmmm uuuuuuuuuu gggggggggg x nnn mmm\n")];
+       int err, n, mode, uid, gid, maj, min;
+       char type;
+
+       err = open_meta_file(path, humfs, &fh);
+       if(err)
+               goto out;
+
+       err = os_read_file(fh.fd, buf, sizeof(buf) - 1);
+       if(err < 0)
+               goto out_close;
+
+       buf[err] = '\0';
+       err = 0;
+
+       n = sscanf(buf, "%d %d %d %c %d %d", &mode, &uid, &gid, &type, &maj, 
+                  &min);
+       if(n == 3){
+               maj = -1;
+               min = -1;
+               type = 0;
+               err = 0;
+       }
+       else if(n != 6)
+               err = -EINVAL;
+
+       if(mode_out != NULL)
+               *mode_out = mode;
+       if(uid_out != NULL)
+               *uid_out = uid;
+       if(gid_out != NULL)
+               *gid_out = uid;
+       if(type_out != NULL)
+               *type_out = type;
+       if(maj_out != NULL)
+               *maj_out = maj;
+       if(min_out != NULL)
+               *min_out = min;
+
+ out_close:
+       close_file(&fh);
+ out:
+       return(err);
+}
+
+static int metafs_change_ownerships(const char *path, int mode, int uid, 
+                                   int gid, struct humfs *humfs)
+{
+       struct file_handle fh;
+       char type;
+       char buf[sizeof("mmmm uuuuuuuuuu gggggggggg x nnn mmm\n")];
+       int err = -ENOMEM, old_mode, old_uid, old_gid, n, maj, min;
+
+       err = open_meta_file(path, humfs, &fh);
+       if(err)
+               goto out;
+
+       err = read_file(&fh, 0, buf, sizeof(buf) - 1);
+       if(err < 0)
+               goto out_close;
+
+       buf[err] = '\0';
+
+       n = sscanf(buf, "%d %d %d %c %d %d\n", &old_mode, &old_uid, &old_gid,
+                  &type, &maj, &min);
+       if((n != 3) && (n != 6)){
+               err = -EINVAL;
+               goto out_close;
+       }
+
+       if(mode == -1)
+                mode = old_mode;
+       if(uid == -1)
+               uid = old_uid;
+       if(gid == -1)
+               gid = old_gid;
+
+       if(n == 3)
+               sprintf(buf, "%d %d %d\n", mode & S_IRWXUGO, uid, gid);
+       else
+               sprintf(buf, "%d %d %d %c %d %d\n", mode & S_IRWXUGO, uid, gid,
+                       type, maj, min);
+
+       err = write_file(&fh, 0, buf, strlen(buf));
+       if(err > 0)
+               err = 0;
+
+       err = truncate_file(&fh, strlen(buf));
+
+ out_close:
+       close_file(&fh);
+ out:
+       return(err);
+}
+
+static int metafs_rename_file(const char *from, const char *to, 
+                             struct humfs *humfs)
+{
+       struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs);
+       const char *metadata_path_from[5], *metadata_path_to[5];
+       int err;
+
+       err = meta_file_path(from, meta, metadata_path_from);
+       if(err)
+               return(err);
+
+       err = meta_file_path(to, meta, metadata_path_to);
+       if(err)
+               return(err);
+
+       return(host_rename_file(metadata_path_from, metadata_path_to));
+}
+
+static struct humfs *metafs_init_mount(char *root)
+{
+       struct meta_fs *meta;
+       int err = -ENOMEM;
+
+       meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+       if(meta == NULL)
+               goto out;
+
+       meta->root = uml_strdup(root);
+       if(meta->root == NULL)
+               goto out_free_meta;
+
+       return(&meta->humfs);
+
+ out_free_meta:
+       kfree(meta);
+ out:
+       return(ERR_PTR(err));
+}
+
+static void metafs_free_mount(struct humfs *humfs)
+{
+       struct meta_fs *meta = container_of(humfs, struct meta_fs, humfs);
+       
+       kfree(meta);
+}
+
+struct humfs_meta_ops hum_fs_meta_fs_ops = {
+       .list                   = LIST_HEAD_INIT(hum_fs_meta_fs_ops.list),
+       .name                   = "shadow_fs",
+       .init_file              = metafs_init_file,
+       .open_file              = metafs_open_file,
+       .close_file             = metafs_close_file,
+       .ownerships             = metafs_ownerships,
+       .make_node              = metafs_make_node,
+       .create_file            = metafs_create_file,
+       .create_link            = metafs_create_link,
+       .remove_file            = metafs_remove_file,
+       .create_dir             = metafs_create_directory,
+       .remove_dir             = metafs_remove_directory,
+       .change_ownerships      = metafs_change_ownerships,
+       .rename_file            = metafs_rename_file,
+       .invisible              = metafs_invisible,
+       .init_mount             = metafs_init_mount,
+       .free_mount             = metafs_free_mount,
+};
+
+static int __init init_meta_fs(void)
+{
+       register_meta(&hum_fs_meta_fs_ops);
+       return(0);
+}
+
+static void __exit exit_meta_fs(void)
+{
+       unregister_meta(&hum_fs_meta_fs_ops);
+}
+
+__initcall(init_meta_fs);
+__exitcall(exit_meta_fs);
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/hostfs/metadata.h b/fs/hostfs/metadata.h
new file mode 100644 (file)
index 0000000..924fb5b
--- /dev/null
@@ -0,0 +1,79 @@
+/* 
+ * Copyright (C) 2004 Piotr Neuman (sikkh@wp.pl) and 
+ * Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_FS_METADATA
+#define __UM_FS_METADATA
+
+#include "linux/fs.h"
+#include "linux/list.h"
+#include "os.h"
+#include "hostfs.h"
+
+struct humfs {
+       struct externfs_data ext;
+       __u64 used;
+       __u64 total;
+       char *data;
+       int mmap;
+       int direct;
+       struct humfs_meta_ops *meta;
+};
+
+struct humfs_file {
+       struct humfs *mount;
+       struct file_handle data;
+       struct externfs_inode ext;
+};
+
+struct humfs_meta_ops {
+       struct list_head list;
+       char *name;
+       struct humfs_file *(*init_file)(void);
+       int (*open_file)(struct humfs_file *hf, const char *path, 
+                        struct inode *inode, struct humfs *humfs);
+       int (*create_file)(struct humfs_file *hf, const char *path, int mode, 
+                          int uid, int gid, struct inode *inode, 
+                          struct humfs *humfs);
+       void (*close_file)(struct humfs_file *humfs);
+       int (*ownerships)(const char *path, int *mode_out, int *uid_out, 
+                         int *gid_out, char *type_out, int *maj_out, 
+                         int *min_out, struct humfs *humfs);
+       int (*make_node)(const char *path, int mode, int uid, int gid,
+                        int type, int major, int minor, struct humfs *humfs);
+       int (*create_link)(const char *to, const char *from, 
+                          struct humfs *humfs);
+       int (*remove_file)(const char *path, struct humfs *humfs);
+       int (*create_dir)(const char *path, int mode, int uid, int gid, 
+                         struct humfs *humfs);
+       int (*remove_dir)(const char *path, struct humfs *humfs);
+       int (*change_ownerships)(const char *path, int mode, int uid, int gid,
+                                struct humfs *humfs);
+       int (*rename_file)(const char *from, const char *to, 
+                          struct humfs *humfs);
+       void (*invisible)(struct humfs_file *hf);
+       struct humfs *(*init_mount)(char *root);
+       void (*free_mount)(struct humfs *humfs);
+};
+
+extern void register_meta(struct humfs_meta_ops *ops);
+extern void unregister_meta(struct humfs_meta_ops *ops);
+
+extern char *humfs_path(char *dir, char *file);
+extern char *humfs_name(struct inode *inode, char *prefix);
+extern struct humfs *inode_humfs_info(struct inode *inode);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
index 471010b..a93f58c 100644 (file)
@@ -118,7 +118,7 @@ static struct inode *alloc_inode(struct super_block *sb)
                inode->i_sb = sb;
                // inode->i_dqh = dqhget(sb->s_dqh);
 
-               /* essential because of inode slab reuse */
+               /* important because of inode slab reuse */
                inode->i_xid = 0;
                inode->i_blkbits = sb->s_blocksize_bits;
                inode->i_flags = 0;
index efba306..9c483a6 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/vserver/xid.h>
+#include <linux/quotaops.h>
 
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
index bccfccc..6fb8c25 100644 (file)
@@ -28,9 +28,7 @@
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
-#include <linux/proc_fs.h>
-#include <linux/vserver/inode.h>
-#include <linux/vserver/debug.h>
+#include <linux/vs_base.h>
 
 #include <asm/namei.h>
 #include <asm/uaccess.h>
@@ -172,6 +170,10 @@ int generic_permission(struct inode *inode, int mask,
 {
        umode_t                 mode = inode->i_mode;
 
+       /* Prevent vservers from escaping chroot() barriers */
+       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
+               return -EACCES;
+
        if (mask & MAY_WRITE) {
                /*
                 * Nobody gets write access to a read-only fs.
@@ -228,38 +230,12 @@ int generic_permission(struct inode *inode, int mask,
        return -EACCES;
 }
 
-static inline int xid_permission(struct inode *inode, int mask, struct nameidata *nd)
-{
-       if (inode->i_xid == 0)
-               return 0;
-
-#ifdef CONFIG_VSERVER_FILESHARING
-       /* MEF: PlanetLab FS module assumes that any file that can be
-        * named (e.g., via a cross mount) is not hidden from another
-        * context or the admin context.
-        */
-       if (vx_check(inode->i_xid,VX_STATIC|VX_DYNAMIC))
-               return 0;
-#endif
-       if (vx_check(inode->i_xid,VX_ADMIN|VX_WATCH|VX_IDENT))
-               return 0;
-
-       vxwprintk(1, "xid=%d denied access to %p[#%d,%lu] Â»%s«.",
-               vx_current_xid(), inode, inode->i_xid, inode->i_ino,
-               vxd_path(nd->dentry, nd->mnt));
-       return -EACCES;
-}
-
 int permission(struct inode * inode,int mask, struct nameidata *nd)
 {
        int retval;
        int submask;
        umode_t mode = inode->i_mode;
 
-       /* Prevent vservers from escaping chroot() barriers */
-       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
-               return -EACCES;
-
        /* Ordinary permission routines do not understand MAY_APPEND. */
        submask = mask & ~MAY_APPEND;
 
@@ -267,9 +243,6 @@ int permission(struct inode * inode,int mask, struct nameidata *nd)
                (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
                return -EROFS;
 
-       if ((retval = xid_permission(inode, mask, nd)))
-               return retval;
-
        if (inode->i_op && inode->i_op->permission)
                retval = inode->i_op->permission(inode, submask, nd);
        else
@@ -672,46 +645,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
 {
        struct vfsmount *mnt = nd->mnt;
        struct dentry *dentry = __d_lookup(nd->dentry, name);
-       struct inode *inode;
 
        if (!dentry)
                goto need_lookup;
        if (dentry->d_op && dentry->d_op->d_revalidate)
                goto need_revalidate;
-       inode = dentry->d_inode;
-       if (!inode)
-               goto done;
-       if (inode->i_sb->s_magic == PROC_SUPER_MAGIC) {
-               struct proc_dir_entry *de = PDE(inode);
-
-               if (de && !vx_hide_check(0, de->vx_flags))
-                       goto hidden;
-       }
-#ifdef CONFIG_VSERVER_FILESHARING
-       /* MEF: PlanetLab FS module assumes that any file that can be
-        * named (e.g., via a cross mount) is not hidden from another
-        * context or the admin context.
-        */
-       if (vx_check(inode->i_xid,VX_STATIC|VX_DYNAMIC|VX_ADMIN)) {
-               /* do nothing */
-       }
-       else /* do the following check */
-#endif
-       if (!vx_check(inode->i_xid, 
-                     VX_WATCH|
-                     VX_HOSTID|
-                     VX_IDENT))
-               goto hidden;
 done:
        path->mnt = mnt;
        path->dentry = dentry;
        return 0;
-hidden:
-       vxwprintk(1, "xid=%d did lookup hidden %p[#%d,%lu] Â»%s«.",
-               vx_current_xid(), inode, inode->i_xid, inode->i_ino,
-               vxd_path(dentry, mnt));
-       dput(dentry);
-       return -ENOENT;
 
 need_lookup:
        if (atomic)
index da22d93..ed977eb 100644 (file)
@@ -22,8 +22,8 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/mount.h>
+#include <linux/vs_base.h>
 #include <linux/vserver/namespace.h>
-#include <linux/vserver/xid.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -164,7 +164,6 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
                mnt->mnt_mountpoint = mnt->mnt_root;
                mnt->mnt_parent = mnt;
                mnt->mnt_namespace = old->mnt_namespace;
-               mnt->mnt_xid = old->mnt_xid;
 
                /* stick the duplicate mount on the same expiry list
                 * as the original if that was on one */
@@ -245,11 +244,6 @@ static int show_vfsmnt(struct seq_file *m, void *v)
        unsigned long s_flags = mnt->mnt_sb->s_flags;
        int mnt_flags = mnt->mnt_flags;
 
-       if (vx_flags(VXF_HIDE_MOUNT, 0))
-               return 0;
-       if (!vx_check_vfsmount(current->vx_info, mnt))
-               return 0;
-
        if (vx_flags(VXF_HIDE_MOUNT, 0))
                return 0;
        if (!vx_check_vfsmount(current->vx_info, mnt))
@@ -270,8 +264,6 @@ static int show_vfsmnt(struct seq_file *m, void *v)
                                seq_puts(m, p->unset_str);
                }
        }
-       if (mnt->mnt_flags & MNT_XID)
-               seq_printf(m, ",xid=%d", mnt->mnt_xid);
        if (mnt->mnt_sb->s_op->show_options)
                err = mnt->mnt_sb->s_op->show_options(m, mnt);
        seq_puts(m, " 0 0\n");
@@ -357,10 +349,8 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-static inline void __umount_list(struct list_head *kill)
+static inline void __umount_tree(struct vfsmount *mnt, struct list_head *kill)
 {
-       struct vfsmount *mnt;
-
        while (!list_empty(kill)) {
                mnt = list_entry(kill->next, struct vfsmount, mnt_list);
                list_del_init(&mnt->mnt_list);
@@ -387,7 +377,7 @@ void umount_tree(struct vfsmount *mnt)
                list_del(&p->mnt_list);
                list_add(&p->mnt_list, &kill);
        }
-       __umount_list(&kill);
+       __umount_tree(mnt, &kill);
 }
 
 void umount_unused(struct vfsmount *mnt, struct fs_struct *fs)
@@ -401,7 +391,7 @@ void umount_unused(struct vfsmount *mnt, struct fs_struct *fs)
                list_del(&p->mnt_list);
                list_add(&p->mnt_list, &kill);
        }
-       __umount_list(&kill);
+       __umount_tree(mnt, &kill);
 }
 
 static int do_umount(struct vfsmount *mnt, int flags)
@@ -660,7 +650,7 @@ out_unlock:
 /*
  * do loopback mount.
  */
-static int do_loopback(struct nameidata *nd, char *old_name, xid_t xid, unsigned long flags, int mnt_flags)
+static int do_loopback(struct nameidata *nd, char *old_name, unsigned long flags, int mnt_flags)
 {
        struct nameidata old_nd;
        struct vfsmount *mnt = NULL;
@@ -691,10 +681,6 @@ static int do_loopback(struct nameidata *nd, char *old_name, xid_t xid, unsigned
                list_del_init(&mnt->mnt_fslink);
                spin_unlock(&vfsmount_lock);
 
-               if (flags & MS_XID) {
-                       mnt->mnt_xid = xid;
-                       mnt->mnt_flags |= MNT_XID;
-               }
                err = graft_tree(mnt, nd);
                if (err) {
                        spin_lock(&vfsmount_lock);
@@ -717,7 +703,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, xid_t xid, unsigned
  */
 
 static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
-                     void *data, xid_t xid)
+                     void *data)
 {
        int err;
        struct super_block * sb = nd->mnt->mnt_sb;
@@ -735,11 +721,8 @@ static int do_remount(struct nameidata *nd, int flags, int mnt_flags,
                mnt_flags |= MNT_NODEV;
        down_write(&sb->s_umount);
        err = do_remount_sb(sb, flags, data, 0);
-       if (!err) {
+       if (!err)
                nd->mnt->mnt_flags=mnt_flags;
-               if (flags & MS_XID)
-                       nd->mnt->mnt_xid = xid;
-       }
        up_write(&sb->s_umount);
        if (!err)
                security_sb_post_remount(nd->mnt, flags, data);
@@ -1065,7 +1048,6 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
        struct nameidata nd;
        int retval = 0;
        int mnt_flags = 0;
-       xid_t xid = 0;
 
        /* Discard magic */
        if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
@@ -1081,14 +1063,6 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
        if (data_page)
                ((char *)data_page)[PAGE_SIZE - 1] = 0;
 
-       retval = vx_parse_xid(data_page, &xid, 1);
-       if (retval) {
-               mnt_flags |= MNT_XID;
-               /* bind and re-mounts get xid flag */
-               if (flags & (MS_BIND|MS_REMOUNT))
-                       flags |= MS_XID;
-       }
-
        /* Separate the per-mountpoint flags */
        if (flags & MS_RDONLY)
                mnt_flags |= MNT_RDONLY;
@@ -1118,10 +1092,9 @@ long do_mount(char * dev_name, char * dir_name, char *type_page,
 
        if (flags & MS_REMOUNT)
                retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-                                   data_page, xid);
+                                   data_page);
        else if (flags & MS_BIND)
-               retval = do_loopback(&nd, dev_name, xid, flags, mnt_flags);
-
+               retval = do_loopback(&nd, dev_name, flags, mnt_flags);
        else if (flags & MS_MOVE)
                retval = do_move_mount(&nd, dev_name);
        else
index 0547efd..34a3c1f 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
-#include <linux/vserver/xid.h>
 
 #include "delegation.h"
 
@@ -760,7 +759,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
        inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
        if (!inode)
                goto out_unlock;
-       vx_propagate_xid(nd, inode);
 no_entry:
        error = 0;
        d_add(dentry, inode);
index 38318ce..60b3074 100644 (file)
@@ -723,7 +723,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
 out:
        return inode;
-/*     FIXME
+/*
 fail_dlim:
        make_bad_inode(inode);
        iput(inode);
index f09f648..39b0d45 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -27,9 +27,6 @@
 #include <linux/vs_dlimit.h>
 #include <linux/vserver/xid.h>
 #include <linux/syscalls.h>
-#include <linux/vs_limit.h>
-#include <linux/vs_dlimit.h>
-#include <linux/vserver/xid.h>
 
 #include <asm/unistd.h>
 
index 9c67690..97fbb86 100644 (file)
@@ -215,10 +215,6 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
        const struct posix_acl_entry *pa, *pe, *mask_obj;
        int found = 0;
 
-       /* Prevent vservers from escaping chroot() barriers */
-       if (IS_BARRIER(inode) && !vx_check(0, VX_ADMIN))
-               return -EACCES;
-
        FOREACH_ACL_ENTRY(pa, acl, pe) {
                 switch(pa->e_tag) {
                         case ACL_USER_OBJ:
index b868698..abdd91d 100644 (file)
@@ -11,5 +11,4 @@ proc-y       += inode.o root.o base.o generic.o array.o \
                kmsg.o proc_tty.o proc_misc.o
 
 proc-$(CONFIG_PROC_KCORE)      += kcore.o
-proc-$(CONFIG_CRASH_DUMP)      += vmcore.o
 proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
index fe7f823..a29937c 100644 (file)
@@ -73,6 +73,7 @@
 #include <linux/highmem.h>
 #include <linux/file.h>
 #include <linux/times.h>
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vs_network.h>
 #include <linux/vs_cvirt.h>
@@ -145,8 +146,8 @@ static inline const char * get_task_state(struct task_struct *tsk)
                                            TASK_INTERRUPTIBLE |
                                            TASK_UNINTERRUPTIBLE |
                                            TASK_STOPPED |
-                                          TASK_TRACED |
-                                          TASK_ONHOLD)) |
+                                           TASK_TRACED |
+                                           TASK_ONHOLD)) |
                        (tsk->exit_state & (EXIT_ZOMBIE |
                                            EXIT_DEAD));
        const char **p = &task_state_array[0];
@@ -162,12 +163,12 @@ static inline char * task_state(struct task_struct *p, char *buffer)
 {
        struct group_info *group_info;
        int g;
-       pid_t pid, ptgid, tppid, tgid;
+       pid_t pid, ppid, tppid, tgid;
 
        read_lock(&tasklist_lock);
        tgid = vx_map_tgid(p->tgid);
        pid = vx_map_pid(p->pid);
-       ptgid = vx_map_pid(p->group_leader->real_parent->tgid);
+       ppid = vx_map_pid(p->real_parent->pid);
        tppid = vx_map_pid(p->parent->pid);
        buffer += sprintf(buffer,
                "State:\t%s\n"
@@ -180,8 +181,8 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                "Gid:\t%d\t%d\t%d\t%d\n",
                get_task_state(p),
                (p->sleep_avg/1024)*100/(1020000000/1024),
-               tgid, pid, (pid > 1) ? ptgid : 0,
-               pid_alive(p) && p->ptrace ? tppid : 0,
+               tgid, pid, (pid > 1) ? ppid : 0,
+               p->pid && p->ptrace ? tppid : 0,
                p->uid, p->euid, p->suid, p->fsuid,
                p->gid, p->egid, p->sgid, p->fsgid);
        read_unlock(&tasklist_lock);
@@ -417,11 +418,10 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                        stime += task->signal->stime;
                }
        }
-       pid = vx_info_map_pid(task->vx_info, pid_alive(task) ? task->pid : 0);
-       ppid = (!(pid > 1)) ? 0 : vx_info_map_tgid(task->vx_info,
-               task->group_leader->real_parent->tgid);
-       pgid = vx_info_map_pid(task->vx_info, pgid);
-
+       if (task_vx_flags(task, VXF_VIRT_UPTIME, 0)) {
+               bias_uptime = task->vx_info->cvirt.bias_uptime.tv_sec * NSEC_PER_SEC
+                       + task->vx_info->cvirt.bias_uptime.tv_nsec;
+       }
        read_unlock(&tasklist_lock);
 
        if (!whole || num_threads<2) {
@@ -453,21 +453,9 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        /* convert timespec -> nsec*/
        start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
                                + task->start_time.tv_nsec;
-
        /* convert nsec -> ticks */
        start_time = nsec_to_clock_t(start_time - bias_uptime);
 
-       /* fixup start time for virt uptime */
-       if (vx_flags(VXF_VIRT_UPTIME, 0)) {
-               unsigned long long bias =
-                       current->vx_info->cvirt.bias_clock;
-
-               if (start_time > bias)
-                       start_time -= bias;
-               else
-                       start_time = 0;
-       }
-
        res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \
 %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
@@ -551,13 +539,13 @@ int proc_pid_delay(struct task_struct *task, char * buffer)
        int res;
 
        res  = sprintf(buffer,"%u %llu %llu %u %llu %u %llu\n",
-                      (unsigned int) get_delay(task,runs),
-                      (uint64_t) get_delay(task,runcpu_total),
-                      (uint64_t) get_delay(task,waitcpu_total),
-                      (unsigned int) get_delay(task,num_iowaits),
-                      (uint64_t) get_delay(task,iowait_total),
-                      (unsigned int) get_delay(task,num_memwaits),
-                      (uint64_t) get_delay(task,mem_iowait_total)
+                      get_delay(task,runs),
+                      (unsigned long long)get_delay(task,runcpu_total),
+                      (unsigned long long)get_delay(task,waitcpu_total),
+                      get_delay(task,num_iowaits),
+                      (unsigned long long)get_delay(task,iowait_total),
+                      get_delay(task,num_memwaits),
+                      (unsigned long long)get_delay(task,mem_iowait_total)
                );
        return res;
 }
index 65f144a..0a5916c 100644 (file)
@@ -101,6 +101,10 @@ enum pid_directory_inos {
 #endif
        PROC_TID_VX_INFO,
        PROC_TID_IP_INFO,
+#ifdef CONFIG_DELAY_ACCT
+        PROC_TID_DELAY_ACCT,
+        PROC_TGID_DELAY_ACCT,
+#endif
        PROC_TID_FD_DIR = 0x8000,       /* 0x8000-0xffff */
 };
 
@@ -131,6 +135,9 @@ static struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_SECURITY
        E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
 #endif
+#ifdef CONFIG_DELAY_ACCT
+       E(PROC_TGID_DELAY_ACCT,"delay",   S_IFREG|S_IRUGO),
+#endif
 #ifdef CONFIG_KALLSYMS
        E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO),
 #endif
@@ -139,6 +146,9 @@ static struct pid_entry tgid_base_stuff[] = {
 #endif
        E(PROC_TGID_VX_INFO,   "vinfo",   S_IFREG|S_IRUGO),
        E(PROC_TGID_IP_INFO,   "ninfo",   S_IFREG|S_IRUGO),
+#ifdef CONFIG_SCHEDSTATS
+       E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
+#endif
        {0,0,NULL,0}
 };
 static struct pid_entry tid_base_stuff[] = {
@@ -158,6 +168,9 @@ static struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_SECURITY
        E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
 #endif
+#ifdef CONFIG_DELAY_ACCT
+       E(PROC_TGID_DELAY_ACCT,"delay",   S_IFREG|S_IRUGO),
+#endif
 #ifdef CONFIG_KALLSYMS
        E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO),
 #endif
@@ -166,6 +179,9 @@ static struct pid_entry tid_base_stuff[] = {
 #endif
        E(PROC_TID_VX_INFO,    "vinfo",   S_IFREG|S_IRUGO),
        E(PROC_TID_IP_INFO,    "ninfo",   S_IFREG|S_IRUGO),
+#ifdef CONFIG_SCHEDSTATS
+       E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
+#endif
        {0,0,NULL,0}
 };
 
@@ -202,6 +218,9 @@ int proc_tid_stat(struct task_struct*,char*);
 int proc_tgid_stat(struct task_struct*,char*);
 int proc_pid_status(struct task_struct*,char*);
 int proc_pid_statm(struct task_struct*,char*);
+#ifdef CONFIG_DELAY_ACCT
+int proc_pid_delay(struct task_struct*,char*);
+#endif
 
 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
 {
@@ -1249,9 +1268,6 @@ static struct file_operations proc_tgid_attr_operations;
 static struct inode_operations proc_tgid_attr_inode_operations;
 #endif
 
-extern int proc_pid_vx_info(struct task_struct *, char *);
-extern int proc_pid_nx_info(struct task_struct *, char *);
-
 /* SMP-safe */
 static struct dentry *proc_pident_lookup(struct inode *dir, 
                                         struct dentry *dentry,
@@ -1412,7 +1428,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
                case PROC_TID_DELAY_ACCT:
                case PROC_TGID_DELAY_ACCT:
                        inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_pid_schedstat;
+                       ei->op.proc_read = proc_pid_delay;
                        break;
 #endif
 #ifdef CONFIG_SCHEDSTATS
@@ -1514,14 +1530,14 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
                              int buflen)
 {
        char tmp[30];
-       sprintf(tmp, "%d", vx_map_tgid(current->tgid));
+       sprintf(tmp, "%d", vx_map_pid(current->tgid));
        return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        char tmp[30];
-       sprintf(tmp, "%d", vx_map_tgid(current->tgid));
+       sprintf(tmp, "%d", vx_map_pid(current->tgid));
        return vfs_follow_link(nd,tmp);
 }      
 
index 2e85926..97e6b98 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/vs_base.h>
 #include <linux/vserver/inode.h>
 #include <linux/bitops.h>
-#include <linux/vserver/inode.h>
 #include <asm/uaccess.h>
 
 static ssize_t proc_file_read(struct file *file, char __user *buf,
@@ -63,7 +62,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
                return -ENOMEM;
 
        while ((nbytes > 0) && !eof) {
-               count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
+               count = min_t(ssize_t, PROC_BLOCK_SIZE, nbytes);
 
                start = NULL;
                if (dp->get_info) {
@@ -389,8 +388,7 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
 
                                error = -EINVAL;
                                inode = proc_get_inode(dir->i_sb, ino, de);
-                               /* generic proc entries belong to the host */
-                               inode->i_xid = 0;
+                               inode->i_xid = vx_current_xid();
                                break;
                        }
                }
index 379804c..ee1b56b 100644 (file)
@@ -114,7 +114,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 /*
  * determine size of ELF note
  */
-int notesize(struct memelfnote *en)
+static int notesize(struct memelfnote *en)
 {
        int sz;
 
@@ -129,7 +129,7 @@ int notesize(struct memelfnote *en)
 /*
  * store a note in the header buffer
  */
-char *storenote(struct memelfnote *men, char *bufp)
+static char *storenote(struct memelfnote *men, char *bufp)
 {
        struct elf_note en;
 
@@ -156,7 +156,7 @@ char *storenote(struct memelfnote *men, char *bufp)
  * store an ELF coredump header in the supplied buffer
  * nphdr is the number of elf_phdr to insert
  */
-void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff, struct kcore_list *clist)
+static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 {
        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
        struct elf_prpsinfo prpsinfo;   /* NT_PRPSINFO */
@@ -208,7 +208,7 @@ void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff, struct kcore_list *
        nhdr->p_align   = 0;
 
        /* setup ELF PT_LOAD program header for every area */
-       for (m=clist; m; m=m->next) {
+       for (m=kclist; m; m=m->next) {
                phdr = (struct elf_phdr *) bufp;
                bufp += sizeof(struct elf_phdr);
                offset += sizeof(struct elf_phdr);
@@ -305,7 +305,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
                        return -ENOMEM;
                }
                memset(elf_buf, 0, elf_buflen);
-               elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen, kclist);
+               elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
                read_unlock(&kclist_lock);
                if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
                        kfree(elf_buf);
index 1c4f019..dbe1fa7 100644 (file)
@@ -44,7 +44,6 @@
 #include <linux/jiffies.h>
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
-#include <linux/crash_dump.h>
 #include <linux/vs_base.h>
 #include <linux/vs_cvirt.h>
 
@@ -54,8 +53,6 @@
 #include <asm/tlb.h>
 #include <asm/div64.h>
 
-#include <linux/vs_cvirt.h>
-
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 /*
@@ -658,13 +655,11 @@ void __init proc_misc_init(void)
                                (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
        }
 #endif
-       crash_create_proc_entry();
 #ifdef CONFIG_MAGIC_SYSRQ
        entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
        if (entry)
                entry->proc_fops = &proc_sysrq_trigger_operations;
 #endif
-       crash_enable_by_proc();
 #ifdef CONFIG_PPC32
        {
                extern struct file_operations ppc_htab_operations;
index def2511..36a4ce1 100644 (file)
@@ -23,6 +23,9 @@ char *task_mem(struct mm_struct *mm, char *buffer)
                "StaBrk:\t%08lx kB\n"
                "Brk:\t%08lx kB\n"
                "StaStk:\t%08lx kB\n"
+#if __i386__
+               "ExecLim:\t%08lx\n"
+#endif
                ,
                (mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
                mm->locked_vm << (PAGE_SHIFT-10),
@@ -30,12 +33,11 @@ char *task_mem(struct mm_struct *mm, char *buffer)
                data << (PAGE_SHIFT-10),
                mm->stack_vm << (PAGE_SHIFT-10), text, lib,
                (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10,
-               mm->start_brk, mm->brk, mm->start_stack);
+               mm->start_brk, mm->brk, mm->start_stack
 #if __i386__
-       if (!nx_enabled)
-               buffer += sprintf(buffer,
-                               "ExecLim:\t%08lx\n", mm->context.exec_limit);
+               , mm->context.exec_limit
 #endif
+               );
        return buffer;
 }
 
@@ -57,9 +59,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
 
 static int show_map(struct seq_file *m, void *v)
 {
-#ifdef __i386__
-       struct task_struct *task = m->private;
-#endif
        struct vm_area_struct *map = v;
        struct file *file = map->vm_file;
        int flags = map->vm_flags;
@@ -78,13 +77,7 @@ static int show_map(struct seq_file *m, void *v)
                        map->vm_end,
                        flags & VM_READ ? 'r' : '-',
                        flags & VM_WRITE ? 'w' : '-',
-                       (flags & VM_EXEC
-#ifdef __i386__
-                               || (!nx_enabled &&
-                               (map->vm_start < task->mm->context.exec_limit))
-#endif
-                       )
-                               ? 'x' : '-',
+                       flags & VM_EXEC ? 'x' : '-',
                        flags & VM_MAYSHARE ? 's' : 'p',
                        map->vm_pgoff << PAGE_SHIFT,
                        MAJOR(dev), MINOR(dev), ino, &len);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
deleted file mode 100644 (file)
index 0c057dc..0000000
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- *     fs/proc/vmcore.c Interface for accessing the crash
- *                              dump from the system's previous life.
- *     Heavily borrowed from fs/proc/kcore.c
- *     Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
- *     Copyright (C) IBM Corporation, 2004. All rights reserved
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/proc_fs.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
-#include <linux/vmalloc.h>
-#include <linux/proc_fs.h>
-#include <linux/highmem.h>
-#include <linux/bootmem.h>
-#include <linux/init.h>
-#include <linux/crash_dump.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-/* This is to re-use the kcore header creation code */
-static struct kcore_list vmcore_mem;
-
-static int open_vmcore(struct inode * inode, struct file * filp)
-{
-       return 0;
-}
-
-static ssize_t read_vmcore(struct file *,char __user *,size_t, loff_t *);
-
-#define BACKUP_START CRASH_BACKUP_BASE
-#define BACKUP_END CRASH_BACKUP_BASE + CRASH_BACKUP_SIZE
-#define REG_SIZE sizeof(elf_gregset_t)
-
-struct file_operations proc_vmcore_operations = {
-       .read           = read_vmcore,
-       .open           = open_vmcore,
-};
-
-struct proc_dir_entry *proc_vmcore;
-
-struct memelfnote
-{
-       const char *name;
-       int type;
-       unsigned int datasz;
-       void *data;
-};
-
-static size_t get_vmcore_size(int *nphdr, size_t *elf_buflen)
-{
-       size_t size;
-
-       /* We need 1 PT_LOAD segment headers
-        * In addition, we need one PT_NOTE header
-        */
-       *nphdr = 2;
-       size = (size_t)(saved_max_pfn << PAGE_SHIFT);
-
-       *elf_buflen =   sizeof(struct elfhdr) +
-                       (*nphdr + 2)*sizeof(struct elf_phdr) +
-                       3 * sizeof(struct memelfnote) +
-                       sizeof(struct elf_prstatus) +
-                       sizeof(struct elf_prpsinfo) +
-                       sizeof(struct task_struct);
-       *elf_buflen = PAGE_ALIGN(*elf_buflen);
-       return size + *elf_buflen;
-}
-
-/*
- * Reads a page from the oldmem device from given offset.
- */
-static ssize_t read_from_oldmem(char *buf, size_t count,
-                            loff_t *ppos, int userbuf)
-{
-       unsigned long pfn;
-       size_t read = 0;
-
-       pfn = (unsigned long)(*ppos / PAGE_SIZE);
-
-       if (pfn > saved_max_pfn) {
-               read = -EINVAL;
-               goto done;
-       }
-
-       count = (count > PAGE_SIZE) ? PAGE_SIZE : count;
-
-       if (copy_oldmem_page(pfn, buf, count, userbuf)) {
-               read = -EFAULT;
-               goto done;
-       }
-
-       *ppos += count;
-done:
-       return read;
-}
-
-/*
- * store an ELF crash dump header in the supplied buffer
- * nphdr is the number of elf_phdr to insert
- */
-static void elf_vmcore_store_hdr(char *bufp, int nphdr, int dataoff)
-{
-       struct elf_prstatus prstatus;   /* NT_PRSTATUS */
-       struct memelfnote notes[1];
-       char reg_buf[REG_SIZE];
-       loff_t reg_ppos;
-       char *buf = bufp;
-
-       vmcore_mem.addr = (unsigned long)__va(0);
-       vmcore_mem.size = saved_max_pfn << PAGE_SHIFT;
-       vmcore_mem.next = NULL;
-
-       /* Re-use the kcore code */
-       elf_kcore_store_hdr(bufp, nphdr, dataoff, &vmcore_mem);
-       buf += sizeof(struct elfhdr) + 2*sizeof(struct elf_phdr);
-
-       /* set up the process status */
-       notes[0].name = "CORE";
-       notes[0].type = NT_PRSTATUS;
-       notes[0].datasz = sizeof(struct elf_prstatus);
-       notes[0].data = &prstatus;
-
-       memset(&prstatus, 0, sizeof(struct elf_prstatus));
-
-       /* 1 - Get the registers from the reserved memory area */
-       reg_ppos = BACKUP_END + CRASH_RELOCATE_SIZE;
-       read_from_oldmem(reg_buf, REG_SIZE, &reg_ppos, 0);
-       elf_core_copy_regs(&prstatus.pr_reg, (struct pt_regs *)reg_buf);
-       buf = storenote(&notes[0], buf);
-}
-
-/*
- * read from the ELF header and then the crash dump
- */
-static ssize_t read_vmcore(
-struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
-{
-       ssize_t acc = 0;
-       size_t size, tsz;
-       size_t elf_buflen;
-       int nphdr;
-       unsigned long start;
-
-       tsz =  get_vmcore_size(&nphdr, &elf_buflen);
-       proc_vmcore->size = size = tsz + elf_buflen;
-       if (buflen == 0 || *fpos >= size) {
-               goto done;
-       }
-
-       /* trim buflen to not go beyond EOF */
-       if (buflen > size - *fpos)
-               buflen = size - *fpos;
-
-       /* construct an ELF core header if we'll need some of it */
-       if (*fpos < elf_buflen) {
-               char * elf_buf;
-
-               tsz = elf_buflen - *fpos;
-               if (buflen < tsz)
-                       tsz = buflen;
-               elf_buf = kmalloc(elf_buflen, GFP_ATOMIC);
-               if (!elf_buf) {
-                       acc = -ENOMEM;
-                       goto done;
-               }
-               memset(elf_buf, 0, elf_buflen);
-               elf_vmcore_store_hdr(elf_buf, nphdr, elf_buflen);
-               if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
-                       kfree(elf_buf);
-                       acc = -EFAULT;
-                       goto done;
-               }
-               kfree(elf_buf);
-               buflen -= tsz;
-               *fpos += tsz;
-               buffer += tsz;
-               acc += tsz;
-
-               /* leave now if filled buffer already */
-               if (buflen == 0) {
-                       goto done;
-               }
-       }
-
-       start = *fpos - elf_buflen;
-       if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
-               tsz = buflen;
-
-       while (buflen) {
-               unsigned long p;
-               loff_t pdup;
-
-               if ((start < 0) || (start >= size))
-                       if (clear_user(buffer, tsz)) {
-                               acc = -EFAULT;
-                               goto done;
-                       }
-
-               /* tsz contains actual len of dump to be read.
-                * buflen is the total len that was requested.
-                * This may contain part of ELF header. start
-                * is the fpos for the oldmem region
-                * If the file position corresponds to the second
-                * kernel's memory, we just return zeroes
-                */
-               p = start;
-               if ((p >= BACKUP_START) && (p < BACKUP_END)) {
-                       if (clear_user(buffer, tsz)) {
-                               acc = -EFAULT;
-                               goto done;
-                       }
-
-                       goto read_done;
-               } else if (p < CRASH_RELOCATE_SIZE)
-                       p += BACKUP_END;
-
-               pdup = p;
-               if (read_from_oldmem(buffer, tsz, &pdup, 1)) {
-                       acc = -EINVAL;
-                       goto done;
-               }
-
-read_done:
-               buflen -= tsz;
-               *fpos += tsz;
-               buffer += tsz;
-               acc += tsz;
-               start += tsz;
-               tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
-       }
-
-done:
-       return acc;
-}
index fff495a..545500e 100644 (file)
@@ -16,8 +16,7 @@
  *
  */
 
-/*
- * Changes
+/* Changes
  *
  * 08 Mar 2004
  *        Created.
@@ -53,7 +52,7 @@ int rcfs_empty(struct dentry *dentry)
            if (!rcfs_is_magic(child) && rcfs_positive(child))
                goto out;
        ret = 1;
-out:
+      out:
        spin_unlock(&dcache_lock);
        return ret;
 }
@@ -67,7 +66,7 @@ rcfs_create(struct inode *dir, struct dentry *dentry, int mode,
        return rcfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
 
-EXPORT_SYMBOL_GPL(rcfs_create);
+EXPORT_SYMBOL(rcfs_create);
 
 /* Symlinks permitted ?? */
 int rcfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
@@ -90,7 +89,7 @@ int rcfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
        return error;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_symlink);
+EXPORT_SYMBOL(rcfs_symlink);
 
 int rcfs_create_coredir(struct inode *dir, struct dentry *dentry)
 {
@@ -100,7 +99,8 @@ int rcfs_create_coredir(struct inode *dir, struct dentry *dentry)
 
        ripar = RCFS_I(dir);
        ridir = RCFS_I(dentry->d_inode);
-       /* Inform resource controllers - do Core operations */
+
+       // Inform RC's - do Core operations 
        if (ckrm_is_core_valid(ripar->core)) {
                sz = strlen(ripar->name) + strlen(dentry->d_name.name) + 2;
                ridir->name = kmalloc(sz, GFP_KERNEL);
@@ -120,7 +120,7 @@ int rcfs_create_coredir(struct inode *dir, struct dentry *dentry)
        return 0;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_create_coredir);
+EXPORT_SYMBOL(rcfs_create_coredir);
 
 int rcfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
@@ -133,7 +133,7 @@ int rcfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                                       d_alias);
        if ((!strcmp(pd->d_name.name, "/") &&
             !strcmp(dentry->d_name.name, "ce"))) {
-               /* Call CE's mkdir if it has registered, else fail. */
+               // Call CE's mkdir if it has registered, else fail.
                if (rcfs_eng_callbacks.mkdir) {
                        return (*rcfs_eng_callbacks.mkdir) (dir, dentry, mode);
                } else {
@@ -141,27 +141,37 @@ int rcfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                }
        }
 #endif
+
        if (_rcfs_mknod(dir, dentry, mode | S_IFDIR, 0)) {
                printk(KERN_ERR "rcfs_mkdir: error in _rcfs_mknod\n");
                return retval;
        }
+
        dir->i_nlink++;
-       /* Inherit parent's ops since _rcfs_mknod assigns noperm ops. */
+
+       // Inherit parent's ops since _rcfs_mknod assigns noperm ops
        dentry->d_inode->i_op = dir->i_op;
        dentry->d_inode->i_fop = dir->i_fop;
+
        retval = rcfs_create_coredir(dir, dentry);
        if (retval) {
                simple_rmdir(dir, dentry);
                return retval;
+               // goto mkdir_err;
        }
-       /* create the default set of magic files */
+       // create the default set of magic files 
        clstype = (RCFS_I(dentry->d_inode))->core->classtype;
        rcfs_create_magic(dentry, &(((struct rcfs_magf *)clstype->mfdesc)[1]),
                          clstype->mfcount - 3);
+
+       return retval;
+
+//mkdir_err:
+       dir->i_nlink--;
        return retval;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_mkdir);
+EXPORT_SYMBOL(rcfs_mkdir);
 
 int rcfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
@@ -183,11 +193,12 @@ int rcfs_rmdir(struct inode *dir, struct dentry *dentry)
                return -EPERM;
        }
 #endif
+
        if (!rcfs_empty(dentry)) {
                printk(KERN_ERR "rcfs_rmdir: directory not empty\n");
                return -ENOTEMPTY;
        }
-       /* Core class removal  */
+       // Core class removal 
 
        if (ri->core == NULL) {
                printk(KERN_ERR "rcfs_rmdir: core==NULL\n");
@@ -201,12 +212,12 @@ int rcfs_rmdir(struct inode *dir, struct dentry *dentry)
        }
        ri->core = NULL;        // just to be safe 
 
-       /* Clear magic files only after core successfully removed */
+       // Clear magic files only after core successfully removed 
        rcfs_clear_magic(dentry);
 
        return simple_rmdir(dir, dentry);
 
-out:
+      out:
        return -EBUSY;
 }
 
@@ -219,9 +230,9 @@ int rcfs_unlink(struct inode *dir, struct dentry *dentry)
        return -ENOENT;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_unlink);
+EXPORT_SYMBOL(rcfs_unlink);
 
-/* rename is allowed on directories only */
+// rename is allowed on directories only
 int
 rcfs_rename(struct inode *old_dir, struct dentry *old_dentry,
            struct inode *new_dir, struct dentry *new_dentry)
@@ -232,7 +243,7 @@ rcfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                return -EINVAL;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_rename);
+EXPORT_SYMBOL(rcfs_rename);
 
 struct inode_operations rcfs_dir_inode_operations = {
        .create = rcfs_create,
index c9ca8a6..23ef014 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) Shailabh Nagar,  IBM Corp. 2004
  *               Vivek Kashyap,   IBM Corp. 2004
  *           
+ * 
  * Resource class filesystem (rcfs) forming the 
  * user interface to Class-based Kernel Resource Management (CKRM).
  *
 
 #include <linux/rcfs.h>
 
-/*
- * Address of variable used as flag to indicate a magic file, 
- * value unimportant
- */ 
+// Address of variable used as flag to indicate a magic file, 
+// ; value unimportant 
 int RCFS_IS_MAGIC;
 
 struct inode *rcfs_get_inode(struct super_block *sb, int mode, dev_t dev)
@@ -64,19 +63,18 @@ struct inode *rcfs_get_inode(struct super_block *sb, int mode, dev_t dev)
                        init_special_inode(inode, mode, dev);
                        break;
                case S_IFREG:
-                       /* Treat as default assignment */
+                       // Treat as default assignment */
                        inode->i_op = &rcfs_file_inode_operations;
-                       /* inode->i_fop = &rcfs_file_operations; */
+                       // inode->i_fop = &rcfs_file_operations;
                        break;
                case S_IFDIR:
-                       /* inode->i_op = &rcfs_dir_inode_operations; */
+                       // inode->i_op = &rcfs_dir_inode_operations;
                        inode->i_op = &rcfs_rootdir_inode_operations;
                        inode->i_fop = &simple_dir_operations;
 
-                       /*
-                        * directory inodes start off with i_nlink == 2 
-                        *  (for "." entry)
-                        */
+                       // directory inodes start off with i_nlink == 2 
+                       //  (for "." entry)
+
                        inode->i_nlink++;
                        break;
                case S_IFLNK:
@@ -94,6 +92,7 @@ int _rcfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 
        if (dentry->d_inode)
                return -EEXIST;
+
        inode = rcfs_get_inode(dir->i_sb, mode, dev);
        if (inode) {
                if (dir->i_mode & S_ISGID) {
@@ -105,21 +104,22 @@ int _rcfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
                dget(dentry);
                error = 0;
        }
+
        return error;
 }
 
-EXPORT_SYMBOL_GPL(_rcfs_mknod);
+EXPORT_SYMBOL(_rcfs_mknod);
 
 int rcfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 {
-       /* User can only create directories, not files */
+       // User can only create directories, not files
        if ((mode & S_IFMT) != S_IFDIR)
                return -EINVAL;
 
        return dir->i_op->mkdir(dir, dentry, mode);
 }
 
-EXPORT_SYMBOL_GPL(rcfs_mknod);
+EXPORT_SYMBOL(rcfs_mknod);
 
 struct dentry *rcfs_create_internal(struct dentry *parent,
                                    struct rcfs_magf *magf, int magic)
@@ -127,7 +127,7 @@ struct dentry *rcfs_create_internal(struct dentry *parent,
        struct qstr qstr;
        struct dentry *mfdentry;
 
-       /* Get new dentry for name */
+       // Get new dentry for name  
        qstr.name = magf->name;
        qstr.len = strlen(magf->name);
        qstr.hash = full_name_hash(magf->name, qstr.len);
@@ -144,13 +144,12 @@ struct dentry *rcfs_create_internal(struct dentry *parent,
                else {
                        err = _rcfs_mknod(parent->d_inode, mfdentry,
                                          magf->mode, 0);
-                       /*
-                        * _rcfs_mknod doesn't increment parent's link count, 
-                        * i_op->mkdir does.
-                        */
+                       // _rcfs_mknod doesn't increment parent's link count, 
+                       // i_op->mkdir does.
                        parent->d_inode->i_nlink++;
                }
                up(&parent->d_inode->i_sem);
+
                if (err) {
                        dput(mfdentry);
                        return mfdentry;
@@ -159,7 +158,7 @@ struct dentry *rcfs_create_internal(struct dentry *parent,
        return mfdentry;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_create_internal);
+EXPORT_SYMBOL(rcfs_create_internal);
 
 int rcfs_delete_internal(struct dentry *mfdentry)
 {
@@ -167,7 +166,9 @@ int rcfs_delete_internal(struct dentry *mfdentry)
 
        if (!mfdentry || !mfdentry->d_parent)
                return -EINVAL;
+
        parent = mfdentry->d_parent;
+
        if (!mfdentry->d_inode) {
                return 0;
        }
@@ -177,12 +178,13 @@ int rcfs_delete_internal(struct dentry *mfdentry)
        else
                simple_unlink(parent->d_inode, mfdentry);
        up(&mfdentry->d_inode->i_sem);
+
        d_delete(mfdentry);
 
        return 0;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_delete_internal);
+EXPORT_SYMBOL(rcfs_delete_internal);
 
 struct inode_operations rcfs_file_inode_operations = {
        .getattr = simple_getattr,
index 8a811cb..1cada33 100644 (file)
@@ -9,19 +9,21 @@
  * File operations for common magic files in rcfs, 
  * the user interface for CKRM. 
  * 
+ * 
  * Latest version, more details at http://ckrm.sf.net
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
+ *
  */
 
-/*
- * Changes
+/* Changes
  *
  * 23 Apr 2004
  *        Created from code kept earlier in fs/rcfs/magic_*.c
+ *
  */
 
 #include <linux/module.h>
 
 #include <linux/rcfs.h>
 
-/*
+/******************************************************
  * Macros
  *
  * generic macros to assist in writing magic fileops
  *
- */
+ *****************************************************/
 
 #define MAGIC_SHOW(FUNC)                                               \
 static int                                                             \
@@ -207,14 +209,16 @@ struct file_operations FUNC ## _fileops = {                            \
 };                                                                     \
 EXPORT_SYMBOL(FUNC ## _fileops);
 
-/*
- * Shared function used by Members / Reclassify
- */
+/******************************************************************************
+ * Shared function used by Target / Reclassify
+ *
+ *
+ *****************************************************************************/
 
-#define MEMBERS_MAX_INPUT_SIZE 100
+#define TARGET_MAX_INPUT_SIZE 100
 
 static ssize_t
-members_reclassify_write(struct file *file, const char __user * buf,
+target_reclassify_write(struct file *file, const char __user * buf,
                        size_t count, loff_t * ppos, int manual)
 {
        struct rcfs_inode_info *ri = RCFS_I(file->f_dentry->d_inode);
@@ -222,51 +226,77 @@ members_reclassify_write(struct file *file, const char __user * buf,
        int rc = -EINVAL;
        ckrm_classtype_t *clstype;
 
-       if ((ssize_t) count < 0 || (ssize_t) count > MEMBERS_MAX_INPUT_SIZE)
+       if ((ssize_t) count < 0 || (ssize_t) count > TARGET_MAX_INPUT_SIZE)
                return -EINVAL;
+
        if (!access_ok(VERIFY_READ, buf, count))
                return -EFAULT;
+
        down(&(ri->vfs_inode.i_sem));
-       optbuf = kmalloc(MEMBERS_MAX_INPUT_SIZE, GFP_KERNEL);
+
+       optbuf = kmalloc(TARGET_MAX_INPUT_SIZE, GFP_KERNEL);
        __copy_from_user(optbuf, buf, count);
        if (optbuf[count - 1] == '\n')
                optbuf[count - 1] = '\0';
+
        clstype = ri->core->classtype;
        if (clstype->forced_reclassify)
                rc = (*clstype->forced_reclassify) (manual ? ri->core: NULL, optbuf);
+
        up(&(ri->vfs_inode.i_sem));
        kfree(optbuf);
        return (!rc ? count : rc);
 
 }
 
-/*
+/******************************************************************************
+ * Target
+ *
+ * pseudo file for manually reclassifying members to a class
+ *
+ *****************************************************************************/
+
+static ssize_t
+target_write(struct file *file, const char __user * buf,
+            size_t count, loff_t * ppos)
+{
+       return target_reclassify_write(file,buf,count,ppos,1);
+}
+
+struct file_operations target_fileops = {
+       .write = target_write,
+};
+
+EXPORT_SYMBOL(target_fileops);
+
+/******************************************************************************
  * Reclassify
  *
  * pseudo file for reclassification of an object through CE
- */
+ *
+ *****************************************************************************/
 
 static ssize_t
 reclassify_write(struct file *file, const char __user * buf,
                 size_t count, loff_t * ppos)
 {
-       return members_reclassify_write(file,buf,count,ppos,0);
+       return target_reclassify_write(file,buf,count,ppos,0);
 }
 
 struct file_operations reclassify_fileops = {
        .write = reclassify_write,
 };
 
-EXPORT_SYMBOL_GPL(reclassify_fileops);
+EXPORT_SYMBOL(reclassify_fileops);
 
-/*
+/******************************************************************************
  * Config
  *
  * Set/get configuration parameters of a class. 
- */
+ *
+ *****************************************************************************/
 
-/*
- * Currently there are no per-class config parameters defined.
+/* Currently there are no per-class config parameters defined.
  * Use existing code as a template
  */
 
@@ -289,32 +319,27 @@ MAGIC_CLOSE(config);
 
 MAGIC_RDWR_FILEOPS(config);
 
-/*
+/******************************************************************************
  * Members
  *
  * List members of a class
- */
+ *
+ *****************************************************************************/
 
 MAGIC_SHOW(members);
 MAGIC_OPEN(members);
 MAGIC_CLOSE(members);
 
-static ssize_t
-members_write(struct file *file, const char __user * buf,
-            size_t count, loff_t * ppos)
-{
-       return members_reclassify_write(file,buf,count,ppos,1);
-}
-
-MAGIC_RDWR_FILEOPS(members);
+MAGIC_RD_FILEOPS(members);
 
-/*
+/******************************************************************************
  * Stats
  *
  * Get/reset class statistics
  * No standard set of stats defined. Each resource controller chooses
  * its own set of statistics to maintain and export.
- */
+ *
+ *****************************************************************************/
 
 #define stats_max_input_size  50
 
@@ -336,22 +361,21 @@ MAGIC_CLOSE(stats);
 
 MAGIC_RDWR_FILEOPS(stats);
 
-/*
+/******************************************************************************
  * Shares
  *
  * Set/get shares of a taskclass.
  * Share types and semantics are defined by rcfs and ckrm core 
- */
+ * 
+ *****************************************************************************/
 
 #define SHARES_MAX_INPUT_SIZE  300
 
-/*
- * The enums for the share types should match the indices expected by
- * array parameter to ckrm_set_resshare
- *
- * Note only the first NUM_SHAREVAL enums correspond to share types,
- * the remaining ones are for token matching purposes
- */
+/* The enums for the share types should match the indices expected by
+   array parameter to ckrm_set_resshare */
+
+/* Note only the first NUM_SHAREVAL enums correspond to share types,
+   the remaining ones are for token matching purposes */
 
 enum share_token_t {
        MY_GUAR, MY_LIM, TOT_GUAR, MAX_LIM, SHARE_RES_TYPE, SHARE_ERR
@@ -375,12 +399,15 @@ shares_parse(char *options, char **resstr, struct ckrm_shares *shares)
 
        if (!options)
                return 1;
+
        while ((p = strsep(&options, ",")) != NULL) {
+
                substring_t args[MAX_OPT_ARGS];
                int token;
 
                if (!*p)
                        continue;
+
                token = match_token(p, shares_tokens, args);
                switch (token) {
                case SHARE_RES_TYPE:
@@ -409,6 +436,7 @@ shares_parse(char *options, char **resstr, struct ckrm_shares *shares)
                default:
                        return 0;
                }
+
        }
        return 1;
 }
@@ -433,31 +461,40 @@ shares_write(struct file *file, const char __user * buf,
                CKRM_SHARE_UNCHANGED,
                CKRM_SHARE_UNCHANGED
        };
+
        if ((ssize_t) count < 0 || (ssize_t) count > SHARES_MAX_INPUT_SIZE)
                return -EINVAL;
+
        if (!access_ok(VERIFY_READ, buf, count))
                return -EFAULT;
+
        ri = RCFS_I(file->f_dentry->d_parent->d_inode);
+
        if (!ri || !ckrm_is_core_valid((ckrm_core_class_t *) (ri->core))) {
                printk(KERN_ERR "shares_write: Error accessing core class\n");
                return -EFAULT;
        }
+
        down(&inode->i_sem);
+
        core = ri->core;
        optbuf = kmalloc(SHARES_MAX_INPUT_SIZE, GFP_KERNEL);
        if (!optbuf) {
                up(&inode->i_sem);
                return -ENOMEM;
        }
+
        __copy_from_user(optbuf, buf, count);
        if (optbuf[count - 1] == '\n')
                optbuf[count - 1] = '\0';
+
        done = shares_parse(optbuf, &resname, &newshares);
        if (!done) {
                printk(KERN_ERR "Error parsing shares\n");
                rc = -EINVAL;
                goto write_out;
        }
+
        if (core->classtype->set_shares) {
                rc = (*core->classtype->set_shares) (core, resname, &newshares);
                if (rc) {
@@ -466,14 +503,17 @@ shares_write(struct file *file, const char __user * buf,
                        goto write_out;
                }
        }
-       printk(KERN_ERR "Set %s shares to %d %d %d %d\n",
+
+       printk(KERN_DEBUG "Set %s shares to %d %d %d %d\n",
               resname,
               newshares.my_guarantee,
               newshares.my_limit,
               newshares.total_guarantee, newshares.max_limit);
+
        rc = count;
 
-write_out:
+      write_out:
+
        up(&inode->i_sem);
        kfree(optbuf);
        kfree(resname);
@@ -488,6 +528,7 @@ MAGIC_RDWR_FILEOPS(shares);
 
 /*
  * magic file creation/deletion
+ *
  */
 
 int rcfs_clear_magic(struct dentry *parent)
@@ -495,16 +536,20 @@ int rcfs_clear_magic(struct dentry *parent)
        struct dentry *mftmp, *mfdentry;
 
        list_for_each_entry_safe(mfdentry, mftmp, &parent->d_subdirs, d_child) {
+
                if (!rcfs_is_magic(mfdentry))
                        continue;
+
                if (rcfs_delete_internal(mfdentry))
                        printk(KERN_ERR
                               "rcfs_clear_magic: error deleting one\n");
        }
+
        return 0;
+
 }
 
-EXPORT_SYMBOL_GPL(rcfs_clear_magic);
+EXPORT_SYMBOL(rcfs_clear_magic);
 
 int rcfs_create_magic(struct dentry *parent, struct rcfs_magf magf[], int count)
 {
@@ -527,4 +572,4 @@ int rcfs_create_magic(struct dentry *parent, struct rcfs_magf magf[], int count)
        return 0;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_create_magic);
+EXPORT_SYMBOL(rcfs_create_magic);
index 54e199a..d827db6 100644 (file)
@@ -16,8 +16,7 @@
  *
  */
 
-/*
- * Changes
+/* Changes
  *
  * 08 April 2004
  *        Created.
@@ -55,7 +54,7 @@ int rcfs_register_engine(rbce_eng_callback_t * rcbs)
        return 0;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_register_engine);
+EXPORT_SYMBOL(rcfs_register_engine);
 
 int rcfs_unregister_engine(rbce_eng_callback_t * rcbs)
 {
@@ -71,8 +70,7 @@ int rcfs_unregister_engine(rbce_eng_callback_t * rcbs)
 
 EXPORT_SYMBOL(rcfs_unregister_engine);
 
-/*
- * rcfs_mkroot
+/* rcfs_mkroot
  * Create and return a "root" dentry under /rcfs. 
  * Also create associated magic files 
  *
@@ -100,6 +98,7 @@ int rcfs_mkroot(struct rcfs_magf *mfdesc, int mfcount, struct dentry **rootde)
                printk(KERN_ERR "Could not create %s\n", rootdesc->name);
                return -ENOMEM;
        }
+
        rootri = RCFS_I(dentry->d_inode);
        sz = strlen(rootdesc->name) + strlen(RCFS_ROOT) + 2;
        rootri->name = kmalloc(sz, GFP_KERNEL);
@@ -110,18 +109,19 @@ int rcfs_mkroot(struct rcfs_magf *mfdesc, int mfcount, struct dentry **rootde)
                return -ENOMEM;
        }
        snprintf(rootri->name, sz, "%s/%s", RCFS_ROOT, rootdesc->name);
+
        if (rootdesc->i_fop)
                dentry->d_inode->i_fop = rootdesc->i_fop;
        if (rootdesc->i_op)
                dentry->d_inode->i_op = rootdesc->i_op;
 
-       /* set output parameters */
+       // set output parameters
        *rootde = dentry;
 
        return 0;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_mkroot);
+EXPORT_SYMBOL(rcfs_mkroot);
 
 int rcfs_rmroot(struct dentry *rootde)
 {
@@ -138,7 +138,7 @@ int rcfs_rmroot(struct dentry *rootde)
        return 0;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_rmroot);
+EXPORT_SYMBOL(rcfs_rmroot);
 
 int rcfs_register_classtype(ckrm_classtype_t * clstype)
 {
@@ -146,10 +146,7 @@ int rcfs_register_classtype(ckrm_classtype_t * clstype)
        struct rcfs_inode_info *rootri;
        struct rcfs_magf *mfdesc;
 
-       if (genmfdesc[clstype->mfidx] == NULL) {
-               return -ENOMEM;
-       }
-
+       // Initialize mfdesc, mfcount 
        clstype->mfdesc = (void *)genmfdesc[clstype->mfidx]->rootmf;
        clstype->mfcount = genmfdesc[clstype->mfidx]->rootmflen;
 
@@ -162,12 +159,13 @@ int rcfs_register_classtype(ckrm_classtype_t * clstype)
                         (struct dentry **)&(clstype->rootde));
        if (rc)
                return rc;
+
        rootri = RCFS_I(((struct dentry *)(clstype->rootde))->d_inode);
        rootri->core = clstype->default_class;
        clstype->default_class->name = rootri->name;
        ckrm_core_grab(clstype->default_class);
 
-       /* Create magic files under root */
+       // Create magic files under root 
        if ((rc = rcfs_create_magic(clstype->rootde, &mfdesc[1],
                                    clstype->mfcount - 1))) {
                kfree(rootri->name);
@@ -175,10 +173,11 @@ int rcfs_register_classtype(ckrm_classtype_t * clstype)
                rcfs_delete_internal(clstype->rootde);
                return rc;
        }
+
        return rc;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_register_classtype);
+EXPORT_SYMBOL(rcfs_register_classtype);
 
 int rcfs_deregister_classtype(ckrm_classtype_t * clstype)
 {
@@ -192,27 +191,29 @@ int rcfs_deregister_classtype(ckrm_classtype_t * clstype)
        return rc;
 }
 
-EXPORT_SYMBOL_GPL(rcfs_deregister_classtype);
+EXPORT_SYMBOL(rcfs_deregister_classtype);
+
+// Common root and magic file entries.
+// root name, root permissions, magic file names and magic file permissions 
+// are needed by all entities (classtypes and classification engines) existing 
+// under the rcfs mount point
+
+// The common sets of these attributes are listed here as a table. Individual 
+// classtypes and classification engines can simple specify the index into the 
+// table to initialize their magf entries. 
+//
 
 #ifdef CONFIG_CKRM_TYPE_TASKCLASS
 extern struct rcfs_mfdesc tc_mfdesc;
 #endif
 
-#ifdef CONFIG_CKRM_TYPE_SOCKETCLASS
+#ifdef CONFIG_CKRM_TYPE_TASKCLASS
 extern struct rcfs_mfdesc sock_mfdesc;
 #endif
 
-/* Common root and magic file entries.
- * root name, root permissions, magic file names and magic file permissions 
- * are needed by all entities (classtypes and classification engines) existing 
- * under the rcfs mount point
- *
- * The common sets of these attributes are listed here as a table. Individual 
- * classtypes and classification engines can simple specify the index into the 
- * table to initialize their magf entries. 
- */
+// extern struct rcfs_magf rbce_mfdesc;
 
-struct rcfs_mfdesc *genmfdesc[CKRM_MAX_CLASSTYPES] = {
+struct rcfs_mfdesc *genmfdesc[] = {
 #ifdef CONFIG_CKRM_TYPE_TASKCLASS
        &tc_mfdesc,
 #else
@@ -223,5 +224,4 @@ struct rcfs_mfdesc *genmfdesc[CKRM_MAX_CLASSTYPES] = {
 #else
        NULL,
 #endif
-
 };
index 89495dd..f1c0899 100644 (file)
@@ -49,7 +49,7 @@ int sock_mknod_noperm(struct inode *, struct dentry *, int, dev_t);
 void sock_set_directory(void);
 
 extern struct file_operations config_fileops,
-    members_fileops, shares_fileops, stats_fileops;
+    members_fileops, shares_fileops, stats_fileops, target_fileops;
 
 struct inode_operations my_iops = {
        .create = rcfs_create,
@@ -107,6 +107,12 @@ struct rcfs_magf sock_rootdesc[] = {
         .i_op = &my_iops,
         .i_fop = &members_fileops,
         },
+       {
+        .name = "target",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &target_fileops,
+        },
        {
         .name = "reclassify",
         .mode = RCFS_DEFAULT_FILE_MODE,
@@ -140,6 +146,12 @@ struct rcfs_magf sock_magf[] = {
         .i_op = &my_iops,
         .i_fop = &stats_fileops,
         },
+       {
+        .name = "target",
+        .mode = RCFS_DEFAULT_FILE_MODE,
+        .i_op = &my_iops,
+        .i_fop = &target_fileops,
+        },
 };
 
 struct rcfs_magf sub_magf[] = {
index 8403f28..4ed49fd 100644 (file)
@@ -16,8 +16,7 @@
  *
  */
 
-/*
- * Changes
+/* Changes
  *
  * 08 Mar 2004
  *        Created.
@@ -40,6 +39,7 @@
 #include <asm/uaccess.h>
 
 #include <linux/rcfs.h>
+#include <linux/ckrm.h>
 #include <linux/ckrm_rc.h>
 #include <linux/ckrm_ce.h>
 
@@ -50,7 +50,7 @@ struct rcfs_inode_info *RCFS_I(struct inode *inode)
        return container_of(inode, struct rcfs_inode_info, vfs_inode);
 }
 
-EXPORT_SYMBOL_GPL(RCFS_I);
+EXPORT_SYMBOL(RCFS_I);
 
 static struct inode *rcfs_alloc_inode(struct super_block *sb)
 {
@@ -143,7 +143,7 @@ static int rcfs_fill_super(struct super_block *sb, void *data, int silent)
        }
        sb->s_root = root;
 
-       /* Link inode and core class */
+       // Link inode and core class 
        rootri = RCFS_I(inode);
        rootri->name = kmalloc(strlen(RCFS_ROOT) + 1, GFP_KERNEL);
        if (!rootri->name) {
@@ -159,7 +159,7 @@ static int rcfs_fill_super(struct super_block *sb, void *data, int silent)
        rcfs_rootde = root;
        rcfs_rootri = rootri;
 
-       /* register metatypes */
+       // register metatypes
        for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) {
                clstype = ckrm_classtypes[i];
                if (clstype == NULL)
@@ -167,28 +167,25 @@ static int rcfs_fill_super(struct super_block *sb, void *data, int silent)
                printk(KERN_DEBUG "A non null classtype\n");
 
                if ((rc = rcfs_register_classtype(clstype)))
-                       continue;       /* could return with an error too */
+                       continue;       // could return with an error too 
        }
 
-       /*
-        * do post-mount initializations needed by CE
-        * this is distinct from CE registration done on rcfs module load
-        */
+       // do post-mount initializations needed by CE
+       // this is distinct from CE registration done on rcfs module load
        if (rcfs_engine_regd) {
                if (rcfs_eng_callbacks.mnt)
                        if ((rc = (*rcfs_eng_callbacks.mnt) ())) {
                                printk(KERN_ERR "Error in CE mnt %d\n", rc);
                        }
        }
-       /*
-        * Following comment handled by code above; keep nonetheless if it
-        * can be done better
-        *
-        * register CE's with rcfs 
-        * check if CE loaded
-        * call rcfs_register_engine for each classtype
-        * AND rcfs_mkroot (preferably subsume latter in former)
-        */
+       // Following comment handled by code above; keep nonetheless if it
+       // can be done better
+       //
+       // register CE's with rcfs 
+       // check if CE loaded
+       // call rcfs_register_engine for each classtype
+       // AND rcfs_mkroot (preferably subsume latter in former) 
+
        return 0;
 }
 
@@ -211,6 +208,7 @@ void rcfs_kill_sb(struct super_block *sb)
        rcfs_mounted--;
 
        for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) {
+
                clstype = ckrm_classtypes[i];
                if (clstype == NULL || clstype->rootde == NULL)
                        continue;
@@ -218,39 +216,38 @@ void rcfs_kill_sb(struct super_block *sb)
                if ((rc = rcfs_deregister_classtype(clstype))) {
                        printk(KERN_ERR "Error removing classtype %s\n",
                               clstype->name);
+                       // return ;   // can also choose to stop here
                }
        }
 
-       /*
-        * do pre-umount shutdown needed by CE
-        * this is distinct from CE deregistration done on rcfs module unload
-        */
+       // do pre-umount shutdown needed by CE
+       // this is distinct from CE deregistration done on rcfs module unload
        if (rcfs_engine_regd) {
                if (rcfs_eng_callbacks.umnt)
                        if ((rc = (*rcfs_eng_callbacks.umnt) ())) {
                                printk(KERN_ERR "Error in CE umnt %d\n", rc);
-                               /* TODO: return ; until error handling improves */
+                               // return ; until error handling improves
                        }
        }
-       /*
-        * Following comment handled by code above; keep nonetheless if it 
-        * can be done better
-        *
-        * deregister CE with rcfs
-        * Check if loaded
-        * if ce is in  one directory /rcfs/ce, 
-        *       rcfs_deregister_engine for all classtypes within above 
-        *             codebase 
-        *       followed by
-        *       rcfs_rmroot here
-        * if ce in multiple (per-classtype) directories
-        *       call rbce_deregister_engine within ckrm_deregister_classtype
-        *
-        * following will automatically clear rcfs root entry including its 
-        *  rcfs_inode_info
-        */
+       // Following comment handled by code above; keep nonetheless if it 
+       // can be done better
+       //
+       // deregister CE with rcfs
+       // Check if loaded
+       // if ce is in  one directory /rcfs/ce, 
+       //       rcfs_deregister_engine for all classtypes within above 
+       //             codebase 
+       //       followed by
+       //       rcfs_rmroot here
+       // if ce in multiple (per-classtype) directories
+       //       call rbce_deregister_engine within ckrm_deregister_classtype
+
+       // following will automatically clear rcfs root entry including its 
+       //  rcfs_inode_info
 
        generic_shutdown_super(sb);
+
+       // printk(KERN_ERR "Removed all entries\n");
 }
 
 static struct file_system_type rcfs_fs_type = {
@@ -275,20 +272,21 @@ static int __init init_rcfs_fs(void)
        ret = register_filesystem(&rcfs_fs_type);
        if (ret)
                goto init_register_err;
+
        ret = rcfs_init_inodecache();
        if (ret)
                goto init_cache_err;
+
        rcfs_fn = my_rcfs_fn;
-       /*
-        * Due to tight coupling of this module with ckrm
-        * do not allow this module to be removed.
-        */
+
+       // Due to tight coupling of this module with ckrm
+       // do not allow this module to be removed.
        try_module_get(THIS_MODULE);
        return ret;
 
-init_cache_err:
+      init_cache_err:
        unregister_filesystem(&rcfs_fs_type);
-init_register_err:
+      init_register_err:
        return ret;
 }
 
@@ -299,6 +297,6 @@ static void __exit exit_rcfs_fs(void)
 }
 
 module_init(init_rcfs_fs)
-module_exit(exit_rcfs_fs)
+    module_exit(exit_rcfs_fs)
 
-MODULE_LICENSE("GPL");
+    MODULE_LICENSE("GPL");
index a7a38d9..9ef6d4d 100644 (file)
@@ -43,7 +43,8 @@
 
 #define TC_FILE_MODE (S_IFREG | S_IRUGO | S_IWUSR)
 
-struct rcfs_magf tc_rootdesc[] = {
+#define NR_TCROOTMF  7
+struct rcfs_magf tc_rootdesc[NR_TCROOTMF] = {
        /* First entry must be root */
        {
 //              .name    = should not be set, copy from classtype name
@@ -52,6 +53,12 @@ struct rcfs_magf tc_rootdesc[] = {
         .i_fop = &simple_dir_operations,
         },
        /* Rest are root's magic files */
+       {
+        .name = "target",
+        .mode = TC_FILE_MODE,
+        .i_fop = &target_fileops,
+        .i_op = &rcfs_file_inode_operations,
+        },
        {
         .name = "members",
         .mode = TC_FILE_MODE,
@@ -89,5 +96,5 @@ struct rcfs_magf tc_rootdesc[] = {
 
 struct rcfs_mfdesc tc_mfdesc = {
        .rootmf = tc_rootdesc,
-       .rootmflen = (sizeof(tc_rootdesc) / sizeof(struct rcfs_magf)),
+       .rootmflen = NR_TCROOTMF,
 };
index 9424c05..f17713e 100644 (file)
@@ -588,7 +588,7 @@ error_exit:
 
 /* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
 void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */
-                             size_t num_pages /* amount of pages */) {
+                             int num_pages /* amount of pages */) {
     int i; // loop counter
 
     for (i=0; i < num_pages ; i++) {
@@ -619,7 +619,7 @@ int reiserfs_copy_from_user_to_file_region(
     int offset; // offset in page
 
     for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) {
-       size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
+       int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
        struct page *page=prepared_pages[i]; // Current page we process.
 
        fault_in_pages_readable( buf, count);
@@ -718,8 +718,8 @@ int reiserfs_submit_file_region_for_write(
                                struct reiserfs_transaction_handle *th,
                                struct inode *inode,
                                loff_t pos, /* Writing position offset */
-                               size_t num_pages, /* Number of pages to write */
-                               size_t write_bytes, /* number of bytes to write */
+                               int num_pages, /* Number of pages to write */
+                               int write_bytes, /* number of bytes to write */
                                struct page **prepared_pages /* list of pages */
                                )
 {
@@ -854,9 +854,9 @@ int reiserfs_check_for_tail_and_convert( struct inode *inode, /* inode to deal w
 int reiserfs_prepare_file_region_for_write(
                                struct inode *inode /* Inode of the file */,
                                loff_t pos, /* position in the file */
-                               size_t num_pages, /* number of pages to
+                               int num_pages, /* number of pages to
                                                  prepare */
-                               size_t write_bytes, /* Amount of bytes to be
+                               int write_bytes, /* Amount of bytes to be
                                                    overwritten from
                                                    @pos */
                                struct page **prepared_pages /* pointer to array
@@ -1252,9 +1252,10 @@ ssize_t reiserfs_file_write( struct file *file, /* the file we are going to writ
     while ( count > 0) {
        /* This is the main loop in which we running until some error occures
           or until we write all of the data. */
-       size_t num_pages;/* amount of pages we are going to write this iteration */
-       size_t write_bytes; /* amount of bytes to write during this iteration */
-       size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
+       int num_pages;/* amount of pages we are going to write this iteration */
+       int write_bytes; /* amount of bytes to write during this iteration */
+       int blocks_to_allocate; /* how much blocks we need to allocate for
+                                  this iteration */
         
         /*  (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/
        num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
@@ -1268,7 +1269,7 @@ ssize_t reiserfs_file_write( struct file *file, /* the file we are going to writ
            /* If we were asked to write more data than we want to or if there
               is not that much space, then we shorten amount of data to write
               for this iteration. */
-           num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
+           num_pages = min_t(int, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
            /* Also we should not forget to set size in bytes accordingly */
            write_bytes = (num_pages << PAGE_CACHE_SHIFT) - 
                            (pos & (PAGE_CACHE_SIZE-1));
@@ -1294,7 +1295,7 @@ ssize_t reiserfs_file_write( struct file *file, /* the file we are going to writ
            // But overwriting files on absolutelly full volumes would not
            // be very efficient. Well, people are not supposed to fill
            // 100% of disk space anyway.
-           write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
+           write_bytes = min_t(int, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
            num_pages = 1;
            // No blocks were claimed before, so do it now.
            reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits));
index de207dc..30e19a1 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/reiserfs_xattr.h>
 #include <linux/smp_lock.h>
 #include <linux/quotaops.h>
-#include <linux/vserver/xid.h>
 
 #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
 #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--;
@@ -351,7 +350,6 @@ static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dent
            reiserfs_write_unlock(dir->i_sb);
            return ERR_PTR(-EACCES);
         }
-       vx_propagate_xid(nd, inode);
 
        /* Propogate the priv_object flag so we know we're in the priv tree */
        if (is_reiserfs_priv_object (dir))
index 035abec..47d461a 100644 (file)
@@ -39,8 +39,6 @@
 #include <linux/devpts_fs.h>
 #include <linux/proc_fs.h>
 #include <linux/kobject.h>
-#include <linux/devpts_fs.h>
-#include <linux/proc_fs.h>
 #include <asm/uaccess.h>
 
 
index 07a29a2..57b6991 100644 (file)
@@ -11,6 +11,8 @@
 
 #include "sysfs.h"
 
+/* Random magic number */
+#define SYSFS_MAGIC 0x62656572
 
 struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
@@ -34,7 +36,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 
        sb->s_blocksize = PAGE_CACHE_SIZE;
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-       sb->s_magic = SYSFS_SUPER_MAGIC;
+       sb->s_magic = SYSFS_MAGIC;
        sb->s_op = &sysfs_ops;
        sysfs_sb = sb;
 
index 4c9a2fb..17debc1 100644 (file)
@@ -1013,7 +1013,7 @@ xfs_ioc_fsgeometry(
 #define LINUX_XFLAG_NODUMP     0x00000040 /* do not dump file */
 #define LINUX_XFLAG_NOATIME    0x00000080 /* do not update atime */
 #define LINUX_XFLAG_BARRIER    0x00004000 /* chroot() barrier */
-#define LINUX_XFLAG_IUNLINK    0x00008000 /* immutable unlink */
+#define LINUX_XFLAG_IUNLINK    0x00008000 /* Immutable unlink */
 
 STATIC unsigned int
 xfs_merge_ioc_xflags(
@@ -1056,8 +1056,6 @@ xfs_di2lxflags(
                flags |= LINUX_XFLAG_IMMUTABLE;
        if (di_flags & XFS_DIFLAG_IUNLINK)
                flags |= LINUX_XFLAG_IUNLINK;
-       if (di_flags & XFS_DIFLAG_BARRIER)
-               flags |= LINUX_XFLAG_BARRIER;
        if (di_flags & XFS_DIFLAG_APPEND)
                flags |= LINUX_XFLAG_APPEND;
        if (di_flags & XFS_DIFLAG_SYNC)
index 425dafd..1a46def 100644 (file)
@@ -460,7 +460,7 @@ xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp);
 #define XFS_DIFLAG_PROJINHERIT_BIT  9  /* create with parents projid */
 #define XFS_DIFLAG_NOSYMLINKS_BIT  10  /* disallow symlink creation */
 #define XFS_DIFLAG_BARRIER_BIT  12     /* chroot() barrier */
-#define XFS_DIFLAG_IUNLINK_BIT  13     /* immutable unlink */
+#define XFS_DIFLAG_IUNLINK_BIT  13     /* inode has iunlink */
 
 #define XFS_DIFLAG_REALTIME      (1 << XFS_DIFLAG_REALTIME_BIT)
 #define XFS_DIFLAG_PREALLOC      (1 << XFS_DIFLAG_PREALLOC_BIT)
@@ -476,7 +476,6 @@ xfs_dinode_t *xfs_buf_to_dinode(struct xfs_buf *bp);
 #define XFS_DIFLAG_BARRIER      (1 << XFS_DIFLAG_BARRIER_BIT)
 #define XFS_DIFLAG_IUNLINK      (1 << XFS_DIFLAG_IUNLINK_BIT)
 
-
 #define XFS_DIFLAG_ANY \
        (XFS_DIFLAG_REALTIME | XFS_DIFLAG_PREALLOC | XFS_DIFLAG_NEWRTBM | \
         XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND | XFS_DIFLAG_SYNC | \
index 94a596a..8290ea7 100644 (file)
@@ -80,7 +80,7 @@ struct fsxattr {
 #define XFS_XFLAG_PROJINHERIT  0x00000200      /* create with parents projid */
 #define XFS_XFLAG_NOSYMLINKS   0x00000400      /* disallow symlink creation */
 #define XFS_XFLAG_BARRIER      0x00004000      /* chroot() barrier */
-#define XFS_XFLAG_IUNLINK      0x00008000      /* immutable unlink */
+#define XFS_XFLAG_IUNLINK      0x00008000      /* Immutable unlink */
 #define XFS_XFLAG_HASATTR      0x80000000      /* no DIFLAG for this   */
 
 /*
index c4e7d98..99cef06 100644 (file)
@@ -73,7 +73,7 @@
        }
 
 #define SECURITY_INIT                                                  \
-       .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) {\
+       .security_initcall.init : {                                     \
                VMLINUX_SYMBOL(__security_initcall_start) = .;          \
                *(.security_initcall.init)                              \
                VMLINUX_SYMBOL(__security_initcall_end) = .;            \
index 9513dd8..c689554 100644 (file)
@@ -86,7 +86,6 @@
 #define                        APIC_LVT_REMOTE_IRR             (1<<14)
 #define                        APIC_INPUT_POLARITY             (1<<13)
 #define                        APIC_SEND_PENDING               (1<<12)
-#define                        APIC_MODE_MASK                  0x700
 #define                        GET_APIC_DELIVERY_MODE(x)       (((x)>>8)&0x7)
 #define                        SET_APIC_DELIVERY_MODE(x,y)     (((x)&~0x700)|((y)<<8))
 #define                                APIC_MODE_FIXED         0x0
diff --git a/include/asm-i386/crash_dump.h b/include/asm-i386/crash_dump.h
deleted file mode 100644 (file)
index a13e4b6..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/* asm-i386/crash_dump.h */
-#include <linux/bootmem.h>
-#include <linux/irq.h>
-#include <asm/apic.h>
-
-#ifdef CONFIG_CRASH_DUMP
-extern unsigned int dump_enabled;
-extern unsigned int crashed;
-
-extern void __crash_relocate_mem(unsigned long, unsigned long);
-extern unsigned long __init find_max_low_pfn(void);
-extern void __init find_max_pfn(void);
-
-extern struct pt_regs crash_smp_regs[NR_CPUS];
-extern long crash_smp_current_task[NR_CPUS];
-extern void crash_dump_save_this_cpu(struct pt_regs *, int);
-extern void __crash_dump_stop_cpus(void);
-extern void crash_get_current_regs(struct pt_regs *regs);
-
-#define CRASH_BACKUP_BASE ((unsigned long)CONFIG_BACKUP_BASE * 0x100000)
-#define CRASH_BACKUP_SIZE ((unsigned long)CONFIG_BACKUP_SIZE * 0x100000)
-#define CRASH_RELOCATE_SIZE 0xa0000
-
-static inline void crash_relocate_mem(void)
-{
-       if (crashed)
-               __crash_relocate_mem(CRASH_BACKUP_BASE + CRASH_BACKUP_SIZE,
-                                       CRASH_RELOCATE_SIZE);
-}
-
-static inline void set_saved_max_pfn(void)
-{
-       find_max_pfn();
-       saved_max_pfn = find_max_low_pfn();
-}
-
-static inline void crash_reserve_bootmem(void)
-{
-       if (!dump_enabled) {
-               reserve_bootmem(CRASH_BACKUP_BASE,
-                       CRASH_BACKUP_SIZE + CRASH_RELOCATE_SIZE + PAGE_SIZE);
-       }
-}
-
-static inline void crash_dump_stop_cpus(void)
-{
-       int cpu;
-
-       if (!crashed)
-               return;
-
-       cpu = smp_processor_id();
-
-       crash_smp_current_task[cpu] = (long)current;
-       crash_get_current_regs(&crash_smp_regs[cpu]);
-
-       /* This also captures the register states of the other cpus */
-       __crash_dump_stop_cpus();
-#if defined(CONFIG_X86_IO_APIC)
-       disable_IO_APIC();
-#endif
-#if defined(CONFIG_X86_LOCAL_APIC)
-       disconnect_bsp_APIC();
-#endif
-}
-
-static inline void crash_dump_save_registers(void)
-{
-       void *addr;
-
-       addr = __va(CRASH_BACKUP_BASE + CRASH_BACKUP_SIZE + CRASH_RELOCATE_SIZE);
-       memcpy(addr, crash_smp_regs, (sizeof(struct pt_regs)*NR_CPUS));
-       addr += sizeof(struct pt_regs)*NR_CPUS;
-       memcpy(addr, crash_smp_current_task, (sizeof(long)*NR_CPUS));
-}
-#else
-#define crash_relocate_mem() do { } while(0)
-#define set_saved_max_pfn() do { } while(0)
-#define crash_reserve_bootmem() do { } while(0)
-#define crash_dump_stop_cpus() do { } while(0)
-#define crash_dump_save_registers() do { } while(0)
-#endif
index a2525ba..454b23f 100644 (file)
@@ -61,7 +61,6 @@ void *kmap(struct page *page);
 void kunmap(struct page *page);
 void *kmap_atomic(struct page *page, enum km_type type);
 void kunmap_atomic(void *kvaddr, enum km_type type);
-char *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
 struct page *kmap_atomic_to_page(void *ptr);
 
 #define flush_cache_kmaps()    do { } while (0)
diff --git a/include/asm-i386/kexec.h b/include/asm-i386/kexec.h
deleted file mode 100644 (file)
index eb8fd98..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _I386_KEXEC_H
-#define _I386_KEXEC_H
-
-#include <asm/fixmap.h>
-
-/*
- * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
- * I.e. Maximum page that is mapped directly into kernel memory,
- * and kmap is not required.
- *
- * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct
- * calculation for the amount of memory directly mappable into the
- * kernel memory space.
- */
-
-/* Maximum physical address we can use pages from */
-#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
-/* Maximum address we can reach in physical address mode */
-#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
-/* Maximum address we can use for the control code buffer */
-#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
-
-#define KEXEC_CONTROL_CODE_SIZE        4096
-
-#endif /* _I386_KEXEC_H */
index 27e18a2..0bcc6f1 100644 (file)
@@ -48,7 +48,7 @@
 #define INVALIDATE_TLB_VECTOR  0xfd
 #define RESCHEDULE_VECTOR      0xfc
 #define CALL_FUNCTION_VECTOR   0xfb
-#define CRASH_DUMP_VECTOR      0xfa
+#define DUMP_VECTOR            0xfa
 
 #define THERMAL_APIC_VECTOR    0xf0
 /*
index 209cda1..01e616e 100644 (file)
@@ -4,21 +4,7 @@
 #include <linux/config.h>
 
 #ifdef __KERNEL__
-
-#if defined(CONFIG_X86_HZ) && defined(CONFIG_KERNEL_HZ)
-#error MEF: fix up CONFIG to only use one of these
-#endif
-
-#ifdef CONFIG_X86_HZ
-# define HZ            CONFIG_X86_HZ
-#else
-# ifdef CONFIG_KERNEL_HZ
-#  define HZ           CONFIG_KERNEL_HZ
-# else
-#  define HZ           1000            /* Internal kernel timer frequency */
-# endif
-#endif
-
+# define HZ            (CONFIG_X86_HZ)
 # define USER_HZ       100             /* .. some user interfaces are in "ticks" */
 # define CLOCKS_PER_SEC                (USER_HZ)       /* like times() */
 #endif
index 5fa792b..047cd23 100644 (file)
@@ -42,7 +42,6 @@ extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
 extern void smp_invalidate_rcv(void);          /* Process an NMI */
 extern void (*mtrr_hook) (void);
 extern void zap_low_mappings (void);
-extern void stop_this_cpu(void *);
 
 #define MAX_APICID 256
 extern u8 x86_cpu_to_apicid[];
index 9fe32c4..80c2db1 100644 (file)
 #define __NR_get_mempolicy     (__NR_Linux + 261)
 #define __NR_set_mempolicy     (__NR_Linux + 262)
 #define __NR_vserver           (__NR_Linux + 273)
-
+#define __NR_Linux_syscalls    274
 
 #define HPUX_GATEWAY_ADDR       0xC0000004
 #define LINUX_GATEWAY_ADDR      0x100
diff --git a/include/asm-ppc/kexec.h b/include/asm-ppc/kexec.h
deleted file mode 100644 (file)
index 3531b6d..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _PPC_KEXEC_H
-#define _PPC_KEXEC_H
-
-#ifdef CONFIG_KEXEC
-
-/*
- * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
- * I.e. Maximum page that is mapped directly into kernel memory,
- * and kmap is not required.
- *
- * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct
- * calculation for the amount of memory directly mappable into the
- * kernel memory space.
- */
-
-/* Maximum physical address we can use pages from */
-#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
-/* Maximum address we can reach in physical address mode */
-#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
-/* Maximum address we can use for the control code buffer */
-#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
-
-#define KEXEC_CONTROL_CODE_SIZE        4096
-
-
-#ifndef __ASSEMBLY__
-
-struct kimage;
-
-extern void machine_kexec_simple(struct kimage *image);
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* CONFIG_KEXEC */
-
-#endif /* _PPC_KEXEC_H */
index 61d385d..87a5501 100644 (file)
@@ -4,7 +4,6 @@
 
 #include <linux/config.h>
 #include <linux/init.h>
-#include <linux/kexec.h>
 
 #include <asm/setup.h>
 
@@ -107,30 +106,6 @@ struct machdep_calls {
        /* functions for dealing with other cpus */
        struct smp_ops_t *smp_ops;
 #endif /* CONFIG_SMP */
-
-#ifdef CONFIG_KEXEC
-       /* Called to shutdown machine specific hardware not already controlled
-        * by other drivers.
-        * XXX Should we move this one out of kexec scope?
-        */
-       void (*machine_shutdown)(void);
-
-       /* Called to do what every setup is needed on image and the
-        * reboot code buffer. Returns 0 on success.
-        * Provide your own (maybe dummy) implementation if your platform
-        * claims to support kexec.
-        */
-       int (*machine_kexec_prepare)(struct kimage *image);
-
-       /* Called to handle any machine specific cleanup on image */
-       void (*machine_kexec_cleanup)(struct kimage *image);
-
-       /* Called to perform the _real_ kexec.
-        * Do NOT allocate memory or fail here. We are past the point of
-        * no return.
-        */
-       void (*machine_kexec)(struct kimage *image);
-#endif /* CONFIG_KEXEC */
 };
 
 extern struct machdep_calls ppc_md;
index 87eff2f..d224b21 100644 (file)
@@ -86,8 +86,7 @@ static inline void tlb_finish_mmu(struct mmu_gather *mp, unsigned long start, un
 
        if (rss < freed)
                freed = rss;
-       // mm->rss = rss - freed;
-       vx_rsspages_sub(mm, freed);
+       mm->rss = rss - freed;
 
        tlb_flush_mmu(mp);
 
diff --git a/include/asm-um/cpumask.h b/include/asm-um/cpumask.h
deleted file mode 100644 (file)
index 90f0d00..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_UM_CPUMASK_H
-#define _ASM_UM_CPUMASK_H
-
-#include <asm-generic/cpumask.h>
-
-#endif /* _ASM_UM_CPUMASK_H */
diff --git a/include/asm-um/diskdump.h b/include/asm-um/diskdump.h
deleted file mode 100644 (file)
index 67fba6c..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_UM_DISKDUMP_H_
-#define _ASM_UM_DISKDUMP_H_
-
-#include <asm-generic/diskdump.h>
-
-#endif /* _ASM_UM_DISKDUMP_H_ */
diff --git a/include/asm-um/init.h b/include/asm-um/init.h
deleted file mode 100644 (file)
index 1e271ca..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _UM_INIT_H
-#define _UM_INIT_H
-
-#ifdef notdef
-#define __init
-#define __initdata
-#define __initfunc(__arginit) __arginit
-#define __cacheline_aligned 
-#endif
-
-#endif
index de389a4..bccf537 100644 (file)
@@ -15,8 +15,9 @@
 #define SIGIO_WRITE_IRQ        11
 #define TELNETD_IRQ            12
 #define XTERM_IRQ              13
-
-#define LAST_IRQ XTERM_IRQ
+#define HUMFS_IRQ              14
+#define LAST_IRQ HUMFS_IRQ
 #define NR_IRQS (LAST_IRQ + 1)
 
 #endif
diff --git a/include/asm-um/module.h b/include/asm-um/module.h
new file mode 100644 (file)
index 0000000..dae3ddf
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef __UM_MODULE_H
+#define __UM_MODULE_H
+
+/* UML is simple */
+struct mod_arch_specific
+{
+};
+
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Ehdr Elf32_Ehdr
+
+#endif
index e56bed3..73973ae 100644 (file)
@@ -49,10 +49,6 @@ static inline void pte_free(struct page *pte)
 
 #define check_pgt_cache()      do { } while (0)
 
-#define arch_add_exec_range(mm, limit)         do { ; } while (0)
-#define arch_flush_exec_range(mm)              do { ; } while (0)
-#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
-
 #endif
 
 /*
index ec58334..e82fda7 100644 (file)
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2004 Jeff Dike (jdike@addtoit.com)
  * Licensed under the GPL
  */
 
@@ -107,6 +107,7 @@ static inline void mm_copy_segments(struct mm_struct *from_mm,
  */
 extern unsigned long task_size;
 
+#undef TASK_SIZE
 #define TASK_SIZE      (task_size)
 
 /* This decides where the kernel will search for a free chunk of vm
diff --git a/include/asm-um/smplock.h b/include/asm-um/smplock.h
deleted file mode 100644 (file)
index aacda39..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_SMPLOCK_H
-#define __UM_SMPLOCK_H
-
-#include "asm/arch/smplock.h"
-
-#endif
diff --git a/include/asm-um/spinlock.h b/include/asm-um/spinlock.h
deleted file mode 100644 (file)
index f18c828..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __UM_SPINLOCK_H
-#define __UM_SPINLOCK_H
-
-#include "asm/arch/spinlock.h"
-
-#endif
diff --git a/include/asm-x86_64/kexec.h b/include/asm-x86_64/kexec.h
deleted file mode 100644 (file)
index b0531c5..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef _X86_64_KEXEC_H
-#define _X86_64_KEXEC_H
-
-#include <asm/page.h>
-#include <asm/proto.h>
-
-/*
- * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
- * I.e. Maximum page that is mapped directly into kernel memory,
- * and kmap is not required.
- *
- * So far x86_64 is limited to 40 physical address bits.
- */
-
-/* Maximum physical address we can use pages from */
-#define KEXEC_SOURCE_MEMORY_LIMIT      (0xFFFFFFFFFFUL)
-/* Maximum address we can reach in physical address mode */
-#define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL)
-/* Maximum address we can use for the control pages */
-#define KEXEC_CONTROL_MEMORY_LIMIT     (0xFFFFFFFFFFUL)
-
-/* Allocate one page for the pdp and the second for the code */
-#define KEXEC_CONTROL_CODE_SIZE  (4096UL + 4096UL)
-
-#endif /* _X86_64_KEXEC_H */
index 8f80f15..671e757 100644 (file)
@@ -54,9 +54,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
                        out_of_line_bug();
                if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) {
                        /* We were in lazy tlb mode and leave_mm disabled 
-                        * tlb flush IPI delivery. We must flush our tlb.
+                        * tlb flush IPI delivery. We must reload CR3
+                        * to make sure to use no freed page tables.
                         */
-                       local_flush_tlb();
+                       asm volatile("movq %0,%%cr3" :: "r" (__pa(next->pgd)) : "memory");
                        load_LDT_nolock(&next->context, cpu);
                }
        }
index e161183..cfecee5 100644 (file)
@@ -559,7 +559,7 @@ __SYSCALL(__NR_mq_notify, sys_mq_notify)
 #define __NR_mq_getsetattr     245
 __SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
 #define __NR_kexec_load        246
-__SYSCALL(__NR_kexec_load, sys_kexec_load)
+__SYSCALL(__NR_kexec_load, sys_ni_syscall)
 #define __NR_waitid            247
 __SYSCALL(__NR_waitid, sys_waitid)
 #define __NR_syscall_max __NR_waitid
index aeabe07..376a550 100644 (file)
@@ -21,7 +21,6 @@ extern unsigned long min_low_pfn;
  * highest page
  */
 extern unsigned long max_pfn;
-extern unsigned long saved_max_pfn;
 
 /*
  * node_bootmem_map is a map pointer - the bits represent all physical 
similarity index 70%
rename from include/linux/ckrm_events.h
rename to include/linux/ckrm.h
index 14cfbe3..a29bf28 100644 (file)
@@ -1,6 +1,4 @@
-/*
- * ckrm_events.h - Class-based Kernel Resource Management (CKRM)
- *                 event handling
+/* ckrm.h - Class-based Kernel Resource Management (CKRM)
  *
  * Copyright (C) Hubertus Franke, IBM Corp. 2003,2004
  *           (C) Shailabh Nagar,  IBM Corp. 2003
@@ -21,8 +19,7 @@
  *
  */
 
-/*
- * Changes
+/* Changes
  *
  * 28 Aug 2003
  *        Created.
  *        New Event callback structure
  */
 
-#ifndef _LINUX_CKRM_EVENTS_H
-#define _LINUX_CKRM_EVENTS_H
+#ifndef _LINUX_CKRM_H
+#define _LINUX_CKRM_H
 
 #ifdef CONFIG_CKRM
 
-/*
- * Data structure and function to get the list of registered 
- * resource controllers.
- */
+// Data structure and function to get the list of registered 
+// resource controllers.
+
+// #include <linux/sched.h>
 
-/*
- * CKRM defines a set of events at particular points in the kernel
+/* CKRM defines a set of events at particular points in the kernel
  * at which callbacks registered by various class types are called
  */
 
 enum ckrm_event {
-       /*
-        * we distinguish these events types:
+       /* we distinguish various events types
         *
         * (a) CKRM_LATCHABLE_EVENTS
         *      events can be latched for event callbacks by classtypes
@@ -89,7 +84,6 @@ enum ckrm_event {
        CKRM_EVENT_RECLASSIFY = CKRM_NONLATCHABLE_EVENTS,
 
        /* events (c) */
-
        CKRM_NOTCLASSIFY_EVENTS,
 
        CKRM_EVENT_MANUAL = CKRM_NOTCLASSIFY_EVENTS,
@@ -101,13 +95,7 @@ enum ckrm_event {
 #ifdef __KERNEL__
 #ifdef CONFIG_CKRM
 
-/*
- * CKRM event callback specification for the classtypes or resource controllers 
- *   typically an array is specified using CKRM_EVENT_SPEC terminated with 
- *   CKRM_EVENT_SPEC_LAST and then that array is registered using
- *   ckrm_register_event_set.
- *   Individual registration of event_cb is also possible
- */
+extern void ckrm_invoke_event_cb_chain(enum ckrm_event ev, void *arg);
 
 typedef void (*ckrm_event_cb) (void *arg);
 
@@ -116,21 +104,6 @@ struct ckrm_hook_cb {
        struct ckrm_hook_cb *next;
 };
 
-struct ckrm_event_spec {
-       enum ckrm_event ev;
-       struct ckrm_hook_cb cb;
-};
-
-#define CKRM_EVENT_SPEC(EV,FCT) { CKRM_EVENT_##EV, \
-                                       { (ckrm_event_cb)FCT, NULL } }
-
-int ckrm_register_event_set(struct ckrm_event_spec especs[]);
-int ckrm_unregister_event_set(struct ckrm_event_spec especs[]);
-int ckrm_register_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb);
-int ckrm_unregister_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb);
-
-extern void ckrm_invoke_event_cb_chain(enum ckrm_event ev, void *arg);
-
 #define CKRM_DEF_CB(EV,fct)                                    \
 static inline void ckrm_cb_##fct(void)                         \
 {                                                              \
@@ -143,7 +116,7 @@ static inline void ckrm_cb_##fct(argtp arg)                         \
          ckrm_invoke_event_cb_chain(CKRM_EVENT_##EV,(void*)arg);       \
 }
 
-#else /* !CONFIG_CKRM */
+#else                          // !CONFIG_CKRM
 
 #define CKRM_DEF_CB(EV,fct)                    \
 static inline void ckrm_cb_##fct(void)  { }
@@ -151,14 +124,14 @@ static inline void ckrm_cb_##fct(void)  { }
 #define CKRM_DEF_CB_ARG(EV,fct,argtp)          \
 static inline void ckrm_cb_##fct(argtp arg) { }
 
-#endif /* CONFIG_CKRM */
+#endif                         // CONFIG_CKRM
 
-/*
+/*-----------------------------------------------------------------
  *   define the CKRM event functions 
  *               EVENT          FCT           ARG         
- */
+ *-----------------------------------------------------------------*/
 
-/* forward declarations for function arguments */
+// types we refer at 
 struct task_struct;
 struct sock;
 struct user_struct;
@@ -175,18 +148,19 @@ CKRM_DEF_CB_ARG(USERDEL, userdel, struct user_struct *);
 CKRM_DEF_CB_ARG(LISTEN_START, listen_start, struct sock *);
 CKRM_DEF_CB_ARG(LISTEN_STOP, listen_stop, struct sock *);
 
-/* some other functions required */
+// some other functions required
 #ifdef CONFIG_CKRM
 extern void ckrm_init(void);
-extern void ckrm_cb_newtask(struct task_struct *);
-extern void ckrm_cb_exit(struct task_struct *);
+void ckrm_cb_newtask(struct task_struct *);
+void ckrm_cb_exit(struct task_struct *);
 #else
-#define ckrm_init(           do { } while (0)
+#define ckrm_init(x)           do { } while (0)
 #define ckrm_cb_newtask(x)     do { } while (0)
 #define ckrm_cb_exit(x)                do { } while (0)
 #endif
 
 extern int get_exe_path_name(struct task_struct *, char *, int);
 
-#endif /* __KERNEL__ */
-#endif /* _LINUX_CKRM_EVENTS_H */
+#endif                         // __KERNEL__
+
+#endif                         // _LINUX_CKRM_H
index ac53ef3..f4e91e9 100644 (file)
@@ -1,5 +1,4 @@
-/*
- *  ckrm_ce.h - Header file to be used by Classification Engine of CKRM
+/* ckrm_ce.h - Header file to be used by Classification Engine of CKRM
  *
  * Copyright (C) Hubertus Franke, IBM Corp. 2003
  *           (C) Shailabh Nagar,  IBM Corp. 2003
 
 #ifdef CONFIG_CKRM
 
-#include <linux/ckrm_events.h>
+#include <linux/ckrm.h>                // getting the event names
 
-/*
- * Action parameters identifying the cause of a task<->class notify callback 
+/* Action parameters identifying the cause of a task<->class notify callback 
  * these can perculate up to user daemon consuming records send by the 
  * classification engine
  */
@@ -52,35 +50,39 @@ typedef struct ckrm_eng_callback {
        int always_callback;    /* set if CE should always be called back 
                                   regardless of numclasses */
 
+
+
+
        /* callbacks which are called without holding locks */
 
        unsigned long c_interest;       /* set of classification events of 
-                                        * interest to CE 
-                                        */
+                                          interest to CE 
+                                       */
 
        /* generic classify */
-       ce_classify_fct_t classify;
-
+       ce_classify_fct_t classify;     
        /* class added */
        void (*class_add) (const char *name, void *core, int classtype);
-
        /* class deleted */
        void (*class_delete) (const char *name, void *core, int classtype);
 
+
        /* callbacks which are called while holding task_lock(tsk) */
+
        unsigned long n_interest;       /* set of notification events of 
-                                        *  interest to CE 
-                                        */
+                                          interest to CE 
+                                       */
        /* notify on class switch */
        ce_notify_fct_t notify; 
+
 } ckrm_eng_callback_t;
 
 struct inode;
 struct dentry;
 
 typedef struct rbce_eng_callback {
-       int (*mkdir) (struct inode *, struct dentry *, int);    /* mkdir */
-       int (*rmdir) (struct inode *, struct dentry *);         /* rmdir */
+       int (*mkdir) (struct inode *, struct dentry *, int);    // mkdir
+       int (*rmdir) (struct inode *, struct dentry *); // rmdir
        int (*mnt) (void);
        int (*umnt) (void);
 } rbce_eng_callback_t;
@@ -88,7 +90,7 @@ typedef struct rbce_eng_callback {
 extern int ckrm_register_engine(const char *name, ckrm_eng_callback_t *);
 extern int ckrm_unregister_engine(const char *name);
 
-extern void *ckrm_classobj(const char *, int *classtype);
+extern void *ckrm_classobj(char *, int *classtype);
 extern int get_exe_path_name(struct task_struct *t, char *filename,
                             int max_size);
 
@@ -98,11 +100,13 @@ extern int rcfs_unregister_engine(rbce_eng_callback_t *);
 extern int ckrm_reclassify(int pid);
 
 #ifndef _LINUX_CKRM_RC_H
-
+// ckrm kernel has inlined functions for this which are exported
 extern void ckrm_core_grab(void *);
 extern void ckrm_core_drop(void *);
 #endif
 
-#endif /* CONFIG_CKRM */
-#endif /* __KERNEL__ */
-#endif /* _LINUX_CKRM_CE_H */
+#endif                         // CONFIG_CKRM
+
+#endif                         // __KERNEL__
+
+#endif                         // _LINUX_CKRM_CE_H
index 3712aef..4efebb9 100644 (file)
@@ -3,12 +3,12 @@
  * Copyright (C) Jiantao Kong, IBM Corp. 2003
  *           (C) Shailabh Nagar, IBM Corp. 2003
  *           (C) Chandra Seetharaman, IBM Corp. 2004
- *
- *
- * Memory control functions of the CKRM kernel API
+ * 
+ * 
+ * Memory control functions of the CKRM kernel API 
  *
  * Latest version, more details at http://ckrm.sf.net
- *
+ * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  *
  */
 
+/* Changes
+ *
+ * 28 Aug 2003
+ *        Created.
+ */
+
 #ifndef _LINUX_CKRM_MEM_H
 #define _LINUX_CKRM_MEM_H
 
 
 #include <linux/list.h>
 #include <linux/ckrm_rc.h>
-#include <linux/kref.h>
 
-struct ckrm_zone {
-       struct list_head active_list;
-       struct list_head inactive_list;
-
-       unsigned long nr_active;        // # of pages in the active list
-       unsigned long nr_inactive;      // # of pages in the inactive list
-       unsigned long active_over;
-       unsigned long inactive_over;
-
-       unsigned long shrink_active;
-       unsigned long shrink_inactive;
-       long shrink_weight;
-       unsigned long shrink_flag;
-
-       struct list_head victim_list;   // list of ckrm_zones chosen for shrinking
-       struct zone *zone;
-       struct ckrm_mem_res *memcls;
-};
-
-struct ckrm_mem_res {
-       unsigned long flags;
-       struct ckrm_core_class *core;   // the core i am part of...
-       struct ckrm_core_class *parent; // parent of the core i am part of....
+typedef struct ckrm_mem_res {
+       unsigned long reclaim_flags; 
+       unsigned long flags; 
+       struct ckrm_core_class *core; // the core i am part of...
+       struct ckrm_core_class *parent; // parent of the core i am part of....
        struct ckrm_shares shares;
-       struct list_head mcls_list;     // list of all 1-level classes
-       struct list_head shrink_list;   // list of classes need to be shrunk
-       struct kref nr_users;           // # of references to this class/data structure
-       atomic_t pg_total;              // # of pages used by this class
-       int pg_guar;                    // # of pages this class is guaranteed
-       int pg_limit;                   // max # of pages this class can get
-       int pg_borrowed;                // # of pages this class borrowed from its parent
-       int pg_lent;                    // # of pages this class lent to its children
-       int pg_unused;                  // # of pages left to this class (after giving the
-                                       // guarantees to children. need to borrow from parent if
-                                       // more than this is needed.
-       int impl_guar;                  // implicit guarantee for class with don't care guar
-       int nr_dontcare;                // # of children with don't care guarantee
-       struct ckrm_zone ckrm_zone[MAX_NR_ZONES];
+       struct list_head mcls_list; // list of all 1-level classes
+       struct list_head shrink_list; // list of classes need to be shrunk
+       atomic_t nr_users; // # of references to this class/data structure
+       atomic_t pg_total;  // # of pages used by this class
+       int pg_guar; // # of pages this class is guaranteed
+       int pg_limit; // max # of pages this class can get
+       int pg_borrowed; // # of pages this class borrowed from its parent
+       int pg_lent; // # of pages this class lent to its children
+       int pg_unused; // # of pages left to this class (after giving the
+                               // guarantees to children. need to borrow from parent if
+                               // more than this is needed.
+       int nr_active[MAX_NR_ZONES];
+       int nr_inactive[MAX_NR_ZONES];
+       int tmp_cnt;
        int shrink_count;
        unsigned long last_shrink;
        int over_limit_failures;
-       int shrink_pages;               // # of pages to free in this class
-       int hier;                       // hiearchy, root = 0
-};
+       int hier; // hiearchy, root = 0
+} ckrm_mem_res_t;
 
 extern atomic_t ckrm_mem_real_count;
 extern unsigned int ckrm_tot_lru_pages;
-extern int ckrm_nr_mem_classes;
 extern struct list_head ckrm_shrink_list;
-extern struct list_head ckrm_memclass_list;
 extern spinlock_t ckrm_mem_lock;
 extern struct ckrm_res_ctlr mem_rcbs;
-extern struct ckrm_mem_res *ckrm_mem_root_class;
-
-#define page_ckrmzone(page)    ((page)->ckrm_zone)
 
-#define CLS_SHRINK_BIT (1)
+#define page_class(page)       ((ckrm_mem_res_t*)((page)->memclass))
+
+// used to fill reclaim_flags, used only when memory is low in the system
+#define CLS_CLEAR              (0)      // class under its guarantee
+#define CLS_OVER_GUAR  (1 << 0) // class is over its guarantee
+#define CLS_PARENT_OVER        (1 << 1) // parent is over 110% mark over limit
+#define CLS_OVER_25            (1 << 2) // class over 25% mark bet guar(0) & limit(100)
+#define CLS_OVER_50            (1 << 3) // class over 50% mark bet guar(0) & limit(100)
+#define CLS_OVER_75            (1 << 4) // class over 75% mark bet guar(0) & limit(100)
+#define CLS_OVER_100   (1 << 5) // class over its limit
+#define CLS_OVER_110   (1 << 6) // class over 110% mark over limit
+#define CLS_FLAGS_ALL  ( CLS_OVER_GUAR | CLS_PARENT_OVER | CLS_OVER_25 | \
+                                       CLS_OVER_50 | CLS_OVER_75 | CLS_OVER_100 | CLS_OVER_110 )
+#define CLS_SHRINK_BIT (31)      // used to both lock and set the bit
+#define CLS_SHRINK             (1 << CLS_SHRINK_BIT) // shrink the given class
 
 // used in flags. set when a class is more than 90% of its maxlimit
-#define MEM_AT_LIMIT   1
+#define MEM_AT_LIMIT 1
 
+extern void ckrm_set_aggressive(ckrm_mem_res_t *);
+extern unsigned int ckrm_setup_reclamation(void);
+extern void ckrm_teardown_reclamation(void);
+extern void ckrm_get_reclaim_bits(unsigned int *, unsigned int *);
 extern void ckrm_init_mm_to_task(struct mm_struct *, struct task_struct *);
-extern void ckrm_mem_evaluate_mm(struct mm_struct *, struct ckrm_mem_res *);
-extern void ckrm_at_limit(struct ckrm_mem_res *);
-extern int ckrm_memclass_valid(struct ckrm_mem_res *);
-extern int ckrm_mem_get_shrink_to(void);
-extern void check_memclass(struct ckrm_mem_res *, char *);
-extern void memclass_release(struct kref *);
+extern void ckrm_mem_evaluate_mm(struct mm_struct *);
+extern void ckrm_at_limit(ckrm_mem_res_t *);
+extern int ckrm_memclass_valid(ckrm_mem_res_t *);
+#define ckrm_get_reclaim_flags(cls)    ((cls)->reclaim_flags)
 
 #else
 
 #define ckrm_init_mm_to_current(a)                     do {} while (0)
 #define ckrm_mem_evaluate_mm(a)                                do {} while (0)
+#define ckrm_get_reclaim_flags(a)                      (0)
+#define ckrm_setup_reclamation()                       (0)
+#define ckrm_teardown_reclamation()                    do {} while (0)
+#define ckrm_get_reclaim_bits(a, b)                    do { *(a) = 0; *(b)= 0; } while (0)
 #define ckrm_init_mm_to_task(a,b)                      do {} while (0)
 
 #endif // CONFIG_CKRM_RES_MEM
 
 #endif //_LINUX_CKRM_MEM_H
+
index 1166956..221f936 100644 (file)
@@ -3,12 +3,12 @@
  * Copyright (C) Jiantao Kong, IBM Corp. 2003
  *           (C) Shailabh Nagar, IBM Corp. 2003
  *           (C) Chandra Seetharaman, IBM Corp. 2004
- *
- *
- * Memory control functions of the CKRM kernel API
+ * 
+ * 
+ * Memory control functions of the CKRM kernel API 
  *
  * Latest version, more details at http://ckrm.sf.net
- *
+ * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  *
  */
 
+/* Changes
+ *
+ * 28 Aug 2003
+ *        Created.
+ */
+
+
 #ifndef _LINUX_CKRM_MEM_INLINE_H_
 #define _LINUX_CKRM_MEM_INLINE_H_
 
 
 #ifdef CONFIG_CKRM_RES_MEM
 
-#define INACTIVE       0
-#define ACTIVE         1
+#define GET_MEM_CLASS(tsk) \
+       ckrm_get_res_class(tsk->taskclass, mem_rcbs.resid, ckrm_mem_res_t)
 
-static inline struct ckrm_mem_res *
-ckrm_get_mem_class(struct task_struct *tsk)
-{
-       return ckrm_get_res_class(tsk->taskclass, mem_rcbs.resid,
-               struct ckrm_mem_res);
-}
+#define ckrm_set_shrink(cls) \
+       set_bit(CLS_SHRINK_BIT, (unsigned long *)&(cls)->reclaim_flags)
+#define ckrm_test_set_shrink(cls) \
+       test_and_set_bit(CLS_SHRINK_BIT, (unsigned long *)&(cls)->reclaim_flags)
+#define ckrm_clear_shrink(cls) \
+       clear_bit(CLS_SHRINK_BIT, (unsigned long *)&(cls)->reclaim_flags)
 
 #define ckrm_shrink_list_empty()       list_empty(&ckrm_shrink_list)
 
-static inline void
-ckrm_set_shrink(struct ckrm_zone *cz)
-{
-       set_bit(CLS_SHRINK_BIT, &cz->shrink_flag);
-}
-
-static inline int
-ckrm_test_set_shrink(struct ckrm_zone *cz)
-{
-       return test_and_set_bit(CLS_SHRINK_BIT, &cz->shrink_flag);
-}
-
-static inline void 
-ckrm_clear_shrink(struct ckrm_zone *cz)
-{
-       clear_bit(CLS_SHRINK_BIT, &cz->shrink_flag);
-}
-
 /*
- * Currently, a shared page that is shared by multiple classes is charged
- * to a class with max available guarantee. Simply replace this function
- * for other policies.
+ * Currently, the class of an address is assigned to the class with max
+ * available guarantee. Simply replace this function for other policies.
  */
 static inline int
-ckrm_mem_share_compare(struct ckrm_mem_res *a, struct ckrm_mem_res *b)
+ckrm_mem_share_compare(ckrm_mem_res_t *a, ckrm_mem_res_t *b)
 {
-       if (a == NULL)
-               return -(b != NULL);
+       if (a == NULL) 
+               return -(b != NULL) ;
        if (b == NULL)
                return 0;
-       if (a->pg_guar == b->pg_guar)
-               return 0;
        if (a->pg_guar == CKRM_SHARE_DONTCARE)
                return 1;
        if (b->pg_guar == CKRM_SHARE_DONTCARE)
@@ -78,20 +64,41 @@ ckrm_mem_share_compare(struct ckrm_mem_res *a, struct ckrm_mem_res *b)
 }
 
 static inline void
-incr_use_count(struct ckrm_mem_res *cls, int borrow)
+mem_class_get(ckrm_mem_res_t *cls)
+{
+       if (cls)
+               atomic_inc(&((cls)->nr_users));
+}
+
+static inline void
+mem_class_put(ckrm_mem_res_t *cls)
+{
+       const char *name;
+       
+       if (cls && atomic_dec_and_test(&(cls->nr_users)) ) {
+               if (cls->core == NULL) {
+                       name = "unknown";
+               } else {
+                       name = cls->core->name;
+               }
+               printk(KERN_DEBUG "freeing memclass %p of <core:%s>\n", cls, name);
+
+               // BUG_ON(ckrm_memclass_valid(cls));
+               // kfree(cls);
+       }       
+}
+
+static inline void
+incr_use_count(ckrm_mem_res_t *cls, int borrow)
 {
-       extern int ckrm_mem_shrink_at;
-       if (unlikely(!cls))
-               return;
-       BUG_ON(!ckrm_memclass_valid(cls));
        atomic_inc(&cls->pg_total);
 
-       if (borrow)
+       if (borrow) 
                cls->pg_lent++;
        if ((cls->pg_guar == CKRM_SHARE_DONTCARE) ||
-                       (atomic_read(&cls->pg_total) > cls->pg_unused)) {
-               struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
-                               mem_rcbs.resid, struct ckrm_mem_res);
+                               (atomic_read(&cls->pg_total) > cls->pg_unused)) {
+               ckrm_mem_res_t *parcls = ckrm_get_res_class(cls->parent,
+                               mem_rcbs.resid, ckrm_mem_res_t);
                if (parcls) {
                        incr_use_count(parcls, 1);
                        cls->pg_borrowed++;
@@ -99,27 +106,23 @@ incr_use_count(struct ckrm_mem_res *cls, int borrow)
        } else {
                atomic_inc(&ckrm_mem_real_count);
        }
-       if (unlikely((cls->pg_limit != CKRM_SHARE_DONTCARE) &&
-                       (atomic_read(&cls->pg_total) >=
-                       ((ckrm_mem_shrink_at * cls->pg_limit) / 100)) &&
-                       ((cls->flags & MEM_AT_LIMIT) != MEM_AT_LIMIT))) {
+       if ((cls->pg_limit != CKRM_SHARE_DONTCARE) && 
+                       (atomic_read(&cls->pg_total) >= cls->pg_limit) &&
+                       ((cls->flags & MEM_AT_LIMIT) != MEM_AT_LIMIT)) {
                ckrm_at_limit(cls);
        }
        return;
 }
 
 static inline void
-decr_use_count(struct ckrm_mem_res *cls, int borrowed)
+decr_use_count(ckrm_mem_res_t *cls, int borrowed)
 {
-       if (unlikely(!cls))
-               return;
-       BUG_ON(!ckrm_memclass_valid(cls));
        atomic_dec(&cls->pg_total);
        if (borrowed)
                cls->pg_lent--;
        if (cls->pg_borrowed > 0) {
-               struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
-                               mem_rcbs.resid, struct ckrm_mem_res);
+               ckrm_mem_res_t *parcls = ckrm_get_res_class(cls->parent,
+                               mem_rcbs.resid, ckrm_mem_res_t);
                if (parcls) {
                        decr_use_count(parcls, 1);
                        cls->pg_borrowed--;
@@ -130,25 +133,21 @@ decr_use_count(struct ckrm_mem_res *cls, int borrowed)
 }
 
 static inline void
-ckrm_set_page_class(struct page *page, struct ckrm_mem_res *cls)
+ckrm_set_page_class(struct page *page, ckrm_mem_res_t *cls)
 {
-       if (unlikely(cls == NULL)) {
-               cls = ckrm_mem_root_class;
-       }
-       if (likely(cls != NULL)) {
-               struct ckrm_zone *czone = &cls->ckrm_zone[page_zonenum(page)];
-               if (unlikely(page->ckrm_zone)) {
-                       kref_put(&cls->nr_users, memclass_release);
+       if (mem_rcbs.resid != -1 && cls != NULL) {
+               if (unlikely(page->memclass)) {
+                       mem_class_put(page->memclass);
                }
-               page->ckrm_zone = czone;
-               kref_get(&cls->nr_users);
+               page->memclass = cls;
+               mem_class_get(cls);
        } else {
-               page->ckrm_zone = NULL;
+               page->memclass = NULL;
        }
 }
 
 static inline void
-ckrm_set_pages_class(struct page *pages, int numpages, struct ckrm_mem_res *cls)
+ckrm_set_pages_class(struct page *pages, int numpages, ckrm_mem_res_t *cls)
 {
        int i;
        for (i = 0; i < numpages; pages++, i++) {
@@ -159,244 +158,154 @@ ckrm_set_pages_class(struct page *pages, int numpages, struct ckrm_mem_res *cls)
 static inline void
 ckrm_clear_page_class(struct page *page)
 {
-       if (likely(page->ckrm_zone != NULL)) {
-               if (CkrmAccount(page)) {
-                       decr_use_count(page->ckrm_zone->memcls, 0);
-                       ClearCkrmAccount(page);
-               }
-               kref_put(&page->ckrm_zone->memcls->nr_users, memclass_release);
-               page->ckrm_zone = NULL;
+       if (page->memclass != NULL) {
+               mem_class_put(page->memclass);
+               page->memclass = NULL;
        }
 }
 
 static inline void
-ckrm_change_page_class(struct page *page, struct ckrm_mem_res *newcls)
+ckrm_clear_pages_class(struct page *pages, int numpages)
 {
-       struct ckrm_zone *old_czone = page->ckrm_zone, *new_czone;
-       struct ckrm_mem_res *oldcls;
-
-       if (unlikely(!old_czone || !newcls)) {
-               BUG_ON(CkrmAccount(page));
-               return;
+       int i;
+       for (i = 0; i < numpages; pages++, i++) {
+               ckrm_clear_page_class(pages);
        }
-       BUG_ON(!CkrmAccount(page));
-
-       oldcls = old_czone->memcls;
-       if (oldcls == NULL || (oldcls == newcls))
-               return;
+}
 
-       kref_put(&oldcls->nr_users, memclass_release);
-       decr_use_count(oldcls, 0);
+static inline void
+ckrm_change_page_class(struct page *page, ckrm_mem_res_t *newcls)
+{
+       ckrm_mem_res_t *oldcls = page_class(page);
 
-       page->ckrm_zone = new_czone = &newcls->ckrm_zone[page_zonenum(page)];
+       if (!newcls || oldcls == newcls)
+               return;
 
-       kref_get(&newcls->nr_users);
-       incr_use_count(newcls, 0);
+       ckrm_clear_page_class(page);
+       ckrm_set_page_class(page, newcls);
+       if (test_bit(PG_ckrm_account, &page->flags)) {
+               decr_use_count(oldcls, 0);
+               incr_use_count(newcls, 0);
+               if (PageActive(page)) {
+                       oldcls->nr_active[page_zonenum(page)]--;
+                       newcls->nr_active[page_zonenum(page)]++;
+               } else {
+                       oldcls->nr_inactive[page_zonenum(page)]--;
+                       newcls->nr_inactive[page_zonenum(page)]++;
+               }
+       }
+}
 
-       list_del(&page->lru);
-       if (PageActive(page)) {
-               old_czone->nr_active--;
-               new_czone->nr_active++;
-               list_add(&page->lru, &new_czone->active_list);
-       } else {
-               old_czone->nr_inactive--;
-               new_czone->nr_inactive++;
-               list_add(&page->lru, &new_czone->inactive_list);
+static inline void
+ckrm_change_pages_class(struct page *pages, int numpages, 
+                                       ckrm_mem_res_t *cls)
+{
+       int i;
+       for (i = 0; i < numpages; pages++, i++) {
+               ckrm_change_page_class(pages, cls);
        }
 }
 
 static inline void
 ckrm_mem_inc_active(struct page *page)
 {
-       struct ckrm_mem_res *cls = ckrm_get_mem_class(current) ?: ckrm_mem_root_class;
-
-       if (cls == NULL)
+       ckrm_mem_res_t *cls = page_class(page), *curcls;
+       if (unlikely(!cls)) {
                return;
-       BUG_ON(CkrmAccount(page));
-       BUG_ON(page->ckrm_zone != NULL);
-
-       ckrm_set_page_class(page, cls);
+       }
+       BUG_ON(test_bit(PG_ckrm_account, &page->flags));
+       if (unlikely(cls != (curcls = GET_MEM_CLASS(current)))) {
+               cls = curcls;
+               ckrm_change_page_class(page, cls);
+       }
+       cls->nr_active[page_zonenum(page)]++;
        incr_use_count(cls, 0);
-       SetCkrmAccount(page);
-       BUG_ON(page->ckrm_zone == NULL);
-       page->ckrm_zone->nr_active++;
-       list_add(&page->lru, &page->ckrm_zone->active_list);
+       set_bit(PG_ckrm_account, &page->flags);
 }
 
 static inline void
 ckrm_mem_dec_active(struct page *page)
 {
-       if (page->ckrm_zone == NULL)
+       ckrm_mem_res_t *cls = page_class(page);
+       if (unlikely(!cls)) {
                return;
-       BUG_ON(page->ckrm_zone->memcls == NULL);
-       BUG_ON(!CkrmAccount(page));
-
-       list_del(&page->lru);
-       page->ckrm_zone->nr_active--;
-       ckrm_clear_page_class(page);
+       }
+       BUG_ON(!test_bit(PG_ckrm_account, &page->flags));
+       cls->nr_active[page_zonenum(page)]--;
+       decr_use_count(cls, 0);
+       clear_bit(PG_ckrm_account, &page->flags);
 }
 
-
 static inline void
 ckrm_mem_inc_inactive(struct page *page)
 {
-       struct ckrm_mem_res *cls = ckrm_get_mem_class(current) ?: ckrm_mem_root_class;
-
-       if (cls == NULL)
+       ckrm_mem_res_t *cls = page_class(page), *curcls;
+       if (unlikely(!cls)) {
                return;
-       BUG_ON(CkrmAccount(page));
-       BUG_ON(page->ckrm_zone != NULL);
-
-       ckrm_set_page_class(page, cls);
+       }
+       BUG_ON(test_bit(PG_ckrm_account, &page->flags));
+       if (unlikely(cls != (curcls = GET_MEM_CLASS(current)))) {
+               cls = curcls;
+               ckrm_change_page_class(page, cls);
+       }
+       cls->nr_inactive[page_zonenum(page)]++;
        incr_use_count(cls, 0);
-       SetCkrmAccount(page);
-       BUG_ON(page->ckrm_zone == NULL);
-       page->ckrm_zone->nr_inactive++;
-       list_add(&page->lru, &page->ckrm_zone->inactive_list);
+       set_bit(PG_ckrm_account, &page->flags);
 }
 
 static inline void
 ckrm_mem_dec_inactive(struct page *page)
 {
-       if (page->ckrm_zone == NULL)
+       ckrm_mem_res_t *cls = page_class(page);
+       if (unlikely(!cls)) {
                return;
-       BUG_ON(page->ckrm_zone->memcls == NULL);
-       BUG_ON(!CkrmAccount(page));
-
-       page->ckrm_zone->nr_inactive--;
-       list_del(&page->lru);
-       ckrm_clear_page_class(page);
+       }
+       BUG_ON(!test_bit(PG_ckrm_account, &page->flags));
+       cls->nr_inactive[page_zonenum(page)]--;
+       decr_use_count(cls, 0);
+       clear_bit(PG_ckrm_account, &page->flags);
 }
 
 static inline int
-ckrm_class_limit_ok(struct ckrm_mem_res *cls)
+ckrm_kick_page(struct page *page, unsigned int bits)
 {
-       int ret;
-       extern int ckrm_mem_fail_over;
-
-       if ((mem_rcbs.resid == -1) || !cls) {
-               return 1;
-       }
-       if (cls->pg_limit == CKRM_SHARE_DONTCARE) {
-               struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
-                                       mem_rcbs.resid, struct ckrm_mem_res);
-               ret = (parcls ? ckrm_class_limit_ok(parcls) : 0);
+       if (page_class(page) == NULL) {
+               return bits;
        } else {
-               ret = (atomic_read(&cls->pg_total) <=
-                       ((ckrm_mem_fail_over * cls->pg_limit) / 100));
-       }
-
-       if (ret == 0) {
-               // if we are failing... just nudge the back end
-               ckrm_at_limit(cls);
+               return (page_class(page)->reclaim_flags & bits);
        }
-       return ret;
-}
-
-// task/mm initializations/cleanup
-
-static inline void
-ckrm_task_mm_init(struct task_struct *tsk)
-{
-       INIT_LIST_HEAD(&tsk->mm_peers);
 }
 
-static inline void
-ckrm_task_change_mm(struct task_struct *tsk, struct mm_struct *oldmm, struct mm_struct *newmm)
+static inline int 
+ckrm_class_limit_ok(ckrm_mem_res_t *cls)
 {
-       if (oldmm) {
-               spin_lock(&oldmm->peertask_lock);
-               list_del(&tsk->mm_peers);
-               ckrm_mem_evaluate_mm(oldmm, NULL);
-               spin_unlock(&oldmm->peertask_lock);
+       if ((mem_rcbs.resid == -1) || !cls) {
+               return 1;
        }
-       spin_lock(&newmm->peertask_lock);
-       list_add_tail(&tsk->mm_peers, &newmm->tasklist);
-       ckrm_mem_evaluate_mm(newmm, NULL);
-       spin_unlock(&newmm->peertask_lock);
-}
-
-static inline void
-ckrm_task_clear_mm(struct task_struct *tsk, struct mm_struct *mm)
-{
-       spin_lock(&mm->peertask_lock);
-       list_del_init(&tsk->mm_peers);
-       ckrm_mem_evaluate_mm(mm, NULL);
-       spin_unlock(&mm->peertask_lock);
-}
-
-static inline void
-ckrm_mm_init(struct mm_struct *mm)
-{
-       INIT_LIST_HEAD(&mm->tasklist);
-       mm->peertask_lock = SPIN_LOCK_UNLOCKED;
-}
-
-static inline void
-ckrm_mm_setclass(struct mm_struct *mm, struct ckrm_mem_res *cls)
-{
-       mm->memclass = cls;
-       kref_get(&cls->nr_users);
-}
-
-static inline void
-ckrm_mm_clearclass(struct mm_struct *mm)
-{
-       if (mm->memclass) {
-               kref_put(&mm->memclass->nr_users, memclass_release);
-               mm->memclass = NULL;
+       if (cls->pg_limit == CKRM_SHARE_DONTCARE) {
+               ckrm_mem_res_t *parcls = ckrm_get_res_class(cls->parent,
+                                               mem_rcbs.resid, ckrm_mem_res_t);
+               return (!parcls ?: ckrm_class_limit_ok(parcls));
+       } else {
+               return (atomic_read(&cls->pg_total) <= (11 * cls->pg_limit) / 10);
        }
 }
 
-static inline void
-ckrm_zone_inc_active(struct ckrm_zone *czone, int cnt)
-{
-       czone->nr_active += cnt;
-}
-
-static inline void
-ckrm_zone_inc_inactive(struct ckrm_zone *czone, int cnt)
-{
-       czone->nr_inactive += cnt;
-}
-
-static inline void
-ckrm_zone_dec_active(struct ckrm_zone *czone, int cnt)
-{
-       czone->nr_active -= cnt;
-}
-
-static inline void
-ckrm_zone_dec_inactive(struct ckrm_zone *czone, int cnt)
-{
-       czone->nr_inactive -= cnt;
-}
-
 #else // !CONFIG_CKRM_RES_MEM
 
-#define ckrm_set_page_class(a,b)       do{}while(0)
-#define ckrm_set_pages_class(a,b,c)    do{}while(0)
-#define ckrm_clear_page_class(a)       do{}while(0)
-#define ckrm_clear_pages_class(a,b)    do{}while(0)
-#define ckrm_change_page_class(a,b)    do{}while(0)
+#define ckrm_set_page_class(a,b)               do{}while(0)
+#define ckrm_set_pages_class(a,b,c)            do{}while(0)
+#define ckrm_clear_page_class(a)               do{}while(0)
+#define ckrm_clear_pages_class(a,b)            do{}while(0)
+#define ckrm_change_page_class(a,b)            do{}while(0)
 #define ckrm_change_pages_class(a,b,c) do{}while(0)
-#define ckrm_mem_inc_active(a)         do{}while(0)
-#define ckrm_mem_dec_active(a)         do{}while(0)
-#define ckrm_mem_inc_inactive(a)       do{}while(0)
-#define ckrm_mem_dec_inactive(a)       do{}while(0)
-#define ckrm_shrink_list_empty()       (1)
-#define ckrm_kick_page(a,b)            (0)
-#define ckrm_class_limit_ok(a)         (1)
-#define ckrm_task_mm_init(a)           do{}while(0)
-#define ckrm_task_clear_mm(a, b)       do{}while(0)
-#define ckrm_task_change_mm(a, b, c)   do{}while(0)
-#define ckrm_mm_init(a)                        do{}while(0)
-#define ckrm_mm_setclass(a, b)         do{}while(0)
-#define ckrm_mm_clearclass(a)          do{}while(0)
-#define ckrm_zone_inc_active(a, b)     do{}while(0)
-#define ckrm_zone_inc_inactive(a, b)   do{}while(0)
-#define ckrm_zone_dec_active(a, b)     do{}while(0)
-#define ckrm_zone_dec_inactive(a, b)   do{}while(0)
+#define ckrm_mem_inc_active(a)                 do{}while(0)
+#define ckrm_mem_dec_active(a)                 do{}while(0)
+#define ckrm_mem_inc_inactive(a)               do{}while(0)
+#define ckrm_mem_dec_inactive(a)               do{}while(0)
+#define ckrm_shrink_list_empty()               (1)
+#define ckrm_kick_page(a,b)                            (0)
+#define ckrm_class_limit_ok(a)                 (1)
 
 #endif // CONFIG_CKRM_RES_MEM
 
index 06e2d2a..1bf2d07 100644 (file)
@@ -1,5 +1,4 @@
-/*
- *  ckrm_rc.h - Header file to be used by Resource controllers of CKRM
+/* ckrm_rc.h - Header file to be used by Resource controllers of CKRM
  *
  * Copyright (C) Hubertus Franke, IBM Corp. 2003
  *           (C) Shailabh Nagar,  IBM Corp. 2003
@@ -9,7 +8,7 @@
  * Provides data structures, macros and kernel API of CKRM for 
  * resource controllers.
  *
- * More details at http://ckrm.sf.net
+ * Latest version, more details at http://ckrm.sf.net
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -18,8 +17,7 @@
  *
  */
 
-/*
- * Changes
+/* Changes
  *
  * 12 Nov 2003
  *        Created.
 #ifdef CONFIG_CKRM
 
 #include <linux/list.h>
-#include <linux/ckrm_events.h>
+#include <linux/ckrm.h>
 #include <linux/ckrm_ce.h>
 #include <linux/seq_file.h>
 
-#define CKRM_MAX_CLASSTYPES         32 /* maximum number of class types */
-#define CKRM_MAX_CLASSTYPE_NAME     32         /* maximum classtype name length */
+/* maximum number of class types */
+#define CKRM_MAX_CLASSTYPES         32
+/* maximum classtype name length */
+#define CKRM_MAX_CLASSTYPE_NAME     32
 
-#define CKRM_MAX_RES_CTLRS           8 /* maximum resource controllers per classtype */
-#define CKRM_MAX_RES_NAME          128 /* maximum resource controller name length */
+/* maximum resource controllers per classtype */
+#define CKRM_MAX_RES_CTLRS           8
+/* maximum resource controller name length */
+#define CKRM_MAX_RES_NAME          128
 
 struct ckrm_core_class;
 struct ckrm_classtype;
 
-/*
+/*****************************************************************************
  * Share specifications
- */
+ *****************************************************************************/
 
 typedef struct ckrm_shares {
        int my_guarantee;
        int my_limit;
        int total_guarantee;
        int max_limit;
-       int unused_guarantee;   /* not used as parameters */
-       int cur_max_limit;      /* not used as parameters */
+       int unused_guarantee;   // not used as parameters
+       int cur_max_limit;      // not used as parameters
 } ckrm_shares_t;
 
 #define CKRM_SHARE_UNCHANGED     (-1)  
@@ -64,17 +66,17 @@ typedef struct ckrm_shares {
 #define CKRM_SHARE_DFLT_TOTAL_GUARANTEE (100) 
 #define CKRM_SHARE_DFLT_MAX_LIMIT     (100)  
 
-/*
+/******************************************************************************
  * RESOURCE CONTROLLERS
- */
+ *****************************************************************************/
 
 /* resource controller callback structure */
 
 typedef struct ckrm_res_ctlr {
        char res_name[CKRM_MAX_RES_NAME];
-       int res_hdepth;         /* maximum hierarchy */
-       int resid;              /* (for now) same as the enum resid */
-       struct ckrm_classtype *classtype;    /* classtype owning this res ctlr */
+       int res_hdepth;         // maximum hierarchy
+       int resid;              // (for now) same as the enum resid
+       struct ckrm_classtype *classtype;    // classtype owning this res ctlr
 
        /* allocate/free new resource class object for resource controller */
        void *(*res_alloc) (struct ckrm_core_class * this,
@@ -92,15 +94,16 @@ typedef struct ckrm_res_ctlr {
        int (*set_config) (void *, const char *cfgstr);
 
        void (*change_resclass) (void *, void *, void *);
+
 } ckrm_res_ctlr_t;
 
-/*
+/******************************************************************************
  * CKRM_CLASSTYPE
  *
  * A <struct ckrm_classtype> object describes a dimension for CKRM to classify 
  * along. Need to provide methods to create and manipulate class objects in
  * this dimension
- */
+ *****************************************************************************/
 
 /* list of predefined class types, we always recognize */
 #define CKRM_CLASSTYPE_TASK_CLASS    0
@@ -110,42 +113,41 @@ typedef struct ckrm_res_ctlr {
 #define CKRM_MAX_TYPENAME_LEN       32
 
 typedef struct ckrm_classtype {
-       /* TODO: Review for cache alignment */
+       /* Hubertus:   Rearrange slots later for cache friendliness */
 
        /* resource controllers */
-
-       spinlock_t res_ctlrs_lock;  /* protect res ctlr related data */
-       int max_res_ctlrs;          /* max number of res ctlrs allowed */
-       int max_resid;              /* max resid used */
-       int resid_reserved;         /* max number of reserved controllers */
-       long bit_res_ctlrs;         /* bitmap of resource ID used */
+       spinlock_t res_ctlrs_lock;  // protect res ctlr related data
+       int max_res_ctlrs;          // max number of res ctlrs allowed 
+       int max_resid;              // max resid used                      
+       int resid_reserved;         // max number of reserved controllers  
+       long bit_res_ctlrs;         // bitmap of resource ID used              
        atomic_t nr_resusers[CKRM_MAX_RES_CTLRS];
        ckrm_res_ctlr_t *res_ctlrs[CKRM_MAX_RES_CTLRS];
 
+
        /* state about my classes */
 
        struct ckrm_core_class *default_class;  
-       struct list_head classes;  /* link all classes of this classtype */
+       struct list_head classes;  // link all classes of this classtype
        int num_classes;         
 
        /* state about my ce interaction */
-       atomic_t ce_regd;               /* if CE registered */
-       int ce_cb_active;               /* if Callbacks active */
-       atomic_t ce_nr_users;           /* number of active transient calls */
-       struct ckrm_eng_callback ce_callbacks;  /* callback engine */
-
-       /* Begin classtype-rcfs private data. No rcfs/fs specific types used.  */
-
-       int mfidx;              /* Index into genmfdesc array used to initialize */
-       void *mfdesc;           /* Array of descriptors of root and magic files */
-       int mfcount;            /* length of above array */
-       void *rootde;           /* root dentry created by rcfs */
-       /* End rcfs private data */
-
-       char name[CKRM_MAX_TYPENAME_LEN]; /* currently same as mfdesc[0]->name  */
-                                         /* but could be different */
-       int typeID;                       /* unique TypeID */
-       int maxdepth;                     /* maximum depth supported */
+       atomic_t ce_regd;               // if CE registered
+       int ce_cb_active;       // if Callbacks active
+       atomic_t ce_nr_users;   // number of active transient calls 
+       struct ckrm_eng_callback ce_callbacks;  // callback engine
+
+       // Begin classtype-rcfs private data. No rcfs/fs specific types used. 
+       int mfidx;              // Index into genmfdesc array used to initialize
+       void *mfdesc;           // Array of descriptors of root and magic files
+       int mfcount;            // length of above array 
+       void *rootde;           // root dentry created by rcfs
+       // End rcfs private data 
+
+       char name[CKRM_MAX_TYPENAME_LEN]; // currently same as mfdesc[0]->name 
+                                         // but could be different
+       int typeID;             // unique TypeID
+       int maxdepth;           // maximum depth supported
 
        /* functions to be called on any class type by external API's */
 
@@ -169,15 +171,15 @@ typedef struct ckrm_classtype {
 
        /* class initialization for new RC */
        void (*add_resctrl) (struct ckrm_core_class *, int resid);      
+
 } ckrm_classtype_t;
 
-/*
+/******************************************************************************
  * CKRM CORE CLASS
  *      common part to any class structure (i.e. instance of a classtype)
- */
+ ******************************************************************************/
 
-/*
- * basic definition of a hierarchy that is to be used by the the CORE classes
+/* basic definition of a hierarchy that is to be used by the the CORE classes
  * and can be used by the resource class objects
  */
 
@@ -191,20 +193,21 @@ typedef struct ckrm_hnode {
 
 typedef struct ckrm_core_class {
        struct ckrm_classtype *classtype;       
-       void *res_class[CKRM_MAX_RES_CTLRS];    /* resource classes */
-       spinlock_t class_lock;                  /* protects list,array above */
+       void *res_class[CKRM_MAX_RES_CTLRS];    // resource classes 
+       spinlock_t class_lock;                  // protects list,array above 
 
-       struct list_head objlist;               /* generic object list */
-       struct list_head clslist;               /* peer classtype classes */
-       struct dentry *dentry;                  /* dentry of inode in the RCFS */
+       
+       struct list_head objlist;               // generic object list 
+       struct list_head clslist;               // peer classtype classes
+       struct dentry *dentry;                  // dentry of inode in the RCFS
        int magic;
 
-       struct ckrm_hnode hnode;                /* hierarchy */
-       rwlock_t hnode_rwlock;                  /* protects hnode above. */
+       struct ckrm_hnode hnode;                // hierarchy
+       rwlock_t hnode_rwlock;                  // protects hnode above.
        atomic_t refcnt;
        const char *name;
-       int delayed;                            /* core deletion delayed  */
-                                               /* because of race conditions */
+       int delayed;                            // core deletion delayed 
+                                               // because of race conditions
 } ckrm_core_class_t;
 
 /* type coerce between derived class types and ckrm core class type */
@@ -216,9 +219,9 @@ typedef struct ckrm_core_class {
 /* what type is a class of ISA */
 #define class_isa(clsptr)          (class_core(clsptr)->classtype)
 
-/*
+/******************************************************************************
  * OTHER
- */
+ ******************************************************************************/
 
 #define ckrm_get_res_class(rescls, resid, type) \
        ((type*) (((resid != -1) && ((rescls) != NULL) \
@@ -235,12 +238,28 @@ extern int ckrm_init_core_class(struct ckrm_classtype *clstype,
                                struct ckrm_core_class *parent,
                                const char *name);
 extern int ckrm_release_core_class(struct ckrm_core_class *);  
-
-/* TODO: can disappear after cls del debugging */
-
+// Hubertus .. can disappear after cls del debugging
 extern struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *type,
                                                 const char *resname);
 
+#if 0
+
+// Hubertus ... need to straighten out all these I don't think we will even 
+// call this or are we 
+
+/* interface to the RCFS filesystem */
+extern struct ckrm_core_class *ckrm_alloc_core_class(struct ckrm_core_class *,
+                                                    const char *, int);
+
+// Reclassify the given pid to the given core class by force
+extern void ckrm_forced_reclassify_pid(int, struct ckrm_core_class *);
+
+// Reclassify the given net_struct  to the given core class by force
+extern void ckrm_forced_reclassify_laq(struct ckrm_net_struct *,
+                                      struct ckrm_core_class *);
+
+#endif
+
 extern void ckrm_lock_hier(struct ckrm_core_class *);
 extern void ckrm_unlock_hier(struct ckrm_core_class *);
 extern struct ckrm_core_class *ckrm_get_next_child(struct ckrm_core_class *,
@@ -271,6 +290,12 @@ extern int ckrm_class_set_shares(struct ckrm_core_class *core,
 extern int ckrm_class_reset_stats(struct ckrm_core_class *core,
                                  const char *resname, const char *unused);
 
+#if 0
+extern void ckrm_ns_hold(struct ckrm_net_struct *);
+extern void ckrm_ns_put(struct ckrm_net_struct *);
+extern void *ckrm_set_rootcore_byname(char *, void *);
+#endif
+
 static inline void ckrm_core_grab(struct ckrm_core_class *core)
 {
        if (core)
@@ -279,7 +304,7 @@ static inline void ckrm_core_grab(struct ckrm_core_class *core)
 
 static inline void ckrm_core_drop(struct ckrm_core_class *core)
 {
-       /* only make definition available in this context */
+       // only make definition available in this context
        extern void ckrm_free_core_class(struct ckrm_core_class *core);
        if (core && (atomic_dec_and_test(&core->refcnt)))
                ckrm_free_core_class(core);
@@ -290,14 +315,11 @@ static inline unsigned int ckrm_is_core_valid(ckrm_core_class_t * core)
        return (core && (core->magic == CKRM_CORE_MAGIC));
 }
 
-/*
- * iterate through all associate resource controllers:
- * requires following arguments (ckrm_core_class *cls, 
- *                               ckrm_res_ctrl   *ctlr,
- *                               void            *robj,
- *                               int              bmap)
- */
-
+// iterate through all associate resource controllers:
+// requires following arguments (ckrm_core_class *cls, 
+//                               ckrm_res_ctrl   *ctlr,
+//                               void            *robj,
+//                               int              bmap)
 #define forall_class_resobjs(cls,rcbs,robj,bmap)                       \
        for ( bmap=((cls->classtype)->bit_res_ctlrs) ;                  \
             ({ int rid; ((rid=ffs(bmap)-1) >= 0) &&                    \
@@ -307,15 +329,36 @@ static inline unsigned int ckrm_is_core_valid(ckrm_core_class_t * core)
            )
 
 extern struct ckrm_classtype *ckrm_classtypes[];       
-
-/*
+/* should provide a different interface */
+
+/*-----------------------------------------------------------------------------
+ * CKRM event callback specification for the classtypes or resource controllers 
+ *   typically an array is specified using CKRM_EVENT_SPEC terminated with 
+ *   CKRM_EVENT_SPEC_LAST and then that array is registered using
+ *   ckrm_register_event_set.
+ *   Individual registration of event_cb is also possible
+ *-----------------------------------------------------------------------------*/
+
+struct ckrm_event_spec {
+       enum ckrm_event ev;
+       struct ckrm_hook_cb cb;
+};
+#define CKRM_EVENT_SPEC(EV,FCT) { CKRM_EVENT_##EV, \
+                                       { (ckrm_event_cb)FCT, NULL } }
+
+int ckrm_register_event_set(struct ckrm_event_spec especs[]);
+int ckrm_unregister_event_set(struct ckrm_event_spec especs[]);
+int ckrm_register_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb);
+int ckrm_unregister_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb);
+
+/******************************************************************************
  * CE Invocation interface
- */
+ ******************************************************************************/
 
 #define ce_protect(ctype)      (atomic_inc(&((ctype)->ce_nr_users)))
 #define ce_release(ctype)      (atomic_dec(&((ctype)->ce_nr_users)))
 
-/* CE Classification callbacks with */
+// CE Classification callbacks with 
 
 #define CE_CLASSIFY_NORET(ctype, event, objs_to_classify...)           \
 do {                                                                   \
@@ -341,15 +384,17 @@ do {                                                                      \
                                                cls,objs_to_classify);  \
 } while (0)
 
-/*
+/***************
  * RCFS related 
- */
+ ***************/
 
 /* vars needed by other modules/core */
 
 extern int rcfs_mounted;
 extern int rcfs_engine_regd;
 
-#endif /* CONFIG_CKRM */
-#endif /* __KERNEL__ */
-#endif /* _LINUX_CKRM_RC_H */
+#endif                         // CONFIG_CKRM
+
+#endif                         // __KERNEL__
+
+#endif                         // _LINUX_CKRM_RC_H
index 5949af1..5650dd3 100644 (file)
@@ -1,32 +1,3 @@
-/* ckrm_tc.h - Header file to be used by task class users
- *
- * Copyright (C) Hubertus Franke, IBM Corp. 2003, 2004
- * 
- * Provides data structures, macros and kernel API for the 
- * classtype, taskclass.
- *
- * Latest version, more details at http://ckrm.sf.net
- * 
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-
-/* Changes
- *
- * 12 Apr 2004
- *        Created.
- */
-
-#ifndef _LINUX_CKRM_TC_H_
-#define _LINUX_CKRM_TC_H_
-
-#ifdef CONFIG_CKRM_TYPE_TASKCLASS
 #include <linux/ckrm_rc.h>
 
 #define TASK_CLASS_TYPE_NAME "taskclass"
@@ -39,12 +10,4 @@ typedef struct ckrm_task_class {
 // which has the mfdesc entry that taskclass wants to use
 #define TC_MF_IDX  0
 
-extern int ckrm_forced_reclassify_pid(int, struct ckrm_task_class *);
-
-#else // CONFIG_CKRM_TYPE_TASKCLASS
-
-#define ckrm_forced_reclassify_pid(a, b) (0)
-
-#endif
-
-#endif // _LINUX_CKRM_TC_H_
+extern int ckrm_forced_reclassify_pid(int pid, struct ckrm_task_class *cls);
index f614539..d0b4530 100644 (file)
  *
  */
 
+/* Changes
+ *
+ * 31 Mar 2004
+ *    Created.
+ */
+
 #ifndef _LINUX_CKRM_TSK_H
 #define _LINUX_CKRM_TSK_H
 
-#ifdef CONFIG_CKRM_TYPE_TASKCLASS
 #include <linux/ckrm_rc.h>
 
-typedef int (*get_ref_t) (struct ckrm_core_class *, int);
-typedef void (*put_ref_t) (struct ckrm_core_class *);
+typedef int (*get_ref_t) (void *, int);
+typedef void (*put_ref_t) (void *);
 
-extern int numtasks_get_ref(struct ckrm_core_class *, int);
-extern void numtasks_put_ref(struct ckrm_core_class *);
+extern int numtasks_get_ref(void *, int);
+extern void numtasks_put_ref(void *);
 extern void ckrm_numtasks_register(get_ref_t, put_ref_t);
 
-#else /* CONFIG_CKRM_TYPE_TASKCLASS */
-
-#define numtasks_get_ref(core_class, ref) (1)
-#define numtasks_put_ref(core_class)  do {} while (0)
-
-#endif /* CONFIG_CKRM_TYPE_TASKCLASS */
-#endif /* _LINUX_CKRM_RES_H */
+#endif                         // _LINUX_CKRM_RES_H
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
deleted file mode 100644 (file)
index 11c65e9..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <linux/kexec.h>
-#include <linux/smp_lock.h>
-#include <linux/device.h>
-#include <linux/proc_fs.h>
-#ifdef CONFIG_CRASH_DUMP
-#include <asm/crash_dump.h>
-#endif
-
-extern unsigned long saved_max_pfn;
-extern struct memelfnote memelfnote;
-extern int notesize(struct memelfnote *);
-extern char *storenote(struct memelfnote *, char *);
-extern void elf_kcore_store_hdr(char *, int, int, struct kcore_list *);
-
-#ifdef CONFIG_CRASH_DUMP
-extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, int);
-extern void __crash_machine_kexec(void);
-extern int crash_dump_on;
-static inline void crash_machine_kexec(void)
-{
-        __crash_machine_kexec();
-}
-#else
-#define crash_machine_kexec()  do { } while(0)
-#endif
-
-
-#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_PROC_FS)
-extern void crash_enable_by_proc(void);
-extern void crash_create_proc_entry(void);
-#else
-#define crash_enable_by_proc() do { } while(0)
-#define crash_create_proc_entry() do { } while(0)
-#endif
index 3cbf85e..6a2190d 100644 (file)
@@ -43,7 +43,6 @@
 #include <linux/types.h>           
 #include <linux/ckrm.h>
 #include <linux/ckrm_ce.h>
-#include <linux/taskdelays.h>
 
 #define CRBCE_UKCC_NAME   "crbce_ukcc"
 #define CRBCE_UKCC_PATH   "/mnt/relayfs"
@@ -101,7 +100,7 @@ struct crbce_hdr {
 struct crbce_hdr_ts {
        int type;
        pid_t pid;
-       uint32_t timestamp;  /* in msecs */
+       uint32_t jiffies;
        uint64_t cls;
 };
 
index 907c3c6..5f82699 100644 (file)
@@ -30,6 +30,7 @@ static inline void devpts_pty_kill(int number) { }
 
 #endif
 
-#define DEVPTS_SUPER_MAGIC     0x1cd1
+#define DEVPTS_SUPER_MAGIC 0x1cd1
+
 
 #endif /* _LINUX_DEVPTS_FS_H */
index df06c88..00c690f 100644 (file)
@@ -20,7 +20,7 @@
 #ifndef _DUMP_H
 #define _DUMP_H
 
-#if defined(CONFIG_CRASH_DUMP)
+#if defined(CONFIG_CRASH_DUMP) || defined (CONFIG_CRASH_DUMP_MODULE)
 
 #include <linux/list.h>
 #include <linux/notifier.h>
index 12788c8..c2bd10f 100644 (file)
@@ -197,8 +197,8 @@ struct ext2_group_desc
 #define EXT2_RESERVED_FL               0x80000000 /* reserved for ext2 lib */
 
 #ifdef CONFIG_VSERVER_LEGACY
-#define EXT2_FL_USER_VISIBLE           0x0C03DFFF /* User visible flags */
-#define EXT2_FL_USER_MODIFIABLE                0x0C0380FF /* User modifiable flags */
+#define EXT2_FL_USER_VISIBLE           0x0803DFFF /* User visible flags */
+#define EXT2_FL_USER_MODIFIABLE                0x080380FF /* User modifiable flags */
 #else
 #define EXT2_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
 #define EXT2_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
@@ -320,7 +320,7 @@ struct ext2_inode {
 #define EXT2_MOUNT_NO_UID32            0x0200  /* Disable 32-bit UIDs */
 #define EXT2_MOUNT_XATTR_USER          0x4000  /* Extended user attributes */
 #define EXT2_MOUNT_POSIX_ACL           0x8000  /* POSIX Access Control Lists */
-#define EXT2_MOUNT_TAG_XID             (1<<24) /* Enable Context Tags */
+#define EXT2_MOUNT_TAG_XID             (1<<16) /* Enable Context Tags */
 
 #define clear_opt(o, opt)              o &= ~EXT2_MOUNT_##opt
 #define set_opt(o, opt)                        o |= EXT2_MOUNT_##opt
index 818516b..d11f5d1 100644 (file)
@@ -190,15 +190,12 @@ struct ext3_group_desc
 #define EXT3_RESERVED_FL               0x80000000 /* reserved for ext3 lib */
 
 #ifdef CONFIG_VSERVER_LEGACY
-#define EXT3_FL_USER_VISIBLE           0x0C03DFFF /* User visible flags */
-#define EXT3_FL_USER_MODIFIABLE                0x0C0380FF /* User modifiable flags */
+#define EXT3_FL_USER_VISIBLE           0x0803DFFF /* User visible flags */
+#define EXT3_FL_USER_MODIFIABLE                0x080380FF /* User modifiable flags */
 #else
 #define EXT3_FL_USER_VISIBLE           0x0003DFFF /* User visible flags */
 #define EXT3_FL_USER_MODIFIABLE                0x000380FF /* User modifiable flags */
 #endif
-#ifdef CONFIG_VSERVER_LEGACY
-#define EXT3_IOC_SETXID                        FIOC_SETXIDJ
-#endif
 
 /*
  * Inode dynamic state flags
@@ -369,7 +366,7 @@ struct ext3_inode {
 #define EXT3_MOUNT_POSIX_ACL           0x08000 /* POSIX Access Control Lists */
 #define EXT3_MOUNT_RESERVATION         0x10000 /* Preallocation */
 #define EXT3_MOUNT_BARRIER             0x20000 /* Use block barriers */
-#define EXT3_MOUNT_TAG_XID             (1<<24) /* Enable Context Tags */
+#define EXT3_MOUNT_TAG_XID             0x40000 /* Enable Context Tags */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
index 93a6a10..667bf73 100644 (file)
@@ -126,7 +126,6 @@ extern int dir_notify_enable;
 #define MS_POSIXACL    (1<<16) /* VFS does not apply the umask */
 #define MS_ONE_SECOND  (1<<17) /* fs has 1 sec a/m/ctime resolution */
 #define MS_TAGXID      (1<<24) /* tag inodes with context information */
-#define MS_XID         (1<<25) /* use specific xid for this mount */
 #define MS_ACTIVE      (1<<30)
 #define MS_NOUSER      (1<<31)
 
@@ -153,8 +152,8 @@ extern int dir_notify_enable;
 #define S_DIRSYNC      64      /* Directory modifications are synchronous */
 #define S_NOCMTIME     128     /* Do not update file c/mtime */
 #define S_SWAPFILE     256     /* Do not truncate: swapon got its bmaps */
-#define S_BARRIER      1024    /* Barrier for chroot() */
-#define S_IUNLINK      2048    /* Immutable unlink */
+#define S_BARRIER      512     /* Barrier for chroot() */
+#define S_IUNLINK      1024    /* Immutable unlink */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
index e7fb254..fbcabf7 100644 (file)
@@ -80,14 +80,13 @@ static inline void arch_free_page(struct page *page, int order) { }
 
 extern struct page *
 FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
-
-static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask,
-                                               unsigned int order)
+static inline struct page * alloc_pages_node(int nid, unsigned int gfp_mask, 
+                                            unsigned int order)
 {
        if (unlikely(order >= MAX_ORDER))
                return NULL;
 
-       return __alloc_pages(gfp_mask, order,
+       return __alloc_pages(gfp_mask, order, 
                NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
 }
 
diff --git a/include/linux/ghash.h b/include/linux/ghash.h
new file mode 100644 (file)
index 0000000..a247988
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+ * include/linux/ghash.h -- generic hashing with fuzzy retrieval
+ *
+ * (C) 1997 Thomas Schoebel-Theuer
+ *
+ * The algorithms implemented here seem to be a completely new invention,
+ * and I'll publish the fundamentals in a paper.
+ */
+
+#ifndef _GHASH_H
+#define _GHASH_H
+/* HASHSIZE _must_ be a power of two!!! */
+
+
+#define DEF_HASH_FUZZY_STRUCTS(NAME,HASHSIZE,TYPE) \
+\
+struct NAME##_table {\
+       TYPE * hashtable[HASHSIZE];\
+       TYPE * sorted_list;\
+       int nr_entries;\
+};\
+\
+struct NAME##_ptrs {\
+       TYPE * next_hash;\
+       TYPE * prev_hash;\
+       TYPE * next_sorted;\
+       TYPE * prev_sorted;\
+};
+
+#define DEF_HASH_FUZZY(LINKAGE,NAME,HASHSIZE,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,KEYEQ,HASHFN)\
+\
+LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\
+{\
+       int ix = HASHFN(elem->KEY);\
+       TYPE ** base = &tbl->hashtable[ix];\
+       TYPE * ptr = *base;\
+       TYPE * prev = NULL;\
+\
+       tbl->nr_entries++;\
+       while(ptr && KEYCMP(ptr->KEY, elem->KEY)) {\
+               base = &ptr->PTRS.next_hash;\
+               prev = ptr;\
+               ptr = *base;\
+       }\
+       elem->PTRS.next_hash = ptr;\
+       elem->PTRS.prev_hash = prev;\
+       if(ptr) {\
+               ptr->PTRS.prev_hash = elem;\
+       }\
+       *base = elem;\
+\
+       ptr = prev;\
+       if(!ptr) {\
+               ptr = tbl->sorted_list;\
+               prev = NULL;\
+       } else {\
+               prev = ptr->PTRS.prev_sorted;\
+       }\
+       while(ptr) {\
+               TYPE * next = ptr->PTRS.next_hash;\
+               if(next && KEYCMP(next->KEY, elem->KEY)) {\
+                       prev = ptr;\
+                       ptr = next;\
+               } else if(KEYCMP(ptr->KEY, elem->KEY)) {\
+                       prev = ptr;\
+                       ptr = ptr->PTRS.next_sorted;\
+               } else\
+                       break;\
+       }\
+       elem->PTRS.next_sorted = ptr;\
+       elem->PTRS.prev_sorted = prev;\
+       if(ptr) {\
+               ptr->PTRS.prev_sorted = elem;\
+       }\
+       if(prev) {\
+               prev->PTRS.next_sorted = elem;\
+       } else {\
+               tbl->sorted_list = elem;\
+       }\
+}\
+\
+LINKAGE void remove_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\
+{\
+       TYPE * next = elem->PTRS.next_hash;\
+       TYPE * prev = elem->PTRS.prev_hash;\
+\
+       tbl->nr_entries--;\
+       if(next)\
+               next->PTRS.prev_hash = prev;\
+       if(prev)\
+               prev->PTRS.next_hash = next;\
+       else {\
+               int ix = HASHFN(elem->KEY);\
+               tbl->hashtable[ix] = next;\
+       }\
+\
+       next = elem->PTRS.next_sorted;\
+       prev = elem->PTRS.prev_sorted;\
+       if(next)\
+               next->PTRS.prev_sorted = prev;\
+       if(prev)\
+               prev->PTRS.next_sorted = next;\
+       else\
+               tbl->sorted_list = next;\
+}\
+\
+LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\
+{\
+       int ix = hashfn(pos);\
+       TYPE * ptr = tbl->hashtable[ix];\
+       while(ptr && KEYCMP(ptr->KEY, pos))\
+               ptr = ptr->PTRS.next_hash;\
+       if(ptr && !KEYEQ(ptr->KEY, pos))\
+               ptr = NULL;\
+       return ptr;\
+}\
+\
+LINKAGE TYPE * find_##NAME##_hash_fuzzy(struct NAME##_table * tbl, KEYTYPE pos)\
+{\
+       int ix;\
+       int offset;\
+       TYPE * ptr;\
+       TYPE * next;\
+\
+       ptr = tbl->sorted_list;\
+       if(!ptr || KEYCMP(pos, ptr->KEY))\
+               return NULL;\
+       ix = HASHFN(pos);\
+       offset = HASHSIZE;\
+       do {\
+               offset >>= 1;\
+               next = tbl->hashtable[(ix+offset) & ((HASHSIZE)-1)];\
+               if(next && (KEYCMP(next->KEY, pos) || KEYEQ(next->KEY, pos))\
+                  && KEYCMP(ptr->KEY, next->KEY))\
+                       ptr = next;\
+       } while(offset);\
+\
+       for(;;) {\
+               next = ptr->PTRS.next_hash;\
+               if(next) {\
+                       if(KEYCMP(next->KEY, pos)) {\
+                               ptr = next;\
+                               continue;\
+                       }\
+               }\
+               next = ptr->PTRS.next_sorted;\
+               if(next && KEYCMP(next->KEY, pos)) {\
+                       ptr = next;\
+                       continue;\
+               }\
+               return ptr;\
+       }\
+       return NULL;\
+}
+
+/* LINKAGE - empty or "static", depending on whether you want the definitions to
+ *     be public or not
+ * NAME - a string to stick in names to make this hash table type distinct from
+ *     any others
+ * HASHSIZE - number of buckets
+ * TYPE - type of data contained in the buckets - must be a structure, one 
+ *     field is of type NAME_ptrs, another is the hash key
+ * PTRS - TYPE must contain a field of type NAME_ptrs, PTRS is the name of that
+ *     field
+ * KEYTYPE - type of the key field within TYPE
+ * KEY - name of the key field within TYPE
+ * KEYCMP - pointer to function that compares KEYTYPEs to each other - the
+ *     prototype is int KEYCMP(KEYTYPE, KEYTYPE), it returns zero for equal, 
+ *     non-zero for not equal
+ * HASHFN - the hash function - the prototype is int HASHFN(KEYTYPE),
+ *     it returns a number in the range 0 ... HASHSIZE - 1
+ * Call DEF_HASH_STRUCTS, define your hash table as a NAME_table, then call
+ * DEF_HASH.
+ */
+
+#define DEF_HASH_STRUCTS(NAME,HASHSIZE,TYPE) \
+\
+struct NAME##_table {\
+       TYPE * hashtable[HASHSIZE];\
+       int nr_entries;\
+};\
+\
+struct NAME##_ptrs {\
+       TYPE * next_hash;\
+       TYPE * prev_hash;\
+};
+
+#define DEF_HASH(LINKAGE,NAME,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,HASHFN)\
+\
+LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\
+{\
+       int ix = HASHFN(elem->KEY);\
+       TYPE ** base = &tbl->hashtable[ix];\
+       TYPE * ptr = *base;\
+       TYPE * prev = NULL;\
+\
+       tbl->nr_entries++;\
+       while(ptr && KEYCMP(ptr->KEY, elem->KEY)) {\
+               base = &ptr->PTRS.next_hash;\
+               prev = ptr;\
+               ptr = *base;\
+       }\
+       elem->PTRS.next_hash = ptr;\
+       elem->PTRS.prev_hash = prev;\
+       if(ptr) {\
+               ptr->PTRS.prev_hash = elem;\
+       }\
+       *base = elem;\
+}\
+\
+LINKAGE void remove_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\
+{\
+       TYPE * next = elem->PTRS.next_hash;\
+       TYPE * prev = elem->PTRS.prev_hash;\
+\
+       tbl->nr_entries--;\
+       if(next)\
+               next->PTRS.prev_hash = prev;\
+       if(prev)\
+               prev->PTRS.next_hash = next;\
+       else {\
+               int ix = HASHFN(elem->KEY);\
+               tbl->hashtable[ix] = next;\
+       }\
+}\
+\
+LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\
+{\
+       int ix = HASHFN(pos);\
+       TYPE * ptr = tbl->hashtable[ix];\
+       while(ptr && KEYCMP(ptr->KEY, pos))\
+               ptr = ptr->PTRS.next_hash;\
+       return ptr;\
+}
+
+#endif
index d3950fc..7153aef 100644 (file)
@@ -30,7 +30,6 @@ static inline void *kmap(struct page *page)
 
 #define kmap_atomic(page, idx)         page_address(page)
 #define kunmap_atomic(addr, idx)       do { } while (0)
-#define kmap_atomic_pfn(pfn, idx) ((char *)page_address(pfn_to_page(pfn)))
 #define kmap_atomic_to_page(ptr)       virt_to_page(ptr)
 
 #endif /* CONFIG_HIGHMEM */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
deleted file mode 100644 (file)
index 523c45a..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef LINUX_KEXEC_H
-#define LINUX_KEXEC_H
-
-#ifdef CONFIG_KEXEC
-#include <linux/types.h>
-#include <linux/list.h>
-#include <asm/kexec.h>
-
-/*
- * This structure is used to hold the arguments that are used when loading
- * kernel binaries.
- */
-
-typedef unsigned long kimage_entry_t;
-#define IND_DESTINATION  0x1
-#define IND_INDIRECTION  0x2
-#define IND_DONE         0x4
-#define IND_SOURCE       0x8
-
-#define KEXEC_SEGMENT_MAX 8
-struct kexec_segment {
-       void *buf;
-       size_t bufsz;
-       void *mem;
-       size_t memsz;
-};
-
-struct kimage {
-       kimage_entry_t head;
-       kimage_entry_t *entry;
-       kimage_entry_t *last_entry;
-
-       unsigned long destination;
-
-       unsigned long start;
-       struct page *control_code_page;
-
-       unsigned long nr_segments;
-       struct kexec_segment segment[KEXEC_SEGMENT_MAX];
-
-       struct list_head control_pages;
-       struct list_head dest_pages;
-       struct list_head unuseable_pages;
-};
-
-
-/* kexec interface functions */
-extern void machine_kexec(struct kimage *image);
-extern int machine_kexec_prepare(struct kimage *image);
-extern void machine_kexec_cleanup(struct kimage *image);
-extern asmlinkage long sys_kexec(unsigned long entry, long nr_segments,
-       struct kexec_segment *segments);
-extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
-extern struct kimage *kexec_image;
-extern struct kimage *kexec_crash_image;
-#endif
-#endif /* LINUX_KEXEC_H */
index d025bcb..98f4ae8 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/rbtree.h>
 #include <linux/prio_tree.h>
 #include <linux/fs.h>
-#include <linux/ckrm_mem.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -239,7 +238,7 @@ struct page {
                                           not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
 #ifdef CONFIG_CKRM_RES_MEM
-       struct ckrm_zone *ckrm_zone;
+       void *memclass;
 #endif // CONFIG_CKRM_RES_MEM
 };
 
@@ -619,6 +618,9 @@ int clear_page_dirty_for_io(struct page *page);
  */
 typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask);
 
+asmlinkage long do_mprotect(struct mm_struct *mm, unsigned long start, 
+                       size_t len, unsigned long prot);
+
 /*
  * Add an aging callback.  The int is the number of 'seeks' it takes
  * to recreate one of the objects that these functions age.
@@ -695,9 +697,10 @@ static inline unsigned long get_unmapped_area(struct file * file, unsigned long
        return get_unmapped_area_prot(file, addr, len, pgoff, flags, 0);        
 }
 
-extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
-       unsigned long len, unsigned long prot,
-       unsigned long flag, unsigned long pgoff);
+extern unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file *file, 
+                                  unsigned long addr, unsigned long len,
+                                  unsigned long prot, unsigned long flag,
+                                  unsigned long pgoff);
 
 static inline unsigned long do_mmap(struct file *file, unsigned long addr,
        unsigned long len, unsigned long prot,
@@ -707,7 +710,8 @@ static inline unsigned long do_mmap(struct file *file, unsigned long addr,
        if ((offset + PAGE_ALIGN(len)) < offset)
                goto out;
        if (!(offset & ~PAGE_MASK))
-               ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
+               ret = do_mmap_pgoff(current->mm, file, addr, len, prot, flag, 
+                                   offset >> PAGE_SHIFT);
 out:
        return ret;
 }
index 0402eb0..5edb739 100644 (file)
@@ -3,9 +3,7 @@
 static inline void
 add_page_to_active_list(struct zone *zone, struct page *page)
 {
-#ifndef CONFIG_CKRM_RES_MEM
        list_add(&page->lru, &zone->active_list);
-#endif
        zone->nr_active++;
        ckrm_mem_inc_active(page);
 }
@@ -13,9 +11,7 @@ add_page_to_active_list(struct zone *zone, struct page *page)
 static inline void
 add_page_to_inactive_list(struct zone *zone, struct page *page)
 {
-#ifndef CONFIG_CKRM_RES_MEM
        list_add(&page->lru, &zone->inactive_list);
-#endif
        zone->nr_inactive++;
        ckrm_mem_inc_inactive(page);
 }
@@ -23,9 +19,7 @@ add_page_to_inactive_list(struct zone *zone, struct page *page)
 static inline void
 del_page_from_active_list(struct zone *zone, struct page *page)
 {
-#ifndef CONFIG_CKRM_RES_MEM
        list_del(&page->lru);
-#endif
        zone->nr_active--;
        ckrm_mem_dec_active(page);
 }
@@ -33,9 +27,7 @@ del_page_from_active_list(struct zone *zone, struct page *page)
 static inline void
 del_page_from_inactive_list(struct zone *zone, struct page *page)
 {
-#ifndef CONFIG_CKRM_RES_MEM
        list_del(&page->lru);
-#endif
        zone->nr_inactive--;
        ckrm_mem_dec_inactive(page);
 }
@@ -43,9 +35,7 @@ del_page_from_inactive_list(struct zone *zone, struct page *page)
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
-#ifndef CONFIG_CKRM_RES_MEM
        list_del(&page->lru);
-#endif
        if (PageActive(page)) {
                ClearPageActive(page);
                zone->nr_active--;
index 08dd6a0..f13406a 100644 (file)
@@ -138,10 +138,8 @@ struct zone {
 
        /* Fields commonly accessed by the page reclaim scanner */
        spinlock_t              lru_lock;       
-#ifndef CONFIG_CKRM_RES_MEM
        struct list_head        active_list;
        struct list_head        inactive_list;
-#endif
        unsigned long           nr_scan_active;
        unsigned long           nr_scan_inactive;
        unsigned long           nr_active;
index 03c6f6d..8821af0 100644 (file)
@@ -22,7 +22,6 @@
 #define MNT_RDONLY     8
 #define MNT_NOATIME    16
 #define MNT_NODIRATIME 32
-#define MNT_XID                256
 
 struct vfsmount
 {
@@ -40,7 +39,6 @@ struct vfsmount
        struct list_head mnt_list;
        struct list_head mnt_fslink;    /* link in fs-specific expiry list */
        struct namespace *mnt_namespace; /* containing namespace */
-       xid_t mnt_xid;                  /* xid tagging used for vfsmount */
 };
 
 #define        MNT_IS_RDONLY(m)        ((m) && ((m)->mnt_flags & MNT_RDONLY))
index c99f570..990fff9 100644 (file)
 #define PG_swapcache           16      /* Swap page: swp_entry_t in private */
 #define PG_mappedtodisk                17      /* Has blocks allocated on-disk */
 #define PG_reclaim             18      /* To be reclaimed asap */
-
 #ifdef CONFIG_CKRM_RES_MEM
-#define PG_ckrm_account                19      /* This page is accounted by CKRM */
+#define PG_ckrm_account        19      /* This page is accounted by CKRM */
 #endif
 
-
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
  * allowed.
@@ -302,12 +300,6 @@ extern unsigned long __read_page_state(unsigned offset);
 #define PageSwapCache(page)    0
 #endif
 
-#ifdef CONFIG_CKRM_RES_MEM
-#define CkrmAccount(page)      test_bit(PG_ckrm_account, &(page)->flags)
-#define SetCkrmAccount(page)   set_bit(PG_ckrm_account, &(page)->flags)
-#define ClearCkrmAccount(page) clear_bit(PG_ckrm_account, &(page)->flags)
-#endif
-
 struct page;   /* forward declaration */
 
 int test_clear_page_dirty(struct page *page);
diff --git a/include/linux/proc_mm.h b/include/linux/proc_mm.h
new file mode 100644 (file)
index 0000000..254f8b4
--- /dev/null
@@ -0,0 +1,48 @@
+/* 
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __PROC_MM_H
+#define __PROC_MM_H
+
+#include "linux/sched.h"
+
+#define MM_MMAP 54
+#define MM_MUNMAP 55
+#define MM_MPROTECT 56
+#define MM_COPY_SEGMENTS 57
+
+struct mm_mmap {
+       unsigned long addr;
+       unsigned long len;
+       unsigned long prot;
+       unsigned long flags;
+       unsigned long fd;
+       unsigned long offset;
+};
+
+struct mm_munmap {
+       unsigned long addr;
+       unsigned long len;      
+};
+
+struct mm_mprotect {
+       unsigned long addr;
+       unsigned long len;
+        unsigned int prot;
+};
+
+struct proc_mm_op {
+       int op;
+       union {
+               struct mm_mmap mmap;
+               struct mm_munmap munmap;
+               struct mm_mprotect mprotect;
+               int copy_segments;
+       } u;
+};
+
+extern struct mm_struct *proc_mm_get_mm(int fd);
+
+#endif
index e7846e1..bb29813 100644 (file)
@@ -3,14 +3,15 @@
 
 #include <linux/kernel.h>
 #include <linux/fs.h>
-#include <linux/ckrm_events.h>
+#include <linux/ckrm.h>
 #include <linux/ckrm_rc.h>
 #include <linux/ckrm_ce.h>
 
-/*
- * The following declarations cannot be included in any of ckrm*.h files 
- * without jumping hoops. Remove later when rearrangements done
- */
+/* The following declarations cannot be included in any of ckrm*.h files 
+   without jumping hoops. Remove later when rearrangements done */
+
+// Hubertus .. taken out 
+//extern ckrm_res_callback_t ckrm_res_ctlrs[CKRM_MAX_RES_CTLRS];
 
 #define RCFS_MAGIC     0x4feedbac
 #define RCFS_MAGF_NAMELEN 20
@@ -35,12 +36,10 @@ struct rcfs_magf {
 };
 
 struct rcfs_mfdesc {
-       struct rcfs_magf *rootmf;       /* Root directory and its magic files */
-       int rootmflen;                  /* length of above array */
-       /*
-        * Can have a different magf describing magic files 
-        * for non-root entries too.
-        */
+       struct rcfs_magf *rootmf;       // Root directory and its magic files
+       int rootmflen;                  // length of above array
+       // Can have a different magf describing magic files 
+       // for non-root entries too
 };
 
 extern struct rcfs_mfdesc *genmfdesc[];
@@ -75,7 +74,7 @@ extern struct file_operations members_fileops;
 extern struct file_operations reclassify_fileops;
 extern struct file_operations rcfs_file_operations;
 
-/* Callbacks into rcfs from ckrm */
+// Callbacks into rcfs from ckrm 
 
 typedef struct rcfs_functions {
        int (*mkroot) (struct rcfs_magf *, int, struct dentry **);
@@ -89,8 +88,9 @@ int rcfs_deregister_classtype(ckrm_classtype_t *);
 int rcfs_mkroot(struct rcfs_magf *, int, struct dentry **);
 int rcfs_rmroot(struct dentry *);
 
-#define RCFS_ROOT "/rcfs"      /* TODO:  Should use the mount point */
+#define RCFS_ROOT "/rcfs"      // Hubertus .. we should use the mount point 
+                               // instead of hardcoded
 extern struct dentry *rcfs_rootde;
 extern rbce_eng_callback_t rcfs_eng_callbacks;
 
-#endif /* _LINUX_RCFS_H */
+#endif                         /* _LINUX_RCFS_H */
index 5460e94..d60fafc 100644 (file)
@@ -51,8 +51,6 @@ extern void machine_restart(char *cmd);
 extern void machine_halt(void);
 extern void machine_power_off(void);
 
-extern void machine_shutdown(void);
-
 #endif
 
 #endif /* _LINUX_REBOOT_H */
index 9446bc5..553a799 100644 (file)
@@ -458,7 +458,6 @@ enum reiserfs_mount_options {
     REISERFS_BARRIER_NONE,
     REISERFS_BARRIER_FLUSH,
     REISERFS_TAGXID,
-
     /* Actions on error */
     REISERFS_ERROR_PANIC,
     REISERFS_ERROR_RO,
index 9cb07d1..96b615c 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
-#include <linux/vs_base.h>
 
 struct exec_domain;
 extern int exec_shield;
@@ -950,28 +949,15 @@ static inline int sas_ss_flags(unsigned long sp)
 #ifdef CONFIG_SECURITY
 /* code is in security.c */
 extern int capable(int cap);
-extern int vx_capable(int cap, int ccap);
 #else
 static inline int capable(int cap)
 {
-       if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
-               return 0;
        if (cap_raised(current->cap_effective, cap)) {
                current->flags |= PF_SUPERPRIV;
                return 1;
        }
        return 0;
 }
-
-static inline int vx_capable(int cap, int ccap)
-{
-       if (cap_raised(current->cap_effective, cap) &&
-               vx_ccaps(ccap)) {
-               current->flags |= PF_SUPERPRIV;
-               return 1;
-       }
-       return 0;
-}
 #endif
 
 
index cf93d31..a7f776e 100644 (file)
@@ -134,8 +134,8 @@ enum
        KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */
        KERN_HZ_TIMER=65,       /* int: hz timer on or off */
        KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */
-       KERN_VSHELPER=67,       /* string: path to vshelper policy agent */
-       KERN_SETUID_DUMPABLE=68, /* int: behaviour of dumps for setuid core */
+       KERN_SETUID_DUMPABLE=67, /* int: behaviour of dumps for setuid core */
+       KERN_VSHELPER=68,       /* string: path to vshelper policy agent */
        KERN_DUMP=69,           /* dir: dump parameters */
 };
 
index acb39e2..d12ee2b 100644 (file)
@@ -11,8 +11,6 @@
 
 #include <asm/atomic.h>
 
-#define SYSFS_SUPER_MAGIC      0x62656572
-
 struct kobject;
 struct module;
 
index b4e4618..e5682d8 100644 (file)
@@ -1,28 +1,3 @@
-/* taskdelays.h - for delay accounting
- *
- * Copyright (C) Hubertus Franke, IBM Corp. 2003, 2004
- * 
- * Has the data structure for delay counting.
- *
- * Latest version, more details at http://ckrm.sf.net
- * 
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * 
- *
- */
-
-/* Changes
- *
- * 24 Aug 2003
- *    Created.
- */
-
 #ifndef _LINUX_TASKDELAYS_H
 #define _LINUX_TASKDELAYS_H
 
index a1d34b6..ed23add 100644 (file)
@@ -1,7 +1,6 @@
 #ifndef _VX_VS_BASE_H
 #define _VX_VS_BASE_H
 
-
 #include "vserver/context.h"
 
 
@@ -18,7 +17,7 @@
  * check current context for ADMIN/WATCH and
  * optionally agains supplied argument
  */
-static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode)
+static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode)
 {
        if (mode & VX_ARG_MASK) {
                if ((mode & VX_IDENT) &&
@@ -35,8 +34,7 @@ static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode)
                        return 1;
        }
        return (((mode & VX_ADMIN) && (cid == 0)) ||
-               ((mode & VX_WATCH) && (cid == 1)) ||
-               ((mode & VX_HOSTID) && (id == 0)));
+               ((mode & VX_WATCH) && (cid == 1)));
 }
 
 
@@ -53,8 +51,6 @@ static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode)
 
 #define vx_mask_mask(v,f,m)    (((v) & ~(m)) | ((v) & (f) & (m)))
 
-#define vx_check_bit(v,n)      ((v) & (1LL << (n)))
-
 
 /* context flags */
 
@@ -82,13 +78,6 @@ static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode)
 #define vx_ccaps(c)    vx_info_ccaps(current->vx_info,(c))
 
 
-#define __vx_mcaps(v)  ((v) ? (v)->vx_ccaps >> 32UL : ~0 )
-
-#define vx_info_mcaps(v,c)     (__vx_mcaps(v) & (c))
-
-#define vx_mcaps(c)    vx_info_mcaps(current->vx_info,(c))
-
-
 #define vx_current_bcaps() \
        (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \
        current->vx_info->vx_bcaps : cap_bset)
@@ -99,6 +88,4 @@ static inline int __vx_check(xid_t cid, xid_t id, unsigned int mode)
        (current->vx_info->vx_initpid == (n)))
 
 
-#else
-#warning duplicate inclusion
 #endif
index cc41014..9d119cd 100644 (file)
@@ -3,9 +3,16 @@
 
 
 #include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
 #include "vserver/debug.h"
 
 
+extern int proc_pid_vx_info(struct task_struct *, char *);
+
+
 #define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__)
 
 static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
@@ -13,28 +20,25 @@ static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
 {
        if (!vxi)
                return NULL;
-
        vxlprintk(VXD_CBIT(xid, 2), "get_vx_info(%p[#%d.%d])",
                vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
                _file, _line);
-       vxh_get_vx_info(vxi);
-
        atomic_inc(&vxi->vx_usecnt);
        return vxi;
 }
 
+
+extern void free_vx_info(struct vx_info *);
+
 #define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__)
 
 static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
 {
        if (!vxi)
                return;
-
        vxlprintk(VXD_CBIT(xid, 2), "put_vx_info(%p[#%d.%d])",
                vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
                _file, _line);
-       vxh_put_vx_info(vxi);
-
        if (atomic_dec_and_test(&vxi->vx_usecnt))
                free_vx_info(vxi);
 }
@@ -54,7 +58,6 @@ static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
                vxi?atomic_read(&vxi->vx_usecnt):0,
                vxi?atomic_read(&vxi->vx_refcnt):0,
                _file, _line);
-       vxh_set_vx_info(vxi, vxp);
 
        atomic_inc(&vxi->vx_refcnt);
        vxo = xchg(vxp, __get_vx_info(vxi, _file, _line));
@@ -77,7 +80,6 @@ static inline void __clr_vx_info(struct vx_info **vxp,
                vxo?atomic_read(&vxo->vx_usecnt):0,
                vxo?atomic_read(&vxo->vx_refcnt):0,
                _file, _line);
-       vxh_clr_vx_info(vxo, vxp);
 
        if (atomic_dec_and_test(&vxo->vx_refcnt))
                unhash_vx_info(vxo);
@@ -85,7 +87,7 @@ static inline void __clr_vx_info(struct vx_info **vxp,
 }
 
 
-#define task_get_vx_info(p)    __task_get_vx_info(p,__FILE__,__LINE__)
+#define task_get_vx_info(i)    __task_get_vx_info(i,__FILE__,__LINE__)
 
 static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p,
        const char *_file, int _line)
index 64b38c2..616706e 100644 (file)
@@ -1,9 +1,9 @@
 #ifndef _VX_VS_CVIRT_H
 #define _VX_VS_CVIRT_H
 
-
 #include "vserver/cvirt.h"
 #include "vserver/debug.h"
+#include "vs_base.h"
 
 
 /* utsname virtualization */
@@ -23,7 +23,6 @@ static inline struct new_utsname *vx_new_utsname(void)
 
 #define vx_info_map_pid(v,p) \
        __vx_info_map_pid((v), (p), __FUNC__, __FILE__, __LINE__)
-#define vx_info_map_tgid(v,p)  vx_info_map_pid(v,p)
 #define vx_map_pid(p)  vx_info_map_pid(current->vx_info, p)
 #define vx_map_tgid(p) vx_map_pid(p)
 
@@ -71,9 +70,11 @@ static inline void vx_activate_task(struct task_struct *p)
 {
        struct vx_info *vxi;
 
+       // if ((vxi = task_get_vx_info(p))) {
        if ((vxi = p->vx_info)) {
                vx_update_load(vxi);
                atomic_inc(&vxi->cvirt.nr_running);
+               // put_vx_info(vxi);
        }
 }
 
@@ -81,28 +82,14 @@ static inline void vx_deactivate_task(struct task_struct *p)
 {
        struct vx_info *vxi;
 
+       // if ((vxi = task_get_vx_info(p))) {
        if ((vxi = p->vx_info)) {
                vx_update_load(vxi);
                atomic_dec(&vxi->cvirt.nr_running);
+               // put_vx_info(vxi);
        }
 }
 
-static inline void vx_uninterruptible_inc(struct task_struct *p)
-{
-       struct vx_info *vxi;
-
-       if ((vxi = p->vx_info))
-               atomic_inc(&vxi->cvirt.nr_uninterruptible);
-}
-
-static inline void vx_uninterruptible_dec(struct task_struct *p)
-{
-       struct vx_info *vxi;
-
-       if ((vxi = p->vx_info))
-               atomic_dec(&vxi->cvirt.nr_uninterruptible);
-}
-
 
 #else
 #warning duplicate inclusion
index b927687..805c257 100644 (file)
@@ -1,7 +1,11 @@
 #ifndef _VX_VS_DLIMIT_H
 #define _VX_VS_DLIMIT_H
 
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
 
+#include "vserver/context.h"
 #include "vserver/dlimit.h"
 #include "vserver/debug.h"
 
@@ -108,7 +112,7 @@ static inline int __dl_alloc_inode(struct super_block *sb,
                dli->dl_inodes_used++;
 #if 0
        else
-               vxwprintk("DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d",
+               printk("VSW: DLIMIT hit (%p,#%d), inode %d>=%d @ %s:%d\n",
                        sb, xid,
                        dli->dl_inodes_used, dli->dl_inodes_total,
                        file, line);
index 561df5a..3098797 100644 (file)
@@ -1,7 +1,11 @@
 #ifndef _VX_VS_LIMIT_H
 #define _VX_VS_LIMIT_H
 
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
 
+#include "vserver/context.h"
 #include "vserver/limit.h"
 #include "vserver/debug.h"
 
@@ -31,7 +35,7 @@ static inline void __vx_acc_cres(struct vx_info *vxi,
        __vx_acc_cres((v), (r), (d), (p), __FILE__, __LINE__)
 
 #define vx_acc_cres_cond(x,d,p,r) \
-       __vx_acc_cres(((x) == vx_current_xid()) ? current->vx_info : 0,\
+       __vx_acc_cres(((x) == current->xid) ? current->vx_info : 0,\
        (r), (d), (p), __FILE__, __LINE__)
 
 #define vx_nproc_inc(p) \
index 2509432..43cde1a 100644 (file)
@@ -1,7 +1,11 @@
 #ifndef _VX_VS_MEMORY_H
 #define _VX_VS_MEMORY_H
 
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
 
+#include "vserver/context.h"
 #include "vserver/limit.h"
 #include "vserver/debug.h"
 
index 9461b86..4bbf923 100644 (file)
@@ -1,11 +1,17 @@
 #ifndef _NX_VS_NETWORK_H
 #define _NX_VS_NETWORK_H
 
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
 
 #include "vserver/network.h"
 #include "vserver/debug.h"
 
 
+extern int proc_pid_nx_info(struct task_struct *, char *);
+
+
 #define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
 
 static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
@@ -20,6 +26,10 @@ static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
        return nxi;
 }
 
+
+#define free_nx_info(i) \
+       call_rcu(&i->nx_rcu, rcu_free_nx_info);
+
 #define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
 
 static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
diff --git a/include/linux/vs_sched.h b/include/linux/vs_sched.h
deleted file mode 100644 (file)
index 0eb1ee6..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef _VX_VS_SCHED_H
-#define _VX_VS_SCHED_H
-
-
-#include "vserver/sched.h"
-
-
-#define VAVAVOOM_RATIO          50
-
-#define MAX_PRIO_BIAS           20
-#define MIN_PRIO_BIAS          -20
-
-
-static inline int vx_tokens_avail(struct vx_info *vxi)
-{
-       return atomic_read(&vxi->sched.tokens);
-}
-
-static inline void vx_consume_token(struct vx_info *vxi)
-{
-       atomic_dec(&vxi->sched.tokens);
-}
-
-static inline int vx_need_resched(struct task_struct *p)
-{
-#ifdef CONFIG_VSERVER_HARDCPU
-       struct vx_info *vxi = p->vx_info;
-#endif
-       int slice = --p->time_slice;
-
-#ifdef CONFIG_VSERVER_HARDCPU
-       if (vxi) {
-               int tokens;
-
-               if ((tokens = vx_tokens_avail(vxi)) > 0)
-                       vx_consume_token(vxi);
-               /* for tokens > 0, one token was consumed */
-               if (tokens < 2)
-                       return 1;
-       }
-#endif
-       return (slice == 0);
-}
-
-
-static inline void vx_onhold_inc(struct vx_info *vxi)
-{
-       int onhold = atomic_read(&vxi->cvirt.nr_onhold);
-
-       atomic_inc(&vxi->cvirt.nr_onhold);
-       if (!onhold)
-               vxi->cvirt.onhold_last = jiffies;
-}
-
-static inline void __vx_onhold_update(struct vx_info *vxi)
-{
-       int cpu = smp_processor_id();
-       uint32_t now = jiffies;
-       uint32_t delta = now - vxi->cvirt.onhold_last;
-
-       vxi->cvirt.onhold_last = now;
-       vxi->sched.cpu[cpu].hold_ticks += delta;
-}
-
-static inline void vx_onhold_dec(struct vx_info *vxi)
-{
-       if (atomic_dec_and_test(&vxi->cvirt.nr_onhold))
-               __vx_onhold_update(vxi);
-}
-
-#else
-#warning duplicate inclusion
-#endif
index d5505c5..ed3451a 100644 (file)
@@ -2,6 +2,14 @@
 #define _VX_VS_SOCKET_H
 
 
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/network.h"
 #include "vserver/debug.h"
 
 
diff --git a/include/linux/vserver.h b/include/linux/vserver.h
new file mode 100644 (file)
index 0000000..6c31e76
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _LINUX_VSERVER_H
+#define _LINUX_VSERVER_H
+
+#include <linux/vserver/context.h>
+#include <linux/vserver/network.h>
+
+extern long vs_reboot(unsigned int, void *);
+
+#endif
index 1fe76e7..4061e7b 100644 (file)
@@ -3,75 +3,22 @@
 
 #include <linux/types.h>
 
-
 #define MAX_S_CONTEXT  65535   /* Arbitrary limit */
 #define MIN_D_CONTEXT  49152   /* dynamic contexts start here */
 
 #define VX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
 
-/* context flags */
-
-#define VXF_INFO_LOCK          0x00000001
-#define VXF_INFO_SCHED         0x00000002
-#define VXF_INFO_NPROC         0x00000004
-#define VXF_INFO_PRIVATE       0x00000008
-
-#define VXF_INFO_INIT          0x00000010
-#define VXF_INFO_HIDE          0x00000020
-#define VXF_INFO_ULIMIT                0x00000040
-#define VXF_INFO_NSPACE                0x00000080
-
-#define VXF_SCHED_HARD         0x00000100
-#define VXF_SCHED_PRIO         0x00000200
-#define VXF_SCHED_PAUSE                0x00000400
-
-#define VXF_VIRT_MEM           0x00010000
-#define VXF_VIRT_UPTIME                0x00020000
-#define VXF_VIRT_CPU           0x00040000
-#define VXF_VIRT_LOAD          0x00080000
-
-#define VXF_HIDE_MOUNT         0x01000000
-#define VXF_HIDE_NETIF         0x02000000
-
-#define VXF_STATE_SETUP                (1ULL<<32)
-#define VXF_STATE_INIT         (1ULL<<33)
-
-#define VXF_FORK_RSS           (1ULL<<48)
-#define VXF_PROLIFIC           (1ULL<<49)
-
-#define VXF_IGNEG_NICE         (1ULL<<52)
-
-#define VXF_ONE_TIME           (0x0003ULL<<32)
-
-
-/* context caps */
-
-#define        VXC_CAP_MASK            0x00000000
-
-#define VXC_SET_UTSNAME                0x00000001
-#define VXC_SET_RLIMIT         0x00000002
-
-#define VXC_RAW_ICMP           0x00000100
-
-#define VXC_SECURE_MOUNT       0x00010000
-#define VXC_SECURE_REMOUNT     0x00020000
-
-
-/* vshelper sync commands */
-
-#define        VS_CONTEXT_CREATED      1
-#define        VS_CONTEXT_DESTROY      2
-
-
 #ifdef __KERNEL__
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
 
-#include "limit_def.h"
-#include "sched_def.h"
-#include "cvirt_def.h"
+#define _VX_INFO_DEF_
+#include "cvirt.h"
+#include "limit.h"
+#include "sched.h"
+#undef _VX_INFO_DEF_
 
 struct vx_info {
        struct hlist_node vx_hlist;             /* linked list of contexts */
@@ -101,7 +48,6 @@ struct vx_info {
        char vx_name[65];                       /* vserver name */
 };
 
-
 /* status flags */
 
 #define VXS_HASHED     0x0001
@@ -115,8 +61,7 @@ struct vx_info {
 
 #define VX_ADMIN       0x0001
 #define VX_WATCH       0x0002
-#define VX_HIDE                0x0004
-#define VX_HOSTID      0x0008
+#define VX_DUMMY       0x0008
 
 #define VX_IDENT       0x0010
 #define VX_EQUIV       0x0020
@@ -133,10 +78,9 @@ struct vx_info {
 
 struct rcu_head;
 
+// extern void rcu_free_vx_info(struct rcu_head *);
 extern void unhash_vx_info(struct vx_info *);
 
-extern void free_vx_info(struct vx_info *);
-
 extern struct vx_info *locate_vx_info(int);
 extern struct vx_info *locate_or_create_vx_info(int);
 
@@ -145,11 +89,111 @@ extern int vx_info_is_hashed(xid_t);
 
 extern int vx_migrate_task(struct task_struct *, struct vx_info *);
 
-// extern int proc_pid_vx_info(struct task_struct *, char *);
+#endif /* __KERNEL__ */
+
+#include "switch.h"
+
+/* vinfo commands */
+
+#define VCMD_task_xid          VC_CMD(VINFO, 1, 0)
+#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_xid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_vx_info           VC_CMD(VINFO, 5, 0)
+#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
+
+struct vcmd_vx_info_v0 {
+       uint32_t xid;
+       uint32_t initpid;
+       /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_vx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_ctx_create                VC_CMD(VPROC, 1, 0)
+#define VCMD_ctx_migrate       VC_CMD(PROCMIG, 1, 0)
+
+#ifdef __KERNEL__
+extern int vc_ctx_create(uint32_t, void __user *);
+extern int vc_ctx_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_get_cflags                VC_CMD(FLAGS, 1, 0)
+#define VCMD_set_cflags                VC_CMD(FLAGS, 2, 0)
+
+struct vcmd_ctx_flags_v0 {
+       uint64_t flagword;
+       uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_cflags(uint32_t, void __user *);
+extern int vc_set_cflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VXF_INFO_LOCK          0x00000001
+#define VXF_INFO_SCHED         0x00000002
+#define VXF_INFO_NPROC         0x00000004
+#define VXF_INFO_PRIVATE       0x00000008
+
+#define VXF_INFO_INIT          0x00000010
+#define VXF_INFO_HIDE          0x00000020
+#define VXF_INFO_ULIMIT                0x00000040
+#define VXF_INFO_NSPACE                0x00000080
+
+#define VXF_SCHED_HARD         0x00000100
+#define VXF_SCHED_PRIO         0x00000200
+#define VXF_SCHED_PAUSE                0x00000400
+
+#define VXF_VIRT_MEM           0x00010000
+#define VXF_VIRT_UPTIME                0x00020000
+#define VXF_VIRT_CPU           0x00040000
+#define VXF_VIRT_LOAD          0x00080000
+
+#define VXF_HIDE_MOUNT         0x01000000
+#define VXF_HIDE_NETIF         0x02000000
+
+#define VXF_STATE_SETUP                (1ULL<<32)
+#define VXF_STATE_INIT         (1ULL<<33)
+
+#define VXF_FORK_RSS           (1ULL<<48)
+#define VXF_PROLIFIC           (1ULL<<49)
+
+#define VXF_IGNEG_NICE         (1ULL<<52)
 
-extern long vs_context_state(unsigned int);
+#define VXF_ONE_TIME           (0x0003ULL<<32)
+
+#define VCMD_get_ccaps         VC_CMD(FLAGS, 3, 0)
+#define VCMD_set_ccaps         VC_CMD(FLAGS, 4, 0)
+
+struct vcmd_ctx_caps_v0 {
+       uint64_t bcaps;
+       uint64_t ccaps;
+       uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ccaps(uint32_t, void __user *);
+extern int vc_set_ccaps(uint32_t, void __user *);
 
 #endif /* __KERNEL__ */
-#else  /* _VX_CONTEXT_H */
-#warning duplicate inclusion
+
+#define VXC_SET_UTSNAME                0x00000001
+#define VXC_SET_RLIMIT         0x00000002
+
+#define VXC_RAW_ICMP           0x00000100
+
+#define VXC_SECURE_MOUNT       0x00010000
+#define VXC_SECURE_REMOUNT     0x00020000
+
+
 #endif /* _VX_CONTEXT_H */
diff --git a/include/linux/vserver/context_cmd.h b/include/linux/vserver/context_cmd.h
deleted file mode 100644 (file)
index 637a0d8..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef _VX_CONTEXT_CMD_H
-#define _VX_CONTEXT_CMD_H
-
-
-/* vinfo commands */
-
-#define VCMD_task_xid          VC_CMD(VINFO, 1, 0)
-
-#ifdef __KERNEL__
-extern int vc_task_xid(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_vx_info           VC_CMD(VINFO, 5, 0)
-
-struct vcmd_vx_info_v0 {
-       uint32_t xid;
-       uint32_t initpid;
-       /* more to come */
-};
-
-#ifdef __KERNEL__
-extern int vc_vx_info(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-
-/* context commands */
-
-#define VCMD_ctx_create                VC_CMD(VPROC, 1, 0)
-#define VCMD_ctx_migrate       VC_CMD(PROCMIG, 1, 0)
-
-#ifdef __KERNEL__
-extern int vc_ctx_create(uint32_t, void __user *);
-extern int vc_ctx_migrate(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-
-/* flag commands */
-
-#define VCMD_get_cflags                VC_CMD(FLAGS, 1, 0)
-#define VCMD_set_cflags                VC_CMD(FLAGS, 2, 0)
-
-struct vcmd_ctx_flags_v0 {
-       uint64_t flagword;
-       uint64_t mask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_cflags(uint32_t, void __user *);
-extern int vc_set_cflags(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-
-/* context caps commands */
-
-#define VCMD_get_ccaps         VC_CMD(FLAGS, 3, 0)
-#define VCMD_set_ccaps         VC_CMD(FLAGS, 4, 0)
-
-struct vcmd_ctx_caps_v0 {
-       uint64_t bcaps;
-       uint64_t ccaps;
-       uint64_t cmask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_ccaps(uint32_t, void __user *);
-extern int vc_set_ccaps(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-#endif /* _VX_CONTEXT_CMD_H */
index 31c47a7..4a4bd17 100644 (file)
+/* _VX_CVIRT_H defined below */
+
+#if    defined(__KERNEL__) && defined(_VX_INFO_DEF_)
+
+#include <linux/utsname.h>
+#include <linux/rwsem.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <linux/kernel_stat.h>
+#include <asm/atomic.h>
+
+/* context sub struct */
+
+struct _vx_cvirt {
+       int max_threads;                /* maximum allowed threads */
+       atomic_t nr_threads;            /* number of current threads */
+       atomic_t nr_running;            /* number of running threads */
+
+       atomic_t nr_onhold;             /* processes on hold */
+       uint32_t onhold_last;           /* jiffies when put on hold */
+
+       struct timespec bias_idle;
+       struct timespec bias_uptime;    /* context creation point */
+
+       struct new_utsname utsname;
+
+       spinlock_t load_lock;           /* lock for the load averages */
+       uint32_t load_last;             /* last time load was cacled */
+       uint32_t load[3];               /* load averages 1,5,15 */
+
+       struct cpu_usage_stat cpustat[NR_CPUS];
+};
+
+struct sock_acc {
+       atomic_t count;
+       atomic_t total;
+};
+
+struct _vx_cacct {
+       unsigned long total_forks;
+
+       struct sock_acc sock[5][3];
+};
+
+
+static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
+{
+       return atomic_read(&cacct->sock[type][pos].count);
+}
+
+
+static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
+{
+       return atomic_read(&cacct->sock[type][pos].total);
+}
+
+
+extern uint64_t vx_idle_jiffies(void);
+
+static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
+{
+       uint64_t idle_jiffies = vx_idle_jiffies();
+
+       do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
+       jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
+       atomic_set(&cvirt->nr_threads, 0);
+       atomic_set(&cvirt->nr_running, 0);
+       atomic_set(&cvirt->nr_onhold, 0);
+
+       down_read(&uts_sem);
+       cvirt->utsname = system_utsname;
+       up_read(&uts_sem);
+
+       spin_lock_init(&cvirt->load_lock);
+       cvirt->load_last = jiffies;
+       cvirt->load[0] = 0;
+       cvirt->load[1] = 0;
+       cvirt->load[2] = 0;
+}
+
+static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
+{
+#ifdef CONFIG_VSERVER_DEBUG
+       int value;
+
+       if ((value = atomic_read(&cvirt->nr_threads)))
+               printk("!!! cvirt: %p[nr_threads] = %d on exit.\n",
+                       cvirt, value);
+       if ((value = atomic_read(&cvirt->nr_running)))
+               printk("!!! cvirt: %p[nr_running] = %d on exit.\n",
+                       cvirt, value);
+#endif
+       return;
+}
+
+static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
+{
+       int i,j;
+
+       for (i=0; i<5; i++) {
+               for (j=0; j<3; j++) {
+                       atomic_set(&cacct->sock[i][j].count, 0);
+                       atomic_set(&cacct->sock[i][j].total, 0);
+               }
+       }
+}
+
+static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
+{
+       return;
+}
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+
+static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
+{
+       int length = 0;
+       int a, b, c;
+
+       length += sprintf(buffer + length,
+               "BiasUptime:\t%lu.%02lu\n",
+                       (unsigned long)cvirt->bias_uptime.tv_sec,
+                       (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
+       length += sprintf(buffer + length,
+               "SysName:\t%.*s\n"
+               "NodeName:\t%.*s\n"
+               "Release:\t%.*s\n"
+               "Version:\t%.*s\n"
+               "Machine:\t%.*s\n"
+               "DomainName:\t%.*s\n"
+               ,__NEW_UTS_LEN, cvirt->utsname.sysname
+               ,__NEW_UTS_LEN, cvirt->utsname.nodename
+               ,__NEW_UTS_LEN, cvirt->utsname.release
+               ,__NEW_UTS_LEN, cvirt->utsname.version
+               ,__NEW_UTS_LEN, cvirt->utsname.machine
+               ,__NEW_UTS_LEN, cvirt->utsname.domainname
+               );
+
+       a = cvirt->load[0] + (FIXED_1/200);
+       b = cvirt->load[1] + (FIXED_1/200);
+       c = cvirt->load[2] + (FIXED_1/200);
+       length += sprintf(buffer + length,
+               "nr_threads:\t%d\n"
+               "nr_running:\t%d\n"
+               "nr_onhold:\t%d\n"
+               "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
+               ,atomic_read(&cvirt->nr_threads)
+               ,atomic_read(&cvirt->nr_running)
+               ,atomic_read(&cvirt->nr_onhold)
+               ,LOAD_INT(a), LOAD_FRAC(a)
+               ,LOAD_INT(b), LOAD_FRAC(b)
+               ,LOAD_INT(c), LOAD_FRAC(c)
+               );
+       return length;
+}
+
+static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
+{
+       int i,j, length = 0;
+       static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
+
+       for (i=0; i<5; i++) {
+               length += sprintf(buffer + length,
+                       "%s:", type[i]);
+               for (j=0; j<3; j++) {
+                       length += sprintf(buffer + length,
+                               "\t%12lu/%-12lu"
+                               ,vx_sock_count(cacct, i, j)
+                               ,vx_sock_total(cacct, i, j)
+                               );
+               }
+               buffer[length++] = '\n';
+       }
+       length += sprintf(buffer + length,
+               "forks:\t%lu\n", cacct->total_forks);
+       return length;
+}
+
+#else  /* _VX_INFO_DEF_ */
 #ifndef _VX_CVIRT_H
 #define _VX_CVIRT_H
 
+#include "switch.h"
+
+/*  cvirt vserver commands */
+
+
 #ifdef __KERNEL__
 
 struct timespec;
 
 void vx_vsi_uptime(struct timespec *, struct timespec *);
 
-
 struct vx_info;
 
 void vx_update_load(struct vx_info *);
 
+
 #endif /* __KERNEL__ */
-#else  /* _VX_CVIRT_H */
-#warning duplicate inclusion
+
 #endif /* _VX_CVIRT_H */
+#endif
diff --git a/include/linux/vserver/cvirt_cmd.h b/include/linux/vserver/cvirt_cmd.h
deleted file mode 100644 (file)
index 368f527..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _VX_CVIRT_CMD_H
-#define _VX_CVIRT_CMD_H
-
-/*  cvirt vserver commands */
-
-
-#endif /* _VX_CVIRT_CMD_H */
diff --git a/include/linux/vserver/cvirt_def.h b/include/linux/vserver/cvirt_def.h
deleted file mode 100644 (file)
index bf4bd84..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef _VX_CVIRT_DEF_H
-#define _VX_CVIRT_DEF_H
-
-#include <linux/jiffies.h>
-#include <linux/utsname.h>
-#include <linux/spinlock.h>
-#include <linux/time.h>
-#include <asm/atomic.h>
-
-
-struct _vx_usage_stat {
-       uint64_t user;
-       uint64_t nice;
-       uint64_t system;
-       uint64_t softirq;
-       uint64_t irq;
-       uint64_t idle;
-       uint64_t iowait;
-};
-
-/* context sub struct */
-
-struct _vx_cvirt {
-       int max_threads;                /* maximum allowed threads */
-       atomic_t nr_threads;            /* number of current threads */
-       atomic_t nr_running;            /* number of running threads */
-       atomic_t nr_uninterruptible;    /* number of uninterruptible threads */
-
-       atomic_t nr_onhold;             /* processes on hold */
-       uint32_t onhold_last;           /* jiffies when put on hold */
-
-       struct timespec bias_idle;
-       struct timespec bias_uptime;    /* context creation point */
-       uint64_t bias_clock;            /* offset in clock_t */
-
-       struct new_utsname utsname;
-
-       spinlock_t load_lock;           /* lock for the load averages */
-       atomic_t load_updates;          /* nr of load updates done so far */
-       uint32_t load_last;             /* last time load was cacled */
-       uint32_t load[3];               /* load averages 1,5,15 */
-
-       struct _vx_usage_stat cpustat[NR_CPUS];
-};
-
-struct _vx_sock_acc {
-       atomic_t count;
-       atomic_t total;
-};
-
-/* context sub struct */
-
-struct _vx_cacct {
-       unsigned long total_forks;
-
-       struct _vx_sock_acc sock[5][3];
-};
-
-#endif /* _VX_CVIRT_DEF_H */
index f6b27cf..9529c3c 100644 (file)
@@ -10,6 +10,8 @@
 #define VXD_QUAD(v)    VXD_QPOS(v,0), VXD_QPOS(v,1),           \
                        VXD_QPOS(v,2), VXD_QPOS(v,3)
 
+// #define     VXD_HERE        __FILE__, __LINE__
+
 #define __FUNC__       __func__
 
 
@@ -25,12 +27,11 @@ extern unsigned int vx_debug_cvirt;
 
 
 #define VX_LOGLEVEL    "vxD: "
-#define VX_WARNLEVEL   KERN_WARNING "vxW: "
 
 #define vxdprintk(c,f,x...)                                    \
        do {                                                    \
                if (c)                                          \
-                       printk(VX_LOGLEVEL f "\n" , ##x);       \
+                       printk(VX_LOGLEVEL f "\n", x);          \
        } while (0)
 
 #define vxlprintk(c,f,x...)                                    \
@@ -45,19 +46,7 @@ extern unsigned int vx_debug_cvirt;
                        printk(VX_LOGLEVEL f " %s@%s:%d\n", x); \
        } while (0)
 
-
-#define vxwprintk(c,f,x...)                                    \
-       do {                                                    \
-               if (c)                                          \
-                       printk(VX_WARNLEVEL f "\n" , ##x);      \
-       } while (0)
-
-
-#define vxd_path(d,m)                                          \
-       ({ static char _buffer[PATH_MAX];                       \
-          d_path((d), (m), _buffer, sizeof(_buffer)); })
-
-#else  /* CONFIG_VSERVER_DEBUG */
+#else
 
 #define vx_debug_switch 0
 #define vx_debug_xid   0
@@ -70,199 +59,9 @@ extern unsigned int vx_debug_cvirt;
 #define vxdprintk(x...) do { } while (0)
 #define vxlprintk(x...) do { } while (0)
 #define vxfprintk(x...) do { } while (0)
-#define vxwprintk(x...) do { } while (0)
-
-#define vxd_path       "<none>"
-
-#endif /* CONFIG_VSERVER_DEBUG */
-
-
-/* history stuff */
-
-#ifdef CONFIG_VSERVER_HISTORY
-
-
-extern unsigned volatile int vxh_active;
-
-struct _vxhe_vxi {
-       struct vx_info *ptr;
-       unsigned xid;
-       unsigned usecnt;
-       unsigned refcnt;
-};
-
-struct _vxhe_set_clr {
-       void *data;
-};
-
-struct _vxhe_loc_lookup {
-       unsigned arg;
-};
-
-enum {
-       VXH_UNUSED=0,
-       VXH_THROW_OOPS=1,
-
-       VXH_GET_VX_INFO,
-       VXH_PUT_VX_INFO,
-       VXH_SET_VX_INFO,
-       VXH_CLR_VX_INFO,
-       VXH_ALLOC_VX_INFO,
-       VXH_DEALLOC_VX_INFO,
-       VXH_HASH_VX_INFO,
-       VXH_UNHASH_VX_INFO,
-       VXH_LOC_VX_INFO,
-       VXH_LOOKUP_VX_INFO,
-};
-
-struct _vx_hist_entry {
-       void *loc;
-       unsigned short seq;
-       unsigned short type;
-       struct _vxhe_vxi vxi;
-       union {
-               struct _vxhe_set_clr sc;
-               struct _vxhe_loc_lookup ll;
-       };
-};
-
-struct _vx_hist_entry *vxh_advance(void *loc);
-
-#define        VXH_HERE()              \
-       ({ __label__ here;      \
-               here:;          \
-               &&here; })
-
-
-
-static inline void __vxh_copy_vxi(struct _vx_hist_entry *entry, struct vx_info *vxi)
-{
-       entry->vxi.ptr = vxi;
-       if (vxi) {
-               entry->vxi.usecnt = atomic_read(&vxi->vx_usecnt);
-               entry->vxi.refcnt = atomic_read(&vxi->vx_refcnt);
-               entry->vxi.xid = vxi->vx_id;
-       }
-}
-
-static inline void vxh_throw_oops(void)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       entry->type = VXH_THROW_OOPS;
-
-       /* prevent further acquisition */
-       vxh_active = 0;
-}
-
-static inline void vxh_get_vx_info(struct vx_info *vxi)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->type = VXH_GET_VX_INFO;
-}
-
-static inline void vxh_put_vx_info(struct vx_info *vxi)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->type = VXH_PUT_VX_INFO;
-}
-
-static inline void vxh_set_vx_info(struct vx_info *vxi, void *data)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->sc.data = data;
-       entry->type = VXH_SET_VX_INFO;
-}
-
-static inline void vxh_clr_vx_info(struct vx_info *vxi, void *data)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->sc.data = data;
-       entry->type = VXH_CLR_VX_INFO;
-}
-
-static inline void vxh_alloc_vx_info(struct vx_info *vxi)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->type = VXH_ALLOC_VX_INFO;
-}
-
-static inline void vxh_dealloc_vx_info(struct vx_info *vxi)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->type = VXH_DEALLOC_VX_INFO;
-}
-
-static inline void vxh_hash_vx_info(struct vx_info *vxi)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->type = VXH_HASH_VX_INFO;
-}
-
-static inline void vxh_unhash_vx_info(struct vx_info *vxi)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->type = VXH_UNHASH_VX_INFO;
-}
-
-static inline void vxh_loc_vx_info(unsigned arg, struct vx_info *vxi)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->ll.arg = arg;
-       entry->type = VXH_LOC_VX_INFO;
-}
-
-static inline void vxh_lookup_vx_info(unsigned arg, struct vx_info *vxi)
-{
-       struct _vx_hist_entry *entry = vxh_advance(VXH_HERE());
-
-       __vxh_copy_vxi(entry, vxi);
-       entry->ll.arg = arg;
-       entry->type = VXH_LOOKUP_VX_INFO;
-}
-
-extern void vxh_dump_history(void);
-
-#else  /* CONFIG_VSERVER_HISTORY */
-
-#define        vxh_throw_oops()        do { } while (0)
-
-#define vxh_get_vx_info(v)     do { } while (0)
-#define vxh_put_vx_info(v)     do { } while (0)
-
-#define vxh_set_vx_info(v,d)   do { } while (0)
-#define vxh_clr_vx_info(v,d)   do { } while (0)
-
-#define vxh_alloc_vx_info(v)   do { } while (0)
-#define vxh_dealloc_vx_info(v) do { } while (0)
-
-#define vxh_hash_vx_info(v)    do { } while (0)
-#define vxh_unhash_vx_info(v)  do { } while (0)
-
-#define vxh_loc_vx_info(a,v)   do { } while (0)
-#define vxh_lookup_vx_info(a,v) do { } while (0)
 
-#define vxh_dump_history()     do { } while (0)
+#endif
 
 
-#endif /* CONFIG_VSERVER_HISTORY */
 
 #endif /* _VX_DEBUG_H */
diff --git a/include/linux/vserver/debug_cmd.h b/include/linux/vserver/debug_cmd.h
deleted file mode 100644 (file)
index c0cbd08..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _VX_DEBUG_CMD_H
-#define _VX_DEBUG_CMD_H
-
-
-/* debug commands */
-
-#define VCMD_dump_history      VC_CMD(DEBUG, 1, 0)
-
-#ifdef __KERNEL__
-
-extern int vc_dump_history(uint32_t);
-
-#endif /* __KERNEL__ */
-#endif /* _VX_DEBUG_CMD_H */
index 0c6587e..14a68fd 100644 (file)
@@ -79,7 +79,7 @@ extern int vc_get_dlimit(uint32_t, void __user *);
 
 typedef uint64_t dlsize_t;
 
+
 #endif /* __KERNEL__ */
-#else  /* _VX_DLIMIT_H */
-#warning duplicate inclusion
+
 #endif /* _VX_DLIMIT_H */
index d9587f2..dac07ea 100644 (file)
@@ -71,6 +71,4 @@ extern int vc_iattr_ioctl(struct dentry *de,
 #define FIOC_GETIATTR   _IOR('x', 7, long)
 #define FIOC_SETIATTR   _IOR('x', 8, long)
 
-#else  /* _VX_INODE_H */
-#warning duplicate inclusion
 #endif /* _VX_INODE_H */
index d5b8a3f..877bfb8 100644 (file)
@@ -2,6 +2,7 @@
 #define _VX_LEGACY_H
 
 #include "switch.h"
+#include "network.h"
 
 /*  compatibiliy vserver commands */
 
index 0ed0e2b..d9e1eea 100644 (file)
+/* _VX_LIMIT_H defined below */
+
+#if    defined(__KERNEL__) && defined(_VX_INFO_DEF_)
+
+#include <asm/atomic.h>
+#include <asm/resource.h>
+
+/* context sub struct */
+
+#define NUM_LIMITS     20
+
+#define VLIMIT_NSOCK   16
+
+
+extern const char *vlimit_name[NUM_LIMITS];
+
+struct _vx_limit {
+       atomic_t ticks;
+
+       unsigned long rlim[NUM_LIMITS];         /* Context limit */
+       unsigned long rmax[NUM_LIMITS];         /* Context maximum */
+       atomic_t rcur[NUM_LIMITS];              /* Current value */
+       atomic_t lhit[NUM_LIMITS];              /* Limit hits */
+};
+
+static inline void vx_info_init_limit(struct _vx_limit *limit)
+{
+       int lim;
+
+       for (lim=0; lim<NUM_LIMITS; lim++) {
+               limit->rlim[lim] = RLIM_INFINITY;
+               limit->rmax[lim] = 0;
+               atomic_set(&limit->rcur[lim], 0);
+               atomic_set(&limit->lhit[lim], 0);
+       }
+}
+
+static inline void vx_info_exit_limit(struct _vx_limit *limit)
+{
+#ifdef CONFIG_VSERVER_DEBUG
+       unsigned long value;
+       unsigned int lim;
+
+       for (lim=0; lim<NUM_LIMITS; lim++) {
+               value = atomic_read(&limit->rcur[lim]);
+               if (value)
+                       printk("!!! limit: %p[%s,%d] = %ld on exit.\n",
+                               limit, vlimit_name[lim], lim, value);
+       }
+#endif
+}
+
+static inline void vx_limit_fixup(struct _vx_limit *limit)
+{
+       unsigned long value;
+       unsigned int lim;
+
+       for (lim=0; lim<NUM_LIMITS; lim++) {
+               value = atomic_read(&limit->rcur[lim]);
+               if (value > limit->rmax[lim])
+                       limit->rmax[lim] = value;
+               if (limit->rmax[lim] > limit->rlim[lim])
+                       limit->rmax[lim] = limit->rlim[lim];
+       }
+}
+
+#define VX_LIMIT_FMT   ":\t%10d\t%10ld\t%10ld\t%6d\n"
+
+#define VX_LIMIT_ARG(r)                                \
+               ,atomic_read(&limit->rcur[r])   \
+               ,limit->rmax[r]                 \
+               ,limit->rlim[r]                 \
+               ,atomic_read(&limit->lhit[r])
+
+static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
+{
+       vx_limit_fixup(limit);
+       return sprintf(buffer,
+               "PROC"  VX_LIMIT_FMT
+               "VM"    VX_LIMIT_FMT
+               "VML"   VX_LIMIT_FMT
+               "RSS"   VX_LIMIT_FMT
+               "FILES" VX_LIMIT_FMT
+               "SOCK"  VX_LIMIT_FMT
+               VX_LIMIT_ARG(RLIMIT_NPROC)
+               VX_LIMIT_ARG(RLIMIT_AS)
+               VX_LIMIT_ARG(RLIMIT_MEMLOCK)
+               VX_LIMIT_ARG(RLIMIT_RSS)
+               VX_LIMIT_ARG(RLIMIT_NOFILE)
+               VX_LIMIT_ARG(VLIMIT_NSOCK)
+               );
+}
+
+#else  /* _VX_INFO_DEF_ */
 #ifndef _VX_LIMIT_H
 #define _VX_LIMIT_H
 
+#include "switch.h"
+
+#define VXD_RLIMIT(r,l)                (VXD_CBIT(limit, (l)) && ((r) == (l)))
+
+/*  rlimit vserver commands */
+
+#define VCMD_get_rlimit                VC_CMD(RLIMIT, 1, 0)
+#define VCMD_set_rlimit                VC_CMD(RLIMIT, 2, 0)
+#define VCMD_get_rlimit_mask   VC_CMD(RLIMIT, 3, 0)
+
+struct vcmd_ctx_rlimit_v0 {
+       uint32_t id;
+       uint64_t minimum;
+       uint64_t softlimit;
+       uint64_t maximum;
+};
+
+struct vcmd_ctx_rlimit_mask_v0 {
+       uint32_t minimum;
+       uint32_t softlimit;
+       uint32_t maximum;
+};
+
+#define CRLIM_UNSET            (0ULL)
+#define CRLIM_INFINITY         (~0ULL)
+#define CRLIM_KEEP             (~1ULL)
+
 #ifdef __KERNEL__
 
+#include <linux/compiler.h>
+
+extern int vc_get_rlimit(uint32_t, void __user *);
+extern int vc_set_rlimit(uint32_t, void __user *);
+extern int vc_get_rlimit_mask(uint32_t, void __user *);
+
 struct sysinfo;
 
 void vx_vsi_meminfo(struct sysinfo *);
 void vx_vsi_swapinfo(struct sysinfo *);
 
-#define VXD_RLIMIT(r,l)                (VXD_CBIT(limit, (l)) && ((r) == (l)))
 
-#define NUM_LIMITS     20
+#endif /* __KERNEL__ */
 
-#define VLIMIT_NSOCK   16
+#endif /* _VX_LIMIT_H */
+#endif
 
-extern const char *vlimit_name[NUM_LIMITS];
 
-#endif /* __KERNEL__ */
-#endif /* _VX_LIMIT_H */
diff --git a/include/linux/vserver/limit_cmd.h b/include/linux/vserver/limit_cmd.h
deleted file mode 100644 (file)
index a994d02..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _VX_LIMIT_CMD_H
-#define _VX_LIMIT_CMD_H
-
-/*  rlimit vserver commands */
-
-#define VCMD_get_rlimit                VC_CMD(RLIMIT, 1, 0)
-#define VCMD_set_rlimit                VC_CMD(RLIMIT, 2, 0)
-#define VCMD_get_rlimit_mask   VC_CMD(RLIMIT, 3, 0)
-
-struct vcmd_ctx_rlimit_v0 {
-       uint32_t id;
-       uint64_t minimum;
-       uint64_t softlimit;
-       uint64_t maximum;
-};
-
-struct vcmd_ctx_rlimit_mask_v0 {
-       uint32_t minimum;
-       uint32_t softlimit;
-       uint32_t maximum;
-};
-
-#define CRLIM_UNSET            (0ULL)
-#define CRLIM_INFINITY         (~0ULL)
-#define CRLIM_KEEP             (~1ULL)
-
-#ifdef __KERNEL__
-
-#include <linux/compiler.h>
-
-extern int vc_get_rlimit(uint32_t, void __user *);
-extern int vc_set_rlimit(uint32_t, void __user *);
-extern int vc_get_rlimit_mask(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-#endif /* _VX_LIMIT_CMD_H */
diff --git a/include/linux/vserver/limit_def.h b/include/linux/vserver/limit_def.h
deleted file mode 100644 (file)
index bab1def..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _VX_LIMIT_DEF_H
-#define _VX_LIMIT_DEF_H
-
-#include <asm/atomic.h>
-#include <asm/resource.h>
-
-#include "limit.h"
-
-/* context sub struct */
-
-struct _vx_limit {
-       atomic_t ticks;
-
-       unsigned long rlim[NUM_LIMITS];         /* Context limit */
-       unsigned long rmax[NUM_LIMITS];         /* Context maximum */
-       atomic_t rcur[NUM_LIMITS];              /* Current value */
-       atomic_t lhit[NUM_LIMITS];              /* Limit hits */
-};
-
-
-#endif /* _VX_LIMIT_DEF_H */
index 72a51f6..08167a1 100644 (file)
@@ -55,6 +55,4 @@ extern int vc_cleanup_namespace(uint32_t, void __user *);
 extern int vc_set_namespace(uint32_t, void __user *);
 
 #endif /* __KERNEL__ */
-#else  /* _VX_NAMESPACE_H */
-#warning duplicate inclusion
 #endif /* _VX_NAMESPACE_H */
index b1ccb9a..e77866b 100644 (file)
@@ -1,21 +1,19 @@
 #ifndef _VX_NETWORK_H
 #define _VX_NETWORK_H
 
-#include <linux/types.h>
-
-
 #define MAX_N_CONTEXT  65535   /* Arbitrary limit */
 
 #define NX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
 
 #define NB_IPV4ROOT    16
 
-
 #ifdef __KERNEL__
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/utsname.h>
 #include <linux/rcupdate.h>
+#include <asm/resource.h>
 #include <asm/atomic.h>
 
 
@@ -45,10 +43,9 @@ struct nx_info {
 
 struct rcu_head;
 
+extern void rcu_free_nx_info(struct rcu_head *);
 extern void unhash_nx_info(struct nx_info *);
 
-extern void free_nx_info(struct nx_info *);
-
 extern struct nx_info *locate_nx_info(int);
 extern struct nx_info *locate_or_create_nx_info(int);
 
@@ -67,7 +64,89 @@ struct sock;
 
 int nx_addr_conflict(struct nx_info *, uint32_t, struct sock *);
 
+
 #endif /* __KERNEL__ */
-#else  /* _VX_NETWORK_H */
-#warning duplicate inclusion
+
+#include "switch.h"
+
+/* vinfo commands */
+
+#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
+
+#ifdef __KERNEL__
+extern int vc_task_nid(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
+
+struct vcmd_nx_info_v0 {
+       uint32_t nid;
+       /* more to come */
+};
+
+#ifdef __KERNEL__
+extern int vc_nx_info(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_net_create                VC_CMD(VNET, 1, 0)
+#define VCMD_net_migrate       VC_CMD(NETMIG, 1, 0)
+
+#define VCMD_net_add           VC_CMD(NETALT, 1, 0)
+#define VCMD_net_remove                VC_CMD(NETALT, 2, 0)
+
+struct vcmd_net_nx_v0 {
+       uint16_t type;
+       uint16_t count;
+       uint32_t ip[4];
+       uint32_t mask[4];
+       /* more to come */
+};
+
+//     IPN_TYPE_IPV4
+
+
+#ifdef __KERNEL__
+extern int vc_net_create(uint32_t, void __user *);
+extern int vc_net_migrate(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define VCMD_get_nflags                VC_CMD(FLAGS, 5, 0)
+#define VCMD_set_nflags                VC_CMD(FLAGS, 6, 0)
+
+struct vcmd_net_flags_v0 {
+       uint64_t flagword;
+       uint64_t mask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_nflags(uint32_t, void __user *);
+extern int vc_set_nflags(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define IPF_STATE_SETUP                (1ULL<<32)
+
+
+#define IPF_ONE_TIME           (0x0001ULL<<32)
+
+#define VCMD_get_ncaps         VC_CMD(FLAGS, 7, 0)
+#define VCMD_set_ncaps         VC_CMD(FLAGS, 8, 0)
+
+struct vcmd_net_caps_v0 {
+       uint64_t ncaps;
+       uint64_t cmask;
+};
+
+#ifdef __KERNEL__
+extern int vc_get_ncaps(uint32_t, void __user *);
+extern int vc_set_ncaps(uint32_t, void __user *);
+
+#endif /* __KERNEL__ */
+
+#define IPC_WOSSNAME           0x00000001
+
+
 #endif /* _VX_NETWORK_H */
diff --git a/include/linux/vserver/network_cmd.h b/include/linux/vserver/network_cmd.h
deleted file mode 100644 (file)
index 4403f54..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef _VX_NETWORK_CMD_H
-#define _VX_NETWORK_CMD_H
-
-
-/* vinfo commands */
-
-#define VCMD_task_nid          VC_CMD(VINFO, 2, 0)
-
-#ifdef __KERNEL__
-extern int vc_task_nid(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_nx_info           VC_CMD(VINFO, 6, 0)
-
-struct vcmd_nx_info_v0 {
-       uint32_t nid;
-       /* more to come */
-};
-
-#ifdef __KERNEL__
-extern int vc_nx_info(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_net_create                VC_CMD(VNET, 1, 0)
-#define VCMD_net_migrate       VC_CMD(NETMIG, 1, 0)
-
-#define VCMD_net_add           VC_CMD(NETALT, 1, 0)
-#define VCMD_net_remove                VC_CMD(NETALT, 2, 0)
-
-struct vcmd_net_nx_v0 {
-       uint16_t type;
-       uint16_t count;
-       uint32_t ip[4];
-       uint32_t mask[4];
-       /* more to come */
-};
-
-//     IPN_TYPE_IPV4
-
-
-#ifdef __KERNEL__
-extern int vc_net_create(uint32_t, void __user *);
-extern int vc_net_migrate(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define VCMD_get_nflags                VC_CMD(FLAGS, 5, 0)
-#define VCMD_set_nflags                VC_CMD(FLAGS, 6, 0)
-
-struct vcmd_net_flags_v0 {
-       uint64_t flagword;
-       uint64_t mask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_nflags(uint32_t, void __user *);
-extern int vc_set_nflags(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-
-#define IPF_STATE_SETUP                (1ULL<<32)
-
-
-#define IPF_ONE_TIME           (0x0001ULL<<32)
-
-#define VCMD_get_ncaps         VC_CMD(FLAGS, 7, 0)
-#define VCMD_set_ncaps         VC_CMD(FLAGS, 8, 0)
-
-struct vcmd_net_caps_v0 {
-       uint64_t ncaps;
-       uint64_t cmask;
-};
-
-#ifdef __KERNEL__
-extern int vc_get_ncaps(uint32_t, void __user *);
-extern int vc_set_ncaps(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-#endif /* _VX_CONTEXT_CMD_H */
index e527b44..f5982bb 100644 (file)
+/* _VX_SCHED_H defined below */
+
+#if    defined(__KERNEL__) && defined(_VX_INFO_DEF_)
+
+#include <linux/spinlock.h>
+#include <linux/jiffies.h>
+#include <linux/cpumask.h>
+#include <asm/atomic.h>
+#include <asm/param.h>
+
+struct _vx_ticks {
+       uint64_t user_ticks;            /* token tick events */
+       uint64_t sys_ticks;             /* token tick events */
+       uint64_t hold_ticks;            /* token ticks paused */
+       uint64_t unused[5];             /* cacheline ? */
+};
+
+/* context sub struct */
+
+struct _vx_sched {
+       atomic_t tokens;                /* number of CPU tokens */
+       spinlock_t tokens_lock;         /* lock for token bucket */
+
+       int fill_rate;                  /* Fill rate: add X tokens... */
+       int interval;                   /* Divisor:   per Y jiffies   */
+       int tokens_min;                 /* Limit:     minimum for unhold */
+       int tokens_max;                 /* Limit:     no more than N tokens */
+       uint32_t jiffies;               /* last time accounted */
+
+       int priority_bias;              /* bias offset for priority */
+       cpumask_t cpus_allowed;         /* cpu mask for context */
+
+       struct _vx_ticks cpu[NR_CPUS];
+};
+
+static inline void vx_info_init_sched(struct _vx_sched *sched)
+{
+       int i;
+
+       /* scheduling; hard code starting values as constants */
+       sched->fill_rate        = 1;
+       sched->interval         = 4;
+       sched->tokens_min       = HZ >> 4;
+       sched->tokens_max       = HZ >> 1;
+       sched->jiffies          = jiffies;
+       sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
+
+       atomic_set(&sched->tokens, HZ >> 2);
+       sched->cpus_allowed     = CPU_MASK_ALL;
+       sched->priority_bias    = 0;
+
+       for_each_cpu(i) {
+               sched->cpu[i].user_ticks        = 0;
+               sched->cpu[i].sys_ticks         = 0;
+               sched->cpu[i].hold_ticks        = 0;
+       }
+}
+
+static inline void vx_info_exit_sched(struct _vx_sched *sched)
+{
+       return;
+}
+
+static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
+{
+       int length = 0;
+       int i;
+
+       length += sprintf(buffer,
+               "Token:\t\t%8d\n"
+               "FillRate:\t%8d\n"
+               "Interval:\t%8d\n"
+               "TokensMin:\t%8d\n"
+               "TokensMax:\t%8d\n"
+               "PrioBias:\t%8d\n"
+               ,atomic_read(&sched->tokens)
+               ,sched->fill_rate
+               ,sched->interval
+               ,sched->tokens_min
+               ,sched->tokens_max
+               ,sched->priority_bias
+               );
+
+       for_each_online_cpu(i) {
+               length += sprintf(buffer + length,
+                       "cpu %d: %lld %lld %lld\n"
+                       ,i
+                       ,(long long)sched->cpu[i].user_ticks
+                       ,(long long)sched->cpu[i].sys_ticks
+                       ,(long long)sched->cpu[i].hold_ticks
+                       );
+       }
+
+       return length;
+}
+
+
+#else  /* _VX_INFO_DEF_ */
 #ifndef _VX_SCHED_H
 #define _VX_SCHED_H
 
+#include "switch.h"
+
+/*  sched vserver commands */
+
+#define VCMD_set_sched_v2      VC_CMD(SCHED, 1, 2)
+#define VCMD_set_sched         VC_CMD(SCHED, 1, 3)
+
+struct vcmd_set_sched_v2 {
+       int32_t fill_rate;
+       int32_t interval;
+       int32_t tokens;
+       int32_t tokens_min;
+       int32_t tokens_max;
+       uint64_t cpu_mask;
+};
+
+struct vcmd_set_sched_v3 {
+       uint32_t set_mask;
+       int32_t fill_rate;
+       int32_t interval;
+       int32_t tokens;
+       int32_t tokens_min;
+       int32_t tokens_max;
+       int32_t priority_bias;
+};
+
+
+#define VXSM_FILL_RATE         0x0001
+#define VXSM_INTERVAL          0x0002
+#define VXSM_TOKENS            0x0010
+#define VXSM_TOKENS_MIN                0x0020
+#define VXSM_TOKENS_MAX                0x0040
+#define VXSM_PRIO_BIAS         0x0100
+
+#define SCHED_KEEP             (-2)
+
 #ifdef __KERNEL__
 
-struct timespec;
+extern int vc_set_sched_v1(uint32_t, void __user *);
+extern int vc_set_sched_v2(uint32_t, void __user *);
+extern int vc_set_sched(uint32_t, void __user *);
 
-void vx_vsi_uptime(struct timespec *, struct timespec *);
 
+#define VAVAVOOM_RATIO         50
 
-struct vx_info;
+#define MAX_PRIO_BIAS          20
+#define MIN_PRIO_BIAS          -20
 
-void vx_update_load(struct vx_info *);
+#include "context.h"
 
 
-struct task_struct;
+/* scheduling stuff */
 
 int effective_vavavoom(struct task_struct *, int);
 
 int vx_tokens_recalc(struct vx_info *);
 
+/* new stuff ;) */
+
+static inline int vx_tokens_avail(struct vx_info *vxi)
+{
+       return atomic_read(&vxi->sched.tokens);
+}
+
+static inline void vx_consume_token(struct vx_info *vxi)
+{
+       atomic_dec(&vxi->sched.tokens);
+}
+
+static inline int vx_need_resched(struct task_struct *p)
+{
+#ifdef CONFIG_VSERVER_HARDCPU
+       struct vx_info *vxi = p->vx_info;
+#endif
+       int slice = --p->time_slice;
+
+#ifdef CONFIG_VSERVER_HARDCPU
+       if (vxi) {
+               int tokens;
+
+               if ((tokens = vx_tokens_avail(vxi)) > 0)
+                       vx_consume_token(vxi);
+               /* for tokens > 0, one token was consumed */
+               if (tokens < 2)
+                       return 1;
+       }
+#endif
+       return (slice == 0);
+}
+
+
+static inline void vx_onhold_inc(struct vx_info *vxi)
+{
+       int onhold = atomic_read(&vxi->cvirt.nr_onhold);
+
+       atomic_inc(&vxi->cvirt.nr_onhold);
+       if (!onhold)
+               vxi->cvirt.onhold_last = jiffies;
+}
+
+static inline void __vx_onhold_update(struct vx_info *vxi)
+{
+       int cpu = smp_processor_id();
+       uint32_t now = jiffies;
+       uint32_t delta = now - vxi->cvirt.onhold_last;
+
+       vxi->cvirt.onhold_last = now;
+       vxi->sched.cpu[cpu].hold_ticks += delta;
+}
+
+static inline void vx_onhold_dec(struct vx_info *vxi)
+{
+       if (atomic_dec_and_test(&vxi->cvirt.nr_onhold))
+               __vx_onhold_update(vxi);
+}
+
 #endif /* __KERNEL__ */
-#else  /* _VX_SCHED_H */
-#warning duplicate inclusion
+
 #endif /* _VX_SCHED_H */
+#endif
diff --git a/include/linux/vserver/sched_cmd.h b/include/linux/vserver/sched_cmd.h
deleted file mode 100644 (file)
index 2a6f55b..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _VX_SCHED_CMD_H
-#define _VX_SCHED_CMD_H
-
-/*  sched vserver commands */
-
-#define VCMD_set_sched_v2      VC_CMD(SCHED, 1, 2)
-#define VCMD_set_sched         VC_CMD(SCHED, 1, 3)
-
-struct vcmd_set_sched_v2 {
-       int32_t fill_rate;
-       int32_t interval;
-       int32_t tokens;
-       int32_t tokens_min;
-       int32_t tokens_max;
-       uint64_t cpu_mask;
-};
-
-struct vcmd_set_sched_v3 {
-       uint32_t set_mask;
-       int32_t fill_rate;
-       int32_t interval;
-       int32_t tokens;
-       int32_t tokens_min;
-       int32_t tokens_max;
-       int32_t priority_bias;
-};
-
-
-#define VXSM_FILL_RATE         0x0001
-#define VXSM_INTERVAL          0x0002
-#define VXSM_TOKENS            0x0010
-#define VXSM_TOKENS_MIN                0x0020
-#define VXSM_TOKENS_MAX                0x0040
-#define VXSM_PRIO_BIAS         0x0100
-
-#define SCHED_KEEP             (-2)
-
-#ifdef __KERNEL__
-
-#include <linux/compiler.h>
-
-extern int vc_set_sched_v1(uint32_t, void __user *);
-extern int vc_set_sched_v2(uint32_t, void __user *);
-extern int vc_set_sched(uint32_t, void __user *);
-
-#endif /* __KERNEL__ */
-#endif /* _VX_SCHED_CMD_H */
diff --git a/include/linux/vserver/sched_def.h b/include/linux/vserver/sched_def.h
deleted file mode 100644 (file)
index e85c09f..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _VX_SCHED_DEF_H
-#define _VX_SCHED_DEF_H
-
-#include <linux/spinlock.h>
-#include <linux/jiffies.h>
-#include <linux/cpumask.h>
-#include <asm/atomic.h>
-#include <asm/param.h>
-
-
-struct _vx_ticks {
-       uint64_t user_ticks;            /* token tick events */
-       uint64_t sys_ticks;             /* token tick events */
-       uint64_t hold_ticks;            /* token ticks paused */
-       uint64_t unused[5];             /* cacheline ? */
-};
-
-/* context sub struct */
-
-struct _vx_sched {
-       atomic_t tokens;                /* number of CPU tokens */
-       spinlock_t tokens_lock;         /* lock for token bucket */
-
-       int fill_rate;                  /* Fill rate: add X tokens... */
-       int interval;                   /* Divisor:   per Y jiffies   */
-       int tokens_min;                 /* Limit:     minimum for unhold */
-       int tokens_max;                 /* Limit:     no more than N tokens */
-       uint32_t jiffies;               /* last time accounted */
-
-       int priority_bias;              /* bias offset for priority */
-       cpumask_t cpus_allowed;         /* cpu mask for context */
-
-       struct _vx_ticks cpu[NR_CPUS];
-};
-
-#endif /* _VX_SCHED_DEF_H */
index 81f5c23..5855b05 100644 (file)
@@ -39,7 +39,7 @@
   SPECIAL|EVENT  |       |       |       |FLAGS  |       | |       |       |
         |     48|     49|     50|     51|     52|     53| |     54|     55|
   -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
-  SPECIAL|DEBUG  |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
+  SPECIAL|       |       |       |       |RLIMIT |SYSCALL| |       |COMPAT |
         |     56|     57|     58|     59|     60|TEST 61| |     62|     63|
   -------+-------+-------+-------+-------+-------+-------+ +-------+-------+
 
@@ -69,7 +69,6 @@
 #define VC_CAT_EVENT           48
 
 #define VC_CAT_FLAGS           52
-#define VC_CAT_DEBUG           56
 #define VC_CAT_RLIMIT          60
 
 #define VC_CAT_SYSTEST         61
@@ -77,7 +76,7 @@
 
 /*  interface version */
 
-#define VCI_VERSION            0x00010025
+#define VCI_VERSION            0x00010022
 
 
 /*  query version */
index 91e28de..23908fd 100644 (file)
@@ -120,7 +120,4 @@ static inline gid_t vx_map_gid(gid_t gid)
 #define FIOC_SETXIDJ   _IOW('x', 3, long)
 #endif
 
-int vx_parse_xid(char *string, xid_t *xid, int remove);
-void vx_propagate_xid(struct nameidata *nd, struct inode *inode);
-
 #endif /* _VX_XID_H */
index 9ed04d9..c5d47b2 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/route.h>
 #include <linux/ip.h>
 #include <linux/cache.h>
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vs_network.h>
 
@@ -145,59 +146,6 @@ static inline char rt_tos2priority(u8 tos)
        return ip_tos2prio[IPTOS_TOS(tos)>>1];
 }
 
-#define IPI_LOOPBACK   0x0100007f
-
-static inline int ip_find_src(struct nx_info *nxi, struct rtable **rp, struct flowi *fl)
-{
-       int err;
-       int i, n = nxi->nbipv4;
-       u32 ipv4root = nxi->ipv4[0];
-
-       if (ipv4root == 0)
-               return 0;
-
-       if (fl->fl4_src == 0) {
-               if (n > 1) {
-                       u32 foundsrc;
-
-                       err = __ip_route_output_key(rp, fl);
-                       if (err) {
-                               fl->fl4_src = ipv4root;
-                               err = __ip_route_output_key(rp, fl);
-                       }
-                       if (err)
-                               return err;
-
-                       foundsrc = (*rp)->rt_src;
-                       ip_rt_put(*rp);
-
-                       for (i=0; i<n; i++){
-                               u32 mask = nxi->mask[i];
-                               u32 ipv4 = nxi->ipv4[i];
-                               u32 net4 = ipv4 & mask;
-
-                               if (foundsrc == ipv4) {
-                                       fl->fl4_src = ipv4;
-                                       break;
-                               }
-                               if (!fl->fl4_src && (foundsrc & mask) == net4)
-                                       fl->fl4_src = ipv4;
-                       }
-               }
-               if (fl->fl4_src == 0)
-                       fl->fl4_src = (fl->fl4_dst == IPI_LOOPBACK)
-                               ? IPI_LOOPBACK : ipv4root;
-       } else {
-               for (i=0; i<n; i++) {
-                       if (nxi->ipv4[i] == fl->fl4_src)
-                               break;
-               }
-               if (i == n)
-                       return -EPERM;
-       }
-       return 0;
-}
-
 static inline int ip_route_connect(struct rtable **rp, u32 dst,
                                   u32 src, u32 tos, int oif, u8 protocol,
                                   u16 sport, u16 dport, struct sock *sk)
@@ -212,23 +160,7 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst,
                                         .dport = dport } } };
 
        int err;
-       struct nx_info *nx_info = current->nx_info;
-
-       if (sk)
-               nx_info = sk->sk_nx_info;
-       vxdprintk(VXD_CBIT(net, 4),
-               "ip_route_connect(%p) %p,%p;%lx",
-               sk, nx_info, sk->sk_socket,
-               (sk->sk_socket?sk->sk_socket->flags:0));
-
-       if (nx_info) {
-               err = ip_find_src(nx_info, rp, &fl);
-               if (err)
-                       return err;
-               if (fl.fl4_dst == IPI_LOOPBACK && !vx_check(0, VX_ADMIN))
-                       fl.fl4_dst = nx_info->ipv4[0];
-       }
-       if (!fl.fl4_dst || !fl.fl4_src) {
+       if (!dst || !src) {
                err = __ip_route_output_key(rp, &fl);
                if (err)
                        return err;
index 5091195..7442c13 100644 (file)
@@ -90,6 +90,8 @@ config SYSVIPC
          you want to run the DOS emulator dosemu under Linux (read the
          DOSEMU-HOWTO, available from <http://www.tldp.org/docs.html#howto>),
          you'll need to say Y here.
+         DOSEMU-HOWTO, available from <http://www.tldp.org/docs.html#howto>),
+         you'll need to say Y here.
 
          You can find documentation about IPC with "info ipc" and also in
          section 6.4 of the Linux Programmer's Guide, available from
@@ -153,19 +155,19 @@ config CKRM
          one of the resource controllers below. Say N if you are unsure. 
 
 config RCFS_FS
-       tristate "Resource Class File System (User API)"
-       depends on CKRM
-       help
+       tristate "Resource Class File System (User API)"
+       depends on CKRM
+       help
          RCFS is the filesystem API for CKRM. This separate configuration 
          option is provided only for debugging and will eventually disappear 
          since rcfs will be automounted whenever CKRM is configured. 
 
-         Say N if unsure, Y if you've enabled CKRM, M to debug rcfs 
+          Say N if unsure, Y if you've enabled CKRM, M to debug rcfs 
          initialization.
 
 config CKRM_TYPE_TASKCLASS
        bool "Class Manager for Task Groups"
-       depends on CKRM && RCFS_FS
+       depends on CKRM
        help
          TASKCLASS provides the extensions for CKRM to track task classes
          This is the base to enable task class based resource control for
@@ -173,51 +175,16 @@ config CKRM_TYPE_TASKCLASS
        
          Say N if unsure 
 
-config CKRM_RES_NULL
-       tristate "Null Tasks Resource Manager"
-       depends on CKRM_TYPE_TASKCLASS
-       default m
-
-config CKRM_RES_MEM
-       bool "Class based physical memory controller"
-       default y
-       depends on CKRM
-       help
-         Provide the basic support for collecting physical memory usage
-         information among classes. Say Y if you want to know the memory
-         usage of each class.
-
-config CKRM_TYPE_SOCKETCLASS
-       bool "Class Manager for socket groups"
-       depends on CKRM && RCFS_FS
-       help
-         Provides a Null Resource Controller for CKRM that is purely for
-         demonstration purposes.
-       
-         Say N if unsure, Y to use the feature.
-
-
 config CKRM_RES_NUMTASKS
        tristate "Number of Tasks Resource Manager"
        depends on CKRM_TYPE_TASKCLASS
        default m
        help
-         Provides a Resource Controller for CKRM that allows limiting number of
+         Provides a Resource Controller for CKRM that allows limiting no of
          tasks a task class can have.
        
          Say N if unsure, Y to use the feature.
 
-config CKRM_RES_NUMTASKS_FORKRATE
-       tristate "Number of Tasks Resource Manager for Fork Rate"
-       depends on CKRM_RES_NUMTASKS
-       default y
-       help
-         Provides a Resource Controller for CKRM that allows limiting the rate
-         of tasks a task class can fork per hour.
-       
-         Say N if unsure, Y to use the feature.
-
-
 config CKRM_CPU_SCHEDULE
        bool "CKRM CPU scheduler"
        depends on CKRM_TYPE_TASKCLASS
@@ -240,6 +207,26 @@ config CKRM_RES_BLKIO
        
          Say N if unsure, Y to use the feature.
 
+config CKRM_RES_MEM
+       bool "Class based physical memory controller"
+       default y
+       depends on CKRM
+       help
+         Provide the basic support for collecting physical memory usage information
+         among classes. Say Y if you want to know the memory usage of each class.
+
+config CKRM_MEM_LRUORDER_CHANGE
+       bool "Change the LRU ordering of scanned pages"
+       default n
+       depends on CKRM_RES_MEM
+       help
+         While trying to free pages, by default(n), scanned pages are left were they
+         are found if they belong to relatively under-used class. In this case the
+         LRU ordering of the memory subsystemis left intact. If this option is chosen,
+         then the scanned pages are moved to the tail of the list(active or inactive).
+         Changing this to yes reduces the checking overhead but violates the approximate
+         LRU order that is maintained by the paging subsystem.
+
 config CKRM_CPU_SCHEDULE_AT_BOOT
        bool "Turn on at boot time"
        depends on CKRM_CPU_SCHEDULE
@@ -274,48 +261,29 @@ config CKRM_RES_LISTENAQ
  
          If unsure, say N.
 
-choice 
-       prompt "Classification Engine"
-       depends on CKRM && RCFS_FS            
-       optional
-       help
-        Select a classification engine (CE) that assists in 
-        automatic classification of kernel objects managed by CKRM when
-        they are created. Without a CE, a user must manually 
-        classify objects into classes. Processes inherit their parent's 
-        classification.
-
-        Only one engine can be built into the kernel though all can be
-        built as modules (only one will load). 
-
-        If unsure, say N.
-
 config CKRM_RBCE
-       tristate "Vanilla RBCE"
+       tristate "Vanilla Rule-based Classification Engine (RBCE)"
+       depends on CKRM && RCFS_FS
+       default m
        help
-         Vanilla Rule-based Classification Engine (RBCE). Rules for
-         classifying kernel objects are created/deleted/modified through
-         a RCFS directory using a filesystem interface.
-       
-         Any CE is optional. If unsure, say N.
+         Provides an optional module to support creation of rules for automatic
+         classification of kernel objects. Rules are created/deleted/modified 
+          through an rcfs interface. RBCE is not required for CKRM. 
+         If unsure, say N.
 
 config CKRM_CRBCE
-       tristate "Enhanced RBCE"
-       depends on DELAY_ACCT && RELAYFS_FS 
+       tristate "Enhanced Rule-based Classification Engine (RBCE)"
+       depends on CKRM && RCFS_FS && RELAYFS_FS && DELAY_ACCT
+       default m
        help
-         Enhanced Rule-based Classification Engine (CRBCE). Like the Vanilla
-         RBCE, rules for classifying kernel objects are created, deleted and
-         modified through a RCFS directory using a filesystem interface 
-         (requires CKRM_RCFS configured).
-         In addition, CRBCE provides per-process delay data 
-         (requires DELAY_ACCT configured) and makes information on significant
-         kernel events available to userspace tools through relayfs 
-         (requires RELAYFS_FS configured). 
+         Provides an optional module to support creation of rules for automatic
+         classification of kernel objects, just like RBCE above. In addition,
+         CRBCE provides per-process delay data (requires DELAY_ACCT configured)
+         enabled) and makes information on significant kernel events available
+         to userspace tools through relayfs (requires RELAYFS_FS configured). 
        
-         Any CE is optional. If unsure, say N.
-
-endchoice
+         If unsure, say N.
 
 endmenu
 
@@ -462,6 +430,7 @@ config DELAY_ACCT
          I/O delays are recorded for memory and regular I/O.
          Information is accessible through /proc/<pid>/delay.
 
+
 config KALLSYMS
         bool "Load all symbols for debugging/kksymoops" if EMBEDDED
         default y
index 4efd7b8..f4b5adf 100644 (file)
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
 #include <linux/key.h>
-#include <linux/ckrm_events.h>
-#include <linux/ckrm_sched.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
 #include <asm/setup.h>
 
+#include <linux/ckrm.h>
+#ifdef CONFIG_CKRM_CPU_SCHEDULE
+int __init init_ckrm_sched_res(void);
+#else
+#define init_ckrm_sched_res() ((void)0)
+#endif
+//#include <linux/ckrm_sched.h>
+
 /*
  * This is one of the first .c files built. Error out early
  * if we have compiler trouble..
index 62aead4..7964406 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -99,7 +99,7 @@ static int newque (key_t key, int msgflg)
 
        msq->q_perm.mode = (msgflg & S_IRWXUGO);
        msq->q_perm.key = key;
-       msq->q_perm.xid = vx_current_xid();
+       msq->q_perm.xid = current->xid;
 
        msq->q_perm.security = NULL;
        retval = security_msg_queue_alloc(msq);
index d33f2ad..3960ddb 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -178,7 +178,7 @@ static int newary (key_t key, int nsems, int semflg)
 
        sma->sem_perm.mode = (semflg & S_IRWXUGO);
        sma->sem_perm.key = key;
-       sma->sem_perm.xid = vx_current_xid();
+       sma->sem_perm.xid = current->xid;
 
        sma->sem_perm.security = NULL;
        retval = security_sem_alloc(sma);
index fa14c36..d7bb539 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -195,7 +195,7 @@ static int newseg (key_t key, int shmflg, size_t size)
                return -ENOMEM;
 
        shp->shm_perm.key = key;
-       shp->shm_perm.xid = vx_current_xid();
+       shp->shm_perm.xid = current->xid;
        shp->shm_flags = (shmflg & S_IRWXUGO);
        shp->mlock_user = NULL;
 
index 2096afd..3d32576 100644 (file)
@@ -14,9 +14,6 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 subdir-y  += vserver
 obj-y    += vserver/vserver.o
 
-subdir-y       += vserver
-obj-y    += vserver/vserver.o
-
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o spinlock.o
@@ -26,7 +23,6 @@ obj-$(CONFIG_MODULE_SIG) += module-verify-sig.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_PM) += power/
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
-obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_IKCONFIG_PROC) += configs.o
@@ -37,7 +33,6 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_SYSFS) += ksysfs.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
-obj-$(CONFIG_CRASH_DUMP) += crash.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
index 649a9ce..a4bf68d 100644 (file)
@@ -12,8 +12,6 @@
 #include <linux/security.h>
 #include <linux/vs_cvirt.h>
 #include <linux/syscalls.h>
-#include <linux/vs_cvirt.h>
-
 #include <asm/uaccess.h>
 
 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
index 0c3c980..4956dcb 100644 (file)
@@ -3,13 +3,11 @@
 #
 
 ifeq ($(CONFIG_CKRM),y)
-    obj-y = ckrm_events.o ckrm.o ckrmutils.o ckrm_numtasks_stub.o rbce/
+    obj-y = ckrm.o ckrmutils.o ckrm_numtasks_stub.o rbce/
 endif  
-
-obj-$(CONFIG_CKRM_TYPE_TASKCLASS) += ckrm_tc.o
-obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o
-obj-$(CONFIG_CKRM_RES_NUMTASKS) += ckrm_numtasks.o
-obj-$(CONFIG_CKRM_RES_LISTENAQ) += ckrm_listenaq.o
-obj-$(CONFIG_CKRM_CPU_SCHEDULE) += ckrm_cpu_class.o ckrm_cpu_monitor.o
-obj-$(CONFIG_CKRM_RES_MEM) += ckrm_mem.o
-obj-$(CONFIG_CKRM_RES_NULL) += ckrm_null_class.o
+    obj-$(CONFIG_CKRM_TYPE_TASKCLASS)  += ckrm_tc.o
+    obj-$(CONFIG_CKRM_RES_NUMTASKS)    += ckrm_numtasks.o
+    obj-$(CONFIG_CKRM_TYPE_SOCKETCLASS) += ckrm_sockc.o
+    obj-$(CONFIG_CKRM_RES_LISTENAQ)    += ckrm_listenaq.o
+    obj-$(CONFIG_CKRM_CPU_SCHEDULE)     += ckrm_cpu_class.o ckrm_cpu_monitor.o
+    obj-$(CONFIG_CKRM_RES_MEM)                 += ckrm_mem.o
index 278aec9..f1cfb26 100644 (file)
@@ -19,8 +19,7 @@
  *
  */
 
-/*
- * Changes
+/* Changes
  *
  * 28 Aug 2003
  *        Created.
 #include <net/sock.h>
 #include <linux/ip.h>
 
-rwlock_t ckrm_class_lock = RW_LOCK_UNLOCKED;   /* protects classlists */
+rwlock_t ckrm_class_lock = RW_LOCK_UNLOCKED;   // protect classlists 
 
 struct rcfs_functions rcfs_fn;
-EXPORT_SYMBOL_GPL(rcfs_fn);
+EXPORT_SYMBOL(rcfs_fn);
 
-int rcfs_engine_regd;          /* rcfs state needed by another module */
-EXPORT_SYMBOL_GPL(rcfs_engine_regd);
+// rcfs state needed by another module
+int rcfs_engine_regd;
+EXPORT_SYMBOL(rcfs_engine_regd);
 
 int rcfs_mounted;
-EXPORT_SYMBOL_GPL(rcfs_mounted);
+EXPORT_SYMBOL(rcfs_mounted);
+
+/**************************************************************************
+ *                   Helper Functions                                     *
+ **************************************************************************/
 
 /*
- * Helper Functions
+ * Return TRUE if the given core class pointer is valid.
  */
 
 /*
@@ -78,10 +82,7 @@ inline unsigned int is_res_regd(struct ckrm_classtype *clstype, int resid)
            );
 }
 
-/*
- * Return TRUE if the given core class pointer is valid.
- */
-static struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *clstype,
+struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *clstype,
                                          const char *resname)
 {
        int resid = -1;
@@ -100,9 +101,10 @@ static struct ckrm_res_ctlr *ckrm_resctlr_lookup(struct ckrm_classtype *clstype,
        return NULL;
 }
 
+EXPORT_SYMBOL(ckrm_resctlr_lookup);
 
 /* given a classname return the class handle and its classtype*/
-void *ckrm_classobj(const char *classname, int *classTypeID)
+void *ckrm_classobj(char *classname, int *classTypeID)
 {
        int i;
 
@@ -131,12 +133,12 @@ void *ckrm_classobj(const char *classname, int *classTypeID)
        return NULL;
 }
 
-EXPORT_SYMBOL_GPL(is_res_regd);
-EXPORT_SYMBOL_GPL(ckrm_classobj);
+EXPORT_SYMBOL(is_res_regd);
+EXPORT_SYMBOL(ckrm_classobj);
 
-/*
- * Internal Functions/macros
- */
+/**************************************************************************
+ *                   Internal Functions/macros                            *
+ **************************************************************************/
 
 static inline void set_callbacks_active(struct ckrm_classtype *ctype)
 {
@@ -157,9 +159,9 @@ int ckrm_validate_and_grab_core(struct ckrm_core_class *core)
        return rc;
 }
 
-/*
- * Interfaces for classification engine
- */
+/****************************************************************************
+ *           Interfaces for classification engine                           *
+ ****************************************************************************/
 
 /*
  * Registering a callback structure by the classification engine.
@@ -182,12 +184,12 @@ int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
                return (-EBUSY);
        }
 
-       /*
-        * One of the following must be set: 
-        * classify, class_delete (due to object reference) or 
-        * notify (case where notification supported but not classification)
-        * The function pointer must be set the momement the mask is non-null
+       /* One of the following must be set: 
+          classify, class_delete (due to object reference) or 
+          notify (case where notification supported but not classification)
+          The function pointer must be set the momement the mask is non-null
         */
+
        if (!(((ecbs->classify) && (ecbs->class_delete)) || (ecbs->notify)) ||
            (ecbs->c_interest && ecbs->classify == NULL) ||
            (ecbs->n_interest && ecbs->notify == NULL)) {
@@ -202,6 +204,7 @@ int ckrm_register_engine(const char *typename, ckrm_eng_callback_t * ecbs)
                struct ckrm_core_class *core;
 
                read_lock(&ckrm_class_lock);
+
                list_for_each_entry(core, &ctype->classes, clslist) {
                        (*ctype->ce_callbacks.class_add) (core->name, core,
                                                          ctype->typeID);
@@ -225,19 +228,23 @@ int ckrm_unregister_engine(const char *typename)
                return (-ENOENT);
 
        ctype->ce_cb_active = 0;
+
        if (atomic_read(&ctype->ce_nr_users) > 1) {
-               /* Somebody is currently using the engine, cannot deregister. */
+               // Somebody is currently using the engine, cannot deregister.
                return (-EAGAIN);
        }
+
        atomic_set(&ctype->ce_regd, 0);
        memset(&ctype->ce_callbacks, 0, sizeof(ckrm_eng_callback_t));
        return 0;
 }
 
-/*
- * Interfaces to manipulate class (core or resource) hierarchies
- */
+/****************************************************************************
+ *           Interfaces to manipulate class (core or resource) hierarchies 
+ ****************************************************************************/
 
+/* 
+ */
 static void
 ckrm_add_child(struct ckrm_core_class *parent, struct ckrm_core_class *child)
 {
@@ -248,6 +255,7 @@ ckrm_add_child(struct ckrm_core_class *parent, struct ckrm_core_class *child)
                       child);
                return;
        }
+
        class_lock(child);
        INIT_LIST_HEAD(&cnode->children);
        INIT_LIST_HEAD(&cnode->siblings);
@@ -272,6 +280,8 @@ ckrm_add_child(struct ckrm_core_class *parent, struct ckrm_core_class *child)
        return;
 }
 
+/* 
+ */
 static int ckrm_remove_child(struct ckrm_core_class *child)
 {
        struct ckrm_hnode *cnode, *pnode;
@@ -345,6 +355,7 @@ struct ckrm_core_class *ckrm_get_next_child(struct ckrm_core_class *parent,
        if (list_empty(&parent->hnode.children)) {
                return NULL;
        }
+
        if (child) {
                if (!ckrm_is_core_valid(child)) {
                        printk(KERN_ERR
@@ -357,7 +368,7 @@ struct ckrm_core_class *ckrm_get_next_child(struct ckrm_core_class *parent,
                cnode = parent->hnode.children.next;
        }
 
-       if (cnode == &parent->hnode.children) { /* back at the anchor */
+       if (cnode == &parent->hnode.children) { // back at the anchor
                return NULL;
        }
 
@@ -373,9 +384,9 @@ struct ckrm_core_class *ckrm_get_next_child(struct ckrm_core_class *parent,
        return next_childcore;
 }
 
-EXPORT_SYMBOL_GPL(ckrm_lock_hier);
-EXPORT_SYMBOL_GPL(ckrm_unlock_hier);
-EXPORT_SYMBOL_GPL(ckrm_get_next_child);
+EXPORT_SYMBOL(ckrm_lock_hier);
+EXPORT_SYMBOL(ckrm_unlock_hier);
+EXPORT_SYMBOL(ckrm_get_next_child);
 
 static void
 ckrm_alloc_res_class(struct ckrm_core_class *core,
@@ -383,12 +394,15 @@ ckrm_alloc_res_class(struct ckrm_core_class *core,
 {
 
        struct ckrm_classtype *clstype;
+
        /* 
         * Allocate a resource class only if the resource controller has
         * registered with core and the engine requests for the class.
         */
+
        if (!ckrm_is_core_valid(core))
                return;
+
        clstype = core->classtype;
        core->res_class[resid] = NULL;
 
@@ -422,15 +436,32 @@ ckrm_init_core_class(struct ckrm_classtype *clstype,
                     struct ckrm_core_class *dcore,
                     struct ckrm_core_class *parent, const char *name)
 {
-       /* TODO:  Should replace name with dentry or add dentry? */
+       // Hubertus   ... should replace name with dentry or add dentry ?
        int i;
 
-       /* TODO:  How is this used in initialization? */
+       // Hubertus .. how is this used in initialization 
+
        CLS_DEBUG("name %s => %p\n", name ? name : "default", dcore);
+
        if ((dcore != clstype->default_class) && (!ckrm_is_core_valid(parent))){
                printk(KERN_DEBUG "error not a valid parent %p\n", parent);
                return -EINVAL;
        }
+#if 0  
+// Hubertus .. dynamic allocation still breaks when RCs registers. 
+// See def in ckrm_rc.h
+       dcore->res_class = NULL;
+       if (clstype->max_resid > 0) {
+               dcore->res_class =
+                   (void **)kmalloc(clstype->max_resid * sizeof(void *),
+                                    GFP_KERNEL);
+               if (dcore->res_class == NULL) {
+                       printk(KERN_DEBUG "error no mem\n");
+                       return -ENOMEM;
+               }
+       }
+#endif
+
        dcore->classtype = clstype;
        dcore->magic = CKRM_CORE_MAGIC;
        dcore->name = name;
@@ -453,7 +484,7 @@ ckrm_init_core_class(struct ckrm_classtype *clstype,
        for (i = 0; i < clstype->max_resid; i++)
                ckrm_alloc_res_class(dcore, parent, i);
 
-       /* fix for race condition seen in stress with numtasks */
+       // fix for race condition seen in stress with numtasks
        if (parent)
                ckrm_core_grab(parent);
 
@@ -506,23 +537,25 @@ void ckrm_free_core_class(struct ckrm_core_class *core)
        if (ckrm_remove_child(core) == 0) {
                printk(KERN_DEBUG "Core class removal failed. Chilren present\n");
        }
+
        for (i = 0; i < clstype->max_resid; i++) {
                ckrm_free_res_class(core, i);
        }
 
        write_lock(&ckrm_class_lock);
-       /* Clear the magic, so we would know if this core is reused. */
+
+       // Clear the magic, so we would know if this core is reused.
        core->magic = 0;
-#if 0                          /* Dynamic not yet enabled */
+#if 0                          // Dynamic not yet enabled
        core->res_class = NULL;
 #endif
-       /* Remove this core class from its linked list. */
+       // Remove this core class from its linked list.
        list_del(&core->clslist);
        clstype->num_classes--;
        set_callbacks_active(clstype);
        write_unlock(&ckrm_class_lock);
 
-       /* fix for race condition seen in stress with numtasks */
+       // fix for race condition seen in stress with numtasks
        if (parent)
                ckrm_core_drop(parent);
 
@@ -549,9 +582,9 @@ int ckrm_release_core_class(struct ckrm_core_class *core)
        return 0;
 }
 
-/*
- * Interfaces for the resource controller
- */
+/****************************************************************************
+ *           Interfaces for the resource controller                         *
+ ****************************************************************************/
 /*
  * Registering a callback structure by the resource controller.
  *
@@ -569,8 +602,10 @@ ckrm_register_res_ctlr_intern(struct ckrm_classtype *clstype,
        resid = rcbs->resid;
 
        spin_lock(&clstype->res_ctlrs_lock);
+
        printk(KERN_WARNING "resid is %d name is %s %s\n",
               resid, rcbs->res_name, clstype->res_ctlrs[resid]->res_name);
+
        if (resid >= 0) {
                if ((resid < CKRM_MAX_RES_CTLRS)
                    && (clstype->res_ctlrs[resid] == NULL)) {
@@ -587,6 +622,7 @@ ckrm_register_res_ctlr_intern(struct ckrm_classtype *clstype,
                spin_unlock(&clstype->res_ctlrs_lock);
                return ret;
        }
+
        for (i = clstype->resid_reserved; i < clstype->max_res_ctlrs; i++) {
                if (clstype->res_ctlrs[i] == NULL) {
                        clstype->res_ctlrs[i] = rcbs;
@@ -600,6 +636,7 @@ ckrm_register_res_ctlr_intern(struct ckrm_classtype *clstype,
                        return i;
                }
        }
+
        spin_unlock(&clstype->res_ctlrs_lock);
        return (-ENOMEM);
 }
@@ -625,7 +662,7 @@ ckrm_register_res_ctlr(struct ckrm_classtype *clstype, ckrm_res_ctlr_t * rcbs)
                        ckrm_alloc_res_class(core, core->hnode.parent, resid);
 
                        if (clstype->add_resctrl) { 
-                               /* FIXME: this should be mandatory */
+                               // FIXME: this should be mandatory
                                (*clstype->add_resctrl) (core, resid);
                        }
                }
@@ -648,10 +685,10 @@ int ckrm_unregister_res_ctlr(struct ckrm_res_ctlr *rcbs)
        if ((clstype == NULL) || (resid < 0)) {
                return -EINVAL;
        }
-       /* TODO: probably need to also call deregistration function */
+       // FIXME: probably need to also call deregistration function
 
        read_lock(&ckrm_class_lock);
-       /* free up this resource from all the classes */
+       // free up this resource from all the classes
        list_for_each_entry(core, &clstype->classes, clslist) {
                ckrm_free_res_class(core, resid);
        }
@@ -671,14 +708,16 @@ int ckrm_unregister_res_ctlr(struct ckrm_res_ctlr *rcbs)
        return 0;
 }
 
-/*
- * Class Type Registration
- */
+/*******************************************************************
+ *   Class Type Registration
+ *******************************************************************/
+
+/* Hubertus ... we got to do some locking here */
 
-/* TODO: What locking is needed here?*/
 
 struct ckrm_classtype *ckrm_classtypes[CKRM_MAX_CLASSTYPES];
-EXPORT_SYMBOL_GPL(ckrm_classtypes);    
+// really should build a better interface for this
+EXPORT_SYMBOL(ckrm_classtypes);        
 
 int ckrm_register_classtype(struct ckrm_classtype *clstype)
 {
@@ -702,11 +741,12 @@ int ckrm_register_classtype(struct ckrm_classtype *clstype)
        clstype->typeID = tid;
        ckrm_classtypes[tid] = clstype;
 
-       /* TODO: Need to call the callbacks of the RCFS client */
+       /* Hubertus .. we need to call the callbacks of the RCFS client */
        if (rcfs_fn.register_classtype) {
                (*rcfs_fn.register_classtype) (clstype);
-               /* No error return for now. */
+               // No error return for now ;
        }
+
        return tid;
 }
 
@@ -739,12 +779,79 @@ struct ckrm_classtype *ckrm_find_classtype_by_name(const char *name)
        return NULL;
 }
 
-/*
+/*******************************************************************
+ *   Event callback invocation
+ *******************************************************************/
+
+struct ckrm_hook_cb *ckrm_event_callbacks[CKRM_NONLATCHABLE_EVENTS];
+
+/* Registration / Deregistration / Invocation functions */
+
+int ckrm_register_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb)
+{
+       struct ckrm_hook_cb **cbptr;
+
+       if ((ev < CKRM_LATCHABLE_EVENTS) || (ev >= CKRM_NONLATCHABLE_EVENTS))
+               return 1;
+       cbptr = &ckrm_event_callbacks[ev];
+       while (*cbptr != NULL)
+               cbptr = &((*cbptr)->next);
+       *cbptr = cb;
+       return 0;
+}
+
+int ckrm_unregister_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb)
+{
+       struct ckrm_hook_cb **cbptr;
+
+       if ((ev < CKRM_LATCHABLE_EVENTS) || (ev >= CKRM_NONLATCHABLE_EVENTS))
+               return -1;
+       cbptr = &ckrm_event_callbacks[ev];
+       while ((*cbptr != NULL) && (*cbptr != cb))
+               cbptr = &((*cbptr)->next);
+       if (*cbptr)
+               (*cbptr)->next = cb->next;
+       return (*cbptr == NULL);
+}
+
+int ckrm_register_event_set(struct ckrm_event_spec especs[])
+{
+       struct ckrm_event_spec *espec = especs;
+
+       for (espec = especs; espec->ev != -1; espec++)
+               ckrm_register_event_cb(espec->ev, &espec->cb);
+       return 0;
+}
+
+int ckrm_unregister_event_set(struct ckrm_event_spec especs[])
+{
+       struct ckrm_event_spec *espec = especs;
+
+       for (espec = especs; espec->ev != -1; espec++)
+               ckrm_unregister_event_cb(espec->ev, &espec->cb);
+       return 0;
+}
+
+#define ECC_PRINTK(fmt, args...) \
+// printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , ## args)
+
+void ckrm_invoke_event_cb_chain(enum ckrm_event ev, void *arg)
+{
+       struct ckrm_hook_cb *cb, *anchor;
+
+       ECC_PRINTK("%d %x\n", current, ev, arg);
+       if ((anchor = ckrm_event_callbacks[ev]) != NULL) {
+               for (cb = anchor; cb; cb = cb->next)
+                       (*cb->fct) (arg);
+       }
+}
+
+/*******************************************************************
  *   Generic Functions that can be used as default functions 
  *   in almost all classtypes
  *     (a) function iterator over all resource classes of a class
  *     (b) function invoker on a named resource
- */
+ *******************************************************************/
 
 int ckrm_class_show_shares(struct ckrm_core_class *core, struct seq_file *seq)
 {
@@ -826,7 +933,7 @@ int ckrm_class_set_shares(struct ckrm_core_class *core, const char *resname,
        struct ckrm_res_ctlr *rcbs;
        int rc;
 
-       /* Check for legal values */
+       // Check for legal values
        if (!legalshare(shares->my_guarantee) || !legalshare(shares->my_limit)
            || !legalshare(shares->total_guarantee)
            || !legalshare(shares->max_limit))
@@ -852,9 +959,9 @@ int ckrm_class_reset_stats(struct ckrm_core_class *core, const char *resname,
        return rc;
 }
 
-/*
- * Initialization
- */
+/*******************************************************************
+ *   Initialization 
+ *******************************************************************/
 
 void ckrm_cb_newtask(struct task_struct *tsk)
 {
@@ -873,9 +980,6 @@ void __init ckrm_init(void)
 {
        printk(KERN_DEBUG "CKRM Initialization\n");
 
-       // prepare init_task and then rely on inheritance of properties
-       ckrm_cb_newtask(&init_task);
-
        // register/initialize the Metatypes
 
 #ifdef CONFIG_CKRM_TYPE_TASKCLASS
@@ -890,38 +994,40 @@ void __init ckrm_init(void)
                ckrm_meta_init_sockclass();
        }
 #endif
-       printk("CKRM Initialization done\n");
+       // prepare init_task and then rely on inheritance of properties
+       ckrm_cb_newtask(&init_task);
+       printk(KERN_DEBUG "CKRM Initialization done\n");
 }
 
-EXPORT_SYMBOL_GPL(ckrm_register_engine);
-EXPORT_SYMBOL_GPL(ckrm_unregister_engine);
+EXPORT_SYMBOL(ckrm_register_engine);
+EXPORT_SYMBOL(ckrm_unregister_engine);
 
-EXPORT_SYMBOL_GPL(ckrm_register_res_ctlr);
-EXPORT_SYMBOL_GPL(ckrm_unregister_res_ctlr);
+EXPORT_SYMBOL(ckrm_register_res_ctlr);
+EXPORT_SYMBOL(ckrm_unregister_res_ctlr);
 
-EXPORT_SYMBOL_GPL(ckrm_init_core_class);
-EXPORT_SYMBOL_GPL(ckrm_free_core_class);
-EXPORT_SYMBOL_GPL(ckrm_release_core_class);
+EXPORT_SYMBOL(ckrm_init_core_class);
+EXPORT_SYMBOL(ckrm_free_core_class);
+EXPORT_SYMBOL(ckrm_release_core_class);
 
-EXPORT_SYMBOL_GPL(ckrm_register_classtype);
-EXPORT_SYMBOL_GPL(ckrm_unregister_classtype);
-EXPORT_SYMBOL_GPL(ckrm_find_classtype_by_name);
+EXPORT_SYMBOL(ckrm_register_classtype);
+EXPORT_SYMBOL(ckrm_unregister_classtype);
+EXPORT_SYMBOL(ckrm_find_classtype_by_name);
 
-EXPORT_SYMBOL_GPL(ckrm_core_grab);
-EXPORT_SYMBOL_GPL(ckrm_core_drop);
-EXPORT_SYMBOL_GPL(ckrm_is_core_valid);
-EXPORT_SYMBOL_GPL(ckrm_validate_and_grab_core);
+EXPORT_SYMBOL(ckrm_core_grab);
+EXPORT_SYMBOL(ckrm_core_drop);
+EXPORT_SYMBOL(ckrm_is_core_valid);
+EXPORT_SYMBOL(ckrm_validate_and_grab_core);
 
-EXPORT_SYMBOL_GPL(ckrm_register_event_set);
-EXPORT_SYMBOL_GPL(ckrm_unregister_event_set);
-EXPORT_SYMBOL_GPL(ckrm_register_event_cb);
-EXPORT_SYMBOL_GPL(ckrm_unregister_event_cb);
+EXPORT_SYMBOL(ckrm_register_event_set);
+EXPORT_SYMBOL(ckrm_unregister_event_set);
+EXPORT_SYMBOL(ckrm_register_event_cb);
+EXPORT_SYMBOL(ckrm_unregister_event_cb);
 
-EXPORT_SYMBOL_GPL(ckrm_class_show_stats);
-EXPORT_SYMBOL_GPL(ckrm_class_show_config);
-EXPORT_SYMBOL_GPL(ckrm_class_show_shares);
+EXPORT_SYMBOL(ckrm_class_show_stats);
+EXPORT_SYMBOL(ckrm_class_show_config);
+EXPORT_SYMBOL(ckrm_class_show_shares);
 
-EXPORT_SYMBOL_GPL(ckrm_class_set_config);
-EXPORT_SYMBOL_GPL(ckrm_class_set_shares);
+EXPORT_SYMBOL(ckrm_class_set_config);
+EXPORT_SYMBOL(ckrm_class_set_shares);
 
-EXPORT_SYMBOL_GPL(ckrm_class_reset_stats);
+EXPORT_SYMBOL(ckrm_class_reset_stats);
index 929c22d..f947f07 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/slab.h>
 #include <asm/errno.h>
 #include <linux/sched.h>
-#include <linux/ckrm_events.h>
+#include <linux/ckrm.h>
 #include <linux/ckrm_rc.h>
 #include <linux/ckrm_tc.h>
 #include <linux/ckrm_sched.h>
@@ -145,8 +145,6 @@ static void ckrm_free_cpu_class(void *my_res)
        struct ckrm_cpu_class *cls = my_res, *parres, *childres;
        ckrm_core_class_t *child = NULL;
        int maxlimit;
-       ckrm_lrq_t* queue;
-       int i;
 
        if (!cls) 
                return;
@@ -154,15 +152,6 @@ static void ckrm_free_cpu_class(void *my_res)
        /*the default class can't be freed*/
        if (cls == get_default_cpu_class()) 
                return;
-#if 1
-#warning "ACB: Remove freed class from any classqueues [PL #4233]"
-       for (i = 0 ; i < NR_CPUS ; i++) {
-         queue = &cls->local_queues[i];
-         if (cls_in_classqueue(&queue->classqueue_linkobj))
-           classqueue_dequeue(queue->classqueue,
-                              &queue->classqueue_linkobj);
-       }
-#endif
 
        // Assuming there will be no children when this function is called
        parres = ckrm_get_cpu_class(cls->parent);
index 5f59b37..4a6ea7a 100644 (file)
@@ -22,7 +22,7 @@
 #include <asm/errno.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <linux/ckrm_events.h>
+#include <linux/ckrm.h>
 #include <linux/ckrm_rc.h>
 #include <linux/ckrm_tc.h>
 #include <asm/div64.h>
@@ -841,9 +841,8 @@ static void adjust_lrq_weight(struct ckrm_cpu_class *clsptr, int cpu_online)
                total_pressure += lrq->lrq_load;
        }
 
-#define FIX_SHARES 
-#ifdef FIX_SHARES
-#warning "ACB: fix share initialization problem [PL #4227]"
+#if 1
+#warning "ACB taking out suspicious early return"
 #else
        if (! total_pressure)
                return;
@@ -860,10 +859,6 @@ static void adjust_lrq_weight(struct ckrm_cpu_class *clsptr, int cpu_online)
                        /*give idle class a high share to boost interactiveness */
                        lw = cpu_class_weight(clsptr); 
                else {
-#ifdef FIX_SHARES
-                       if (! total_pressure)
-                               return;
-#endif                 
                        lw = lrq->lrq_load * class_weight;
                        do_div(lw,total_pressure);
                        if (!lw)
@@ -965,8 +960,6 @@ static int thread_exit = 0;
 static int ckrm_cpu_monitord(void *nothing)
 {
        daemonize("ckrm_cpu_ctrld");
-       current->flags |= PF_NOFREEZE;
-
        for (;;) {
                /*sleep for sometime before next try*/
                set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/ckrm/ckrm_events.c b/kernel/ckrm/ckrm_events.c
deleted file mode 100644 (file)
index aad5e25..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/* ckrm_events.c - Class-based Kernel Resource Management (CKRM)
- *               - event handling routines
- *
- * Copyright (C) Hubertus Franke, IBM Corp. 2003, 2004
- *           (C) Chandra Seetharaman,  IBM Corp. 2003
- * 
- * 
- * Provides API for event registration and handling for different
- * classtypes.
- *
- * Latest version, more details at http://ckrm.sf.net
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-/* Changes
- *
- * 29 Sep 2004
- *        Separated from ckrm.c
- *  
- */
-
-#include <linux/config.h>
-#include <linux/stddef.h>
-#include <linux/ckrm_events.h>
-
-/*******************************************************************
- *   Event callback invocation
- *******************************************************************/
-
-struct ckrm_hook_cb *ckrm_event_callbacks[CKRM_NONLATCHABLE_EVENTS];
-
-/* Registration / Deregistration / Invocation functions */
-
-int ckrm_register_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb)
-{
-       struct ckrm_hook_cb **cbptr;
-
-       if ((ev < CKRM_LATCHABLE_EVENTS) || (ev >= CKRM_NONLATCHABLE_EVENTS))
-               return 1;
-       cbptr = &ckrm_event_callbacks[ev];
-       while (*cbptr != NULL)
-               cbptr = &((*cbptr)->next);
-       *cbptr = cb;
-       return 0;
-}
-
-int ckrm_unregister_event_cb(enum ckrm_event ev, struct ckrm_hook_cb *cb)
-{
-       struct ckrm_hook_cb **cbptr;
-
-       if ((ev < CKRM_LATCHABLE_EVENTS) || (ev >= CKRM_NONLATCHABLE_EVENTS))
-               return -1;
-       cbptr = &ckrm_event_callbacks[ev];
-       while ((*cbptr != NULL) && (*cbptr != cb))
-               cbptr = &((*cbptr)->next);
-       if (*cbptr)
-               (*cbptr)->next = cb->next;
-       return (*cbptr == NULL);
-}
-
-int ckrm_register_event_set(struct ckrm_event_spec especs[])
-{
-       struct ckrm_event_spec *espec = especs;
-
-       for (espec = especs; espec->ev != -1; espec++)
-               ckrm_register_event_cb(espec->ev, &espec->cb);
-       return 0;
-}
-
-int ckrm_unregister_event_set(struct ckrm_event_spec especs[])
-{
-       struct ckrm_event_spec *espec = especs;
-
-       for (espec = especs; espec->ev != -1; espec++)
-               ckrm_unregister_event_cb(espec->ev, &espec->cb);
-       return 0;
-}
-
-#define ECC_PRINTK(fmt, args...) \
-// printk("%s: " fmt, __FUNCTION__ , ## args)
-
-void ckrm_invoke_event_cb_chain(enum ckrm_event ev, void *arg)
-{
-       struct ckrm_hook_cb *cb, *anchor;
-
-       ECC_PRINTK("%d %x\n", current, ev, arg);
-       if ((anchor = ckrm_event_callbacks[ev]) != NULL) {
-               for (cb = anchor; cb; cb = cb->next)
-                       (*cb->fct) (arg);
-       }
-}
-
index 736b579..01d38c2 100644 (file)
@@ -5,7 +5,7 @@
  * Provides a Memory Resource controller for CKRM
  *
  * Latest version, more details at http://ckrm.sf.net
- *
+ * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  *
  */
 
+/* Code Description: TBD
+ *
+ */
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <asm/errno.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/pagemap.h>
 #include <linux/cache.h>
 #include <linux/percpu.h>
 #include <linux/pagevec.h>
-#include <linux/parser.h>
+
 #include <linux/ckrm_mem_inline.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
-#include <asm/errno.h>
 
 #define MEM_NAME "mem"
 
 #define CKRM_MEM_MAX_HIERARCHY 2 // allows only upto 2 levels - 0, 1 & 2
 
 /* all 1-level memory_share_class are chained together */
-LIST_HEAD(ckrm_memclass_list);
+static LIST_HEAD(ckrm_memclass_list);
 LIST_HEAD(ckrm_shrink_list);
-spinlock_t ckrm_mem_lock; // protects both lists above
+EXPORT_SYMBOL(ckrm_shrink_list);
+spinlock_t ckrm_mem_lock = SPIN_LOCK_UNLOCKED; // protects both lists above
+EXPORT_SYMBOL(ckrm_mem_lock);
 unsigned int ckrm_tot_lru_pages; // total # of pages in the system
-                                // currently doesn't handle memory add/remove
-struct ckrm_mem_res *ckrm_mem_root_class;
-atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
-static void ckrm_mem_evaluate_all_pages(struct ckrm_mem_res *);
-int ckrm_nr_mem_classes = 0;
+                                                        // currently doesn't handle memory add/remove
+EXPORT_SYMBOL(ckrm_tot_lru_pages);
 
-EXPORT_SYMBOL_GPL(ckrm_memclass_list);
-EXPORT_SYMBOL_GPL(ckrm_shrink_list);
-EXPORT_SYMBOL_GPL(ckrm_mem_lock);
-EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
-EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
-EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
-EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
+static ckrm_mem_res_t *ckrm_mem_root_class;
+atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
+EXPORT_SYMBOL(ckrm_mem_real_count);
+static void ckrm_mem_evaluate_all_pages(void);
 
 /* Initialize rescls values
  * May be called on each rcfs unmount or as part of error recovery
@@ -60,15 +60,6 @@ EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
  * Does not traverse hierarchy reinitializing children.
  */
 
-void
-memclass_release(struct kref *kref)
-{
-       struct ckrm_mem_res *cls = container_of(kref, struct ckrm_mem_res, nr_users);
-       BUG_ON(ckrm_memclass_valid(cls));
-       kfree(cls);
-}
-EXPORT_SYMBOL_GPL(memclass_release);
-
 static void
 set_ckrm_tot_pages(void)
 {
@@ -84,12 +75,11 @@ set_ckrm_tot_pages(void)
 }
 
 static void
-mem_res_initcls_one(struct ckrm_mem_res *res)
+mem_res_initcls_one(void *my_res)
 {
-       int zindex = 0;
-       struct zone *zone;
+       ckrm_mem_res_t *res = my_res;
 
-       memset(res, 0, sizeof(struct ckrm_mem_res));
+       memset(res, 0, sizeof(ckrm_mem_res_t));
 
        res->shares.my_guarantee     = CKRM_SHARE_DONTCARE;
        res->shares.my_limit         = CKRM_SHARE_DONTCARE;
@@ -100,115 +90,21 @@ mem_res_initcls_one(struct ckrm_mem_res *res)
 
        res->pg_guar = CKRM_SHARE_DONTCARE;
        res->pg_limit = CKRM_SHARE_DONTCARE;
-
-       INIT_LIST_HEAD(&res->shrink_list);
-       INIT_LIST_HEAD(&res->mcls_list);
-
-       for_each_zone(zone) {
-               INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
-               INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list);
-               INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list);
-               res->ckrm_zone[zindex].nr_active = 0;
-               res->ckrm_zone[zindex].nr_inactive = 0;
-               res->ckrm_zone[zindex].zone = zone;
-               res->ckrm_zone[zindex].memcls = res;
-               zindex++;
-       }
-
        res->pg_unused = 0;
-       res->nr_dontcare = 1; // for default class
-       kref_init(&res->nr_users);
-}
-
-static void
-set_impl_guar_children(struct ckrm_mem_res *parres)
-{
-       ckrm_core_class_t *child = NULL;
-       struct ckrm_mem_res *cres;
-       int nr_dontcare = 1; // for defaultclass
-       int guar, impl_guar;
-       int resid = mem_rcbs.resid;
-
-       ckrm_lock_hier(parres->core);
-       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
-               // treat NULL cres as don't care as that child is just being
-               // created.
-               // FIXME: need a better way to handle this case.
-               if (!cres || cres->pg_guar == CKRM_SHARE_DONTCARE) {
-                       nr_dontcare++;
-               }
-       }
-
-       parres->nr_dontcare = nr_dontcare;
-       guar = (parres->pg_guar == CKRM_SHARE_DONTCARE) ?
-                       parres->impl_guar : parres->pg_unused;
-       impl_guar = guar / parres->nr_dontcare;
-
-       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
-               if (cres && cres->pg_guar == CKRM_SHARE_DONTCARE) {
-                       cres->impl_guar = impl_guar;
-                       set_impl_guar_children(cres);
-               }
-       }
-       ckrm_unlock_hier(parres->core);
-
-}
-
-void
-check_memclass(struct ckrm_mem_res *res, char *str)
-{
-       int i, act = 0, inact = 0;
-       struct zone *zone;
-       struct ckrm_zone *ckrm_zone;
-       struct list_head *pos;
-       struct page *page;
-
-#if 0
-       printk("Check<%s> %s: total=%d\n",
-               str, res->core->name, atomic_read(&res->pg_total));
-#endif
-       for (i = 0; i < MAX_NR_ZONES; i++) {
-               act = 0; inact = 0;
-               ckrm_zone = &res->ckrm_zone[i];
-               zone = ckrm_zone->zone;
-               spin_lock_irq(&zone->lru_lock);
-               pos = ckrm_zone->inactive_list.next;
-               while (pos != &ckrm_zone->inactive_list) {
-                       page = list_entry(pos, struct page, lru);
-                       pos = pos->next;
-                       inact++;
-               }
-               pos = ckrm_zone->active_list.next;
-               while (pos != &ckrm_zone->active_list) {
-                       page = list_entry(pos, struct page, lru);
-                       pos = pos->next;
-                       act++;
-               }
-               spin_unlock_irq(&zone->lru_lock);
-#if 0
-               printk("Check<%s>(zone=%d): act %ld, inae %ld lact %d lina %d\n",
-                       str, i, ckrm_zone->nr_active, ckrm_zone->nr_inactive,
-                       act, inact);
-#endif
-       }
 }
-EXPORT_SYMBOL_GPL(check_memclass);
 
 static void *
 mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
 {
-       struct ckrm_mem_res *res, *pres;
+       ckrm_mem_res_t *res, *parres;
 
        if (mem_rcbs.resid == -1) {
                return NULL;
        }
 
-       pres = ckrm_get_res_class(parent, mem_rcbs.resid, struct ckrm_mem_res);
-       if (pres && (pres->hier == CKRM_MEM_MAX_HIERARCHY)) {
-               printk(KERN_ERR "MEM_RC: only allows hieararchy of %d\n",
-                                               CKRM_MEM_MAX_HIERARCHY);
+       parres = ckrm_get_res_class(parent, mem_rcbs.resid, ckrm_mem_res_t);
+       if (parres && (parres->hier == CKRM_MEM_MAX_HIERARCHY)) {
+               // allows only upto CKRM_MEM_MAX_HIERARCHY
                return NULL;
        }
 
@@ -216,23 +112,23 @@ mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
                printk(KERN_ERR "MEM_RC: Only one root class is allowed\n");
                return NULL;
        }
-
+               
        if (unlikely((parent != NULL) && (ckrm_mem_root_class == NULL))) {
-               printk(KERN_ERR "MEM_RC: child class with no root class!!");
+               printk(KERN_ERR "MEM_RC: creating child class without root class\n");
                return NULL;
        }
-
-       res = kmalloc(sizeof(struct ckrm_mem_res), GFP_ATOMIC);
-
+               
+       res = kmalloc(sizeof(ckrm_mem_res_t), GFP_ATOMIC);
+       
        if (res) {
                mem_res_initcls_one(res);
                res->core = core;
                res->parent = parent;
-               spin_lock_irq(&ckrm_mem_lock);
+               spin_lock(&ckrm_mem_lock);
                list_add(&res->mcls_list, &ckrm_memclass_list);
-               spin_unlock_irq(&ckrm_mem_lock);
+               spin_unlock(&ckrm_mem_lock);
                if (parent == NULL) {
-                       // I am part of the root class. So, set the max to
+                       // I am part of the root class. So, set the max to 
                        // number of pages available
                        res->pg_guar = ckrm_tot_lru_pages;
                        res->pg_unused = ckrm_tot_lru_pages;
@@ -240,17 +136,12 @@ mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
                        res->hier = 0;
                        ckrm_mem_root_class = res;
                } else {
-                       int guar;
-                       res->hier = pres->hier + 1;
-                       set_impl_guar_children(pres);
-                       guar = (pres->pg_guar == CKRM_SHARE_DONTCARE) ?
-                               pres->impl_guar : pres->pg_unused;
-                       res->impl_guar = guar / pres->nr_dontcare;
+                       res->hier = parres->hier + 1;
                }
-               ckrm_nr_mem_classes++;
+               mem_class_get(res);
        }
        else
-               printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
+               printk(KERN_ERR "mem_res_alloc: failed GFP_ATOMIC alloc\n");
        return res;
 }
 
@@ -261,17 +152,17 @@ mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
  * child is deleted this should be called after the child is removed.
  */
 static void
-child_maxlimit_changed_local(struct ckrm_mem_res *parres)
+child_maxlimit_changed_local(ckrm_mem_res_t *parres)
 {
        int maxlimit = 0;
-       struct ckrm_mem_res *childres;
+       ckrm_mem_res_t *childres;
        ckrm_core_class_t *child = NULL;
 
        // run thru parent's children and get the new max_limit of the parent
        ckrm_lock_hier(parres->core);
        while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
                childres = ckrm_get_res_class(child, mem_rcbs.resid,
-                               struct ckrm_mem_res);
+                               ckrm_mem_res_t);
                if (maxlimit < childres->shares.my_limit) {
                        maxlimit = childres->shares.my_limit;
                }
@@ -280,16 +171,47 @@ child_maxlimit_changed_local(struct ckrm_mem_res *parres)
        parres->shares.cur_max_limit = maxlimit;
 }
 
+static void
+mem_res_free(void *my_res)
+{
+       ckrm_mem_res_t *res = my_res;
+       ckrm_mem_res_t *parres;
+
+       if (!res) 
+               return;
+
+       res->shares.my_guarantee = 0;
+       res->shares.my_limit = 0;
+       res->pg_guar = 0;
+       res->pg_limit = 0;
+       res->pg_unused = 0;
+
+       parres = ckrm_get_res_class(res->parent, mem_rcbs.resid, ckrm_mem_res_t);
+       // return child's limit/guarantee to parent node
+       if (parres) {
+               child_guarantee_changed(&parres->shares, res->shares.my_guarantee, 0);
+               child_maxlimit_changed_local(parres);
+       }
+       ckrm_mem_evaluate_all_pages();
+       res->core = NULL;
+
+       spin_lock(&ckrm_mem_lock);
+       list_del(&res->mcls_list);
+       spin_unlock(&ckrm_mem_lock);
+       mem_class_put(res);
+       return;
+}
+
 /*
  * Recalculate the guarantee and limit in # of pages... and propagate the
  * same to children.
  * Caller is responsible for protecting res and for the integrity of parres
  */
 static void
-recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres)
+recalc_and_propagate(ckrm_mem_res_t * res, ckrm_mem_res_t * parres)
 {
        ckrm_core_class_t *child = NULL;
-       struct ckrm_mem_res *cres;
+       ckrm_mem_res_t *childres;
        int resid = mem_rcbs.resid;
        struct ckrm_shares *self = &res->shares;
 
@@ -305,10 +227,8 @@ recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres)
                        u64 temp = (u64) self->my_guarantee * parres->pg_guar;
                        do_div(temp, par->total_guarantee);
                        res->pg_guar = (int) temp;
-                       res->impl_guar = CKRM_SHARE_DONTCARE;
                } else {
                        res->pg_guar = 0;
-                       res->impl_guar = CKRM_SHARE_DONTCARE;
                }
 
                if (parres->pg_limit == CKRM_SHARE_DONTCARE ||
@@ -337,112 +257,64 @@ recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres)
        // propagate to children
        ckrm_lock_hier(res->core);
        while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
-               cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
-               recalc_and_propagate(cres, res);
+               childres = ckrm_get_res_class(child, resid, ckrm_mem_res_t);
+               recalc_and_propagate(childres, res);
        }
        ckrm_unlock_hier(res->core);
        return;
 }
 
-static void
-mem_res_free(void *my_res)
-{
-       struct ckrm_mem_res *res = my_res;
-       struct ckrm_mem_res *pres;
-
-       if (!res)
-               return;
-
-       ckrm_mem_evaluate_all_pages(res);
-
-       pres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
-                       struct ckrm_mem_res);
-
-       if (pres) {
-               child_guarantee_changed(&pres->shares,
-                               res->shares.my_guarantee, 0);
-               child_maxlimit_changed_local(pres);
-               recalc_and_propagate(pres, NULL);
-               set_impl_guar_children(pres);
-       }
-
-       res->shares.my_guarantee = 0;
-       res->shares.my_limit = 0;
-       res->pg_guar = 0;
-       res->pg_limit = 0;
-       res->pg_unused = 0;
-
-       spin_lock_irq(&ckrm_mem_lock);
-       list_del_init(&res->mcls_list);
-       spin_unlock_irq(&ckrm_mem_lock);
-
-       res->core = NULL;
-       res->parent = NULL;
-       kref_put(&res->nr_users, memclass_release);
-       ckrm_nr_mem_classes--;
-       return;
-}
-
 static int
 mem_set_share_values(void *my_res, struct ckrm_shares *shares)
 {
-       struct ckrm_mem_res *res = my_res;
-       struct ckrm_mem_res *parres;
-       int rc;
+       ckrm_mem_res_t *res = my_res;
+       ckrm_mem_res_t *parres;
+       int rc = EINVAL;
 
-       if (!res)
+       if (!res) 
                return -EINVAL;
 
-       parres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
-                       struct ckrm_mem_res);
+       parres = ckrm_get_res_class(res->parent, mem_rcbs.resid, ckrm_mem_res_t);
 
        rc = set_shares(shares, &res->shares, parres ? &parres->shares : NULL);
 
        if ((rc == 0) && (parres != NULL)) {
                child_maxlimit_changed_local(parres);
                recalc_and_propagate(parres, NULL);
-               set_impl_guar_children(parres);
        }
-
        return rc;
 }
 
 static int
 mem_get_share_values(void *my_res, struct ckrm_shares *shares)
 {
-       struct ckrm_mem_res *res = my_res;
+       ckrm_mem_res_t *res = my_res;
 
-       if (!res)
+       if (!res) 
                return -EINVAL;
        *shares = res->shares;
        return 0;
 }
 
-static int
+static int  
 mem_get_stats(void *my_res, struct seq_file *sfile)
 {
-       struct ckrm_mem_res *res = my_res;
-       struct zone *zone;
-       int active = 0, inactive = 0, fr = 0;
+       ckrm_mem_res_t *res = my_res;
 
-       if (!res)
+       if (!res) 
                return -EINVAL;
 
-       seq_printf(sfile, "--------- Memory Resource stats start ---------\n");
-       if (res == ckrm_mem_root_class) {
-               int i = 0;
-               for_each_zone(zone) {
-                       active += zone->nr_active;
-                       inactive += zone->nr_inactive;
-                       fr += zone->free_pages;
-                       i++;
-               }
-               seq_printf(sfile,"System: tot_pages=%d,active=%d,inactive=%d"
-                               ",free=%d\n", ckrm_tot_lru_pages,
-                               active, inactive, fr);
-       }
-       seq_printf(sfile, "Number of pages used(including pages lent to"
-                       " children): %d\n", atomic_read(&res->pg_total));
+#if 0
+       seq_printf(sfile, "tot %6d;gua %6d;lmt %6d;unu %6d;"
+                       "lnt %6d;bor %6d;rlt %6d\n", atomic_read(&res->pg_total),
+                       res->pg_guar, res->pg_limit, res->pg_unused, res->pg_lent,
+                       res->pg_borrowed, atomic_read(&ckrm_mem_real_count));
+#endif
+
+
+       seq_printf(sfile, "----------- Memory Resource stats start -----------\n");
+       seq_printf(sfile, "Number of pages used(including pages lent to children):"
+                       " %d\n", atomic_read(&res->pg_total));
        seq_printf(sfile, "Number of pages guaranteed: %d\n",
                        res->pg_guar);
        seq_printf(sfile, "Maximum limit of pages: %d\n",
@@ -454,7 +326,7 @@ mem_get_stats(void *my_res, struct seq_file *sfile)
                        res->pg_lent);
        seq_printf(sfile, "Number of pages borrowed from the parent: %d\n",
                        res->pg_borrowed);
-       seq_printf(sfile, "---------- Memory Resource stats end ----------\n");
+       seq_printf(sfile, "----------- Memory Resource stats end -----------\n");
 
        return 0;
 }
@@ -465,14 +337,14 @@ mem_change_resclass(void *tsk, void *old, void *new)
        struct mm_struct *mm;
        struct task_struct *task = tsk, *t1;
        struct ckrm_mem_res *prev_mmcls;
-
+       
        if (!task->mm || (new == old) || (old == (void *) -1))
                return;
 
        mm = task->active_mm;
        spin_lock(&mm->peertask_lock);
        prev_mmcls = mm->memclass;
-
+               
        if (new == NULL) {
                list_del_init(&task->mm_peers);
        } else {
@@ -490,130 +362,55 @@ mem_change_resclass(void *tsk, void *old, void *new)
        }
 
        spin_unlock(&mm->peertask_lock);
-       ckrm_mem_evaluate_mm(mm, (struct ckrm_mem_res *) new);
+       ckrm_mem_evaluate_mm(mm);
+       /*
+       printk("chg_cls: task <%s:%d> mm %p oldmm %s newmm %s o %s n %s\n",
+               task->comm, task->pid, mm, prev_mmcls ? prev_mmcls->core->name:
+               "NULL", mm->memclass ? mm->memclass->core->name : "NULL",
+               o ? o->core->name: "NULL", n ? n->core->name: "NULL");  
+       */
        return;
 }
 
-#define MEM_FAIL_OVER "fail_over"
-#define MEM_SHRINK_AT "shrink_at"
-#define MEM_SHRINK_TO "shrink_to"
-#define MEM_SHRINK_COUNT "num_shrinks"
-#define MEM_SHRINK_INTERVAL "shrink_interval"
-
-int ckrm_mem_fail_over = 110;
-int ckrm_mem_shrink_at = 90;
-static int ckrm_mem_shrink_to = 80;
-static int ckrm_mem_shrink_count = 10;
-static int ckrm_mem_shrink_interval = 10;
-
-EXPORT_SYMBOL_GPL(ckrm_mem_fail_over);
-EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
-
+// config file is available only at the root level,
+// so assuming my_res to be the system level class
 static int
-mem_show_config(void *my_res, struct seq_file *sfile)
+mem_set_config(void *my_res, const char *cfgstr)
 {
-       struct ckrm_mem_res *res = my_res;
-
-       if (!res)
-               return -EINVAL;
-
-       seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
-               MEM_NAME,
-               MEM_FAIL_OVER, ckrm_mem_fail_over,
-               MEM_SHRINK_AT, ckrm_mem_shrink_at,
-               MEM_SHRINK_TO, ckrm_mem_shrink_to,
-               MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
-               MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
+       ckrm_mem_res_t *res = my_res;
 
+       printk(KERN_INFO "%s class of %s is called with config<%s>\n",
+                       MEM_NAME, res->core->name, cfgstr);
        return 0;
 }
 
-// config file is available only at the root level,
-// so assuming my_res to be the system level class
-enum memclass_token {
-       mem_fail_over,
-       mem_shrink_at,
-       mem_shrink_to,
-       mem_shrink_count,
-       mem_shrink_interval,
-       mem_err
-};
-
-static match_table_t mem_tokens = {
-       {mem_fail_over, MEM_FAIL_OVER "=%d"},
-       {mem_shrink_at, MEM_SHRINK_AT "=%d"},
-       {mem_shrink_to, MEM_SHRINK_TO "=%d"},
-       {mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
-       {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
-       {mem_err, NULL},
-};
-
-static int
-mem_set_config(void *my_res, const char *cfgstr)
+static int 
+mem_show_config(void *my_res, struct seq_file *sfile)
 {
-       char *p;
-       struct ckrm_mem_res *res = my_res;
-       int err = 0, val;
+       struct zone *zone;
+       ckrm_mem_res_t *res = my_res;
+       int active = 0, inactive = 0, fr = 0;
 
        if (!res)
                return -EINVAL;
 
-       while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
-               substring_t args[MAX_OPT_ARGS];
-               int token;
-               if (!*p)
-                       continue;
-
-               token = match_token(p, mem_tokens, args);
-               switch (token) {
-               case mem_fail_over:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_fail_over = val;
-                       }
-                       break;
-               case mem_shrink_at:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_at = val;
-                       }
-                       break;
-               case mem_shrink_to:
-                       if (match_int(args, &val) || (val < 0) || (val > 100)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_to = val;
-                       }
-                       break;
-               case mem_shrink_count:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_count = val;
-                       }
-                       break;
-               case mem_shrink_interval:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_interval = val;
-                       }
-                       break;
-               default:
-                       err = -EINVAL;
-               }
+       for_each_zone(zone) {
+               active += zone->nr_active;
+               inactive += zone->nr_inactive;
+               fr += zone->free_pages;
        }
-       return err;
+       seq_printf(sfile, "res=%s;tot_pages=%d,active=%d,inactive=%d,free=%d\n",
+                       MEM_NAME, ckrm_tot_lru_pages,active,inactive,fr);
+
+
+       return 0;
 }
 
 static int
 mem_reset_stats(void *my_res)
 {
-       struct ckrm_mem_res *res = my_res;
-       printk(KERN_INFO "MEM_RC: reset stats called for class %s\n",
-                               res->core->name);
+       ckrm_mem_res_t *res = my_res;
+       printk(KERN_INFO " memclass of %s called for reset\n", res->core->name);
        return 0;
 }
 
@@ -632,7 +429,7 @@ struct ckrm_res_ctlr mem_rcbs = {
        .reset_stats       = mem_reset_stats,
 };
 
-EXPORT_SYMBOL_GPL(mem_rcbs);
+EXPORT_SYMBOL(mem_rcbs);
 
 int __init
 init_ckrm_mem_res(void)
@@ -641,7 +438,6 @@ init_ckrm_mem_res(void)
        int resid = mem_rcbs.resid;
 
        set_ckrm_tot_pages();
-       spin_lock_init(&ckrm_mem_lock);
        clstype = ckrm_find_classtype_by_name("taskclass");
        if (clstype == NULL) {
                printk(KERN_INFO " Unknown ckrm classtype<taskclass>");
@@ -655,7 +451,7 @@ init_ckrm_mem_res(void)
                }
        }
        return ((resid < 0) ? resid : 0);
-}
+}      
 
 void __exit
 exit_ckrm_mem_res(void)
@@ -667,229 +463,360 @@ exit_ckrm_mem_res(void)
 module_init(init_ckrm_mem_res)
 module_exit(exit_ckrm_mem_res)
 
-int
-ckrm_mem_get_shrink_to(void)
+static void
+set_flags_of_children(ckrm_mem_res_t *parres, unsigned int flag)
+{
+       ckrm_mem_res_t *childres;
+       ckrm_core_class_t *child = NULL;
+
+       parres->reclaim_flags |= flag;
+       ckrm_lock_hier(parres->core);
+       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
+               childres = ckrm_get_res_class(child, mem_rcbs.resid,
+                               ckrm_mem_res_t);
+               set_flags_of_children(childres, flag);
+       }
+       ckrm_unlock_hier(parres->core);
+       return;
+}
+
+// FIXME: more attention is needed to this function
+static unsigned int
+set_usage_flags(ckrm_mem_res_t *res)
+{
+       int tot_usage, cls_usage, range, guar;
+
+       if (res->pg_limit == CKRM_SHARE_DONTCARE) {
+                       // No limit is set for the class. don't bother it
+                       res->reclaim_flags = 0;
+                       return res->reclaim_flags;
+       }
+
+       tot_usage = atomic_read(&res->pg_total);
+       cls_usage = tot_usage - res->pg_lent;
+       guar = (res->pg_guar > 0) ? res->pg_guar : 0;
+       range = res->pg_limit - guar;
+
+       if ((tot_usage > (guar + ((110 * range) / 100))) &&
+                               (res->pg_lent > (guar + ((25 * range) / 100)))) {
+               set_flags_of_children(res, CLS_PARENT_OVER);
+       }
+
+       if (cls_usage > (guar + ((110 * range) / 100))) {
+               res->reclaim_flags |= CLS_OVER_110;
+       } else if (cls_usage > (guar + range)) {
+               res->reclaim_flags |= CLS_OVER_100;
+       } else if (cls_usage > (guar + ((3 * range) / 4))) {
+               res->reclaim_flags |= CLS_OVER_75;
+       } else if (cls_usage > (guar + (range / 2))) {
+               res->reclaim_flags |= CLS_OVER_50;
+       } else if (cls_usage > (guar + (range / 4))) {
+               res->reclaim_flags |= CLS_OVER_25;
+       } else if (cls_usage > guar) {
+               res->reclaim_flags |= CLS_OVER_GUAR;
+       } else {
+               res->reclaim_flags = 0;
+       }
+       return res->reclaim_flags;
+}
+
+/*
+ * The functions ckrm_setup_reclamation(), ckrm_teardown_reclamation(),
+ * ckrm_get_reclaim_bits() and the macro ckrm_kick_page() along with the 
+ * macros CLS_* define how the pages are reclaimed.
+ * Keeping this logic thru these interface eliminate the necessity to
+ * change the reclaimation code in VM if we want to change the logic.
+ */
+unsigned int
+ckrm_setup_reclamation(void)
+{
+       ckrm_mem_res_t *res;
+       unsigned int ret = 0;
+
+       spin_lock(&ckrm_mem_lock);
+       set_ckrm_tot_pages();
+       ckrm_mem_root_class->pg_guar = ckrm_tot_lru_pages;
+       ckrm_mem_root_class->pg_unused = ckrm_tot_lru_pages;
+       ckrm_mem_root_class->pg_limit = ckrm_tot_lru_pages;
+       recalc_and_propagate(ckrm_mem_root_class, NULL);
+       list_for_each_entry(res, &ckrm_memclass_list, mcls_list) {
+               ret |= set_usage_flags(res);
+       }
+       spin_unlock(&ckrm_mem_lock);
+       return ret;
+}
+
+void
+ckrm_teardown_reclamation(void)
+{
+       ckrm_mem_res_t *res;
+       spin_lock(&ckrm_mem_lock);
+       list_for_each_entry(res, &ckrm_memclass_list, mcls_list) {
+               res->reclaim_flags = 0;
+       }
+       spin_unlock(&ckrm_mem_lock);
+}
+
+void
+ckrm_get_reclaim_bits(unsigned int *flags, unsigned int *extract)
 {
-       return ckrm_mem_shrink_to;
+       int i, j, mask = 0;
+
+       if (*flags == 0) {
+               *extract = 0;
+               return;
+       }
+
+       if (*flags & CLS_SHRINK) {
+               *extract = CLS_SHRINK;
+               *flags = 0;
+               return;
+       }
+
+       i = fls(*flags);
+       for (j = i-1; j > 0; j--) {
+               mask = (mask<<1) | 1;
+       }
+       *extract = (CLS_FLAGS_ALL & ~mask);
+       *flags &= ~*extract;
+       return;
 }
 
 void
-ckrm_at_limit(struct ckrm_mem_res *cls)
+ckrm_at_limit(ckrm_mem_res_t *cls)
 {
+#ifndef AT_LIMIT_SUPPORT
+#warning "ckrm_at_limit disabled due to problems with memory hog tests"
+#else
        struct zone *zone;
        unsigned long now = jiffies;
 
-       if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) ||
+       if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) || 
                        ((cls->flags & MEM_AT_LIMIT) == MEM_AT_LIMIT)) {
                return;
        }
-       if ((cls->last_shrink > now) /* jiffies wrapped around */ ||
-                  (cls->last_shrink + (ckrm_mem_shrink_interval * HZ)) < now) {
+       if ((cls->last_shrink + (10 * HZ)) < now) { // 10 seconds since last ?
                cls->last_shrink = now;
                cls->shrink_count = 0;
        }
        cls->shrink_count++;
-       if (cls->shrink_count > ckrm_mem_shrink_count) {
+       if (cls->shrink_count > 10) {
                return;
        }
-       spin_lock_irq(&ckrm_mem_lock);
+       spin_lock(&ckrm_mem_lock);
        list_add(&cls->shrink_list, &ckrm_shrink_list);
-       spin_unlock_irq(&ckrm_mem_lock);
+       spin_unlock(&ckrm_mem_lock);
        cls->flags |= MEM_AT_LIMIT;
        for_each_zone(zone) {
                wakeup_kswapd(zone);
                break; // only once is enough
        }
+#endif // AT_LIMIT_SUPPORT
 }
 
-static int
+static int unmapped = 0, changed = 0, unchanged = 0, maxnull = 0,
+anovma = 0, fnovma = 0;
+static void
 ckrm_mem_evaluate_page_anon(struct page* page)
 {
-       struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
-       struct ckrm_mem_res* maxshareclass = NULL;
+       ckrm_mem_res_t* pgcls = page_class(page);
+       ckrm_mem_res_t* maxshareclass = NULL;
        struct anon_vma *anon_vma = (struct anon_vma *) page->mapping;
        struct vm_area_struct *vma;
        struct mm_struct* mm;
-       int ret = 0;
+       int v = 0;
 
        spin_lock(&anon_vma->lock);
        BUG_ON(list_empty(&anon_vma->head));
        list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+               v++;
                mm = vma->vm_mm;
-               if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
-                               mm->memclass) < 0) {
+               if (!maxshareclass ||
+                               ckrm_mem_share_compare(maxshareclass, mm->memclass) < 0) {
                        maxshareclass = mm->memclass;
                }
        }
        spin_unlock(&anon_vma->lock);
+       if (!v)
+               anovma++;
 
-       if (!maxshareclass) {
-               maxshareclass = ckrm_mem_root_class;
-       }
-       if (pgcls != maxshareclass) {
+       if (!maxshareclass)
+               maxnull++;
+       if (maxshareclass && (pgcls != maxshareclass)) {
                ckrm_change_page_class(page, maxshareclass);
-               ret = 1;
-       }
-       return ret;
+               changed++;
+       } else 
+               unchanged++;
+       return;
 }
 
-static int
-ckrm_mem_evaluate_page_file(struct page* page)
+static void
+ckrm_mem_evaluate_page_file(struct page* page) 
 {
-       struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
-       struct ckrm_mem_res* maxshareclass = NULL;
+       ckrm_mem_res_t* pgcls = page_class(page);
+       ckrm_mem_res_t* maxshareclass = NULL;
        struct address_space *mapping = page->mapping;
        struct vm_area_struct *vma = NULL;
        pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
        struct prio_tree_iter iter;
        struct mm_struct* mm;
-       int ret = 0;
+       int v = 0;
 
        if (!mapping)
-               return 0;
+               return;
 
        if (!spin_trylock(&mapping->i_mmap_lock))
-               return 0;
+               return;
 
-       vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap,
-                                       pgoff, pgoff) {
+       vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap,pgoff,pgoff) {
                mm = vma->vm_mm;
-               if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
-                               mm->memclass)<0)
+               if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,mm->memclass)<0)
                        maxshareclass = mm->memclass;
        }
        spin_unlock(&mapping->i_mmap_lock);
 
-       if (!maxshareclass) {
-               maxshareclass = ckrm_mem_root_class;
-       }
-       if (pgcls != maxshareclass) {
+       if (!v)
+               fnovma++;
+       if (!maxshareclass)
+               maxnull++;
+
+       if (maxshareclass && pgcls != maxshareclass) {
                ckrm_change_page_class(page, maxshareclass);
-               ret = 1;
-       }
-       return ret;
+               changed++;
+       } else 
+               unchanged++;
+       return;
 }
 
-static int
-ckrm_mem_evaluate_page(struct page* page)
+static void
+ckrm_mem_evaluate_page(struct page* page) 
 {
-       int ret = 0;
-       BUG_ON(page->ckrm_zone == NULL);
        if (page->mapping) {
                if (PageAnon(page))
-                       ret = ckrm_mem_evaluate_page_anon(page);
+                       ckrm_mem_evaluate_page_anon(page);
                else
-                       ret = ckrm_mem_evaluate_page_file(page);
-       }
-       return ret;
+                       ckrm_mem_evaluate_page_file(page);
+       } else
+               unmapped++;
+       return;
 }
 
 static void
-ckrm_mem_evaluate_all_pages(struct ckrm_mem_res* res)
+ckrm_mem_evaluate_all_pages()
 {
        struct page *page;
-       struct ckrm_zone *ckrm_zone;
        struct zone *zone;
-       struct list_head *pos, *next;
-       int i;
+       int active = 0, inactive = 0, cleared = 0;
+       int act_cnt, inact_cnt, idx;
+       ckrm_mem_res_t *res;
+
+       spin_lock(&ckrm_mem_lock);
+       list_for_each_entry(res, &ckrm_memclass_list, mcls_list) {
+               res->tmp_cnt = 0;
+       }
+       spin_unlock(&ckrm_mem_lock);
 
-       check_memclass(res, "bef_eval_all_pgs");
-       for (i = 0; i < MAX_NR_ZONES; i++) {
-               ckrm_zone = &res->ckrm_zone[i];
-               zone = ckrm_zone->zone;
+       for_each_zone(zone) {
                spin_lock_irq(&zone->lru_lock);
-               pos = ckrm_zone->inactive_list.next;
-               while (pos != &ckrm_zone->inactive_list) {
-                       next = pos->next;
-                       page = list_entry(pos, struct page, lru);
-                       if (!ckrm_mem_evaluate_page(page))
-                               ckrm_change_page_class(page,
-                                               ckrm_mem_root_class);
-                       pos = next;
+               list_for_each_entry(page, &zone->inactive_list, lru) {
+                       ckrm_mem_evaluate_page(page);
+                       active++;
+                       page_class(page)->tmp_cnt++;
+                       if (!test_bit(PG_ckrm_account, &page->flags))
+                               cleared++;
                }
-               pos = ckrm_zone->active_list.next;
-               while (pos != &ckrm_zone->active_list) {
-                       next = pos->next;
-                       page = list_entry(pos, struct page, lru);
-                       if (!ckrm_mem_evaluate_page(page))
-                               ckrm_change_page_class(page,
-                                               ckrm_mem_root_class);
-                       pos = next;
+               list_for_each_entry(page, &zone->active_list, lru) {
+                       ckrm_mem_evaluate_page(page);
+                       inactive++;
+                       page_class(page)->tmp_cnt++;
+                       if (!test_bit(PG_ckrm_account, &page->flags))
+                               cleared++;
                }
                spin_unlock_irq(&zone->lru_lock);
        }
-       check_memclass(res, "aft_eval_all_pgs");
+       printk(KERN_DEBUG "all_pages: active %d inactive %d cleared %d\n", 
+                       active, inactive, cleared);
+       spin_lock(&ckrm_mem_lock);
+       list_for_each_entry(res, &ckrm_memclass_list, mcls_list) {
+               act_cnt = 0; inact_cnt = 0; idx = 0;
+               for_each_zone(zone) {
+                       act_cnt += res->nr_active[idx];
+                       inact_cnt += res->nr_inactive[idx];
+                       idx++;
+               }
+               printk(KERN_DEBUG "all_pages: %s: tmp_cnt %d; act_cnt %d inact_cnt %d\n",
+                       res->core->name, res->tmp_cnt, act_cnt, inact_cnt);
+       }
+       spin_unlock(&ckrm_mem_lock);
+
+       // check all mm's in the system to see which memclass they are attached
+       // to.
        return;
 }
 
-static inline int
+static /*inline*/ int
 class_migrate_pmd(struct mm_struct* mm, struct vm_area_struct* vma,
                pmd_t* pmdir, unsigned long address, unsigned long end)
 {
-       pte_t *pte;
+       pte_t *pte, *orig_pte;
        unsigned long pmd_end;
-
+       
        if (pmd_none(*pmdir))
                return 0;
        BUG_ON(pmd_bad(*pmdir));
-
+       
+       orig_pte = pte = pte_offset_map(pmdir,address);
        pmd_end = (address+PMD_SIZE)&PMD_MASK;
        if (end>pmd_end)
                end = pmd_end;
-
+       
        do {
-               pte = pte_offset_map(pmdir,address);
                if (pte_present(*pte)) {
-                       struct page *page = pte_page(*pte);
                        BUG_ON(mm->memclass == NULL);
-                       if (page->mapping && page->ckrm_zone) {
-                               struct zone *zone = page->ckrm_zone->zone;
-                               spin_lock_irq(&zone->lru_lock);
-                               ckrm_change_page_class(page, mm->memclass);
-                               spin_unlock_irq(&zone->lru_lock);
-                       }
+                       ckrm_change_page_class(pte_page(*pte), mm->memclass);
+                       // ckrm_mem_evaluate_page(pte_page(*pte));
                }
                address += PAGE_SIZE;
-               pte_unmap(pte);
                pte++;
        } while(address && (address<end));
+       pte_unmap(orig_pte);
        return 0;
 }
 
-static inline int
+static /*inline*/ int
 class_migrate_pgd(struct mm_struct* mm, struct vm_area_struct* vma,
                pgd_t* pgdir, unsigned long address, unsigned long end)
 {
        pmd_t* pmd;
        unsigned long pgd_end;
-
+       
        if (pgd_none(*pgdir))
                return 0;
        BUG_ON(pgd_bad(*pgdir));
-
+       
        pmd = pmd_offset(pgdir,address);
        pgd_end = (address+PGDIR_SIZE)&PGDIR_MASK;
-
+       
        if (pgd_end && (end>pgd_end))
                end = pgd_end;
-
+       
        do {
                class_migrate_pmd(mm,vma,pmd,address,end);
-               address = (address+PMD_SIZE)&PMD_MASK;
+               address =  (address+PMD_SIZE)&PMD_MASK;
                pmd++;
        } while (address && (address<end));
        return 0;
 }
 
-static inline int
+static /*inline*/ int
 class_migrate_vma(struct mm_struct* mm, struct vm_area_struct* vma)
 {
        pgd_t* pgdir;
        unsigned long address, end;
-
+       
        address = vma->vm_start;
        end = vma->vm_end;
-
+       
        pgdir = pgd_offset(vma->vm_mm, address);
        do {
                class_migrate_pgd(mm,vma,pgdir,address,end);
@@ -901,36 +828,34 @@ class_migrate_vma(struct mm_struct* mm, struct vm_area_struct* vma)
 
 /* this function is called with mm->peertask_lock hold */
 void
-ckrm_mem_evaluate_mm(struct mm_struct* mm, struct ckrm_mem_res *def)
+ckrm_mem_evaluate_mm(struct mm_struct* mm)
 {
        struct task_struct *task;
-       struct ckrm_mem_res *maxshareclass = def;
+       struct ckrm_mem_res *maxshareclass = NULL;
        struct vm_area_struct *vma;
-
+       
        if (list_empty(&mm->tasklist)) {
                /* We leave the mm->memclass untouched since we believe that one
                 * mm with no task associated will be deleted soon or attach
                 * with another task later.
                 */
-               return;
+               return; 
        }
 
        list_for_each_entry(task, &mm->tasklist, mm_peers) {
-               struct ckrm_mem_res* cls = ckrm_get_mem_class(task);
+               ckrm_mem_res_t* cls = GET_MEM_CLASS(task);
                if (!cls)
                        continue;
-               if (!maxshareclass ||
-                               ckrm_mem_share_compare(maxshareclass,cls)<0 )
+               if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,cls)<0 ) 
                        maxshareclass = cls;
        }
 
-       if (maxshareclass && (mm->memclass != maxshareclass)) {
-               if (mm->memclass) {
-                       kref_put(&mm->memclass->nr_users, memclass_release);
-               }
+       if (maxshareclass && (mm->memclass != (void *)maxshareclass)) {
+               if (mm->memclass)
+                       mem_class_put(mm->memclass);
                mm->memclass = maxshareclass;
-               kref_get(&maxshareclass->nr_users);
-
+               mem_class_get(maxshareclass);
+               
                /* Go through all VMA to migrate pages */
                down_read(&mm->mmap_sem);
                vma = mm->mmap;
@@ -948,33 +873,29 @@ ckrm_init_mm_to_task(struct mm_struct * mm, struct task_struct *task)
 {
        spin_lock(&mm->peertask_lock);
        if (!list_empty(&task->mm_peers)) {
-               printk(KERN_ERR "MEM_RC: Task list NOT empty!! emptying...\n");
+               printk(KERN_ERR "CKRM_MEM: Task list should be empty, but is not!!\n");
                list_del_init(&task->mm_peers);
        }
        list_add_tail(&task->mm_peers, &mm->tasklist);
        spin_unlock(&mm->peertask_lock);
-       if (mm->memclass != ckrm_get_mem_class(task))
-               ckrm_mem_evaluate_mm(mm, NULL);
+       if (mm->memclass != GET_MEM_CLASS(task))
+               ckrm_mem_evaluate_mm(mm);
        return;
 }
 
 int
-ckrm_memclass_valid(struct ckrm_mem_res *cls)
+ckrm_memclass_valid(ckrm_mem_res_t *cls)
 {
-       struct ckrm_mem_res *tmp;
-       unsigned long flags;
+       ckrm_mem_res_t *tmp;
 
-       if (!cls || list_empty(&cls->mcls_list)) {
-               return 0;
-       }
-       spin_lock_irqsave(&ckrm_mem_lock, flags);
+       spin_lock(&ckrm_mem_lock);
        list_for_each_entry(tmp, &ckrm_memclass_list, mcls_list) {
                if (tmp == cls) {
                        spin_unlock(&ckrm_mem_lock);
                        return 1;
                }
        }
-       spin_unlock_irqrestore(&ckrm_mem_lock, flags);
+       spin_unlock(&ckrm_mem_lock);
        return 0;
 }
 
diff --git a/kernel/ckrm/ckrm_memcore.c b/kernel/ckrm/ckrm_memcore.c
deleted file mode 100644 (file)
index eeeba24..0000000
+++ /dev/null
@@ -1,628 +0,0 @@
-/* ckrm_memcore.c - Memory Resource Manager for CKRM
- *
- * Copyright (C) Jiantao Kong, IBM Corp. 2003
- *           (C) Chandra Seetharaman, IBM Corp. 2004
- *
- * Provides a Memory Resource controller for CKRM
- *
- * Latest version, more details at http://ckrm.sf.net
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <linux/swapops.h>
-#include <linux/cache.h>
-#include <linux/percpu.h>
-#include <linux/pagevec.h>
-#include <linux/parser.h>
-#include <linux/ckrm_mem_inline.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/errno.h>
-
-#define MEM_RES_NAME "mem"
-
-#define CKRM_MEM_MAX_HIERARCHY 2 /* allows only upto 2 levels - 0, 1 & 2 */
-
-/* all 1-level memory_share_class are chained together */
-LIST_HEAD(ckrm_memclass_list);
-spinlock_t ckrm_mem_lock; /* protects list above */
-unsigned int ckrm_tot_lru_pages; /* # of pages in the system */
-int ckrm_nr_mem_classes = 0;
-struct ckrm_mem_res *ckrm_mem_root_class;
-atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
-
-EXPORT_SYMBOL_GPL(ckrm_memclass_list);
-EXPORT_SYMBOL_GPL(ckrm_mem_lock);
-EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
-EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
-EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
-EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
-
-void
-memclass_release(struct kref *kref)
-{
-       struct ckrm_mem_res *cls = container_of(kref, 
-                               struct ckrm_mem_res, nr_users);
-       kfree(cls);
-}
-EXPORT_SYMBOL_GPL(memclass_release);
-
-static void
-set_ckrm_tot_pages(void)
-{
-       struct zone *zone;
-       int tot_lru_pages = 0;
-
-       for_each_zone(zone) {
-               tot_lru_pages += zone->nr_active;
-               tot_lru_pages += zone->nr_inactive;
-               tot_lru_pages += zone->free_pages;
-       }
-       ckrm_tot_lru_pages = tot_lru_pages;
-}
-
-/* Initialize rescls values
- * May be called on each rcfs unmount or as part of error recovery
- * to make share values sane.
- * Does not traverse hierarchy reinitializing children.
- */
-static void
-mem_res_initcls_one(struct ckrm_mem_res *res)
-{
-       int zindex = 0;
-       struct zone *zone;
-
-       memset(res, 0, sizeof(struct ckrm_mem_res));
-
-       res->shares.my_guarantee     = CKRM_SHARE_DONTCARE;
-       res->shares.my_limit         = CKRM_SHARE_DONTCARE;
-       res->shares.total_guarantee  = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-       res->shares.max_limit        = CKRM_SHARE_DFLT_MAX_LIMIT;
-       res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-       res->shares.cur_max_limit    = 0;
-
-       res->pg_guar = CKRM_SHARE_DONTCARE;
-       res->pg_limit = CKRM_SHARE_DONTCARE;
-
-       INIT_LIST_HEAD(&res->mcls_list);
-       INIT_LIST_HEAD(&res->shrink_list);
-
-       for_each_zone(zone) {
-               INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
-               INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list);
-               INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list);
-               res->ckrm_zone[zindex].nr_active = 0;
-               res->ckrm_zone[zindex].nr_inactive = 0;
-               res->ckrm_zone[zindex].zone = zone;
-               res->ckrm_zone[zindex].memcls = res;
-               zindex++;
-       }
-
-       res->pg_unused = 0;
-       res->nr_dontcare = 1; /* for default class */
-       kref_init(&res->nr_users);
-}
-
-static void
-set_impl_guar_children(struct ckrm_mem_res *parres)
-{
-       struct ckrm_core_class *child = NULL;
-       struct ckrm_mem_res *cres;
-       int nr_dontcare = 1; // for defaultclass
-       int guar, impl_guar;
-       int resid = mem_rcbs.resid;
-
-       ckrm_lock_hier(parres->core);
-       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
-               // treat NULL cres as don't care as that child is just being
-               // created.
-               // FIXME: need a better way to handle this case.
-               if (!cres || cres->pg_guar == CKRM_SHARE_DONTCARE) {
-                       nr_dontcare++;
-               }
-       }
-
-       parres->nr_dontcare = nr_dontcare;
-       guar = (parres->pg_guar == CKRM_SHARE_DONTCARE) ?
-                       parres->impl_guar : parres->pg_unused;
-       impl_guar = guar / parres->nr_dontcare;
-
-       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
-               if (cres && cres->pg_guar == CKRM_SHARE_DONTCARE) {
-                       cres->impl_guar = impl_guar;
-                       set_impl_guar_children(cres);
-               }
-       }
-       ckrm_unlock_hier(parres->core);
-
-}
-
-static void *
-mem_res_alloc(struct ckrm_core_class *core, struct ckrm_core_class *parent)
-{
-       struct ckrm_mem_res *res, *pres;
-
-       BUG_ON(mem_rcbs.resid == -1);
-
-       pres = ckrm_get_res_class(parent, mem_rcbs.resid, struct ckrm_mem_res);
-       if (pres && (pres->hier == CKRM_MEM_MAX_HIERARCHY)) {
-               printk(KERN_ERR "MEM_RC: only allows hieararchy of %d\n",
-                                               CKRM_MEM_MAX_HIERARCHY);
-               return NULL;
-       }
-
-       if ((parent == NULL) && (ckrm_mem_root_class != NULL)) {
-               printk(KERN_ERR "MEM_RC: Only one root class is allowed\n");
-               return NULL;
-       }
-
-       if ((parent != NULL) && (ckrm_mem_root_class == NULL)) {
-               printk(KERN_ERR "MEM_RC: child class with no root class!!");
-               return NULL;
-       }
-
-       res = kmalloc(sizeof(struct ckrm_mem_res), GFP_ATOMIC);
-
-       if (res) {
-               mem_res_initcls_one(res);
-               res->core = core;
-               res->parent = parent;
-               spin_lock(&ckrm_mem_lock);
-               list_add(&res->mcls_list, &ckrm_memclass_list);
-               spin_unlock(&ckrm_mem_lock);
-               if (parent == NULL) {
-                       /* I am the root class. So, set the max to *
-                        * number of pages available in the system */
-                       res->pg_guar = ckrm_tot_lru_pages;
-                       res->pg_unused = ckrm_tot_lru_pages;
-                       res->pg_limit = ckrm_tot_lru_pages;
-                       res->hier = 0;
-                       ckrm_mem_root_class = res;
-               } else {
-                       int guar;
-                       res->hier = pres->hier + 1;
-                       set_impl_guar_children(pres);
-                       guar = (pres->pg_guar == CKRM_SHARE_DONTCARE) ?
-                               pres->impl_guar : pres->pg_unused;
-                       res->impl_guar = guar / pres->nr_dontcare;
-               }
-               ckrm_nr_mem_classes++;
-       } else
-               printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
-       return res;
-}
-
-/*
- * It is the caller's responsibility to make sure that the parent only
- * has chilren that are to be accounted. i.e if a new child is added
- * this function should be called after it has been added, and if a
- * child is deleted this should be called after the child is removed.
- */
-static void
-child_maxlimit_changed_local(struct ckrm_mem_res *parres)
-{
-       int maxlimit = 0;
-       struct ckrm_mem_res *childres;
-       struct ckrm_core_class *child = NULL;
-
-       /* run thru parent's children and get new max_limit of parent */
-       ckrm_lock_hier(parres->core);
-       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               childres = ckrm_get_res_class(child, mem_rcbs.resid,
-                               struct ckrm_mem_res);
-               if (maxlimit < childres->shares.my_limit) {
-                       maxlimit = childres->shares.my_limit;
-               }
-       }
-       ckrm_unlock_hier(parres->core);
-       parres->shares.cur_max_limit = maxlimit;
-}
-
-/*
- * Recalculate the guarantee and limit in # of pages... and propagate the
- * same to children.
- * Caller is responsible for protecting res and for the integrity of parres
- */
-static void
-recalc_and_propagate(struct ckrm_mem_res * res, struct ckrm_mem_res * parres)
-{
-       struct ckrm_core_class *child = NULL;
-       struct ckrm_mem_res *cres;
-       int resid = mem_rcbs.resid;
-       struct ckrm_shares *self = &res->shares;
-
-       if (parres) {
-               struct ckrm_shares *par = &parres->shares;
-
-               /* calculate pg_guar and pg_limit */
-               if (parres->pg_guar == CKRM_SHARE_DONTCARE ||
-                               self->my_guarantee == CKRM_SHARE_DONTCARE) {
-                       res->pg_guar = CKRM_SHARE_DONTCARE;
-               } else if (par->total_guarantee) {
-                       u64 temp = (u64) self->my_guarantee * parres->pg_guar;
-                       do_div(temp, par->total_guarantee);
-                       res->pg_guar = (int) temp;
-                       res->impl_guar = CKRM_SHARE_DONTCARE;
-               } else {
-                       res->pg_guar = 0;
-                       res->impl_guar = CKRM_SHARE_DONTCARE;
-               }
-
-               if (parres->pg_limit == CKRM_SHARE_DONTCARE ||
-                               self->my_limit == CKRM_SHARE_DONTCARE) {
-                       res->pg_limit = CKRM_SHARE_DONTCARE;
-               } else if (par->max_limit) {
-                       u64 temp = (u64) self->my_limit * parres->pg_limit;
-                       do_div(temp, par->max_limit);
-                       res->pg_limit = (int) temp;
-               } else {
-                       res->pg_limit = 0;
-               }
-       }
-
-       /* Calculate unused units */
-       if (res->pg_guar == CKRM_SHARE_DONTCARE) {
-               res->pg_unused = CKRM_SHARE_DONTCARE;
-       } else if (self->total_guarantee) {
-               u64 temp = (u64) self->unused_guarantee * res->pg_guar;
-               do_div(temp, self->total_guarantee);
-               res->pg_unused = (int) temp;
-       } else {
-               res->pg_unused = 0;
-       }
-
-       /* propagate to children */
-       ckrm_lock_hier(res->core);
-       while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
-               cres = ckrm_get_res_class(child, resid, struct ckrm_mem_res);
-               recalc_and_propagate(cres, res);
-       }
-       ckrm_unlock_hier(res->core);
-       return;
-}
-
-static void
-mem_res_free(void *my_res)
-{
-       struct ckrm_mem_res *res = my_res;
-       struct ckrm_mem_res *pres;
-
-       if (!res)
-               return;
-
-       ckrm_mem_migrate_all_pages(res, ckrm_mem_root_class);
-
-       pres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
-                       struct ckrm_mem_res);
-
-       if (pres) {
-               child_guarantee_changed(&pres->shares,
-                               res->shares.my_guarantee, 0);
-               child_maxlimit_changed_local(pres);
-               recalc_and_propagate(pres, NULL);
-               set_impl_guar_children(pres);
-       }
-
-       /*
-        * Making it all zero as freeing of data structure could 
-        * happen later.
-        */
-       res->shares.my_guarantee = 0;
-       res->shares.my_limit = 0;
-       res->pg_guar = 0;
-       res->pg_limit = 0;
-       res->pg_unused = 0;
-
-       spin_lock(&ckrm_mem_lock);
-       list_del_init(&res->mcls_list);
-       spin_unlock(&ckrm_mem_lock);
-
-       res->core = NULL;
-       res->parent = NULL;
-       kref_put(&res->nr_users, memclass_release);
-       ckrm_nr_mem_classes--;
-       return;
-}
-
-static int
-mem_set_share_values(void *my_res, struct ckrm_shares *shares)
-{
-       struct ckrm_mem_res *res = my_res;
-       struct ckrm_mem_res *parres;
-       int rc;
-
-       if (!res)
-               return -EINVAL;
-
-       parres = ckrm_get_res_class(res->parent, mem_rcbs.resid,
-               struct ckrm_mem_res);
-
-       rc = set_shares(shares, &res->shares, parres ? &parres->shares : NULL);
-
-       if ((rc == 0) && (parres != NULL)) {
-               child_maxlimit_changed_local(parres);
-               recalc_and_propagate(parres, NULL);
-               set_impl_guar_children(parres);
-       }
-
-       return rc;
-}
-
-static int
-mem_get_share_values(void *my_res, struct ckrm_shares *shares)
-{
-       struct ckrm_mem_res *res = my_res;
-
-       if (!res)
-               return -EINVAL;
-       printk(KERN_INFO "get_share called for %s resource of class %s\n",
-                       MEM_RES_NAME, res->core->name);
-       *shares = res->shares;
-       return 0;
-}
-
-static int
-mem_get_stats(void *my_res, struct seq_file *sfile)
-{
-       struct ckrm_mem_res *res = my_res;
-       struct zone *zone;
-       int active = 0, inactive = 0, fr = 0;
-
-       if (!res)
-               return -EINVAL;
-
-       seq_printf(sfile, "--------- Memory Resource stats start ---------\n");
-       if (res == ckrm_mem_root_class) {
-               int i = 0;
-               for_each_zone(zone) {
-                       active += zone->nr_active;
-                       inactive += zone->nr_inactive;
-                       fr += zone->free_pages;
-                       i++;
-               }
-               seq_printf(sfile,"System: tot_pages=%d,active=%d,inactive=%d"
-                               ",free=%d\n", ckrm_tot_lru_pages,
-                               active, inactive, fr);
-       }
-       seq_printf(sfile, "Number of pages used(including pages lent to"
-                       " children): %d\n", atomic_read(&res->pg_total));
-       seq_printf(sfile, "Number of pages guaranteed: %d\n",
-                       res->pg_guar);
-       seq_printf(sfile, "Maximum limit of pages: %d\n",
-                       res->pg_limit);
-       seq_printf(sfile, "Total number of pages available"
-                       "(after serving guarantees to children): %d\n",
-                       res->pg_unused);
-       seq_printf(sfile, "Number of pages lent to children: %d\n",
-                       res->pg_lent);
-       seq_printf(sfile, "Number of pages borrowed from the parent: %d\n",
-                       res->pg_borrowed);
-       seq_printf(sfile, "---------- Memory Resource stats end ----------\n");
-
-       return 0;
-}
-
-static void
-mem_change_resclass(void *tsk, void *old, void *new)
-{
-       struct mm_struct *mm;
-       struct task_struct *task = tsk, *t1;
-       struct ckrm_mem_res *prev_mmcls;
-
-       if (!task->mm || (new == old) || (old == (void *) -1))
-               return;
-
-       mm = task->active_mm;
-       spin_lock(&mm->peertask_lock);
-       prev_mmcls = mm->memclass;
-
-       if (new == NULL) {
-               list_del_init(&task->mm_peers);
-       } else {
-               int found = 0;
-               list_for_each_entry(t1, &mm->tasklist, mm_peers) {
-                       if (t1 == task) {
-                               found++;
-                               break;
-                       }
-               }
-               if (!found) {
-                       list_del_init(&task->mm_peers);
-                       list_add_tail(&task->mm_peers, &mm->tasklist);
-               }
-       }
-
-       spin_unlock(&mm->peertask_lock);
-       ckrm_mem_migrate_mm(mm, (struct ckrm_mem_res *) new);
-       return;
-}
-
-#define MEM_FAIL_OVER "fail_over"
-#define MEM_SHRINK_AT "shrink_at"
-#define MEM_SHRINK_TO "shrink_to"
-#define MEM_SHRINK_COUNT "num_shrinks"
-#define MEM_SHRINK_INTERVAL "shrink_interval"
-
-int ckrm_mem_fail_at = 110;
-int ckrm_mem_shrink_at = 90;
-int ckrm_mem_shrink_to = 80;
-int ckrm_mem_shrink_count = 10;
-int ckrm_mem_shrink_interval = 10;
-
-EXPORT_SYMBOL_GPL(ckrm_mem_fail_at);
-EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
-EXPORT_SYMBOL_GPL(ckrm_mem_shrink_to);
-
-static int
-mem_show_config(void *my_res, struct seq_file *sfile)
-{
-       struct ckrm_mem_res *res = my_res;
-
-       if (!res)
-               return -EINVAL;
-
-       seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
-               MEM_RES_NAME,
-               MEM_FAIL_OVER, ckrm_mem_fail_at,
-               MEM_SHRINK_AT, ckrm_mem_shrink_at,
-               MEM_SHRINK_TO, ckrm_mem_shrink_to,
-               MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
-               MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
-
-       return 0;
-}
-
-typedef int __bitwise memclass_token_t;
-
-enum memclass_token {
-       mem_fail_over = (__force memclass_token_t) 1,
-       mem_shrink_at = (__force memclass_token_t) 2,
-       mem_shrink_to = (__force memclass_token_t) 3,
-       mem_shrink_count = (__force memclass_token_t) 4,
-       mem_shrink_interval = (__force memclass_token_t) 5,
-       mem_err = (__force memclass_token_t) 6
-};
-
-static match_table_t mem_tokens = {
-       {mem_fail_over, MEM_FAIL_OVER "=%d"},
-       {mem_shrink_at, MEM_SHRINK_AT "=%d"},
-       {mem_shrink_to, MEM_SHRINK_TO "=%d"},
-       {mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
-       {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
-       {mem_err, NULL},
-};
-
-static int
-mem_set_config(void *my_res, const char *cfgstr)
-{
-       char *p;
-       struct ckrm_mem_res *res = my_res;
-       int err = 0, val;
-
-       if (!res)
-               return -EINVAL;
-
-       while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
-               substring_t args[MAX_OPT_ARGS];
-               int token;
-               if (!*p)
-                       continue;
-
-               token = match_token(p, mem_tokens, args);
-               switch (token) {
-               case mem_fail_over:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_fail_at = val;
-                       }
-                       break;
-               case mem_shrink_at:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_at = val;
-                       }
-                       break;
-               case mem_shrink_to:
-                       if (match_int(args, &val) || (val < 0) || (val > 100)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_to = val;
-                       }
-                       break;
-               case mem_shrink_count:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_count = val;
-                       }
-                       break;
-               case mem_shrink_interval:
-                       if (match_int(args, &val) || (val <= 0)) {
-                               err = -EINVAL;
-                       } else {
-                               ckrm_mem_shrink_interval = val;
-                       }
-                       break;
-               default:
-                       err = -EINVAL;
-               }
-       }
-       return err;
-}
-
-static int
-mem_reset_stats(void *my_res)
-{
-       struct ckrm_mem_res *res = my_res;
-       printk(KERN_INFO "MEM_RC: reset stats called for class %s\n",
-                               res->core->name);
-       return 0;
-}
-
-struct ckrm_res_ctlr mem_rcbs = {
-       .res_name          = MEM_RES_NAME,
-       .res_hdepth        = CKRM_MEM_MAX_HIERARCHY,
-       .resid             = -1,
-       .res_alloc         = mem_res_alloc,
-       .res_free          = mem_res_free,
-       .set_share_values  = mem_set_share_values,
-       .get_share_values  = mem_get_share_values,
-       .get_stats         = mem_get_stats,
-       .change_resclass   = mem_change_resclass,
-       .show_config       = mem_show_config,
-       .set_config        = mem_set_config,
-       .reset_stats       = mem_reset_stats,
-};
-
-EXPORT_SYMBOL_GPL(mem_rcbs);
-
-int __init
-init_ckrm_mem_res(void)
-{
-       struct ckrm_classtype *clstype;
-       int resid = mem_rcbs.resid;
-
-       set_ckrm_tot_pages();
-       spin_lock_init(&ckrm_mem_lock);
-       clstype = ckrm_find_classtype_by_name("taskclass");
-       if (clstype == NULL) {
-               printk(KERN_INFO " Unknown ckrm classtype<taskclass>");
-               return -ENOENT;
-       }
-
-       if (resid == -1) {
-               resid = ckrm_register_res_ctlr(clstype, &mem_rcbs);
-               if (resid != -1) {
-                       mem_rcbs.classtype = clstype;
-               }
-       }
-       return ((resid < 0) ? resid : 0);
-}
-
-void __exit
-exit_ckrm_mem_res(void)
-{
-       ckrm_unregister_res_ctlr(&mem_rcbs);
-       mem_rcbs.resid = -1;
-}
-
-module_init(init_ckrm_mem_res)
-module_exit(exit_ckrm_mem_res)
-MODULE_LICENSE("GPL");
diff --git a/kernel/ckrm/ckrm_memctlr.c b/kernel/ckrm/ckrm_memctlr.c
deleted file mode 100644 (file)
index a8ae7a6..0000000
+++ /dev/null
@@ -1,439 +0,0 @@
-/* ckrm_memctlr.c - Basic routines for the CKRM memory controller
- *
- * Copyright (C) Jiantao Kong, IBM Corp. 2003
- *           (C) Chandra Seetharaman, IBM Corp. 2004
- *
- * Provides a Memory Resource controller for CKRM
- *
- * Latest version, more details at http://ckrm.sf.net
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/swap.h>
-#include <linux/pagemap.h>
-#include <linux/ckrm_mem_inline.h>
-
-static int
-ckrm_mem_evaluate_page_anon(struct page* page)
-{
-       struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
-       struct ckrm_mem_res* maxshareclass = NULL;
-       struct anon_vma *anon_vma = (struct anon_vma *) page->mapping;
-       struct vm_area_struct *vma;
-       struct mm_struct* mm;
-       int ret = 0;
-
-       if (!spin_trylock(&anon_vma->lock))
-               return 0;
-       BUG_ON(list_empty(&anon_vma->head));
-       list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
-               mm = vma->vm_mm;
-               if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
-                               mm->memclass) < 0) {
-                       maxshareclass = mm->memclass;
-               }
-       }
-       spin_unlock(&anon_vma->lock);
-
-       if (!maxshareclass) {
-               maxshareclass = ckrm_mem_root_class;
-       }
-       if (pgcls != maxshareclass) {
-               ckrm_change_page_class(page, maxshareclass);
-               ret = 1;
-       }
-       return ret;
-}
-
-static int
-ckrm_mem_evaluate_page_file(struct page* page)
-{
-       struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
-       struct ckrm_mem_res* maxshareclass = NULL;
-       struct address_space *mapping = page->mapping;
-       struct vm_area_struct *vma = NULL;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct prio_tree_iter iter;
-       struct mm_struct* mm;
-       int ret = 0;
-
-       if (!mapping)
-               return 0;
-
-       if (!spin_trylock(&mapping->i_mmap_lock))
-               return 0;
-
-       vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap,
-                                       pgoff, pgoff) {
-               mm = vma->vm_mm;
-               if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
-                               mm->memclass)<0)
-                       maxshareclass = mm->memclass;
-       }
-       spin_unlock(&mapping->i_mmap_lock);
-
-       if (!maxshareclass) {
-               maxshareclass = ckrm_mem_root_class;
-       }
-       if (pgcls != maxshareclass) {
-               ckrm_change_page_class(page, maxshareclass);
-               ret = 1;
-       }
-       return ret;
-}
-
-static int
-ckrm_mem_evaluate_page(struct page* page)
-{
-       int ret = 0;
-       if (page->mapping) {
-               if (PageAnon(page))
-                       ret = ckrm_mem_evaluate_page_anon(page);
-               else
-                       ret = ckrm_mem_evaluate_page_file(page);
-       }
-       return ret;
-}
-
-void
-ckrm_mem_migrate_all_pages(struct ckrm_mem_res* from, struct ckrm_mem_res* def)
-{
-       int i;
-       struct page *page;
-       struct zone *zone;
-       struct list_head *pos, *next;
-       struct ckrm_zone *ckrm_zone;
-
-       for (i = 0; i < MAX_NR_ZONES; i++) {
-               ckrm_zone = &from->ckrm_zone[i];
-               zone = ckrm_zone->zone;
-               spin_lock_irq(&zone->lru_lock);
-               pos = ckrm_zone->inactive_list.next;
-               while (pos != &ckrm_zone->inactive_list) {
-                       next = pos->next;
-                       page = list_entry(pos, struct page, lru);
-                       if (ckrm_mem_evaluate_page(page))
-                               ckrm_change_page_class(page, def);
-                       pos = next;
-               }
-               pos = ckrm_zone->active_list.next;
-               while (pos != &ckrm_zone->active_list) {
-                       next = pos->next;
-                       page = list_entry(pos, struct page, lru);
-                       if (ckrm_mem_evaluate_page(page))
-                               ckrm_change_page_class(page, def);
-                       pos = next;
-               }
-               spin_unlock_irq(&zone->lru_lock);
-       }
-       return;
-}
-
-static inline int
-class_migrate_pmd(struct mm_struct* mm, struct vm_area_struct* vma,
-               pmd_t* pmdir, unsigned long address, unsigned long end)
-{
-       pte_t *pte;
-       unsigned long pmd_end;
-
-       if (pmd_none(*pmdir))
-               return 0;
-       BUG_ON(pmd_bad(*pmdir));
-
-       pmd_end = (address+ PMD_SIZE) & PMD_MASK;
-       if (end > pmd_end)
-               end = pmd_end;
-
-       do {
-               pte = pte_offset_map(pmdir, address);
-               if (pte_present(*pte)) {
-                       struct page *page = pte_page(*pte);
-                       struct ckrm_zone *czone = page_ckrmzone(page);
-                       if (page->mapping && czone) {
-                               struct zone *zone = czone->zone;
-                               spin_lock_irq(&zone->lru_lock);
-                               ckrm_change_page_class(page, mm->memclass);
-                               spin_unlock_irq(&zone->lru_lock);
-                       }
-               }
-               address += PAGE_SIZE;
-               pte_unmap(pte);
-               pte++;
-       } while(address && (address < end));
-       return 0;
-}
-
-static inline int
-class_migrate_pgd(struct mm_struct* mm, struct vm_area_struct* vma,
-               pgd_t* pgdir, unsigned long address, unsigned long end)
-{
-       pmd_t* pmd;
-       unsigned long pgd_end;
-
-       if (pgd_none(*pgdir))
-               return 0;
-       BUG_ON(pgd_bad(*pgdir));
-
-       pmd = pmd_offset(pgdir, address);
-       pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
-
-       if (pgd_end && (end > pgd_end))
-               end = pgd_end;
-
-       do {
-               class_migrate_pmd(mm, vma, pmd, address, end);
-               address = (address + PMD_SIZE) & PMD_MASK;
-               pmd++;
-       } while (address && (address < end));
-       return 0;
-}
-
-static inline int
-class_migrate_vma(struct mm_struct* mm, struct vm_area_struct* vma)
-{
-       pgd_t* pgdir;
-       unsigned long address, end;
-
-       address = vma->vm_start;
-       end = vma->vm_end;
-
-       pgdir = pgd_offset(vma->vm_mm, address);
-       do {
-               class_migrate_pgd(mm, vma, pgdir, address, end);
-               address = (address + PGDIR_SIZE) & PGDIR_MASK;
-               pgdir++;
-       } while(address && (address < end));
-       return 0;
-}
-
-/* this function is called with mm->peertask_lock hold */
-void
-ckrm_mem_migrate_mm(struct mm_struct* mm, struct ckrm_mem_res *def)
-{
-       struct task_struct *task;
-       struct vm_area_struct *vma;
-       struct ckrm_mem_res *maxshareclass = def;
-
-       if (list_empty(&mm->tasklist)) {
-               /* We leave the mm->memclass untouched since we believe that one
-                * mm with no task associated will be deleted soon or attach
-                * with another task later.
-                */
-               return;
-       }
-
-       list_for_each_entry(task, &mm->tasklist, mm_peers) {
-               struct ckrm_mem_res* cls = ckrm_get_mem_class(task);
-               if (!cls)
-                       continue;
-               if (!maxshareclass ||
-                               ckrm_mem_share_compare(maxshareclass,cls)<0 )
-                       maxshareclass = cls;
-       }
-
-       if (maxshareclass && (mm->memclass != maxshareclass)) {
-               if (mm->memclass) {
-                       kref_put(&mm->memclass->nr_users, memclass_release);
-               }
-               mm->memclass = maxshareclass;
-               kref_get(&maxshareclass->nr_users);
-
-               /* Go through all VMA to migrate pages */
-               down_read(&mm->mmap_sem);
-               vma = mm->mmap;
-               while(vma) {
-                       class_migrate_vma(mm, vma);
-                       vma = vma->vm_next;
-               }
-               up_read(&mm->mmap_sem);
-       }
-       return;
-}
-
-static int
-shrink_weight(struct ckrm_zone *czone)
-{
-       u64 temp;
-       struct zone *zone = czone->zone;
-       struct ckrm_mem_res *cls = czone->memcls;
-       int zone_usage, zone_guar, zone_total, guar, ret, cnt;
-
-       zone_usage = czone->nr_active + czone->nr_inactive;
-       czone->active_over = czone->inactive_over = 0;
-
-       if (zone_usage < SWAP_CLUSTER_MAX * 4)
-               return 0;
-
-       if (cls->pg_guar == CKRM_SHARE_DONTCARE) {
-               // no guarantee for this class. use implicit guarantee
-               guar = cls->impl_guar / cls->nr_dontcare;
-       } else {
-               guar = cls->pg_unused / cls->nr_dontcare;
-       }
-       zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages;
-       temp = (u64) guar * zone_total;
-       do_div(temp, ckrm_tot_lru_pages);
-       zone_guar = (int) temp;
-
-       ret = ((zone_usage - zone_guar) > SWAP_CLUSTER_MAX) ?
-                               (zone_usage - zone_guar) : 0;
-       if (ret) {
-               cnt = czone->nr_active - (2 * zone_guar / 3);
-               if (cnt > 0)
-                       czone->active_over = cnt;
-               cnt = czone->active_over + czone->nr_inactive
-                                       - zone_guar / 3;
-               if (cnt > 0)
-                       czone->inactive_over = cnt;
-       }
-       return ret;
-}
-
-/* insert an entry to the list and sort decendently*/
-static void
-list_add_sort(struct list_head *entry, struct list_head *head)
-{
-       struct ckrm_zone *czone, *new =
-                       list_entry(entry, struct ckrm_zone, victim_list);
-       struct list_head* pos = head->next;
-
-       while (pos != head) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               if (new->shrink_weight > czone->shrink_weight) {
-                       __list_add(entry, pos->prev, pos);
-                       return;
-               }
-               pos = pos->next;
-       }
-       list_add_tail(entry, head);
-       return; 
-}
-
-static void
-shrink_choose_victims(struct list_head *victims,
-               unsigned long nr_active, unsigned long nr_inactive)
-{
-       unsigned long nr;
-       struct ckrm_zone* czone;
-       struct list_head *pos, *next;
-
-       pos = victims->next;
-       while ((pos != victims) && (nr_active || nr_inactive)) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               
-               if (nr_active && czone->active_over) {
-                       nr = min(nr_active, czone->active_over);
-                       czone->shrink_active += nr;
-                       czone->active_over -= nr;
-                       nr_active -= nr;
-               }
-
-               if (nr_inactive && czone->inactive_over) {
-                       nr = min(nr_inactive, czone->inactive_over);
-                       czone->shrink_inactive += nr;
-                       czone->inactive_over -= nr;
-                       nr_inactive -= nr;
-               }
-               pos = pos->next;
-       }
-
-       pos = victims->next;
-       while (pos != victims) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               next = pos->next;
-               if (czone->shrink_active == 0 && czone->shrink_inactive == 0) {
-                       list_del_init(pos);
-                       ckrm_clear_shrink(czone);
-               }
-               pos = next;
-       }       
-       return;
-}
-
-void
-shrink_get_victims(struct zone *zone, unsigned long nr_active,
-               unsigned long nr_inactive, struct list_head *victims)
-{
-       struct list_head *pos;
-       struct ckrm_mem_res *cls;
-       struct ckrm_zone *czone;
-       int zoneindex = zone_idx(zone);
-       
-       if (ckrm_nr_mem_classes <= 1) {
-               if (ckrm_mem_root_class) {
-                       czone = ckrm_mem_root_class->ckrm_zone + zoneindex;
-                       if (!ckrm_test_set_shrink(czone)) {
-                               list_add(&czone->victim_list, victims);
-                               czone->shrink_active = nr_active;
-                               czone->shrink_inactive = nr_inactive;
-                       }
-               }
-               return;
-       }
-       spin_lock(&ckrm_mem_lock);
-       list_for_each_entry(cls, &ckrm_memclass_list, mcls_list) {
-               czone = cls->ckrm_zone + zoneindex;
-               if (ckrm_test_set_shrink(czone))
-                       continue;
-
-               czone->shrink_active = 0;
-               czone->shrink_inactive = 0;
-               czone->shrink_weight = shrink_weight(czone);
-               if (czone->shrink_weight) {
-                       list_add_sort(&czone->victim_list, victims);
-               } else {
-                       ckrm_clear_shrink(czone);
-               }
-       }
-       pos = victims->next;
-       while (pos != victims) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               pos = pos->next;
-       }
-       shrink_choose_victims(victims, nr_active, nr_inactive);
-       spin_unlock(&ckrm_mem_lock);
-       pos = victims->next;
-       while (pos != victims) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               pos = pos->next;
-       }
-}
-
-LIST_HEAD(ckrm_shrink_list);
-void
-ckrm_shrink_atlimit(struct ckrm_mem_res *cls)
-{
-       struct zone *zone;
-       unsigned long now = jiffies;
-       int order;
-
-       if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) ||
-                       ((cls->flags & CLS_AT_LIMIT) == CLS_AT_LIMIT)) {
-               return;
-       }
-       if ((cls->last_shrink > now) /* jiffies wrapped around */ ||
-                  (cls->last_shrink + (ckrm_mem_shrink_interval * HZ)) < now) {
-               cls->last_shrink = now;
-               cls->shrink_count = 0;
-       }
-       cls->shrink_count++;
-       if (cls->shrink_count > ckrm_mem_shrink_count) {
-               return;
-       }
-       spin_lock(&ckrm_mem_lock);
-       list_add(&cls->shrink_list, &ckrm_shrink_list);
-       spin_unlock(&ckrm_mem_lock);
-       cls->flags |= CLS_AT_LIMIT;
-       for_each_zone(zone) {
-               /* This is just a number to get to wakeup kswapd */
-               order = atomic_read(&cls->pg_total) -
-                       ((ckrm_mem_shrink_to * cls->pg_limit) / 100);
-               wakeup_kswapd(zone);
-               break; // only once is enough
-       }
-}
diff --git a/kernel/ckrm/ckrm_null_class.c b/kernel/ckrm/ckrm_null_class.c
deleted file mode 100644 (file)
index 7ea79d1..0000000
+++ /dev/null
@@ -1,308 +0,0 @@
-/* kernel/ckrm/ckrm_null_class.c - NULL TaskClass controller for CKRM
- *
- * Copyright (C) Haoqiang Zheng,     IBM Corp. 2004
- *           (C) Hubertus Franke,    IBM Corp. 2004
- *
- * Copyright (C) Marc E. Fiuczynski, Princeton University 2005
- *               Adapted from ckrm_cpu_class.c.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <asm/errno.h>
-#include <linux/ckrm_events.h>
-#include <linux/ckrm_rc.h>
-#include <linux/ckrm_tc.h>
-#include <linux/ckrm_classqueue.h>
-#include <linux/seq_file.h>
-
-#define CKRM_NULL_CLASS_MAGIC 0xdeadbeef
-
-static struct ckrm_res_ctlr null_rcbs;
-
-/*
- * manages the class status
- * there should be only one instance of this object for each class in the whole system  
- */
-struct ckrm_null_class {
-       struct ckrm_core_class *core;
-       struct ckrm_core_class *parent;
-       struct ckrm_shares shares;
-       spinlock_t cnt_lock;    // always grab parent's lock first and then child's
-       unsigned long magic;    //for debugging
-};
-
-/*
- *  initialize a class object and its local queues
- */
-static void init_null_class(struct ckrm_null_class *cls,ckrm_shares_t* shares) 
-{
-       cls->shares = *shares;
-       cls->cnt_lock = SPIN_LOCK_UNLOCKED;
-       cls->magic = CKRM_NULL_CLASS_MAGIC;
-}
-
-static inline void set_default_share(ckrm_shares_t *shares)
-{
-       shares->my_guarantee     = 0;
-       shares->total_guarantee  = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-       shares->unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
-       shares->my_limit         = CKRM_SHARE_DFLT_MAX_LIMIT;
-       shares->max_limit        = CKRM_SHARE_DFLT_MAX_LIMIT;
-       shares->cur_max_limit    = 0;
-}
-
-static inline int valid_null_class(struct ckrm_null_class * cls)
-{
-       return (cls && cls->magic == CKRM_NULL_CLASS_MAGIC);
-}
-
-
-static struct ckrm_null_class * ckrm_get_null_class(struct ckrm_core_class *core)
-{
-       struct ckrm_null_class * cls;
-       cls = ckrm_get_res_class(core, null_rcbs.resid, struct ckrm_null_class);
-       if (valid_null_class(cls))
-               return cls;
-       else
-               return NULL;
-}
-
-
-static struct ckrm_null_class default_null_class_obj;
-
-static struct ckrm_null_class * get_default_null_class(void) {
-       return (&default_null_class_obj);
-}
-
-
-static void* ckrm_alloc_null_class(struct ckrm_core_class *core, struct ckrm_core_class *parent) 
-{              
-       struct ckrm_null_class *cls;
-
-       if (! parent) /*root class*/
-               cls =  get_default_null_class();
-       else
-               cls = (struct ckrm_null_class *) kmalloc(sizeof(struct ckrm_null_class),GFP_ATOMIC);
-
-       if (cls) {
-               ckrm_shares_t shares;           
-               if ((! parent) && (core)) { 
-                       /*
-                        * the default class is already initialized
-                        * so only update the core structure
-                        */
-                       cls->core = core;                       
-               } else {
-                       set_default_share(&shares);
-                       init_null_class(cls,&shares);
-                       cls->core = core;
-                       cls->parent = parent;                   
-               }
-       } else
-               printk(KERN_ERR"alloc_null_class failed\n");
-
-       return cls;
-}              
-
-/*
- * hzheng: this is not a stable implementation
- *         need to check race condition issue here
- */            
-static void ckrm_free_null_class(void *my_res) 
-{                      
-       struct ckrm_null_class *cls = my_res, *parres, *childres;
-       ckrm_core_class_t *child = NULL;
-       int maxlimit;
-
-       if (!cls) 
-               return;
-
-       /*the default class can't be freed*/
-       if (cls == get_default_null_class()) 
-               return;
-
-       // Assuming there will be no children when this function is called
-       parres = ckrm_get_null_class(cls->parent);
-
-       // return child's limit/guarantee to parent node
-       spin_lock(&parres->cnt_lock);
-       child_guarantee_changed(&parres->shares, cls->shares.my_guarantee, 0);
-
-       // run thru parent's children and get the new max_limit of the parent
-       ckrm_lock_hier(parres->core);
-       maxlimit = 0;
-       while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               childres = ckrm_get_null_class(child);
-               if (maxlimit < childres->shares.my_limit) {
-                       maxlimit = childres->shares.my_limit;
-               }
-       }
-       ckrm_unlock_hier(parres->core);
-       if (parres->shares.cur_max_limit < maxlimit) {
-               parres->shares.cur_max_limit = maxlimit;
-       }
-
-       spin_unlock(&parres->cnt_lock);
-       kfree(cls);
-}                              
-
-/*
- *  the system will adjust to the new share automatically  
- */                    
-static int ckrm_null_set_share(void *my_res, struct ckrm_shares *new_share) 
-{      
-        struct ckrm_null_class *parres, *cls = my_res;
-        struct ckrm_shares *cur = &cls->shares, *par;
-        int rc = -EINVAL;
-
-        if (!cls)
-               return rc;
-
-        if (cls->parent) {
-                parres = ckrm_get_null_class(cls->parent);
-                spin_lock(&parres->cnt_lock);
-                spin_lock(&cls->cnt_lock);
-                par = &parres->shares;
-        } else {
-                spin_lock(&cls->cnt_lock);
-                par = NULL;
-                parres = NULL;
-        }
-
-       /*
-        * hzheng: CKRM_SHARE_DONTCARE should be handled
-        */
-       if (new_share->my_guarantee == CKRM_SHARE_DONTCARE)
-               new_share->my_guarantee = 0;
-
-       rc = set_shares(new_share, cur, par);
-       if (cur->my_limit == CKRM_SHARE_DONTCARE)
-               cur->my_limit = cur->max_limit;
-
-
-       spin_unlock(&cls->cnt_lock);
-       if (cls->parent) {
-               spin_unlock(&parres->cnt_lock);
-       }
-
-       return rc;
-}                                                      
-                       
-static int ckrm_null_get_share(void *my_res,
-                             struct ckrm_shares *shares)
-{                      
-       struct ckrm_null_class *cls = my_res;
-
-        if (!cls)
-               return -EINVAL;
-       *shares = cls->shares;
-       return 0;
-}                              
-
-static int ckrm_null_get_stats(void *my_res, struct seq_file * sfile)
-{
-       struct ckrm_null_class *cls = my_res;
-
-       if (!cls) 
-               return -EINVAL;
-
-       seq_printf(sfile, "-------- Null Class Status Start---------\n");
-       seq_printf(sfile, "Share:\n\tgrt= %d limit= %d total_grt= %d max_limit= %d\n",
-                  cls->shares.my_guarantee,
-                  cls->shares.my_limit,
-                  cls->shares.total_guarantee,
-                  cls->shares.max_limit);
-       seq_printf(sfile, "\tunused_grt= %d cur_max_limit= %d\n",
-                  cls->shares.unused_guarantee,
-                  cls->shares.cur_max_limit);
-
-       seq_printf(sfile, "-------- Null Class Status END ---------\n");
-
-       return 0;
-}
-
-/*
- * task will remain in the same null but on a different local runqueue
- */
-static void ckrm_null_change_class(void *task, void *old, void *new)
-{              
-       /*sanity checking*/
-       if (!task || ! old || !new)
-               return; 
-
-       /* hook to controller */
-}                                                      
-
-/*dummy function, not used*/
-static int ckrm_null_show_config(void *my_res, struct seq_file *sfile)
-{
-       struct ckrm_null_class *cls = my_res;
-
-       if (!cls) 
-               return -EINVAL;
-
-       seq_printf(sfile, "cls=%s,parameter=somevalue\n","ckrm_null class");
-       return 0;
-}
-
-/*dummy function, not used*/
-static int ckrm_null_set_config(void *my_res, const char *cfgstr)
-{
-       struct ckrm_nullclass *cls = my_res;
-
-       if (!cls) 
-               return -EINVAL;
-       printk(KERN_DEBUG "ckrm_null config='%s'\n",cfgstr);
-       return 0;
-}
-       
-static struct ckrm_res_ctlr null_rcbs = {
-       .res_name          = "null",
-       .res_hdepth        = 1,
-       .resid             = -1,
-       .res_alloc         = ckrm_alloc_null_class,
-       .res_free          = ckrm_free_null_class,
-       .set_share_values  = ckrm_null_set_share,
-       .get_share_values  = ckrm_null_get_share,
-       .get_stats         = ckrm_null_get_stats,
-       .show_config       = ckrm_null_show_config,
-       .set_config        = ckrm_null_set_config,
-       .change_resclass   = ckrm_null_change_class,
-};
-
-int __init init_ckrm_null_res(void)
-{
-       struct ckrm_classtype *clstype;
-       int resid = null_rcbs.resid;
-
-       clstype = ckrm_find_classtype_by_name("taskclass");
-       if (clstype == NULL) {
-               printk(KERN_INFO" Unknown ckrm classtype<taskclass>");
-               return -ENOENT;
-       }
-
-       /* Initialize default class obj before registering with core */
-       ckrm_alloc_null_class(NULL,NULL);
-
-       if (resid == -1) { /*not registered */
-               resid = ckrm_register_res_ctlr(clstype,&null_rcbs);
-               printk(KERN_DEBUG "........init_ckrm_null_res , resid= %d\n",resid);
-       }
-       return 0;
-}
-
-void __exit exit_ckrm_null_res(void)
-{
-       ckrm_unregister_res_ctlr(&null_rcbs);
-       null_rcbs.resid = -1;
-}
-
-module_init(init_ckrm_null_res)
-module_exit(exit_ckrm_null_res)
index c058305..61517ae 100644 (file)
  *
  */
 
+/* Changes
+ * 
+ * 31 Mar 2004: Created
+ * 
+ */
+
 /*
- * CKRM Resource controller for tracking number of tasks in a class.
+ * Code Description: TBD
  */
 
 #include <linux/module.h>
 #include <asm/div64.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/ckrm.h>
 #include <linux/ckrm_rc.h>
 #include <linux/ckrm_tc.h>
 #include <linux/ckrm_tsk.h>
 
-#define TOTAL_NUM_TASKS (131072)       /* 128 K */
+#define TOTAL_NUM_TASKS (131072)       // 128 K
 #define NUMTASKS_DEBUG
 #define NUMTASKS_NAME "numtasks"
 
-struct ckrm_numtasks {
-       struct ckrm_core_class *core;   /* the core i am part of... */
-       struct ckrm_core_class *parent; /* parent of the core above. */
+typedef struct ckrm_numtasks {
+       struct ckrm_core_class *core;   // the core i am part of...
+       struct ckrm_core_class *parent; // parent of the core above.
        struct ckrm_shares shares;
-       spinlock_t cnt_lock;    /* always grab parent's lock before child's */
-       int cnt_guarantee;      /* num_tasks guarantee in local units */
-       int cnt_unused;         /* has to borrow if more than this is needed */
-       int cnt_limit;          /* no tasks over this limit. */
-       atomic_t cnt_cur_alloc; /* current alloc from self */
-       atomic_t cnt_borrowed;  /* borrowed from the parent */
-
-       int over_guarantee;     /* turn on/off when cur_alloc goes  */
-                               /* over/under guarantee */
-
-       /* internally maintained statictics to compare with max numbers */
-       int limit_failures;     /* # failures as request was over the limit */
-       int borrow_sucesses;    /* # successful borrows */
-       int borrow_failures;    /* # borrow failures */
-
-       /* Maximum the specific statictics has reached. */
+       spinlock_t cnt_lock;    // always grab parent's lock before child's
+       int cnt_guarantee;      // num_tasks guarantee in local units
+       int cnt_unused;         // has to borrow if more than this is needed
+       int cnt_limit;          // no tasks over this limit.
+       atomic_t cnt_cur_alloc; // current alloc from self
+       atomic_t cnt_borrowed;  // borrowed from the parent
+
+       int over_guarantee;     // turn on/off when cur_alloc goes 
+                               // over/under guarantee
+
+       // internally maintained statictics to compare with max numbers
+       int limit_failures;     // # failures as request was over the limit
+       int borrow_sucesses;    // # successful borrows
+       int borrow_failures;    // # borrow failures
+
+       // Maximum the specific statictics has reached.
        int max_limit_failures;
        int max_borrow_sucesses;
        int max_borrow_failures;
 
-       /* Total number of specific statistics */
+       // Total number of specific statistics
        int tot_limit_failures;
        int tot_borrow_sucesses;
        int tot_borrow_failures;
-};
+} ckrm_numtasks_t;
 
 struct ckrm_res_ctlr numtasks_rcbs;
 
@@ -67,7 +74,7 @@ struct ckrm_res_ctlr numtasks_rcbs;
  * to make share values sane.
  * Does not traverse hierarchy reinitializing children.
  */
-static void numtasks_res_initcls_one(struct ckrm_numtasks * res)
+static void numtasks_res_initcls_one(ckrm_numtasks_t * res)
 {
        res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
        res->shares.my_limit = CKRM_SHARE_DONTCARE;
@@ -99,15 +106,27 @@ static void numtasks_res_initcls_one(struct ckrm_numtasks * res)
        return;
 }
 
-static int numtasks_get_ref_local(struct ckrm_core_class *core, int force)
+#if 0
+static void numtasks_res_initcls(void *my_res)
+{
+       ckrm_numtasks_t *res = my_res;
+
+       /* Write a version which propagates values all the way down 
+          and replace rcbs callback with that version */
+
+}
+#endif
+
+static int numtasks_get_ref_local(void *arg, int force)
 {
        int rc, resid = numtasks_rcbs.resid;
-       struct ckrm_numtasks *res;
+       ckrm_numtasks_t *res;
+       ckrm_core_class_t *core = arg;
 
        if ((resid < 0) || (core == NULL))
                return 1;
 
-       res = ckrm_get_res_class(core, resid, struct ckrm_numtasks);
+       res = ckrm_get_res_class(core, resid, ckrm_numtasks_t);
        if (res == NULL)
                return 1;
 
@@ -133,44 +152,51 @@ static int numtasks_get_ref_local(struct ckrm_core_class *core, int force)
                                res->borrow_failures++;
                                res->tot_borrow_failures++;
                        }
-               } else
+               } else {
                        rc = force;
+               }
        } else if (res->over_guarantee) {
                res->over_guarantee = 0;
 
-               if (res->max_limit_failures < res->limit_failures)
+               if (res->max_limit_failures < res->limit_failures) {
                        res->max_limit_failures = res->limit_failures;
-               if (res->max_borrow_sucesses < res->borrow_sucesses)
+               }
+               if (res->max_borrow_sucesses < res->borrow_sucesses) {
                        res->max_borrow_sucesses = res->borrow_sucesses;
-               if (res->max_borrow_failures < res->borrow_failures)
+               }
+               if (res->max_borrow_failures < res->borrow_failures) {
                        res->max_borrow_failures = res->borrow_failures;
+               }
                res->limit_failures = 0;
                res->borrow_sucesses = 0;
                res->borrow_failures = 0;
        }
 
-       if (!rc)
+       if (!rc) {
                atomic_dec(&res->cnt_cur_alloc);
+       }
        return rc;
 }
 
-static void numtasks_put_ref_local(struct ckrm_core_class *core)
+static void numtasks_put_ref_local(void *arg)
 {
        int resid = numtasks_rcbs.resid;
-       struct ckrm_numtasks *res;
+       ckrm_numtasks_t *res;
+       ckrm_core_class_t *core = arg;
 
-       if ((resid == -1) || (core == NULL))
+       if ((resid == -1) || (core == NULL)) {
                return;
+       }
 
-       res = ckrm_get_res_class(core, resid, struct ckrm_numtasks);
+       res = ckrm_get_res_class(core, resid, ckrm_numtasks_t);
        if (res == NULL)
                return;
-
-       if (atomic_read(&res->cnt_cur_alloc)==0)
+       if (unlikely(atomic_read(&res->cnt_cur_alloc) == 0)) {
+               printk(KERN_WARNING "numtasks_put_ref: Trying to decrement "
+                                       "counter below 0\n");
                return;
-
+       }
        atomic_dec(&res->cnt_cur_alloc);
-
        if (atomic_read(&res->cnt_borrowed) > 0) {
                atomic_dec(&res->cnt_borrowed);
                numtasks_put_ref_local(res->parent);
@@ -181,21 +207,19 @@ static void numtasks_put_ref_local(struct ckrm_core_class *core)
 static void *numtasks_res_alloc(struct ckrm_core_class *core,
                                struct ckrm_core_class *parent)
 {
-       struct ckrm_numtasks *res;
+       ckrm_numtasks_t *res;
 
-       res = kmalloc(sizeof(struct ckrm_numtasks), GFP_ATOMIC);
+       res = kmalloc(sizeof(ckrm_numtasks_t), GFP_ATOMIC);
 
        if (res) {
-               memset(res, 0, sizeof(struct ckrm_numtasks));
+               memset(res, 0, sizeof(ckrm_numtasks_t));
                res->core = core;
                res->parent = parent;
                numtasks_res_initcls_one(res);
                res->cnt_lock = SPIN_LOCK_UNLOCKED;
                if (parent == NULL) {
-                       /*
-                        * I am part of root class. So set the max tasks 
-                        * to available default.
-                        */
+                       // I am part of root class. So set the max tasks 
+                       // to available default
                        res->cnt_guarantee = TOTAL_NUM_TASKS;
                        res->cnt_unused = TOTAL_NUM_TASKS;
                        res->cnt_limit = TOTAL_NUM_TASKS;
@@ -214,36 +238,47 @@ static void *numtasks_res_alloc(struct ckrm_core_class *core,
  */
 static void numtasks_res_free(void *my_res)
 {
-       struct ckrm_numtasks *res = my_res, *parres, *childres;
-       struct ckrm_core_class *child = NULL;
+       ckrm_numtasks_t *res = my_res, *parres, *childres;
+       ckrm_core_class_t *child = NULL;
        int i, borrowed, maxlimit, resid = numtasks_rcbs.resid;
 
        if (!res)
                return;
 
-       /* Assuming there will be no children when this function is called */
-
-       parres = ckrm_get_res_class(res->parent, resid, struct ckrm_numtasks);
+       // Assuming there will be no children when this function is called
 
-       if ((borrowed = atomic_read(&res->cnt_borrowed)) > 0)
-               for (i = 0; i < borrowed; i++)
-                       numtasks_put_ref_local(parres->core);
+       parres = ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t);
 
-       /* return child's limit/guarantee to parent node */
+       if (unlikely(atomic_read(&res->cnt_cur_alloc) < 0)) {
+               printk(KERN_WARNING "numtasks_res: counter below 0\n");
+       }
+       if (unlikely(atomic_read(&res->cnt_cur_alloc) > 0 ||
+                               atomic_read(&res->cnt_borrowed) > 0)) {
+               printk(KERN_WARNING "numtasks_res_free: resource still "
+                      "alloc'd %p\n", res);
+               if ((borrowed = atomic_read(&res->cnt_borrowed)) > 0) {
+                       for (i = 0; i < borrowed; i++) {
+                               numtasks_put_ref_local(parres->core);
+                       }
+               }
+       }
+       // return child's limit/guarantee to parent node
        spin_lock(&parres->cnt_lock);
        child_guarantee_changed(&parres->shares, res->shares.my_guarantee, 0);
 
-       /* run thru parent's children and get the new max_limit of the parent */
+       // run thru parent's children and get the new max_limit of the parent
        ckrm_lock_hier(parres->core);
        maxlimit = 0;
        while ((child = ckrm_get_next_child(parres->core, child)) != NULL) {
-               childres = ckrm_get_res_class(child, resid, struct ckrm_numtasks);
-               if (maxlimit < childres->shares.my_limit)
+               childres = ckrm_get_res_class(child, resid, ckrm_numtasks_t);
+               if (maxlimit < childres->shares.my_limit) {
                        maxlimit = childres->shares.my_limit;
+               }
        }
        ckrm_unlock_hier(parres->core);
-       if (parres->shares.cur_max_limit < maxlimit)
+       if (parres->shares.cur_max_limit < maxlimit) {
                parres->shares.cur_max_limit = maxlimit;
+       }
 
        spin_unlock(&parres->cnt_lock);
        kfree(res);
@@ -257,53 +292,53 @@ static void numtasks_res_free(void *my_res)
  * Caller is responsible for protecting res and for the integrity of parres
  */
 static void
-recalc_and_propagate(struct ckrm_numtasks * res, struct ckrm_numtasks * parres)
+recalc_and_propagate(ckrm_numtasks_t * res, ckrm_numtasks_t * parres)
 {
-       struct ckrm_core_class *child = NULL;
-       struct ckrm_numtasks *childres;
+       ckrm_core_class_t *child = NULL;
+       ckrm_numtasks_t *childres;
        int resid = numtasks_rcbs.resid;
 
        if (parres) {
                struct ckrm_shares *par = &parres->shares;
                struct ckrm_shares *self = &res->shares;
 
-               /* calculate cnt_guarantee and cnt_limit */
-               if ((parres->cnt_guarantee == CKRM_SHARE_DONTCARE) ||
-                               (self->my_guarantee == CKRM_SHARE_DONTCARE))
+               // calculate cnt_guarantee and cnt_limit
+               //
+               if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
                        res->cnt_guarantee = CKRM_SHARE_DONTCARE;
-               else if (par->total_guarantee) {
+               else if (par->total_guarantee) {
                        u64 temp = (u64) self->my_guarantee * parres->cnt_guarantee;
                        do_div(temp, par->total_guarantee);
                        res->cnt_guarantee = (int) temp;
-               } else
+               } else {
                        res->cnt_guarantee = 0;
+               }
 
-               if ((parres->cnt_limit == CKRM_SHARE_DONTCARE) ||
-                               (self->my_limit == CKRM_SHARE_DONTCARE))
+               if (parres->cnt_limit == CKRM_SHARE_DONTCARE) {
                        res->cnt_limit = CKRM_SHARE_DONTCARE;
-               else if (par->max_limit) {
+               else if (par->max_limit) {
                        u64 temp = (u64) self->my_limit * parres->cnt_limit;
                        do_div(temp, par->max_limit);
                        res->cnt_limit = (int) temp;
-               } else
+               } else {
                        res->cnt_limit = 0;
+               }
 
-               /* Calculate unused units */
-               if ((res->cnt_guarantee == CKRM_SHARE_DONTCARE) ||
-                               (self->my_guarantee == CKRM_SHARE_DONTCARE))
+               // Calculate unused units
+               if (res->cnt_guarantee == CKRM_SHARE_DONTCARE) {
                        res->cnt_unused = CKRM_SHARE_DONTCARE;
-               else if (self->total_guarantee) {
+               else if (self->total_guarantee) {
                        u64 temp = (u64) self->unused_guarantee * res->cnt_guarantee;
                        do_div(temp, self->total_guarantee);
                        res->cnt_unused = (int) temp;
-               } else
+               } else {
                        res->cnt_unused = 0;
+               }
        }
-
-       /* propagate to children */
+       // propagate to children
        ckrm_lock_hier(res->core);
        while ((child = ckrm_get_next_child(res->core, child)) != NULL) {
-               childres = ckrm_get_res_class(child, resid, struct ckrm_numtasks);
+               childres = ckrm_get_res_class(child, resid, ckrm_numtasks_t);
 
                spin_lock(&childres->cnt_lock);
                recalc_and_propagate(childres, res);
@@ -315,7 +350,7 @@ recalc_and_propagate(struct ckrm_numtasks * res, struct ckrm_numtasks * parres)
 
 static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new)
 {
-       struct ckrm_numtasks *parres, *res = my_res;
+       ckrm_numtasks_t *parres, *res = my_res;
        struct ckrm_shares *cur = &res->shares, *par;
        int rc = -EINVAL, resid = numtasks_rcbs.resid;
 
@@ -324,7 +359,7 @@ static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new)
 
        if (res->parent) {
                parres =
-                   ckrm_get_res_class(res->parent, resid, struct ckrm_numtasks);
+                   ckrm_get_res_class(res->parent, resid, ckrm_numtasks_t);
                spin_lock(&parres->cnt_lock);
                spin_lock(&res->cnt_lock);
                par = &parres->shares;
@@ -337,26 +372,28 @@ static int numtasks_set_share_values(void *my_res, struct ckrm_shares *new)
        rc = set_shares(new, cur, par);
 
        if ((rc == 0) && parres) {
-               /* Calculate parent's unused units */
-               if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE)
+               // Calculate parent's unused units
+               if (parres->cnt_guarantee == CKRM_SHARE_DONTCARE) {
                        parres->cnt_unused = CKRM_SHARE_DONTCARE;
-               else if (par->total_guarantee) {
+               else if (par->total_guarantee) {
                        u64 temp = (u64) par->unused_guarantee * parres->cnt_guarantee;
                        do_div(temp, par->total_guarantee);
                        parres->cnt_unused = (int) temp;
-               } else
+               } else {
                        parres->cnt_unused = 0;
+               }
                recalc_and_propagate(res, parres);
        }
        spin_unlock(&res->cnt_lock);
-       if (res->parent)
+       if (res->parent) {
                spin_unlock(&parres->cnt_lock);
+       }
        return rc;
 }
 
 static int numtasks_get_share_values(void *my_res, struct ckrm_shares *shares)
 {
-       struct ckrm_numtasks *res = my_res;
+       ckrm_numtasks_t *res = my_res;
 
        if (!res)
                return -EINVAL;
@@ -366,12 +403,12 @@ static int numtasks_get_share_values(void *my_res, struct ckrm_shares *shares)
 
 static int numtasks_get_stats(void *my_res, struct seq_file *sfile)
 {
-       struct ckrm_numtasks *res = my_res;
+       ckrm_numtasks_t *res = my_res;
 
        if (!res)
                return -EINVAL;
 
-       seq_printf(sfile, "---------Number of tasks stats start---------\n");
+       seq_printf(sfile, "Number of tasks resource:\n");
        seq_printf(sfile, "Total Over limit failures: %d\n",
                   res->tot_limit_failures);
        seq_printf(sfile, "Total Over guarantee sucesses: %d\n",
@@ -385,7 +422,6 @@ static int numtasks_get_stats(void *my_res, struct seq_file *sfile)
                   res->max_borrow_sucesses);
        seq_printf(sfile, "Maximum Over guarantee failures: %d\n",
                   res->max_borrow_failures);
-       seq_printf(sfile, "---------Number of tasks stats end---------\n");
 #ifdef NUMTASKS_DEBUG
        seq_printf(sfile,
                   "cur_alloc %d; borrowed %d; cnt_guar %d; cnt_limit %d "
@@ -402,7 +438,7 @@ static int numtasks_get_stats(void *my_res, struct seq_file *sfile)
 
 static int numtasks_show_config(void *my_res, struct seq_file *sfile)
 {
-       struct ckrm_numtasks *res = my_res;
+       ckrm_numtasks_t *res = my_res;
 
        if (!res)
                return -EINVAL;
@@ -413,18 +449,18 @@ static int numtasks_show_config(void *my_res, struct seq_file *sfile)
 
 static int numtasks_set_config(void *my_res, const char *cfgstr)
 {
-       struct ckrm_numtasks *res = my_res;
+       ckrm_numtasks_t *res = my_res;
 
        if (!res)
                return -EINVAL;
-       printk("numtasks config='%s'\n", cfgstr);
+       printk(KERN_DEBUG "numtasks config='%s'\n", cfgstr);
        return 0;
 }
 
 static void numtasks_change_resclass(void *task, void *old, void *new)
 {
-       struct ckrm_numtasks *oldres = old;
-       struct ckrm_numtasks *newres = new;
+       ckrm_numtasks_t *oldres = old;
+       ckrm_numtasks_t *newres = new;
 
        if (oldres != (void *)-1) {
                struct task_struct *tsk = task;
@@ -433,13 +469,13 @@ static void numtasks_change_resclass(void *task, void *old, void *new)
                            &(tsk->parent->taskclass->core);
                        oldres =
                            ckrm_get_res_class(old_core, numtasks_rcbs.resid,
-                                              struct ckrm_numtasks);
+                                              ckrm_numtasks_t);
                }
-               if (oldres)
-                       numtasks_put_ref_local(oldres->core);
+               numtasks_put_ref_local(oldres->core);
        }
-       if (newres)
+       if (newres) {
                (void)numtasks_get_ref_local(newres->core, 1);
+       }
 }
 
 struct ckrm_res_ctlr numtasks_rcbs = {
@@ -469,7 +505,7 @@ int __init init_ckrm_numtasks_res(void)
 
        if (resid == -1) {
                resid = ckrm_register_res_ctlr(clstype, &numtasks_rcbs);
-               printk("........init_ckrm_numtasks_res -> %d\n", resid);
+               printk(KERN_DEBUG "........init_ckrm_numtasks_res -> %d\n", resid);
                if (resid != -1) {
                        ckrm_numtasks_register(numtasks_get_ref_local,
                                               numtasks_put_ref_local);
@@ -481,13 +517,14 @@ int __init init_ckrm_numtasks_res(void)
 
 void __exit exit_ckrm_numtasks_res(void)
 {
-       if (numtasks_rcbs.resid != -1)
+       if (numtasks_rcbs.resid != -1) {
                ckrm_numtasks_register(NULL, NULL);
+       }
        ckrm_unregister_res_ctlr(&numtasks_rcbs);
        numtasks_rcbs.resid = -1;
 }
 
 module_init(init_ckrm_numtasks_res)
-module_exit(exit_ckrm_numtasks_res)
+    module_exit(exit_ckrm_numtasks_res)
 
-MODULE_LICENSE("GPL");
+    MODULE_LICENSE("GPL");
index d9f15c9..179e6b5 100644 (file)
  *
  */
 
+/* Changes
+ * 
+ * 16 May 2004: Created
+ * 
+ */
+
 #include <linux/spinlock.h>
 #include <linux/module.h>
 #include <linux/ckrm_tsk.h>
@@ -28,7 +34,7 @@ void ckrm_numtasks_register(get_ref_t gr, put_ref_t pr)
        spin_unlock(&stub_lock);
 }
 
-int numtasks_get_ref(struct ckrm_core_class *arg, int force)
+int numtasks_get_ref(void *arg, int force)
 {
        int ret = 1;
        spin_lock(&stub_lock);
@@ -39,7 +45,7 @@ int numtasks_get_ref(struct ckrm_core_class *arg, int force)
        return ret;
 }
 
-void numtasks_put_ref(struct ckrm_core_class *arg)
+void numtasks_put_ref(void *arg)
 {
        spin_lock(&stub_lock);
        if (real_put_ref) {
index 77b565a..2b12532 100644 (file)
@@ -685,9 +685,6 @@ void __init ckrm_meta_init_taskclass(void)
 
        // note registeration of all resource controllers will be done 
        // later dynamically as these are specified as modules
-
-       // prepare init_task and then rely on inheritance of properties
-       ckrm_set_taskclass(&init_task, NULL, NULL, CKRM_EVENT_NEWTASK);
 }
 
 static int tc_show_members(struct ckrm_core_class *core, struct seq_file *seq)
index 5e5bf29..2b2683c 100644 (file)
@@ -1,5 +1,4 @@
-/*
- * ckrmutils.c - Utility functions for CKRM
+/* ckrmutils.c - Utility functions for CKRM
  *
  * Copyright (C) Chandra Seetharaman,  IBM Corp. 2003
  *           (C) Hubertus Franke    ,  IBM Corp. 2004
@@ -16,8 +15,7 @@
  *
  */
 
-/*
- * Changes
+/* Changes
  * 
  * 13 Nov 2003
  *        Created
@@ -42,6 +40,7 @@ int get_exe_path_name(struct task_struct *tsk, char *buf, int buflen)
        if (!mm) {
                return -EINVAL;
        }
+
        down_read(&mm->mmap_sem);
        vma = mm->mmap;
        while (vma) {
@@ -114,57 +113,55 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
        else
                increase_by = new->my_guarantee - cur->my_guarantee;
 
-       /* Check total_guarantee for correctness */
+       // Check total_guarantee for correctness
        if (new->total_guarantee <= CKRM_SHARE_DONTCARE) {
                goto set_share_err;
        } else if (new->total_guarantee == CKRM_SHARE_UNCHANGED) {
-               /* do nothing */;
+               ;               // do nothing
        } else if (cur_usage_guar > new->total_guarantee) {
                goto set_share_err;
        }
-       /* Check max_limit for correctness */
+       // Check max_limit for correctness
        if (new->max_limit <= CKRM_SHARE_DONTCARE) {
                goto set_share_err;
        } else if (new->max_limit == CKRM_SHARE_UNCHANGED) {
-               /* do nothing */;
+               ;               // do nothing
        } else if (cur->cur_max_limit > new->max_limit) {
                goto set_share_err;
        }
-       /* Check my_guarantee for correctness */
+       // Check my_guarantee for correctness
        if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
-               /* do nothing */;
+               ;               // do nothing
        } else if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
-               /* do nothing */;
+               ;               // do nothing
        } else if (par && increase_by > par->unused_guarantee) {
                goto set_share_err;
        }
-       /* Check my_limit for correctness */
+       // Check my_limit for correctness
        if (new->my_limit == CKRM_SHARE_UNCHANGED) {
-               /* do nothing */;
+               ;               // do nothing
        } else if (new->my_limit == CKRM_SHARE_DONTCARE) {
-               /* do nothing */;
+               ;               // do nothing
        } else if (par && new->my_limit > par->max_limit) {
-               /* I can't get more limit than my parent's limit */
+               // I can't get more limit than my parent's limit
                goto set_share_err;
 
        }
-       /* make sure guarantee is lesser than limit */
+       // make sure guarantee is lesser than limit
        if (new->my_limit == CKRM_SHARE_DONTCARE) {
-               /* do nothing */;
+               ;               // do nothing
        } else if (new->my_limit == CKRM_SHARE_UNCHANGED) {
                if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
-                       /* do nothing */;
+                       ;       // do nothing
                } else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
-                       /*
-                        * do nothing; earlier setting would have
-                        * taken care of it
-                        */;
+                       ;       // do nothing earlier setting would've 
+                               // taken care of it
                } else if (new->my_guarantee > cur->my_limit) {
                        goto set_share_err;
                }
-       } else {                /* new->my_limit has a valid value */
+       } else {                // new->my_limit has a valid value
                if (new->my_guarantee == CKRM_SHARE_DONTCARE) {
-                       /* do nothing */;
+                       ;       // do nothing
                } else if (new->my_guarantee == CKRM_SHARE_UNCHANGED) {
                        if (cur->my_guarantee > new->my_limit) {
                                goto set_share_err;
@@ -173,28 +170,33 @@ set_shares(struct ckrm_shares *new, struct ckrm_shares *cur,
                        goto set_share_err;
                }
        }
+
        if (new->my_guarantee != CKRM_SHARE_UNCHANGED) {
                child_guarantee_changed(par, cur->my_guarantee,
                                        new->my_guarantee);
                cur->my_guarantee = new->my_guarantee;
        }
+
        if (new->my_limit != CKRM_SHARE_UNCHANGED) {
                child_maxlimit_changed(par, new->my_limit);
                cur->my_limit = new->my_limit;
        }
+
        if (new->total_guarantee != CKRM_SHARE_UNCHANGED) {
                cur->unused_guarantee = new->total_guarantee - cur_usage_guar;
                cur->total_guarantee = new->total_guarantee;
        }
+
        if (new->max_limit != CKRM_SHARE_UNCHANGED) {
                cur->max_limit = new->max_limit;
        }
+
        rc = 0;
-set_share_err:
+      set_share_err:
        return rc;
 }
 
-EXPORT_SYMBOL_GPL(get_exe_path_name);
-EXPORT_SYMBOL_GPL(child_guarantee_changed);
-EXPORT_SYMBOL_GPL(child_maxlimit_changed);
-EXPORT_SYMBOL_GPL(set_shares);
+EXPORT_SYMBOL(get_exe_path_name);
+EXPORT_SYMBOL(child_guarantee_changed);
+EXPORT_SYMBOL(child_maxlimit_changed);
+EXPORT_SYMBOL(set_shares);
index 8631bd1..187e7cd 100644 (file)
@@ -89,7 +89,7 @@ rbce_write(struct file *file, const char __user * buf,
        }
        // printk("kernel read |%s|\n", line);
        // printk("kernel read-2 |%s|\n", line+1000);
-       // printk prints only 1024 bytes once  :) 
+       // printk prints only 1024 bytes once :)
        //
        kfree(line);
        return len;
index 98f624f..21dd3cf 100644 (file)
@@ -382,12 +382,10 @@ static int insert_rule(struct rbce_rule *rule, int order)
  */
 static int reinsert_rule(struct rbce_rule *rule, int order)
 {
-       if (!list_empty(&rule->obj.link)) {
-               list_del_init(&rule->obj.link);
-               gl_num_rules--;
-               gl_rules_version++;
-               module_put(THIS_MODULE);
-       }
+       list_del(&rule->obj.link);
+       gl_num_rules--;
+       gl_rules_version++;
+       module_put(THIS_MODULE);
        return insert_rule(rule, order);
 }
 
@@ -428,7 +426,7 @@ static struct rbce_class *create_rbce_class(const char *classname,
        return cls;
 }
 
-static struct rbce_class *get_class(const char *classname, int *classtype)
+static struct rbce_class *get_class(char *classname, int *classtype)
 {
        struct rbce_class *cls;
        void *classobj;
@@ -480,11 +478,14 @@ static void rbce_class_addcb(const char *classname, void *clsobj, int classtype)
        struct rbce_class *cls;
 
        write_lock(&global_rwlock);
-       cls = get_class(classname, &classtype);
+       cls = find_class_name((char *)classname);
        if (cls) {
                cls->classobj = clsobj;
-               notify_class_action(cls, 1);
+       } else {
+               cls = create_rbce_class(classname, classtype, clsobj);
        }
+       if (cls)
+               notify_class_action(cls, 1);
        write_unlock(&global_rwlock);
        return;
 }
@@ -503,9 +504,6 @@ rbce_class_deletecb(const char *classname, void *classobj, int classtype)
        write_lock(&global_rwlock);
        cls = find_class_name(classname);
        if (cls) {
-#ifdef RBCE_EXTENSION
-               put_class(cls);
-#endif
                if (cls->classobj != classobj) {
                        printk(KERN_ERR "rbce: class %s changed identity\n",
                               classname);
@@ -631,16 +629,13 @@ static void __release_rule(struct rbce_rule *rule)
 static inline int __delete_rule(struct rbce_rule *rule)
 {
        // make sure we are not referenced by other rules
-       if (list_empty(&rule->obj.link)) {
-               return 0;
-       }
        if (GET_REF(rule)) {
                return -EBUSY;
        }
        __release_rule(rule);
        put_class(rule->target_class);
        release_term_index(rule->index);
-       list_del_init(&rule->obj.link);
+       list_del(&rule->obj.link);
        gl_num_rules--;
        gl_rules_version++;
        module_put(THIS_MODULE);
index d0c97ea..3cae550 100644 (file)
@@ -27,7 +27,6 @@
  ******************************************************************************/
 
 #include <linux/relayfs_fs.h>
-#include <linux/jiffies.h>
 
 #define PSAMPLE(pdata)    (&((pdata)->ext_data.sample))
 #define UKCC_N_SUB_BUFFERS     (4)
@@ -129,8 +128,7 @@ static inline void close_ukcc_channel(void)
 
 #define rec_set_hdr(r,t,p)      ((r)->hdr.type = (t), (r)->hdr.pid = (p))
 #define rec_set_timehdr(r,t,p,c)  (rec_set_hdr(r,t,p), \
-(r)->hdr.timestamp = jiffies_to_msecs(jiffies), (r)->hdr.cls=(unsigned long)(c) )
-
+(r)->hdr.jiffies = jiffies, (r)->hdr.cls=(unsigned long)(c) )
 
 #if CHANNEL_AUTO_CONT
 
diff --git a/kernel/crash.c b/kernel/crash.c
deleted file mode 100644 (file)
index 885def9..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- *     kernel/crash.c - Memory preserving reboot related code.
- *
- *     Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
- *     Copyright (C) IBM Corporation, 2004. All rights reserved
- */
-
-#include <linux/smp_lock.h>
-#include <linux/kexec.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/bootmem.h>
-#include <linux/highmem.h>
-#include <linux/crash_dump.h>
-
-#include <asm/io.h>
-#include <asm/uaccess.h>
-
-#ifdef CONFIG_PROC_FS
-/*
- * Enable kexec reboot upon panic; for dumping
- */
-static ssize_t write_crash_dump_on(struct file *file, const char __user *buf,
-                                       size_t count, loff_t *ppos)
-{
-       if (count) {
-               if (get_user(crash_dump_on, buf))
-                       return -EFAULT;
-       }
-       return count;
-}
-
-static struct file_operations proc_crash_dump_on_operations = {
-       .write = write_crash_dump_on,
-};
-
-extern struct file_operations proc_vmcore_operations;
-extern struct proc_dir_entry *proc_vmcore;
-
-void crash_enable_by_proc(void)
-{
-       struct proc_dir_entry *entry;
-
-       entry = create_proc_entry("kexec-dump", S_IWUSR, NULL);
-       if (entry)
-               entry->proc_fops = &proc_crash_dump_on_operations;
-}
-
-void crash_create_proc_entry(void)
-{
-       if (dump_enabled) {
-               proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
-               if (proc_vmcore) {
-                       proc_vmcore->proc_fops = &proc_vmcore_operations;
-                       proc_vmcore->size =
-                       (size_t)(saved_max_pfn << PAGE_SHIFT);
-               }
-       }
-}
-
-#endif /* CONFIG_PROC_FS */
-
-void __crash_machine_kexec(void)
-{
-       struct kimage *image;
-
-       if ((!crash_dump_on) || (crashed))
-               return;
-
-       image = xchg(&kexec_crash_image, 0);
-       if (image) {
-               crashed = 1;
-               printk(KERN_EMERG "kexec: opening parachute\n");
-               crash_dump_stop_cpus();
-               crash_dump_save_registers();
-
-       /* If we are here to do a crash dump, save the memory from
-        * 0-640k before we copy over the kexec kernel image.  Otherwise
-        * our dump will show the wrong kernel entirely.
-        */
-               crash_relocate_mem();
-
-               machine_kexec(image);
-       } else {
-               printk(KERN_EMERG "kexec: No kernel image loaded!\n");
-       }
-}
-
-/*
- * Copy a page from "oldmem". For this page, there is no pte mapped
- * in the current kernel. We stitch up a pte, similar to kmap_atomic.
- */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
-                               size_t csize, int userbuf)
-{
-       void *page, *vaddr;
-
-       if (!csize)
-               return 0;
-
-       page = kmalloc(PAGE_SIZE, GFP_KERNEL);
-
-       vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
-       copy_page(page, vaddr);
-       kunmap_atomic(vaddr, KM_PTE0);
-
-       if (userbuf) {
-               if (copy_to_user(buf, page, csize)) {
-                       kfree(page);
-                       return -EFAULT;
-               }
-       } else
-               memcpy(buf, page, csize);
-       kfree(page);
-
-       return 0;
-}
index 8ca3c17..764c1ad 100644 (file)
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
 #include <linux/mempolicy.h>
-#include <linux/ckrm_events.h>
+#include <linux/ckrm.h>
 #include <linux/ckrm_tsk.h>
-#include <linux/ckrm_mem_inline.h>
-#include <linux/syscalls.h>
 #include <linux/vs_limit.h>
+#include <linux/ckrm_mem.h>
+#include <linux/syscalls.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -382,7 +382,6 @@ static inline void close_files(struct files_struct * files)
                                struct file * file = xchg(&files->fd[i], NULL);
                                if (file) 
                                        filp_close(file, files);
-                               // vx_openfd_dec(i);
                        }
                        i++;
                        set >>= 1;
@@ -514,7 +513,12 @@ static inline void __exit_mm(struct task_struct * tsk)
        task_lock(tsk);
        tsk->mm = NULL;
        up_read(&mm->mmap_sem);
-       ckrm_task_clear_mm(tsk, mm);
+#ifdef CONFIG_CKRM_RES_MEM
+       spin_lock(&mm->peertask_lock);
+       list_del_init(&tsk->mm_peers);
+       ckrm_mem_evaluate_mm(mm);
+       spin_unlock(&mm->peertask_lock);
+#endif
        enter_lazy_tlb(mm, current);
        task_unlock(tsk);
        mmput(mm);
@@ -607,7 +611,6 @@ static inline void forget_original_parent(struct task_struct * father,
        struct task_struct *p, *reaper = father;
        struct list_head *_p, *_n;
 
-       /* FIXME handle vchild_reaper/initpid */
        do {
                reaper = next_thread(reaper);
                if (reaper == father) {
index 1902e9d..d19d14e 100644 (file)
 #include <linux/audit.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
-#include <linux/ckrm_events.h>
-#include <linux/ckrm_tsk.h>
-#include <linux/ckrm_mem_inline.h>
 #include <linux/vs_network.h>
 #include <linux/vs_limit.h>
 #include <linux/vs_memory.h>
+#include <linux/ckrm.h>
+#include <linux/ckrm_tsk.h>
+#include <linux/ckrm_mem_inline.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -161,9 +161,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
        ti->task = tsk;
 
        ckrm_cb_newtask(tsk);
-       ckrm_task_mm_init(tsk);
        /* One for us, one for whoever does the "release_task()" (usually parent) */
        atomic_set(&tsk->usage,2);
+#ifdef CONFIG_CKRM_RES_MEM     
+       INIT_LIST_HEAD(&tsk->mm_peers);
+#endif
        return tsk;
 }
 
@@ -309,7 +311,10 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
        mm->ioctx_list = NULL;
        mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
        mm->free_area_cache = TASK_UNMAPPED_BASE;
-       ckrm_mm_init(mm);
+#ifdef CONFIG_CKRM_RES_MEM
+       INIT_LIST_HEAD(&mm->tasklist);
+       mm->peertask_lock = SPIN_LOCK_UNLOCKED;
+#endif
 
        if (likely(!mm_alloc_pgd(mm))) {
                mm->def_flags = 0;
@@ -331,7 +336,10 @@ struct mm_struct * mm_alloc(void)
        if (mm) {
                memset(mm, 0, sizeof(*mm));
                mm = mm_init(mm);
-               ckrm_mm_setclass(mm, ckrm_get_mem_class(current));
+#ifdef CONFIG_CKRM_RES_MEM
+               mm->memclass = GET_MEM_CLASS(current);
+               mem_class_get(mm->memclass);
+#endif
        }
        return mm;
 }
@@ -346,8 +354,14 @@ void fastcall __mmdrop(struct mm_struct *mm)
        BUG_ON(mm == &init_mm);
        mm_free_pgd(mm);
        destroy_context(mm);
-       ckrm_mm_clearclass(mm);
        clr_vx_info(&mm->mm_vx_info);
+#ifdef CONFIG_CKRM_RES_MEM
+       /* class can be null and mm's tasklist can be empty here */
+       if (mm->memclass) {
+               mem_class_put(mm->memclass);
+               mm->memclass = NULL;
+       }
+#endif
        free_mm(mm);
 }
 
@@ -486,7 +500,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
                goto free_pt;
 
 good_mm:
-       ckrm_mm_setclass(mm, oldmm->memclass);
        tsk->mm = mm;
        tsk->active_mm = mm;
        ckrm_init_mm_to_task(mm, tsk);
@@ -856,23 +869,6 @@ static task_t *copy_process(unsigned long clone_flags,
                        goto bad_fork_cleanup_vm;
        }
 
-       p->vx_info = NULL;
-       set_vx_info(&p->vx_info, current->vx_info);
-       p->nx_info = NULL;
-       set_nx_info(&p->nx_info, current->nx_info);
-
-       /* check vserver memory */
-       if (p->mm && !(clone_flags & CLONE_VM)) {
-               if (vx_vmpages_avail(p->mm, p->mm->total_vm))
-                       vx_pages_add(p->mm->mm_vx_info, RLIMIT_AS, p->mm->total_vm);
-               else
-                       goto bad_fork_free;
-       }
-       if (p->mm && vx_flags(VXF_FORK_RSS, 0)) {
-               if (!vx_rsspages_avail(p->mm, p->mm->rss))
-                       goto bad_fork_cleanup_vm;
-       }
-
        retval = -EAGAIN;
        if (!vx_nproc_avail(1))
                goto bad_fork_cleanup_vm;
@@ -1195,11 +1191,13 @@ long do_fork(unsigned long clone_flags,
                        clone_flags |= CLONE_PTRACE;
        }
 
+#ifdef CONFIG_CKRM_TYPE_TASKCLASS
        if (numtasks_get_ref(current->taskclass, 0) == 0) {
                return -ENOMEM;
        }
-       p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+#endif
 
+       p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
@@ -1239,7 +1237,9 @@ long do_fork(unsigned long clone_flags,
                                ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
                }
        } else {
+#ifdef CONFIG_CKRM_TYPE_TASKCLASS
                numtasks_put_ref(current->taskclass);
+#endif
                free_pidmap(pid);
                pid = PTR_ERR(p);
        }
diff --git a/kernel/kexec.c b/kernel/kexec.c
deleted file mode 100644 (file)
index e838875..0000000
+++ /dev/null
@@ -1,637 +0,0 @@
-/*
- * kexec.c - kexec system call
- * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2.  See the file COPYING for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/file.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/kexec.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <net/checksum.h>
-#include <asm/page.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/system.h>
-
-/*
- * When kexec transitions to the new kernel there is a one-to-one
- * mapping between physical and virtual addresses.  On processors
- * where you can disable the MMU this is trivial, and easy.  For
- * others it is still a simple predictable page table to setup.
- *
- * In that environment kexec copies the new kernel to its final
- * resting place.  This means I can only support memory whose
- * physical address can fit in an unsigned long.  In particular
- * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
- * If the assembly stub has more restrictive requirements
- * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
- * defined more restrictively in <asm/kexec.h>.
- *
- * The code for the transition from the current kernel to the
- * the new kernel is placed in the control_code_buffer, whose size
- * is given by KEXEC_CONTROL_CODE_SIZE.  In the best case only a single
- * page of memory is necessary, but some architectures require more.
- * Because this memory must be identity mapped in the transition from
- * virtual to physical addresses it must live in the range
- * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
- * modifiable.
- *
- * The assembly stub in the control code buffer is passed a linked list
- * of descriptor pages detailing the source pages of the new kernel,
- * and the destination addresses of those source pages.  As this data
- * structure is not used in the context of the current OS, it must
- * be self-contained.
- *
- * The code has been made to work with highmem pages and will use a
- * destination page in its final resting place (if it happens
- * to allocate it).  The end product of this is that most of the
- * physical address space, and most of RAM can be used.
- *
- * Future directions include:
- *  - allocating a page table with the control code buffer identity
- *    mapped, to simplify machine_kexec and make kexec_on_panic more
- *    reliable.
- */
-
-/*
- * KIMAGE_NO_DEST is an impossible destination address..., for
- * allocating pages whose destination address we do not care about.
- */
-#define KIMAGE_NO_DEST (-1UL)
-
-static int kimage_is_destination_range(
-       struct kimage *image, unsigned long start, unsigned long end);
-static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long dest);
-
-
-static int kimage_alloc(struct kimage **rimage,
-       unsigned long nr_segments, struct kexec_segment *segments)
-{
-       int result;
-       struct kimage *image;
-       size_t segment_bytes;
-       unsigned long i;
-
-       /* Allocate a controlling structure */
-       result = -ENOMEM;
-       image = kmalloc(sizeof(*image), GFP_KERNEL);
-       if (!image) {
-               goto out;
-       }
-       memset(image, 0, sizeof(*image));
-       image->head = 0;
-       image->entry = &image->head;
-       image->last_entry = &image->head;
-
-       /* Initialize the list of control pages */
-       INIT_LIST_HEAD(&image->control_pages);
-
-       /* Initialize the list of destination pages */
-       INIT_LIST_HEAD(&image->dest_pages);
-
-       /* Initialize the list of unuseable pages */
-       INIT_LIST_HEAD(&image->unuseable_pages);
-
-       /* Read in the segments */
-       image->nr_segments = nr_segments;
-       segment_bytes = nr_segments * sizeof*segments;
-       result = copy_from_user(image->segment, segments, segment_bytes);
-       if (result)
-               goto out;
-
-       /*
-        * Verify we have good destination addresses.  The caller is
-        * responsible for making certain we don't attempt to load
-        * the new image into invalid or reserved areas of RAM.  This
-        * just verifies it is an address we can use.
-        */
-       result = -EADDRNOTAVAIL;
-       for (i = 0; i < nr_segments; i++) {
-               unsigned long mend;
-               mend = ((unsigned long)(image->segment[i].mem)) +
-                       image->segment[i].memsz;
-               if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
-                       goto out;
-       }
-
-       /*
-        * Find a location for the control code buffer, and add it
-        * the vector of segments so that it's pages will also be
-        * counted as destination pages.
-        */
-       result = -ENOMEM;
-       image->control_code_page = kimage_alloc_control_pages(image,
-               get_order(KEXEC_CONTROL_CODE_SIZE));
-       if (!image->control_code_page) {
-               printk(KERN_ERR "Could not allocate control_code_buffer\n");
-               goto out;
-       }
-
-       result = 0;
- out:
-       if (result == 0) {
-               *rimage = image;
-       } else {
-               kfree(image);
-       }
-       return result;
-}
-
-static int kimage_is_destination_range(
-       struct kimage *image, unsigned long start, unsigned long end)
-{
-       unsigned long i;
-
-       for (i = 0; i < image->nr_segments; i++) {
-               unsigned long mstart, mend;
-               mstart = (unsigned long)image->segment[i].mem;
-               mend   = mstart + image->segment[i].memsz;
-               if ((end > mstart) && (start < mend)) {
-                       return 1;
-               }
-       }
-       return 0;
-}
-
-static struct page *kimage_alloc_pages(unsigned int gfp_mask, unsigned int order)
-{
-       struct page *pages;
-       pages = alloc_pages(gfp_mask, order);
-       if (pages) {
-               unsigned int count, i;
-               pages->mapping = NULL;
-               pages->private = order;
-               count = 1 << order;
-               for(i = 0; i < count; i++) {
-                       SetPageReserved(pages + i);
-               }
-       }
-       return pages;
-}
-
-static void kimage_free_pages(struct page *page)
-{
-       unsigned int order, count, i;
-       order = page->private;
-       count = 1 << order;
-       for(i = 0; i < count; i++) {
-               ClearPageReserved(page + i);
-       }
-       __free_pages(page, order);
-}
-
-static void kimage_free_page_list(struct list_head *list)
-{
-       struct list_head *pos, *next;
-       list_for_each_safe(pos, next, list) {
-               struct page *page;
-
-               page = list_entry(pos, struct page, lru);
-               list_del(&page->lru);
-
-               kimage_free_pages(page);
-       }
-}
-
-struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order)
-{
-       /* Control pages are special, they are the intermediaries
-        * that are needed while we copy the rest of the pages
-        * to their final resting place.  As such they must
-        * not conflict with either the destination addresses
-        * or memory the kernel is already using.
-        *
-        * The only case where we really need more than one of
-        * these are for architectures where we cannot disable
-        * the MMU and must instead generate an identity mapped
-        * page table for all of the memory.
-        *
-        * At worst this runs in O(N) of the image size.
-        */
-       struct list_head extra_pages;
-       struct page *pages;
-       unsigned int count;
-
-       count = 1 << order;
-       INIT_LIST_HEAD(&extra_pages);
-
-       /* Loop while I can allocate a page and the page allocated
-        * is a destination page.
-        */
-       do {
-               unsigned long pfn, epfn, addr, eaddr;
-               pages = kimage_alloc_pages(GFP_KERNEL, order);
-               if (!pages)
-                       break;
-               pfn   = page_to_pfn(pages);
-               epfn  = pfn + count;
-               addr  = pfn << PAGE_SHIFT;
-               eaddr = epfn << PAGE_SHIFT;
-               if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
-                       kimage_is_destination_range(image, addr, eaddr))
-               {
-                       list_add(&pages->lru, &extra_pages);
-                       pages = NULL;
-               }
-       } while(!pages);
-       if (pages) {
-               /* Remember the allocated page... */
-               list_add(&pages->lru, &image->control_pages);
-
-               /* Because the page is already in it's destination
-                * location we will never allocate another page at
-                * that address.  Therefore kimage_alloc_pages
-                * will not return it (again) and we don't need
-                * to give it an entry in image->segment[].
-                */
-       }
-       /* Deal with the destination pages I have inadvertently allocated.
-        *
-        * Ideally I would convert multi-page allocations into single
-        * page allocations, and add everyting to image->dest_pages.
-        *
-        * For now it is simpler to just free the pages.
-        */
-       kimage_free_page_list(&extra_pages);
-       return pages;
-
-}
-
-static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
-{
-       if (*image->entry != 0) {
-               image->entry++;
-       }
-       if (image->entry == image->last_entry) {
-               kimage_entry_t *ind_page;
-               struct page *page;
-               page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
-               if (!page) {
-                       return -ENOMEM;
-               }
-               ind_page = page_address(page);
-               *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
-               image->entry = ind_page;
-               image->last_entry =
-                       ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
-       }
-       *image->entry = entry;
-       image->entry++;
-       *image->entry = 0;
-       return 0;
-}
-
-static int kimage_set_destination(
-       struct kimage *image, unsigned long destination)
-{
-       int result;
-
-       destination &= PAGE_MASK;
-       result = kimage_add_entry(image, destination | IND_DESTINATION);
-       if (result == 0) {
-               image->destination = destination;
-       }
-       return result;
-}
-
-
-static int kimage_add_page(struct kimage *image, unsigned long page)
-{
-       int result;
-
-       page &= PAGE_MASK;
-       result = kimage_add_entry(image, page | IND_SOURCE);
-       if (result == 0) {
-               image->destination += PAGE_SIZE;
-       }
-       return result;
-}
-
-
-static void kimage_free_extra_pages(struct kimage *image)
-{
-       /* Walk through and free any extra destination pages I may have */
-       kimage_free_page_list(&image->dest_pages);
-
-       /* Walk through and free any unuseable pages I have cached */
-       kimage_free_page_list(&image->unuseable_pages);
-
-}
-static int kimage_terminate(struct kimage *image)
-{
-       int result;
-
-       result = kimage_add_entry(image, IND_DONE);
-       if (result == 0) {
-               /* Point at the terminating element */
-               image->entry--;
-               kimage_free_extra_pages(image);
-       }
-       return result;
-}
-
-#define for_each_kimage_entry(image, ptr, entry) \
-       for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
-               ptr = (entry & IND_INDIRECTION)? \
-                       phys_to_virt((entry & PAGE_MASK)): ptr +1)
-
-static void kimage_free_entry(kimage_entry_t entry)
-{
-       struct page *page;
-
-       page = pfn_to_page(entry >> PAGE_SHIFT);
-       kimage_free_pages(page);
-}
-
-static void kimage_free(struct kimage *image)
-{
-       kimage_entry_t *ptr, entry;
-       kimage_entry_t ind = 0;
-
-       if (!image)
-               return;
-       kimage_free_extra_pages(image);
-       for_each_kimage_entry(image, ptr, entry) {
-               if (entry & IND_INDIRECTION) {
-                       /* Free the previous indirection page */
-                       if (ind & IND_INDIRECTION) {
-                               kimage_free_entry(ind);
-                       }
-                       /* Save this indirection page until we are
-                        * done with it.
-                        */
-                       ind = entry;
-               }
-               else if (entry & IND_SOURCE) {
-                       kimage_free_entry(entry);
-               }
-       }
-       /* Free the final indirection page */
-       if (ind & IND_INDIRECTION) {
-               kimage_free_entry(ind);
-       }
-
-       /* Handle any machine specific cleanup */
-       machine_kexec_cleanup(image);
-
-       /* Free the kexec control pages... */
-       kimage_free_page_list(&image->control_pages);
-       kfree(image);
-}
-
-static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page)
-{
-       kimage_entry_t *ptr, entry;
-       unsigned long destination = 0;
-
-       for_each_kimage_entry(image, ptr, entry) {
-               if (entry & IND_DESTINATION) {
-                       destination = entry & PAGE_MASK;
-               }
-               else if (entry & IND_SOURCE) {
-                       if (page == destination) {
-                               return ptr;
-                       }
-                       destination += PAGE_SIZE;
-               }
-       }
-       return 0;
-}
-
-static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long destination)
-{
-       /*
-        * Here we implement safeguards to ensure that a source page
-        * is not copied to its destination page before the data on
-        * the destination page is no longer useful.
-        *
-        * To do this we maintain the invariant that a source page is
-        * either its own destination page, or it is not a
-        * destination page at all.
-        *
-        * That is slightly stronger than required, but the proof
-        * that no problems will not occur is trivial, and the
-        * implementation is simply to verify.
-        *
-        * When allocating all pages normally this algorithm will run
-        * in O(N) time, but in the worst case it will run in O(N^2)
-        * time.   If the runtime is a problem the data structures can
-        * be fixed.
-        */
-       struct page *page;
-       unsigned long addr;
-
-       /*
-        * Walk through the list of destination pages, and see if I
-        * have a match.
-        */
-       list_for_each_entry(page, &image->dest_pages, lru) {
-               addr = page_to_pfn(page) << PAGE_SHIFT;
-               if (addr == destination) {
-                       list_del(&page->lru);
-                       return page;
-               }
-       }
-       page = NULL;
-       while (1) {
-               kimage_entry_t *old;
-
-               /* Allocate a page, if we run out of memory give up */
-               page = kimage_alloc_pages(gfp_mask, 0);
-               if (!page) {
-                       return 0;
-               }
-               /* If the page cannot be used file it away */
-               if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
-                       list_add(&page->lru, &image->unuseable_pages);
-                       continue;
-               }
-               addr = page_to_pfn(page) << PAGE_SHIFT;
-
-               /* If it is the destination page we want use it */
-               if (addr == destination)
-                       break;
-
-               /* If the page is not a destination page use it */
-               if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE))
-                       break;
-
-               /*
-                * I know that the page is someones destination page.
-                * See if there is already a source page for this
-                * destination page.  And if so swap the source pages.
-                */
-               old = kimage_dst_used(image, addr);
-               if (old) {
-                       /* If so move it */
-                       unsigned long old_addr;
-                       struct page *old_page;
-
-                       old_addr = *old & PAGE_MASK;
-                       old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
-                       copy_highpage(page, old_page);
-                       *old = addr | (*old & ~PAGE_MASK);
-
-                       /* The old page I have found cannot be a
-                        * destination page, so return it.
-                        */
-                       addr = old_addr;
-                       page = old_page;
-                       break;
-               }
-               else {
-                       /* Place the page on the destination list I
-                        * will use it later.
-                        */
-                       list_add(&page->lru, &image->dest_pages);
-               }
-       }
-       return page;
-}
-
-static int kimage_load_segment(struct kimage *image,
-       struct kexec_segment *segment)
-{
-       unsigned long mstart;
-       int result;
-       unsigned long offset;
-       unsigned long offset_end;
-       unsigned char *buf;
-
-       result = 0;
-       buf = segment->buf;
-       mstart = (unsigned long)segment->mem;
-
-       offset_end = segment->memsz;
-
-       result = kimage_set_destination(image, mstart);
-       if (result < 0) {
-               goto out;
-       }
-       for (offset = 0;  offset < segment->memsz; offset += PAGE_SIZE) {
-               struct page *page;
-               char *ptr;
-               size_t size, leader;
-               page = kimage_alloc_page(image, GFP_HIGHUSER, mstart + offset);
-               if (page == 0) {
-                       result  = -ENOMEM;
-                       goto out;
-               }
-               result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT);
-               if (result < 0) {
-                       goto out;
-               }
-               ptr = kmap(page);
-               if (segment->bufsz < offset) {
-                       /* We are past the end zero the whole page */
-                       memset(ptr, 0, PAGE_SIZE);
-                       kunmap(page);
-                       continue;
-               }
-               size = PAGE_SIZE;
-               leader = 0;
-               if ((offset == 0)) {
-                       leader = mstart & ~PAGE_MASK;
-               }
-               if (leader) {
-                       /* We are on the first page zero the unused portion */
-                       memset(ptr, 0, leader);
-                       size -= leader;
-                       ptr += leader;
-               }
-               if (size > (segment->bufsz - offset)) {
-                       size = segment->bufsz - offset;
-               }
-               if (size < (PAGE_SIZE - leader)) {
-                       /* zero the trailing part of the page */
-                       memset(ptr + size, 0, (PAGE_SIZE - leader) - size);
-               }
-               result = copy_from_user(ptr, buf + offset, size);
-               kunmap(page);
-               if (result) {
-                       result = (result < 0) ? result : -EIO;
-                       goto out;
-               }
-       }
- out:
-       return result;
-}
-
-/*
- * Exec Kernel system call: for obvious reasons only root may call it.
- *
- * This call breaks up into three pieces.
- * - A generic part which loads the new kernel from the current
- *   address space, and very carefully places the data in the
- *   allocated pages.
- *
- * - A generic part that interacts with the kernel and tells all of
- *   the devices to shut down.  Preventing on-going dmas, and placing
- *   the devices in a consistent state so a later kernel can
- *   reinitialize them.
- *
- * - A machine specific part that includes the syscall number
- *   and the copies the image to it's final destination.  And
- *   jumps into the image at entry.
- *
- * kexec does not sync, or unmount filesystems so if you need
- * that to happen you need to do that yourself.
- */
-struct kimage *kexec_image = NULL;
-struct kimage *kexec_crash_image = NULL;
-
-asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
-       struct kexec_segment *segments, unsigned long flags)
-{
-       struct kimage *image;
-       int result;
-
-       /* We only trust the superuser with rebooting the system. */
-       if (!capable(CAP_SYS_BOOT))
-               return -EPERM;
-
-       if (nr_segments > KEXEC_SEGMENT_MAX)
-               return -EINVAL;
-
-       image = NULL;
-       result = 0;
-
-       if (nr_segments > 0) {
-               unsigned long i;
-               result = kimage_alloc(&image, nr_segments, segments);
-               if (result) {
-                       goto out;
-               }
-               result = machine_kexec_prepare(image);
-               if (result) {
-                       goto out;
-               }
-               image->start = entry;
-               for (i = 0; i < nr_segments; i++) {
-                       result = kimage_load_segment(image, &image->segment[i]);
-                       if (result) {
-                               goto out;
-                       }
-               }
-               result = kimage_terminate(image);
-               if (result) {
-                       goto out;
-               }
-       }
-
-       if (!flags)
-               image = xchg(&kexec_image, image);
-       else
-               image = xchg(&kexec_crash_image, image);
-
- out:
-       kimage_free(image);
-       return result;
-}
index 3fea0f2..2bdd2cf 100644 (file)
 #include <linux/sysrq.h>
 #include <linux/interrupt.h>
 #include <linux/nmi.h>
+#ifdef CONFIG_KEXEC
 #include <linux/kexec.h>
-#include <linux/crash_dump.h>
+#endif
 
 int panic_timeout = 900;
 int panic_on_oops = 1;
 int tainted;
-unsigned int crashed;
-int crash_dump_on;
 void (*dump_function_ptr)(const char *, const struct pt_regs *) = 0;
 
 EXPORT_SYMBOL(panic_timeout);
@@ -79,9 +78,6 @@ NORET_TYPE void panic(const char * fmt, ...)
                BUG();
        bust_spinlocks(0);
 
-       /* If we have crashed, perform a kexec reboot, for dump write-out */
-       crash_machine_kexec();
-
         notifier_call_chain(&panic_notifier_list, 0, buf);
        
 #ifdef CONFIG_SMP
index 663980a..2d04567 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/syscalls.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
-#include <linux/vs_cvirt.h>
 
 #ifndef div_long_long_rem
 #include <asm/div64.h>
index 2b80f44..31e1731 100644 (file)
@@ -539,8 +539,6 @@ asmlinkage int printk(const char *fmt, ...)
        return r;
 }
 
-static volatile int printk_cpu = -1;
-
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
        unsigned long flags;
@@ -549,12 +547,11 @@ asmlinkage int vprintk(const char *fmt, va_list args)
        static char printk_buf[1024];
        static int log_level_unknown = 1;
 
-       if (unlikely(oops_in_progress && printk_cpu == smp_processor_id()))
+       if (unlikely(oops_in_progress))
                zap_locks();
 
        /* This stops the holder of console_sem just where we want him */
        spin_lock_irqsave(&logbuf_lock, flags);
-       printk_cpu = smp_processor_id();
 
        /* Emit the output into the temporary buffer */
        printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
index 1054838..f609197 100644 (file)
@@ -50,9 +50,6 @@
 #include <asm/tlb.h>
 
 #include <asm/unistd.h>
-#include <linux/vs_context.h>
-#include <linux/vs_cvirt.h>
-#include <linux/vs_sched.h>
 
 #ifdef CONFIG_NUMA
 #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
@@ -265,6 +262,7 @@ struct runqueue {
        task_t *migration_thread;
        struct list_head migration_queue;
 #endif
+
 #ifdef CONFIG_VSERVER_HARDCPU
        struct list_head hold_queue;
        int idle_tokens;
@@ -740,10 +738,12 @@ static int effective_prio(task_t *p)
        bonus = CURRENT_BONUS(p) - MAX_BONUS / 2;
 
        prio = p->static_prio - bonus;
+
 #ifdef CONFIG_VSERVER_HARDCPU
        if (task_vx_flags(p, VXF_SCHED_PRIO, 0))
                prio += effective_vavavoom(p, MAX_USER_PRIO);
 #endif
+
        if (prio < MAX_RT_PRIO)
                prio = MAX_RT_PRIO;
        if (prio > MAX_PRIO-1)
@@ -904,11 +904,10 @@ static void __deactivate_task(struct task_struct *p, runqueue_t *rq)
        p->array = NULL;
 }
 
-static inline
-void deactivate_task(struct task_struct *p, runqueue_t *rq)
+static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
-       vx_deactivate_task(p);
        __deactivate_task(p, rq);
+       vx_deactivate_task(p);
 }
 
 /*
@@ -1245,9 +1244,6 @@ out_activate:
         * to be considered on this CPU.)
         */
        activate_task(p, rq, cpu == this_cpu);
-       /* this is to get the accounting behind the load update */
-       if (old_state == TASK_UNINTERRUPTIBLE)
-               vx_uninterruptible_dec(p);
        if (!sync || cpu != this_cpu) {
                if (TASK_PREEMPTS_CURR(p, rq))
                        resched_task(rq->curr);
@@ -2890,6 +2886,7 @@ void scheduler_tick(int user_ticks, int sys_ticks)
        if (rcu_pending(cpu))
                rcu_check_callbacks(cpu, user_ticks);
 
+
        if (vxi) {
                vxi->sched.cpu[cpu].user_ticks += user_ticks;
                vxi->sched.cpu[cpu].sys_ticks += sys_ticks;
@@ -2914,7 +2911,6 @@ void scheduler_tick(int user_ticks, int sys_ticks)
 
                if (wake_priority_sleeper(rq))
                        goto out;
-
                ckrm_sched_tick(jiffies,cpu,rq_ckrm_load(rq));
 
 #ifdef CONFIG_VSERVER_HARDCPU_IDLE
@@ -2959,7 +2955,6 @@ void scheduler_tick(int user_ticks, int sys_ticks)
                }
                goto out_unlock;
        }
-#warning MEF: vx_need_resched incorpates standard kernel code, which it should not.
        if (vx_need_resched(p)) {
 #ifdef CONFIG_CKRM_CPU_SCHEDULE
                /* Hubertus ... we can abstract this out */
@@ -3163,11 +3158,11 @@ asmlinkage void __sched schedule(void)
        prio_array_t *array;
        unsigned long long now;
        unsigned long run_time;
+       int cpu;
 #ifdef CONFIG_VSERVER_HARDCPU
        struct vx_info *vxi;
        int maxidle = -HZ;
 #endif
-       int cpu;
 
        /*
         * If crash dump is in progress, this other cpu's
@@ -3178,6 +3173,7 @@ asmlinkage void __sched schedule(void)
         if (unlikely(dump_oncpu))
                 goto dump_scheduling_disabled;
 
+
        /*
         * Test if we are atomic.  Since do_exit() needs to call into
         * schedule() atomically, we ignore that path for now.
@@ -3253,10 +3249,8 @@ need_resched_nonpreemptible:
                                unlikely(signal_pending(prev))))
                        prev->state = TASK_RUNNING;
                else {
-                       if (prev->state == TASK_UNINTERRUPTIBLE) {
+                       if (prev->state == TASK_UNINTERRUPTIBLE)
                                rq->nr_uninterruptible++;
-                               vx_uninterruptible_inc(prev);
-                       }
                        deactivate_task(prev, rq);
                }
        }
index e74c821..a56f3d9 100644 (file)
@@ -626,6 +626,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 
        if (sig < 0 || sig > _NSIG)
                return error;
+
        user = (!info ||
                (info != SEND_SIG_PRIV &&
                 info != SEND_SIG_FORCED &&
index 85a4489..3792340 100644 (file)
@@ -17,8 +17,6 @@
 #include <linux/init.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/kexec.h>
 #include <linux/workqueue.h>
 #include <linux/device.h>
 #include <linux/key.h>
 #include <linux/security.h>
 #include <linux/dcookies.h>
 #include <linux/suspend.h>
-#include <linux/ckrm_events.h>
-#include <linux/tty.h>
+#include <linux/ckrm.h>
+#include <linux/vs_base.h>
 #include <linux/vs_cvirt.h>
+#include <linux/tty.h>
+
 #include <linux/compat.h>
 #include <linux/syscalls.h>
 
@@ -362,8 +362,7 @@ asmlinkage long sys_getpriority(int which, int who)
 out_unlock:
        read_unlock(&tasklist_lock);
 
-       key_fsgid_changed(current);
-       return 0;
+       return retval;
 }
 
 long vs_reboot(unsigned int, void *);
@@ -506,7 +505,6 @@ void ctrl_alt_del(void)
 }
        
 
-
 /*
  * Unprivileged users may change the real gid to the effective gid
  * or vice versa.  (BSD-style)
@@ -567,8 +565,9 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
        current->egid = new_egid;
        current->gid = new_rgid;
 
-       key_fsgid_changed(current);
        ckrm_cb_gid();
+
+       key_fsgid_changed(current);
        return 0;
 }
 
@@ -607,9 +606,9 @@ asmlinkage long sys_setgid(gid_t gid)
        else
                return -EPERM;
 
-       key_fsgid_changed(current);
        ckrm_cb_gid();
 
+       key_fsgid_changed(current);
        return 0;
 }
   
@@ -698,9 +697,10 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
                current->suid = current->euid;
        current->fsuid = current->euid;
 
-       key_fsuid_changed(current);
        ckrm_cb_uid();
 
+       key_fsuid_changed(current);
+
        return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE);
 }
 
@@ -746,9 +746,10 @@ asmlinkage long sys_setuid(uid_t uid)
        current->fsuid = current->euid = uid;
        current->suid = new_suid;
 
-       key_fsuid_changed(current);
        ckrm_cb_uid();
 
+       key_fsuid_changed(current);
+
        return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID);
 }
 
@@ -795,9 +796,10 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
        if (suid != (uid_t) -1)
                current->suid = suid;
 
-       key_fsuid_changed(current);
        ckrm_cb_uid();
 
+       key_fsuid_changed(current);
+
        return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES);
 }
 
@@ -848,8 +850,10 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
        if (sgid != (gid_t) -1)
                current->sgid = sgid;
 
-       key_fsgid_changed(current);
        ckrm_cb_gid();
+
+
+       key_fsgid_changed(current);
        return 0;
 }
 
index 41a327d..9f36b40 100644 (file)
@@ -18,7 +18,6 @@ cond_syscall(sys_acct)
 cond_syscall(sys_lookup_dcookie)
 cond_syscall(sys_swapon)
 cond_syscall(sys_swapoff)
-cond_syscall(sys_kexec_load)
 cond_syscall(sys_init_module)
 cond_syscall(sys_delete_module)
 cond_syscall(sys_socketpair)
index 0651158..aea29b2 100644 (file)
@@ -4,15 +4,6 @@
 
 menu "Linux VServer"
 
-config VSERVER_FILESHARING
-       bool    "(PLANETLAB) Disable Hidden File Module"
-       default y
-       help
-         This retains the module that when a vserver can
-         see a file, it can also stat and read it.  The
-         assumption is that chroot() works and vservers
-         are contained that way.
-
 config VSERVER_LEGACY
        bool    "Enable Legacy Kernel API"
        default y
@@ -101,23 +92,5 @@ config      VSERVER_DEBUG
          overhead (~ ??%) to all vserver related functions and
          increases the kernel size by about 20k.
 
-config VSERVER_HISTORY
-       bool    "Compile History Tracing"
-       depends on VSERVER_DEBUG
-       default n
-       help
-         Set this to yes if you want to record the history of
-         linux-vserver activities, so they can be replayed on
-         a kernel panic (oops)
-
-config VSERVER_HISTORY_SIZE
-       int "Per CPU History Size (32-65536)"
-       depends on VSERVER_HISTORY
-       range 32 65536
-       default 64
-       help
-         This allows you to specify the number of entries in
-         the per CPU history buffer.
-
 endmenu
 
index 1cee3de..577c3ca 100644 (file)
@@ -10,5 +10,4 @@ vserver-y     := switch.o context.o namespace.o sched.o network.o inode.o \
 
 vserver-$(CONFIG_VSERVER_DEBUG) += sysctl.o
 vserver-$(CONFIG_VSERVER_LEGACY) += legacy.o
-vserver-$(CONFIG_VSERVER_HISTORY) += history.o
 
index 6b1c9be..d56d362 100644 (file)
@@ -3,7 +3,7 @@
  *
  *  Virtual Server: Context Support
  *
- *  Copyright (C) 2003-2005  Herbert Pötzl
+ *  Copyright (C) 2003-2004  Herbert Pötzl
  *
  *  V0.01  context helper
  *  V0.02  vx_ctx_kill syscall command
  *  V0.06  task_xid and info commands
  *  V0.07  context flags and caps
  *  V0.08  switch to RCU based hash
- *  V0.09  revert to non RCU for now
- *  V0.10  and back to working RCU hash
  *
  */
 
 #include <linux/config.h>
 #include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/namespace.h>
-
-#include <linux/sched.h>
-#include <linux/vserver/network.h>
+#include <linux/vserver.h>
 #include <linux/vserver/legacy.h>
-#include <linux/vserver/limit.h>
-#include <linux/vserver/debug.h>
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
-#include <linux/vserver/context_cmd.h>
-#include <linux/ckrm_events.h> /* needed for ckrm_cb_xid() */
+#include <linux/kernel_stat.h>
+#include <linux/namespace.h>
+#include <linux/rcupdate.h>
 
-#include <asm/errno.h>
+#define CKRM_VSERVER_INTEGRATION
+#ifdef CKRM_VSERVER_INTEGRATION
+#include <linux/ckrm.h>
+#endif //CKRM_VSERVER_INTEGRATION
 
-#include "cvirt_init.h"
-#include "limit_init.h"
-#include "sched_init.h"
+#include <asm/errno.h>
 
 
 /*     __alloc_vx_info()
@@ -79,7 +74,6 @@ static struct vx_info *__alloc_vx_info(xid_t xid)
 
        vxdprintk(VXD_CBIT(xid, 0),
                "alloc_vx_info(%d) = %p", xid, new);
-       vxh_alloc_vx_info(new);
        return new;
 }
 
@@ -91,7 +85,6 @@ static void __dealloc_vx_info(struct vx_info *vxi)
 {
        vxdprintk(VXD_CBIT(xid, 0),
                "dealloc_vx_info(%p)", vxi);
-       vxh_dealloc_vx_info(vxi);
 
        vxi->vx_hlist.next = LIST_POISON1;
        vxi->vx_id = -1;
@@ -129,47 +122,40 @@ static inline int __free_vx_info(struct vx_info *vxi)
        return usecnt;
 }
 
-static void __rcu_put_vx_info(struct rcu_head *head)
+#if 0
+
+static void __rcu_free_vx_info(struct rcu_head *head)
 {
        struct vx_info *vxi = container_of(head, struct vx_info, vx_rcu);
 
+       BUG_ON(!head);
        vxdprintk(VXD_CBIT(xid, 3),
-               "__rcu_put_vx_info(%p[#%d]): %d,%d",
-               vxi, vxi->vx_id,
-               atomic_read(&vxi->vx_usecnt),
-               atomic_read(&vxi->vx_refcnt));
-       put_vx_info(vxi);
+               "rcu_free_vx_info(%p): uc=%d", vxi,
+               atomic_read(&vxi->vx_usecnt));
+
+       __free_vx_info(vxi);
 }
 
-void __shutdown_vx_info(struct vx_info *vxi)
+#endif
+
+void free_vx_info(struct vx_info *vxi)
 {
        struct namespace *namespace;
        struct fs_struct *fs;
 
-       might_sleep();
+       /* context shutdown is mandatory */
+       // BUG_ON(vxi->vx_state != VXS_SHUTDOWN);
 
        namespace = xchg(&vxi->vx_namespace, NULL);
+       fs = xchg(&vxi->vx_fs, NULL);
+
        if (namespace)
                put_namespace(namespace);
-
-       fs = xchg(&vxi->vx_fs, NULL);
        if (fs)
                put_fs_struct(fs);
-}
-
-/* exported stuff */
-
-void free_vx_info(struct vx_info *vxi)
-{
-       /* context shutdown is mandatory */
-       // BUG_ON(vxi->vx_state != VXS_SHUTDOWN);
-
-       BUG_ON(vxi->vx_state & VXS_HASHED);
-
-       BUG_ON(vxi->vx_namespace);
-       BUG_ON(vxi->vx_fs);
 
        BUG_ON(__free_vx_info(vxi));
+       // call_rcu(&i->vx_rcu, __rcu_free_vx_info);
 }
 
 
@@ -200,8 +186,6 @@ static inline void __hash_vx_info(struct vx_info *vxi)
 
        vxdprintk(VXD_CBIT(xid, 4),
                "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
-       vxh_hash_vx_info(vxi);
-
        get_vx_info(vxi);
        vxi->vx_state |= VXS_HASHED;
        head = &vx_info_hash[__hashval(vxi->vx_id)];
@@ -217,12 +201,9 @@ static inline void __unhash_vx_info(struct vx_info *vxi)
 {
        vxdprintk(VXD_CBIT(xid, 4),
                "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id);
-       vxh_unhash_vx_info(vxi);
-
        vxi->vx_state &= ~VXS_HASHED;
        hlist_del_rcu(&vxi->vx_hlist);
-
-       call_rcu(&vxi->vx_rcu, __rcu_put_vx_info);
+       put_vx_info(vxi);
 }
 
 
@@ -235,29 +216,22 @@ static inline struct vx_info *__lookup_vx_info(xid_t xid)
 {
        struct hlist_head *head = &vx_info_hash[__hashval(xid)];
        struct hlist_node *pos;
-       struct vx_info *vxi;
 
        hlist_for_each_rcu(pos, head) {
-               vxi = hlist_entry(pos, struct vx_info, vx_hlist);
+               struct vx_info *vxi =
+                       hlist_entry(pos, struct vx_info, vx_hlist);
 
                if ((vxi->vx_id == xid) &&
                        vx_info_state(vxi, VXS_HASHED))
-                       goto found;
+                       return vxi;
        }
-       vxi = NULL;
-found:
-       vxdprintk(VXD_CBIT(xid, 0),
-               "__lookup_vx_info(#%u): %p[#%u]",
-               xid, vxi, vxi?vxi->vx_id:0);
-       vxh_lookup_vx_info(xid, vxi);
-       return vxi;
+       return NULL;
 }
 
 
 /*     __vx_dynamic_id()
 
        * find unused dynamic xid
-       * requires the rcu_read_lock()
        * requires the hash_lock to be held                     */
 
 static inline xid_t __vx_dynamic_id(void)
@@ -293,9 +267,6 @@ static struct vx_info * __loc_vx_info(int id, int *err)
                return NULL;
        }
 
-       /* FIXME is this required at all ? */
-       rcu_read_lock();
-       /* required to make dynamic xids unique */
        spin_lock(&vx_info_hash_lock);
 
        /* dynamic context requested */
@@ -333,8 +304,6 @@ static struct vx_info * __loc_vx_info(int id, int *err)
 
 out_unlock:
        spin_unlock(&vx_info_hash_lock);
-       rcu_read_unlock();
-       vxh_loc_vx_info(id, vxi);
        if (new)
                __dealloc_vx_info(new);
        return vxi;
@@ -347,7 +316,6 @@ out_unlock:
 
 void unhash_vx_info(struct vx_info *vxi)
 {
-       __shutdown_vx_info(vxi);
        spin_lock(&vx_info_hash_lock);
        __unhash_vx_info(vxi);
        spin_unlock(&vx_info_hash_lock);
@@ -566,7 +534,12 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
 out:
 
 
-       ckrm_cb_xid(p);
+#ifdef CKRM_VSERVER_INTEGRATION
+       do {
+         ckrm_cb_xid(p);
+       } while (0);
+#endif //CKRM_VSERVER_INTEGRATION
+
 
        put_vx_info(old_vxi);
        return ret;
@@ -611,7 +584,7 @@ int vc_task_xid(uint32_t id, void __user *data)
                read_unlock(&tasklist_lock);
        }
        else
-               xid = vx_current_xid();
+               xid = current->xid;
        return xid;
 }
 
@@ -795,6 +768,8 @@ int vc_set_ccaps(uint32_t id, void __user *data)
 
 #include <linux/module.h>
 
+// EXPORT_SYMBOL_GPL(rcu_free_vx_info);
 EXPORT_SYMBOL_GPL(free_vx_info);
+EXPORT_SYMBOL_GPL(vx_info_hash_lock);
 EXPORT_SYMBOL_GPL(unhash_vx_info);
 
index 1cb3eda..6b5f100 100644 (file)
  */
 
 #include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/types.h>
+#include <linux/vserver/cvirt.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/switch.h>
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vs_cvirt.h>
-#include <linux/vserver/switch.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
@@ -35,7 +36,7 @@ void vx_vsi_uptime(struct timespec *uptime, struct timespec *idle)
        return;
 }
 
-uint64_t vx_idle_jiffies(void)
+uint64_t vx_idle_jiffies()
 {
        return init_task.utime + init_task.stime;
 }
@@ -45,17 +46,14 @@ uint64_t vx_idle_jiffies(void)
 static inline uint32_t __update_loadavg(uint32_t load,
        int wsize, int delta, int n)
 {
-       unsigned long long calc, prev;
+       unsigned long long calc;
 
        /* just set it to n */
        if (unlikely(delta >= wsize))
                return (n << FSHIFT);
 
-       calc = delta * n;
-       calc <<= FSHIFT;
-       prev = (wsize - delta);
-       prev *= load;
-       calc += prev;
+       calc = (delta * n) << FSHIFT;
+       calc += (wsize - delta) * load;
        do_div(calc, wsize);
        return calc;
 }
@@ -64,8 +62,6 @@ static inline uint32_t __update_loadavg(uint32_t load,
 void vx_update_load(struct vx_info *vxi)
 {
        uint32_t now, last, delta;
-       unsigned int nr_running, nr_uninterruptible;
-       unsigned int total;
 
        spin_lock(&vxi->cvirt.load_lock);
 
@@ -73,23 +69,14 @@ void vx_update_load(struct vx_info *vxi)
        last = vxi->cvirt.load_last;
        delta = now - last;
 
-       if (delta < 5*HZ)
-               goto out;
-
-       nr_running = atomic_read(&vxi->cvirt.nr_running);
-       nr_uninterruptible = atomic_read(&vxi->cvirt.nr_uninterruptible);
-       total = nr_running + nr_uninterruptible;
-
        vxi->cvirt.load[0] = __update_loadavg(vxi->cvirt.load[0],
-               60*HZ, delta, total);
+               60*HZ, delta, atomic_read(&vxi->cvirt.nr_running));
        vxi->cvirt.load[1] = __update_loadavg(vxi->cvirt.load[1],
-               5*60*HZ, delta, total);
+               5*60*HZ, delta, atomic_read(&vxi->cvirt.nr_running));
        vxi->cvirt.load[2] = __update_loadavg(vxi->cvirt.load[2],
-               15*60*HZ, delta, total);
+               15*60*HZ, delta, atomic_read(&vxi->cvirt.nr_running));
 
        vxi->cvirt.load_last = now;
-out:
-       atomic_inc(&vxi->cvirt.load_updates);
        spin_unlock(&vxi->cvirt.load_lock);
 }
 
diff --git a/kernel/vserver/cvirt_init.h b/kernel/vserver/cvirt_init.h
deleted file mode 100644 (file)
index ecc34e1..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-
-extern uint64_t vx_idle_jiffies(void);
-
-static inline void vx_info_init_cvirt(struct _vx_cvirt *cvirt)
-{
-       uint64_t idle_jiffies = vx_idle_jiffies();
-       uint64_t nsuptime;
-
-       do_posix_clock_monotonic_gettime(&cvirt->bias_uptime);
-       nsuptime = (unsigned long long)cvirt->bias_uptime.tv_sec
-               * NSEC_PER_SEC + cvirt->bias_uptime.tv_nsec;
-       cvirt->bias_clock = nsec_to_clock_t(nsuptime);
-
-       jiffies_to_timespec(idle_jiffies, &cvirt->bias_idle);
-       atomic_set(&cvirt->nr_threads, 0);
-       atomic_set(&cvirt->nr_running, 0);
-       atomic_set(&cvirt->nr_uninterruptible, 0);
-       atomic_set(&cvirt->nr_onhold, 0);
-
-       down_read(&uts_sem);
-       cvirt->utsname = system_utsname;
-       up_read(&uts_sem);
-
-       spin_lock_init(&cvirt->load_lock);
-       cvirt->load_last = jiffies;
-       atomic_set(&cvirt->load_updates, 0);
-       cvirt->load[0] = 0;
-       cvirt->load[1] = 0;
-       cvirt->load[2] = 0;
-}
-
-static inline void vx_info_exit_cvirt(struct _vx_cvirt *cvirt)
-{
-#ifdef CONFIG_VSERVER_DEBUG
-       int value;
-
-       vxwprintk((value = atomic_read(&cvirt->nr_threads)),
-               "!!! cvirt: %p[nr_threads] = %d on exit.",
-               cvirt, value);
-       vxwprintk((value = atomic_read(&cvirt->nr_running)),
-               "!!! cvirt: %p[nr_running] = %d on exit.",
-               cvirt, value);
-       vxwprintk((value = atomic_read(&cvirt->nr_uninterruptible)),
-               "!!! cvirt: %p[nr_uninterruptible] = %d on exit.",
-               cvirt, value);
-#endif
-       return;
-}
-
-static inline void vx_info_init_cacct(struct _vx_cacct *cacct)
-{
-       int i,j;
-
-       for (i=0; i<5; i++) {
-               for (j=0; j<3; j++) {
-                       atomic_set(&cacct->sock[i][j].count, 0);
-                       atomic_set(&cacct->sock[i][j].total, 0);
-               }
-       }
-}
-
-static inline void vx_info_exit_cacct(struct _vx_cacct *cacct)
-{
-       return;
-}
-
diff --git a/kernel/vserver/cvirt_proc.h b/kernel/vserver/cvirt_proc.h
deleted file mode 100644 (file)
index ac67f98..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef _VX_CVIRT_PROC_H
-#define _VX_CVIRT_PROC_H
-
-#include <linux/sched.h>
-
-
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
-static inline int vx_info_proc_cvirt(struct _vx_cvirt *cvirt, char *buffer)
-{
-       int length = 0;
-       int a, b, c;
-
-       length += sprintf(buffer + length,
-               "BiasUptime:\t%lu.%02lu\n",
-                       (unsigned long)cvirt->bias_uptime.tv_sec,
-                       (cvirt->bias_uptime.tv_nsec / (NSEC_PER_SEC / 100)));
-       length += sprintf(buffer + length,
-               "SysName:\t%.*s\n"
-               "NodeName:\t%.*s\n"
-               "Release:\t%.*s\n"
-               "Version:\t%.*s\n"
-               "Machine:\t%.*s\n"
-               "DomainName:\t%.*s\n"
-               ,__NEW_UTS_LEN, cvirt->utsname.sysname
-               ,__NEW_UTS_LEN, cvirt->utsname.nodename
-               ,__NEW_UTS_LEN, cvirt->utsname.release
-               ,__NEW_UTS_LEN, cvirt->utsname.version
-               ,__NEW_UTS_LEN, cvirt->utsname.machine
-               ,__NEW_UTS_LEN, cvirt->utsname.domainname
-               );
-
-       a = cvirt->load[0] + (FIXED_1/200);
-       b = cvirt->load[1] + (FIXED_1/200);
-       c = cvirt->load[2] + (FIXED_1/200);
-       length += sprintf(buffer + length,
-               "nr_threads:\t%d\n"
-               "nr_running:\t%d\n"
-               "nr_unintr:\t%d\n"
-               "nr_onhold:\t%d\n"
-               "load_updates:\t%d\n"
-               "loadavg:\t%d.%02d %d.%02d %d.%02d\n"
-               ,atomic_read(&cvirt->nr_threads)
-               ,atomic_read(&cvirt->nr_running)
-               ,atomic_read(&cvirt->nr_uninterruptible)
-               ,atomic_read(&cvirt->nr_onhold)
-               ,atomic_read(&cvirt->load_updates)
-               ,LOAD_INT(a), LOAD_FRAC(a)
-               ,LOAD_INT(b), LOAD_FRAC(b)
-               ,LOAD_INT(c), LOAD_FRAC(c)
-               );
-       return length;
-}
-
-
-static inline long vx_sock_count(struct _vx_cacct *cacct, int type, int pos)
-{
-       return atomic_read(&cacct->sock[type][pos].count);
-}
-
-
-static inline long vx_sock_total(struct _vx_cacct *cacct, int type, int pos)
-{
-       return atomic_read(&cacct->sock[type][pos].total);
-}
-
-static inline int vx_info_proc_cacct(struct _vx_cacct *cacct, char *buffer)
-{
-       int i,j, length = 0;
-       static char *type[] = { "UNSPEC", "UNIX", "INET", "INET6", "OTHER" };
-
-       for (i=0; i<5; i++) {
-               length += sprintf(buffer + length,
-                       "%s:", type[i]);
-               for (j=0; j<3; j++) {
-                       length += sprintf(buffer + length,
-                               "\t%12lu/%-12lu"
-                               ,vx_sock_count(cacct, i, j)
-                               ,vx_sock_total(cacct, i, j)
-                               );
-               }
-               buffer[length++] = '\n';
-       }
-       length += sprintf(buffer + length,
-               "forks:\t%lu\n", cacct->total_forks);
-       return length;
-}
-
-#endif /* _VX_CVIRT_PROC_H */
index 6b14494..11da06d 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/namei.h>
 #include <linux/statfs.h>
 #include <linux/vserver/switch.h>
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vs_dlimit.h>
 
@@ -388,7 +389,7 @@ void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
        __u64 blimit, bfree, bavail;
        __u32 ifree;
 
-       dli = locate_dl_info(sb, vx_current_xid());
+       dli = locate_dl_info(sb, current->xid);
        if (!dli)
                return;
 
index ce8f971..023bbee 100644 (file)
@@ -13,7 +13,8 @@
 #include <linux/errno.h>
 #include <linux/reboot.h>
 #include <linux/kmod.h>
-#include <linux/sched.h>
+#include <linux/vserver.h>
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 
 #include <asm/uaccess.h>
@@ -89,34 +90,3 @@ long vs_reboot(unsigned int cmd, void * arg)
        return 0;
 }
 
-long vs_context_state(unsigned int cmd)
-{
-       char id_buf[8], cmd_buf[32];
-
-       char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0};
-       char *envp[] = {"HOME=/", "TERM=linux",
-                       "PATH=/sbin:/usr/sbin:/bin:/usr/bin", cmd_buf, 0};
-
-       snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid());
-       snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
-
-       switch (cmd) {
-       case VS_CONTEXT_CREATED:
-               argv[1] = "startup";
-               break;
-       case VS_CONTEXT_DESTROY:
-               argv[1] = "shutdown";
-               break;
-       default:
-               return 0;
-       }
-
-       if (call_usermodehelper(*argv, argv, envp, 1)) {
-               printk( KERN_WARNING
-                       "vs_context_state(): failed to exec (%s %s %s %s)\n",
-                       vshelper_path, argv[1], argv[2], argv[3]);
-               return 0;
-       }
-       return 0;
-}
-
index 8c44b33..22cc818 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/config.h>
 #include <linux/errno.h>
+#include <linux/vserver.h>
 #include <linux/init.h>
 #include <linux/module.h>
 
index ca16e0c..60e6fe1 100644 (file)
  */
 
 #include <linux/config.h>
-#include <linux/sched.h>
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
-#include <linux/devpts_fs.h>
 #include <linux/namei.h>
-#include <linux/mount.h>
-#include <linux/parser.h>
 #include <linux/vserver/inode.h>
-#include <linux/vserver/xid.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
@@ -27,8 +23,6 @@
 
 static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint32_t *mask)
 {
-       struct proc_dir_entry *entry;
-
        if (!in || !in->i_sb)
                return -ESRCH;
 
@@ -46,9 +40,8 @@ static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint
                *mask |= IATTR_XID;
        }
 
-       switch (in->i_sb->s_magic) {
-       case PROC_SUPER_MAGIC:
-               entry = PROC_I(in)->pde;
+       if (in->i_sb->s_magic == PROC_SUPER_MAGIC) {
+               struct proc_dir_entry *entry = PROC_I(in)->pde;
 
                // check for specific inodes ?
                if (entry)
@@ -57,15 +50,6 @@ static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint
                        *flags |= (entry->vx_flags & IATTR_FLAGS);
                else
                        *flags |= (PROC_I(in)->vx_flags & IATTR_FLAGS);
-               break;
-
-       case DEVPTS_SUPER_MAGIC:
-               *xid = in->i_xid;
-               *mask |= IATTR_XID;
-               break;
-
-       default:
-               break;
        }
        return 0;
 }
@@ -73,7 +57,7 @@ static int __vc_get_iattr(struct inode *in, uint32_t *xid, uint32_t *flags, uint
 int vc_get_iattr(uint32_t id, void __user *data)
 {
        struct nameidata nd;
-       struct vcmd_ctx_iattr_v1 vc_data = { .xid = -1 };
+       struct vcmd_ctx_iattr_v1 vc_data;
        int ret;
 
        if (!vx_check(0, VX_ADMIN))
@@ -96,7 +80,7 @@ int vc_get_iattr(uint32_t id, void __user *data)
 static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uint32_t *mask)
 {
        struct inode *in = de->d_inode;
-       int error = 0, is_proc = 0, has_xid = 0;
+       int error = 0, is_proc = 0;
 
        if (!in || !in->i_sb)
                return -ESRCH;
@@ -104,10 +88,7 @@ static int __vc_set_iattr(struct dentry *de, uint32_t *xid, uint32_t *flags, uin
        is_proc = (in->i_sb->s_magic == PROC_SUPER_MAGIC);
        if ((*mask & IATTR_FLAGS) && !is_proc)
                return -EINVAL;
-
-       has_xid = (in->i_sb->s_flags & MS_TAGXID) ||
-               (in->i_sb->s_magic == DEVPTS_SUPER_MAGIC);
-       if ((*mask & IATTR_XID) && !has_xid)
+       if ((*mask & IATTR_XID) && !(in->i_sb->s_flags & MS_TAGXID))
                return -EINVAL;
 
        down(&in->i_sem);
@@ -222,6 +203,7 @@ int vc_iattr_ioctl(struct dentry *de, unsigned int cmd, unsigned long arg)
 
 
 #ifdef CONFIG_VSERVER_LEGACY
+#include <linux/proc_fs.h>
 
 #define PROC_DYNAMIC_FIRST 0xF0000000UL
 
@@ -270,69 +252,3 @@ int vx_proc_ioctl(struct inode * inode, struct file * filp,
 }
 #endif
 
-
-int vx_parse_xid(char *string, xid_t *xid, int remove)
-{
-       static match_table_t tokens = {
-               {1, "xid=%u"},
-               {0, NULL}
-       };
-       substring_t args[MAX_OPT_ARGS];
-       int token, option = 0;
-
-       if (!string)
-               return 0;
-
-       token = match_token(string, tokens, args);
-       if (token && xid && !match_int(args, &option))
-               *xid = option;
-
-       vxdprintk(VXD_CBIT(xid, 7),
-               "vx_parse_xid(»%s«): %d:#%d",
-               string, token, option);
-
-       if (token && remove) {
-               char *p = strstr(string, "xid=");
-               char *q = p;
-
-               if (p) {
-                       while (*q != '\0' && *q != ',')
-                               q++;
-                       while (*q)
-                               *p++ = *q++;
-                       while (*p)
-                               *p++ = '\0';
-               }
-       }
-       return token;
-}
-
-void vx_propagate_xid(struct nameidata *nd, struct inode *inode)
-{
-       xid_t new_xid = 0;
-       struct vfsmount *mnt;
-       int propagate;
-
-       if (!nd)
-               return;
-       mnt = nd->mnt;
-       if (!mnt)
-               return;
-
-       propagate = (mnt->mnt_flags & MNT_XID);
-       if (propagate)
-               new_xid = mnt->mnt_xid;
-
-       vxdprintk(VXD_CBIT(xid, 7),
-               "vx_propagate_xid(%p[#%lu.%d]): %d,%d",
-               inode, inode->i_ino, inode->i_xid,
-               new_xid, (propagate)?1:0);
-
-       if (propagate)
-               inode->i_xid = new_xid;
-}
-
-#include <linux/module.h>
-
-EXPORT_SYMBOL_GPL(vx_propagate_xid);
-
index fe4c66d..e760653 100644 (file)
 
 #include <linux/config.h>
 #include <linux/sched.h>
-#include <linux/vs_context.h>
-#include <linux/vs_network.h>
+#include <linux/namespace.h>
 #include <linux/vserver/legacy.h>
 #include <linux/vserver/namespace.h>
-#include <linux/namespace.h>
+#include <linux/vserver.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+#include <linux/vs_network.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
@@ -59,9 +61,8 @@ int vc_new_s_context(uint32_t ctx, void __user *data)
                return ret;
        }
 
-       if (!vx_check(0, VX_ADMIN) || !capable(CAP_SYS_ADMIN)
-               /* might make sense in the future, or not ... */
-               || vx_flags(VX_INFO_LOCK, 0))
+       if (!vx_check(0, VX_ADMIN) ||
+               !capable(CAP_SYS_ADMIN) || vx_flags(VX_INFO_PRIVATE, 0))
                return -EPERM;
 
        /* ugly hack for Spectator */
@@ -81,12 +82,6 @@ int vc_new_s_context(uint32_t ctx, void __user *data)
 
        if (!new_vxi)
                return -EINVAL;
-
-       ret = -EPERM;
-       if (!vx_info_flags(new_vxi, VXF_STATE_SETUP, 0) &&
-               vx_info_flags(new_vxi, VX_INFO_PRIVATE, 0))
-               goto out_put;
-
        new_vxi->vx_flags &= ~(VXF_STATE_SETUP|VXF_STATE_INIT);
 
        ret = vx_migrate_task(current, new_vxi);
@@ -104,7 +99,6 @@ int vc_new_s_context(uint32_t ctx, void __user *data)
                                current->signal->rlim[RLIMIT_NPROC].rlim_max;
                ret = new_vxi->vx_id;
        }
-out_put:
        put_vx_info(new_vxi);
        return ret;
 }
index a1497be..4af1ee5 100644 (file)
 
 #include <linux/config.h>
 #include <linux/module.h>
-#include <linux/vs_context.h>
-#include <linux/vs_limit.h>
 #include <linux/vserver/limit.h>
+#include <linux/vserver/context.h>
 #include <linux/vserver/switch.h>
-#include <linux/vserver/limit_cmd.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+#include <linux/vs_limit.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
diff --git a/kernel/vserver/limit_init.h b/kernel/vserver/limit_init.h
deleted file mode 100644 (file)
index 0a9dcf4..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-
-static inline void vx_info_init_limit(struct _vx_limit *limit)
-{
-       int lim;
-
-       for (lim=0; lim<NUM_LIMITS; lim++) {
-               limit->rlim[lim] = RLIM_INFINITY;
-               limit->rmax[lim] = 0;
-               atomic_set(&limit->rcur[lim], 0);
-               atomic_set(&limit->lhit[lim], 0);
-       }
-}
-
-static inline void vx_info_exit_limit(struct _vx_limit *limit)
-{
-#ifdef CONFIG_VSERVER_DEBUG
-       unsigned long value;
-       unsigned int lim;
-
-       for (lim=0; lim<NUM_LIMITS; lim++) {
-               value = atomic_read(&limit->rcur[lim]);
-               vxwprintk(value,
-                       "!!! limit: %p[%s,%d] = %ld on exit.",
-                       limit, vlimit_name[lim], lim, value);
-       }
-#endif
-}
-
diff --git a/kernel/vserver/limit_proc.h b/kernel/vserver/limit_proc.h
deleted file mode 100644 (file)
index 97696e9..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef _VX_LIMIT_PROC_H
-#define _VX_LIMIT_PROC_H
-
-
-static inline void vx_limit_fixup(struct _vx_limit *limit)
-{
-       unsigned long value;
-       unsigned int lim;
-
-       for (lim=0; lim<NUM_LIMITS; lim++) {
-               value = atomic_read(&limit->rcur[lim]);
-               if (value > limit->rmax[lim])
-                       limit->rmax[lim] = value;
-               if (limit->rmax[lim] > limit->rlim[lim])
-                       limit->rmax[lim] = limit->rlim[lim];
-       }
-}
-
-#define VX_LIMIT_FMT   ":\t%10d\t%10ld\t%10ld\t%6d\n"
-
-#define VX_LIMIT_ARG(r)                                \
-               ,atomic_read(&limit->rcur[r])   \
-               ,limit->rmax[r]                 \
-               ,limit->rlim[r]                 \
-               ,atomic_read(&limit->lhit[r])
-
-static inline int vx_info_proc_limit(struct _vx_limit *limit, char *buffer)
-{
-       vx_limit_fixup(limit);
-       return sprintf(buffer,
-               "PROC"  VX_LIMIT_FMT
-               "VM"    VX_LIMIT_FMT
-               "VML"   VX_LIMIT_FMT
-               "RSS"   VX_LIMIT_FMT
-               "FILES" VX_LIMIT_FMT
-               "SOCK"  VX_LIMIT_FMT
-               VX_LIMIT_ARG(RLIMIT_NPROC)
-               VX_LIMIT_ARG(RLIMIT_AS)
-               VX_LIMIT_ARG(RLIMIT_MEMLOCK)
-               VX_LIMIT_ARG(RLIMIT_RSS)
-               VX_LIMIT_ARG(RLIMIT_NOFILE)
-               VX_LIMIT_ARG(VLIMIT_NSOCK)
-               );
-}
-
-#endif /* _VX_LIMIT_PROC_H */
-
-
index 6685161..ee60d4b 100644 (file)
 
 #include <linux/config.h>
 #include <linux/utsname.h>
-#include <linux/sched.h>
-#include <linux/vs_context.h>
 #include <linux/vserver/namespace.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+#include <linux/namespace.h>
 #include <linux/dcache.h>
-#include <linux/mount.h>
 #include <linux/fs.h>
 
 #include <asm/errno.h>
index f1a110b..e87c8b6 100644 (file)
@@ -14,7 +14,8 @@
 
 #include <linux/config.h>
 #include <linux/slab.h>
-#include <linux/vserver/network_cmd.h>
+#include <linux/vserver.h>
+#include <linux/vs_base.h>
 #include <linux/rcupdate.h>
 #include <net/tcp.h>
 
@@ -69,35 +70,6 @@ static void __dealloc_nx_info(struct nx_info *nxi)
        kfree(nxi);
 }
 
-static inline int __free_nx_info(struct nx_info *nxi)
-{
-       int usecnt, refcnt;
-
-       BUG_ON(!nxi);
-
-       usecnt = atomic_read(&nxi->nx_usecnt);
-       BUG_ON(usecnt < 0);
-
-       refcnt = atomic_read(&nxi->nx_refcnt);
-       BUG_ON(refcnt < 0);
-
-       if (!usecnt)
-               __dealloc_nx_info(nxi);
-       return usecnt;
-}
-
-static void __rcu_put_nx_info(struct rcu_head *head)
-{
-       struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu);
-
-       vxdprintk(VXD_CBIT(nid, 3),
-               "__rcu_put_nx_info(%p[#%d]): %d,%d",
-               nxi, nxi->nx_id,
-               atomic_read(&nxi->nx_usecnt),
-               atomic_read(&nxi->nx_refcnt));
-       put_nx_info(nxi);
-}
-
 
 /*     hash table for nx_info hash */
 
@@ -141,7 +113,7 @@ static inline void __unhash_nx_info(struct nx_info *nxi)
        vxdprintk(VXD_CBIT(nid, 4),
                "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
        hlist_del_rcu(&nxi->nx_hlist);
-       call_rcu(&nxi->nx_rcu, __rcu_put_nx_info);
+       put_nx_info(nxi);
 }
 
 
@@ -170,7 +142,6 @@ static inline struct nx_info *__lookup_nx_info(nid_t nid)
 /*     __nx_dynamic_id()
 
        * find unused dynamic nid
-       * requires the rcu_read_lock()
        * requires the hash_lock to be held                     */
 
 static inline nid_t __nx_dynamic_id(void)
@@ -206,9 +177,6 @@ static struct nx_info * __loc_nx_info(int id, int *err)
                return NULL;
        }
 
-       /* FIXME is this required at all ? */
-       rcu_read_lock();
-       /* required to make dynamic xids unique */
        spin_lock(&nx_info_hash_lock);
 
        /* dynamic context requested */
@@ -246,7 +214,6 @@ static struct nx_info * __loc_nx_info(int id, int *err)
 
 out_unlock:
        spin_unlock(&nx_info_hash_lock);
-       rcu_read_unlock();
        if (new)
                __dealloc_nx_info(new);
        return nxi;
@@ -256,9 +223,28 @@ out_unlock:
 
 /*     exported stuff                                          */
 
-void free_nx_info(struct nx_info *nxi)
+
+
+
+void rcu_free_nx_info(struct rcu_head *head)
 {
-       BUG_ON(__free_nx_info(nxi));
+       struct nx_info *nxi = container_of(head, struct nx_info, nx_rcu);
+       int usecnt, refcnt;
+
+       BUG_ON(!nxi || !head);
+
+       usecnt = atomic_read(&nxi->nx_usecnt);
+       BUG_ON(usecnt < 0);
+
+       refcnt = atomic_read(&nxi->nx_refcnt);
+       BUG_ON(refcnt < 0);
+
+       vxdprintk(VXD_CBIT(nid, 3),
+               "rcu_free_nx_info(%p): uc=%d", nxi, usecnt);
+       if (!usecnt)
+               __dealloc_nx_info(nxi);
+       else
+               printk("!!! rcu didn't free\n");
 }
 
 void unhash_nx_info(struct nx_info *nxi)
@@ -710,6 +696,7 @@ int vc_set_ncaps(uint32_t id, void __user *data)
 
 #include <linux/module.h>
 
-EXPORT_SYMBOL_GPL(free_nx_info);
+EXPORT_SYMBOL_GPL(rcu_free_nx_info);
+EXPORT_SYMBOL_GPL(nx_info_hash_lock);
 EXPORT_SYMBOL_GPL(unhash_nx_info);
 
index 823226b..4408810 100644 (file)
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/proc_fs.h>
-#include <linux/sched.h>
+#include <linux/vserver.h>
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vs_network.h>
 #include <linux/vs_cvirt.h>
 
-#include <linux/vserver/switch.h>
-
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
-#include "cvirt_proc.h"
-#include "limit_proc.h"
-#include "sched_proc.h"
 
 static struct proc_dir_entry *proc_virtual;
 
@@ -712,7 +708,7 @@ int proc_virtual_readdir(struct file * filp,
                        filp->f_pos++;
                        /* fall through */
                case 3:
-                       if (vx_current_xid() > 1) {
+                       if (current->xid > 1) {
                                ino = fake_ino(1, PROC_XID_INO);
                                if (filldir(dirent, "current", 7,
                                        filp->f_pos, ino, DT_LNK) < 0)
@@ -780,7 +776,7 @@ int proc_vnet_readdir(struct file * filp,
                        filp->f_pos++;
                        /* fall through */
                case 3:
-                       if (vx_current_xid() > 1) {
+                       if (current->xid > 1) {
                                ino = fake_ino(1, PROC_NID_INO);
                                if (filldir(dirent, "current", 7,
                                        filp->f_pos, ino, DT_LNK) < 0)
@@ -828,7 +824,7 @@ void proc_vx_init(void)
        }
        proc_virtual = ent;
 
-       ent = proc_mkdir("virtnet", 0);
+       ent = proc_mkdir("vnet", 0);
        if (ent) {
                ent->proc_fops = &proc_vnet_dir_operations;
                ent->proc_iops = &proc_vnet_dir_inode_operations;
index 70e964e..fab4514 100644 (file)
 
 #include <linux/config.h>
 #include <linux/sched.h>
-// #include <linux/vs_base.h>
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
-#include <linux/vs_sched.h>
-#include <linux/vserver/sched_cmd.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/sched.h>
 
 #include <asm/errno.h>
 #include <asm/uaccess.h>
diff --git a/kernel/vserver/sched_init.h b/kernel/vserver/sched_init.h
deleted file mode 100644 (file)
index 3fbab7c..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-
-static inline void vx_info_init_sched(struct _vx_sched *sched)
-{
-       int i;
-
-       /* scheduling; hard code starting values as constants */
-       sched->fill_rate        = 1;
-       sched->interval         = 4;
-       sched->tokens_min       = HZ >> 4;
-       sched->tokens_max       = HZ >> 1;
-       sched->jiffies          = jiffies;
-       sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
-
-       atomic_set(&sched->tokens, HZ >> 2);
-       sched->cpus_allowed     = CPU_MASK_ALL;
-       sched->priority_bias    = 0;
-
-       for_each_cpu(i) {
-               sched->cpu[i].user_ticks        = 0;
-               sched->cpu[i].sys_ticks         = 0;
-               sched->cpu[i].hold_ticks        = 0;
-       }
-}
-
-static inline void vx_info_exit_sched(struct _vx_sched *sched)
-{
-       return;
-}
-
diff --git a/kernel/vserver/sched_proc.h b/kernel/vserver/sched_proc.h
deleted file mode 100644 (file)
index 1da5fa3..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _VX_SCHED_PROC_H
-#define _VX_SCHED_PROC_H
-
-
-static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
-{
-       int length = 0;
-       int i;
-
-       length += sprintf(buffer,
-               "Token:\t\t%8d\n"
-               "FillRate:\t%8d\n"
-               "Interval:\t%8d\n"
-               "TokensMin:\t%8d\n"
-               "TokensMax:\t%8d\n"
-               "PrioBias:\t%8d\n"
-               ,atomic_read(&sched->tokens)
-               ,sched->fill_rate
-               ,sched->interval
-               ,sched->tokens_min
-               ,sched->tokens_max
-               ,sched->priority_bias
-               );
-
-       for_each_online_cpu(i) {
-               length += sprintf(buffer + length,
-                       "cpu %d: %lld %lld %lld\n"
-                       ,i
-                       ,(long long)sched->cpu[i].user_ticks
-                       ,(long long)sched->cpu[i].sys_ticks
-                       ,(long long)sched->cpu[i].hold_ticks
-                       );
-       }
-
-       return length;
-}
-
-#endif /* _VX_SCHED_PROC_H */
index bdf3c22..0499c9e 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/errno.h>
 #include <asm/uaccess.h>
 
+#include <linux/vs_base.h>
 #include <linux/vs_context.h>
 #include <linux/vserver/signal.h>
 
index 271f630..f72cb7e 100644 (file)
@@ -3,24 +3,22 @@
  *
  *  Virtual Server: Syscall Switch
  *
- *  Copyright (C) 2003-2005  Herbert Pötzl
+ *  Copyright (C) 2003-2004  Herbert Pötzl
  *
  *  V0.01  syscall switch
  *  V0.02  added signal to context
  *  V0.03  added rlimit functions
  *  V0.04  added iattr, task/xid functions
- *  V0.05  added debug/history stuff
  *
  */
 
 #include <linux/config.h>
 #include <linux/linkage.h>
-#include <linux/sched.h>
 #include <asm/errno.h>
 
-#include <linux/vserver/network.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
 #include <linux/vserver/switch.h>
-#include <linux/vserver/debug.h>
 
 
 static inline int
@@ -29,15 +27,13 @@ vc_get_version(uint32_t id)
        return VCI_VERSION;
 }
 
-#include <linux/vserver/context_cmd.h>
-#include <linux/vserver/cvirt_cmd.h>
-#include <linux/vserver/limit_cmd.h>
-#include <linux/vserver/network_cmd.h>
-#include <linux/vserver/sched_cmd.h>
-#include <linux/vserver/debug_cmd.h>
 
 #include <linux/vserver/legacy.h>
+#include <linux/vserver/context.h>
+#include <linux/vserver/network.h>
 #include <linux/vserver/namespace.h>
+#include <linux/vserver/sched.h>
+#include <linux/vserver/limit.h>
 #include <linux/vserver/inode.h>
 #include <linux/vserver/signal.h>
 #include <linux/vserver/dlimit.h>
@@ -46,32 +42,18 @@ vc_get_version(uint32_t id)
 extern asmlinkage long
 sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
 {
+       if (!capable(CAP_CONTEXT))
+               return -EPERM;
+
        vxdprintk(VXD_CBIT(switch, 0),
                "vc: VCMD_%02d_%d[%d], %d",
                VC_CATEGORY(cmd), VC_COMMAND(cmd),
                VC_VERSION(cmd), id);
 
-#ifdef CONFIG_VSERVER_LEGACY
-       if (!capable(CAP_CONTEXT) &&
-               /* dirty hack for capremove */
-               !(cmd==VCMD_new_s_context && id==-2))
-               return -EPERM;
-#else
-       if (!capable(CAP_CONTEXT))
-               return -EPERM;
-#endif
-
        switch (cmd) {
        case VCMD_get_version:
                return vc_get_version(id);
 
-       case VCMD_dump_history:
-#ifdef CONFIG_VSERVER_HISTORY
-               return vc_dump_history(id);
-#else
-               return -ENOSYS;
-#endif
-
 #ifdef CONFIG_VSERVER_LEGACY
        case VCMD_new_s_context:
                return vc_new_s_context(id, data);
@@ -169,11 +151,9 @@ sys_vserver(uint32_t cmd, uint32_t id, void __user *data)
        case VCMD_wait_exit:
                return vc_wait_exit(id, data);
 
-       case VCMD_create_context:
 #ifdef CONFIG_VSERVER_LEGACY
+       case VCMD_create_context:
                return vc_ctx_create(id, data);
-#else
-               return -ENOSYS;
 #endif
 
        case VCMD_get_iattr:
index 6a90067..fffc0dd 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/config.h>
 #include <linux/errno.h>
+#include <linux/vserver.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
@@ -51,6 +52,10 @@ void vserver_register_sysctl(void)
 {
        if (!vserver_table_header) {
                vserver_table_header = register_sysctl_table(vserver_table, 1);
+#ifdef CONFIG_PROC_FS
+//             if (vserver_table[0].de)
+//                     vserver_table[0].de->owner = THIS_MODULE;
+#endif
        }
 
 }
index 01cd3de..d2ca5bb 100644 (file)
@@ -16,6 +16,7 @@ obj-$(CONFIG_OOM_KILL)        += oom_kill.o
 obj-$(CONFIG_OOM_PANIC)        += oom_panic.o
 obj-$(CONFIG_SWAP)     += page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HUGETLBFS)        += hugetlb.o
+obj-$(CONFIG_PROC_MM)  += proc_mm.o
 obj-$(CONFIG_NUMA)     += mempolicy.o
 obj-$(CONFIG_SHMEM) += shmem.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
index da73a9d..8d7ff9b 100644 (file)
@@ -28,11 +28,6 @@ unsigned long max_low_pfn;
 unsigned long min_low_pfn;
 EXPORT_SYMBOL(min_low_pfn);
 unsigned long max_pfn;
-/*
- * If we have booted due to a crash, max_pfn will be a very low value. We need
- * to know the amount of memory that the previous kernel used.
- */
-unsigned long saved_max_pfn;
 
 EXPORT_SYMBOL(max_pfn);                /* This is exported so
                                 * dma_get_required_mask(), which uses
index b7f0f91..2362ba2 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/vs_memory.h>
 #include <linux/syscalls.h>
-#include <linux/vs_memory.h>
 
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
index 3a911dd..9a4f695 100644 (file)
@@ -1584,9 +1584,9 @@ retry:
         */
        /* Only go through if we didn't race with anybody else... */
        if (pte_none(*page_table)) {
-               if (!PageReserved(new_page))
-                       // ++mm->rss;
-                       vx_rsspages_inc(mm);
+               if (!PageReserved(new_page)) 
+                       //++mm->rss;
+                       vx_rsspages_inc(mm);
                flush_icache_page(vma, new_page);
                entry = mk_pte(new_page, vma->vm_page_prot);
                if (write_access)
index fb3a1cf..3be348d 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/mm.h>
 #include <linux/vs_memory.h>
 #include <linux/syscalls.h>
-#include <linux/vs_memory.h>
 
 
 static int mlock_fixup(struct vm_area_struct * vma, 
index 432fd49..c17c39e 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -766,11 +766,11 @@ void __vm_stat_account(struct mm_struct *mm, unsigned long flags,
  * The caller must hold down_write(current->mm->mmap_sem).
  */
 
-unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
-                       unsigned long len, unsigned long prot,
-                       unsigned long flags, unsigned long pgoff)
+unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file * file, 
+                           unsigned long addr, unsigned long len,
+                           unsigned long prot, unsigned long flags,
+                           unsigned long pgoff)
 {
-       struct mm_struct * mm = current->mm;
        struct vm_area_struct * vma, * prev;
        struct inode *inode;
        unsigned int vm_flags;
@@ -1440,7 +1440,8 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address)
        address &= PAGE_MASK;
        grow = (address - vma->vm_end) >> PAGE_SHIFT;
 
-       if (address < vma->vm_end) {
+       /* Someone beat us to it */
+       if (grow <= 0) {
                anon_vma_unlock(vma);
                return 0;
        }
@@ -1468,9 +1469,10 @@ int expand_stack(struct vm_area_struct * vma, unsigned long address)
        vma->vm_end = address;
        // vma->vm_mm->total_vm += grow;
        vx_vmpages_add(vma->vm_mm, grow);
-       if (vma->vm_flags & VM_LOCKED)
+       if (vma->vm_flags & VM_LOCKED) {
                // vma->vm_mm->locked_vm += grow;
                vx_vmlocked_add(vma->vm_mm, grow);
+       }
        __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
        anon_vma_unlock(vma);
        return 0;
@@ -1516,7 +1518,8 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
        address &= PAGE_MASK;
        grow = (vma->vm_start - address) >> PAGE_SHIFT;
 
-       if (address >= vma->vm_start) {
+       /* Someone beat us to it */
+       if (grow <= 0) {
                anon_vma_unlock(vma);
                return 0;
        }
@@ -1545,9 +1548,10 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
        vma->vm_pgoff -= grow;
        // vma->vm_mm->total_vm += grow;
        vx_vmpages_add(vma->vm_mm, grow);
-       if (vma->vm_flags & VM_LOCKED)
+       if (vma->vm_flags & VM_LOCKED) {
                // vma->vm_mm->locked_vm += grow;
                vx_vmlocked_add(vma->vm_mm, grow);
+       }
        __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
        anon_vma_unlock(vma);
        return 0;
@@ -1653,10 +1657,11 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
 
        // area->vm_mm->total_vm -= len >> PAGE_SHIFT;
        vx_vmpages_sub(area->vm_mm, len >> PAGE_SHIFT);
-
-       if (area->vm_flags & VM_LOCKED)
+       
+       if (area->vm_flags & VM_LOCKED) {
                // area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
                vx_vmlocked_sub(area->vm_mm, len >> PAGE_SHIFT);
+       }
        vm_stat_unaccount(area);
        area->vm_mm->unmap_area(area);
        remove_vm_struct(area);
@@ -2002,6 +2007,7 @@ void exit_mmap(struct mm_struct *mm)
        vx_vmpages_sub(mm, mm->total_vm);
        // mm->locked_vm = 0;
        vx_vmlocked_sub(mm, mm->locked_vm);
+       arch_flush_exec_range(mm);
 
        spin_unlock(&mm->page_table_lock);
 
index b9bc487..8ad4f77 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/security.h>
 #include <linux/vs_memory.h>
 #include <linux/syscalls.h>
-#include <linux/vs_memory.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
index 834a364..1e780d5 100644 (file)
@@ -440,8 +440,7 @@ unsigned long do_mmap_pgoff(
 
        tblock->next = current->mm->context.tblock.next;
        current->mm->context.tblock.next = tblock;
-       // current->mm->total_vm += len >> PAGE_SHIFT;
-       vx_vmpages_add(current->mm, len >> PAGE_SHIFT);
+       current->mm->total_vm += len >> PAGE_SHIFT;
 
 #ifdef DEBUG
        printk("do_mmap:\n");
@@ -495,8 +494,7 @@ int do_munmap(struct mm_struct * mm, unsigned long addr, size_t len)
        realalloc -= kobjsize(tblock);
        askedalloc -= sizeof(struct mm_tblock_struct);
        kfree(tblock);
-       // mm->total_vm -= len >> PAGE_SHIFT;
-       vx_vmpages_sub(mm, len >> PAGE_SHIFT);
+       mm->total_vm -= len >> PAGE_SHIFT;
 
 #ifdef DEBUG
        show_process_blocks();
@@ -509,8 +507,7 @@ int do_munmap(struct mm_struct * mm, unsigned long addr, size_t len)
 void exit_mmap(struct mm_struct * mm)
 {
        struct mm_tblock_struct *tmp;
-       // mm->total_vm = 0;
-       vx_vmpages_sub(mm, mm->total_vm);
+       mm->total_vm = 0;
 
        if (!mm)
                return;
index 35e1084..abc73e0 100644 (file)
@@ -55,7 +55,7 @@ static unsigned long badness(struct task_struct *p, unsigned long uptime)
         * The memory size of the process is the basis for the badness.
         */
        points = p->mm->total_vm;
-       /* FIXME add vserver badness ;) */
+       /* add vserver badness ;) */
 
        /*
         * CPU time is in tens of seconds and run time is in thousands
index 8c206e4..be1d6dc 100644 (file)
@@ -31,9 +31,9 @@
 #include <linux/topology.h>
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
-#include <linux/ckrm_mem_inline.h>
 #include <linux/vs_base.h>
 #include <linux/vs_limit.h>
+#include <linux/ckrm_mem_inline.h>
 #include <linux/nodemask.h>
 
 #include <asm/tlbflush.h>
@@ -50,7 +50,7 @@ int sysctl_lower_zone_protection = 0;
 EXPORT_SYMBOL(totalram_pages);
 EXPORT_SYMBOL(nr_swap_pages);
 
-#ifdef CONFIG_CRASH_DUMP
+#ifdef CONFIG_CRASH_DUMP_MODULE
 /* This symbol has to be exported to use 'for_each_pgdat' macro by modules. */
 EXPORT_SYMBOL(pgdat_list);
 #endif
@@ -105,7 +105,8 @@ static void bad_page(const char *function, struct page *page)
        tainted |= TAINT_BAD_PAGE;
 }
 
-#if !defined(CONFIG_HUGETLB_PAGE) && !defined(CONFIG_CRASH_DUMP)
+#if !defined(CONFIG_HUGETLB_PAGE) && !defined(CONFIG_CRASH_DUMP) \
+       && !defined(CONFIG_CRASH_DUMP_MODULE)
 #define prep_compound_page(page, order) do { } while (0)
 #define destroy_compound_page(page, order) do { } while (0)
 #else
@@ -366,14 +367,8 @@ static void prep_new_page(struct page *page, int order)
 
        page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
                        1 << PG_referenced | 1 << PG_arch_1 |
-#ifdef CONFIG_CKRM_RES_MEM
-                       1 << PG_ckrm_account |
-#endif
                        1 << PG_checked | 1 << PG_mappedtodisk);
        page->private = 0;
-#ifdef CONFIG_CKRM_RES_MEM
-       page->ckrm_zone = NULL;
-#endif
        set_page_refs(page, order);
 }
 
@@ -629,6 +624,10 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
 
        might_sleep_if(wait);
 
+       if (!ckrm_class_limit_ok((GET_MEM_CLASS(current)))) {
+               return NULL;
+       }
+
        /*
         * The caller may dip into page reserves a bit more if the caller
         * cannot run direct reclaim, or is the caller has realtime scheduling
@@ -636,10 +635,6 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
         */
        can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
 
-       if (!ckrm_class_limit_ok((ckrm_get_mem_class(current)))) {
-               return NULL;
-       }
-
        zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
 
        if (unlikely(zones[0] == NULL)) {
@@ -757,6 +752,7 @@ nopage:
 got_pg:
        zone_statistics(zonelist, z);
        kernel_map_pages(page, 1 << order, 1);
+       ckrm_set_pages_class(page, 1 << order, GET_MEM_CLASS(current));
        return page;
 }
 
@@ -1573,10 +1569,8 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
                }
                printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
                                zone_names[j], realsize, batch);
-#ifndef CONFIG_CKRM_RES_MEM
                INIT_LIST_HEAD(&zone->active_list);
                INIT_LIST_HEAD(&zone->inactive_list);
-#endif
                zone->nr_scan_active = 0;
                zone->nr_scan_inactive = 0;
                zone->nr_active = 0;
index a7eb649..7771d28 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -30,7 +30,6 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/init.h>
-#include <linux/ckrm_mem_inline.h>
 
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
@@ -72,12 +71,7 @@ EXPORT_SYMBOL(put_page);
  */
 int rotate_reclaimable_page(struct page *page)
 {
-#ifdef CONFIG_CKRM_RES_MEM
-       struct ckrm_zone *ckrm_zone = page_ckrmzone(page);
-       struct zone *zone = ckrm_zone->zone;
-#else
-       struct zone *zone = page_zone(page);
-#endif
+       struct zone *zone;
        unsigned long flags;
 
        if (PageLocked(page))
@@ -89,14 +83,11 @@ int rotate_reclaimable_page(struct page *page)
        if (!PageLRU(page))
                return 1;
 
+       zone = page_zone(page);
        spin_lock_irqsave(&zone->lru_lock, flags);
        if (PageLRU(page) && !PageActive(page)) {
                list_del(&page->lru);
-#ifdef CONFIG_CKRM_RES_MEM
-               list_add_tail(&page->lru, &ckrm_zone->inactive_list);
-#else
                list_add_tail(&page->lru, &zone->inactive_list);
-#endif
                inc_page_state(pgrotated);
        }
        if (!test_clear_page_writeback(page))
index 1b4dae6..42288bb 100644 (file)
@@ -30,6 +30,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
+#include <linux/vs_base.h>
 #include <linux/vs_memory.h>
 
 spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
index 6f7fba5..ba42ce7 100644 (file)
 
 #include <linux/swapops.h>
 #include <linux/ckrm_mem.h>
-#include <linux/vs_cvirt.h>
 
+#ifndef AT_LIMIT_SUPPORT
+#warning "ckrm_at_limit disabled due to problems with memory hog tests -- seting ckrm_shrink_list_empty to true"
+#undef ckrm_shrink_list_empty
+#define ckrm_shrink_list_empty()               (1)
+#endif
 
 /* possible outcome of pageout() */
 typedef enum {
@@ -75,6 +79,9 @@ struct scan_control {
        /* This context's GFP mask */
        unsigned int gfp_mask;
 
+       /* Flag used by CKRM */
+       unsigned int ckrm_flags;
+
        int may_writepage;
 };
 
@@ -538,40 +545,32 @@ keep:
  * For pagecache intensive workloads, the first loop here is the hottest spot
  * in the kernel (apart from the copy_*_user functions).
  */
-#ifdef CONFIG_CKRM_RES_MEM
-static void shrink_cache(struct ckrm_zone *ckrm_zone, struct scan_control *sc)
-#else
 static void shrink_cache(struct zone *zone, struct scan_control *sc)
-#endif
 {
        LIST_HEAD(page_list);
        struct pagevec pvec;
-       int max_scan = sc->nr_to_scan;
-#ifdef CONFIG_CKRM_RES_MEM
-       struct zone *zone = ckrm_zone->zone;
-       struct list_head *inactive_list = &ckrm_zone->inactive_list;
-       struct list_head *active_list = &ckrm_zone->active_list;
-#else
-       struct list_head *inactive_list = &zone->inactive_list;
-       struct list_head *active_list = &zone->active_list;
-#endif
+       int max_scan = sc->nr_to_scan, nr_pass;
+       unsigned int ckrm_flags = sc->ckrm_flags, bit_flag;
 
        pagevec_init(&pvec, 1);
 
        lru_add_drain();
        spin_lock_irq(&zone->lru_lock);
+redo:
+       ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag);
+       nr_pass = zone->nr_inactive;
        while (max_scan > 0) {
                struct page *page;
                int nr_taken = 0;
                int nr_scan = 0;
                int nr_freed;
 
-               while (nr_scan++ < SWAP_CLUSTER_MAX &&
-                               !list_empty(inactive_list)) {
-                       page = lru_to_page(inactive_list);
+               while (nr_pass-- && nr_scan++ < SWAP_CLUSTER_MAX &&
+                               !list_empty(&zone->inactive_list)) {
+                       page = lru_to_page(&zone->inactive_list);
 
                        prefetchw_prev_lru_page(page,
-                                               inactive_list, flags);
+                                               &zone->inactive_list, flags);
 
                        if (!TestClearPageLRU(page))
                                BUG();
@@ -582,17 +581,26 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
                                 */
                                __put_page(page);
                                SetPageLRU(page);
-                               list_add(&page->lru, inactive_list);
+                               list_add(&page->lru, &zone->inactive_list);
+                               continue;
+                       } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) {
+                               __put_page(page);
+                               SetPageLRU(page);
+#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE
+                               list_add_tail(&page->lru, &zone->inactive_list);
+#else
+                               list_add(&page->lru, &zone->inactive_list);
+#endif
                                continue;
                        }
                        list_add(&page->lru, &page_list);
+                       ckrm_mem_dec_inactive(page);
                        nr_taken++;
                }
                zone->nr_inactive -= nr_taken;
-               ckrm_zone_dec_inactive(ckrm_zone, nr_taken);
                spin_unlock_irq(&zone->lru_lock);
 
-               if (nr_taken == 0)
+               if ((bit_flag == 0) && (nr_taken == 0))
                        goto done;
 
                max_scan -= nr_scan;
@@ -615,21 +623,19 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
                        if (TestSetPageLRU(page))
                                BUG();
                        list_del(&page->lru);
-                       if (PageActive(page)) {
-                               ckrm_zone_inc_active(ckrm_zone, 1);
-                               zone->nr_active++;
-                               list_add(&page->lru, active_list);
-                       } else {
-                               ckrm_zone_inc_inactive(ckrm_zone, 1);
-                               zone->nr_inactive++;
-                               list_add(&page->lru, inactive_list);
-                       }
+                       if (PageActive(page))
+                               add_page_to_active_list(zone, page);
+                       else
+                               add_page_to_inactive_list(zone, page);
                        if (!pagevec_add(&pvec, page)) {
                                spin_unlock_irq(&zone->lru_lock);
                                __pagevec_release(&pvec);
                                spin_lock_irq(&zone->lru_lock);
                        }
                }
+               if (ckrm_flags && (nr_pass <= 0)) {
+                       goto redo;
+               }
        }
        spin_unlock_irq(&zone->lru_lock);
 done:
@@ -654,11 +660,7 @@ done:
  * But we had to alter page->flags anyway.
  */
 static void
-#ifdef CONFIG_CKRM_RES_MEM
-refill_inactive_zone(struct ckrm_zone *ckrm_zone, struct scan_control *sc)
-#else
 refill_inactive_zone(struct zone *zone, struct scan_control *sc)
-#endif
 {
        int pgmoved;
        int pgdeactivate = 0;
@@ -673,21 +675,19 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
        long mapped_ratio;
        long distress;
        long swap_tendency;
-#ifdef CONFIG_CKRM_RES_MEM
-       struct zone *zone = ckrm_zone->zone;
-       struct list_head *active_list = &ckrm_zone->active_list;
-       struct list_head *inactive_list = &ckrm_zone->inactive_list;
-#else
-       struct list_head *active_list = &zone->active_list;
-       struct list_head *inactive_list = &zone->inactive_list;
-#endif
+       unsigned int ckrm_flags = sc->ckrm_flags, bit_flag;
+       int nr_pass;
 
        lru_add_drain();
        pgmoved = 0;
        spin_lock_irq(&zone->lru_lock);
-       while (pgscanned < nr_pages && !list_empty(active_list)) {
-               page = lru_to_page(active_list);
-               prefetchw_prev_lru_page(page, active_list, flags);
+redo:
+       ckrm_get_reclaim_bits(&ckrm_flags, &bit_flag);
+       nr_pass = zone->nr_active;
+       while (pgscanned < nr_pages && !list_empty(&zone->active_list) &&
+                                               nr_pass) {
+               page = lru_to_page(&zone->active_list);
+               prefetchw_prev_lru_page(page, &zone->active_list, flags);
                if (!TestClearPageLRU(page))
                        BUG();
                list_del(&page->lru);
@@ -700,16 +700,28 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                         */
                        __put_page(page);
                        SetPageLRU(page);
-                       list_add(&page->lru, active_list);
+                       list_add(&page->lru, &zone->active_list);
+                       pgscanned++;
+               } else if (bit_flag && !ckrm_kick_page(page, bit_flag)) {
+                       __put_page(page);
+                       SetPageLRU(page);
+#ifdef CONFIG_CKRM_MEM_LRUORDER_CHANGE
+                       list_add_tail(&page->lru, &zone->active_list);
+#else
+                       list_add(&page->lru, &zone->active_list);
+#endif
                } else {
                        list_add(&page->lru, &l_hold);
+                       ckrm_mem_dec_active(page);
                        pgmoved++;
+                       pgscanned++;
+               }
+               if (!--nr_pass && ckrm_flags) {
+                       goto redo;
                }
-               pgscanned++;
        }
        zone->pages_scanned += pgscanned;
        zone->nr_active -= pgmoved;
-       ckrm_zone_dec_active(ckrm_zone, pgmoved);
        spin_unlock_irq(&zone->lru_lock);
 
        /*
@@ -767,10 +779,10 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                        BUG();
                if (!TestClearPageActive(page))
                        BUG();
-               list_move(&page->lru, inactive_list);
+               list_move(&page->lru, &zone->inactive_list);
+               ckrm_mem_inc_inactive(page);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
-                       ckrm_zone_inc_inactive(ckrm_zone, pgmoved);
                        zone->nr_inactive += pgmoved;
                        spin_unlock_irq(&zone->lru_lock);
                        pgdeactivate += pgmoved;
@@ -781,7 +793,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                        spin_lock_irq(&zone->lru_lock);
                }
        }
-       ckrm_zone_inc_inactive(ckrm_zone, pgmoved);
        zone->nr_inactive += pgmoved;
        pgdeactivate += pgmoved;
        if (buffer_heads_over_limit) {
@@ -797,10 +808,10 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                if (TestSetPageLRU(page))
                        BUG();
                BUG_ON(!PageActive(page));
-               list_move(&page->lru, active_list);
+               list_move(&page->lru, &zone->active_list);
+               ckrm_mem_inc_active(page);
                pgmoved++;
                if (!pagevec_add(&pvec, page)) {
-                       ckrm_zone_inc_active(ckrm_zone, pgmoved);
                        zone->nr_active += pgmoved;
                        pgmoved = 0;
                        spin_unlock_irq(&zone->lru_lock);
@@ -808,7 +819,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
                        spin_lock_irq(&zone->lru_lock);
                }
        }
-       ckrm_zone_inc_active(ckrm_zone, pgmoved);
        zone->nr_active += pgmoved;
        spin_unlock_irq(&zone->lru_lock);
        pagevec_release(&pvec);
@@ -817,183 +827,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
        mod_page_state(pgdeactivate, pgdeactivate);
 }
 
-#ifdef CONFIG_CKRM_RES_MEM
-static int
-shrink_weight(struct ckrm_zone *czone)
-{
-       u64 temp;
-       struct zone *zone = czone->zone;
-       struct ckrm_mem_res *cls = czone->memcls;
-       int zone_usage, zone_guar, zone_total, guar, ret, cnt;
-
-       zone_usage = czone->nr_active + czone->nr_inactive;
-       czone->active_over = czone->inactive_over = 0;
-
-       if (zone_usage < SWAP_CLUSTER_MAX * 4)
-               return 0;
-
-       if (cls->pg_guar == CKRM_SHARE_DONTCARE) {
-               // no guarantee for this class. use implicit guarantee
-               guar = cls->impl_guar / cls->nr_dontcare;
-       } else {
-               guar = cls->pg_unused / cls->nr_dontcare;
-       }
-       zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages;
-       temp = (u64) guar * zone_total;
-       do_div(temp, ckrm_tot_lru_pages);
-       zone_guar = (int) temp;
-
-       ret = ((zone_usage - zone_guar) > SWAP_CLUSTER_MAX) ?
-                               (zone_usage - zone_guar) : 0;
-       if (ret) {
-               cnt = czone->nr_active - (2 * zone_guar / 3);
-               if (cnt > 0)
-                       czone->active_over = cnt;
-               cnt = czone->active_over + czone->nr_inactive
-                                       - zone_guar / 3;
-               if (cnt > 0)
-                       czone->inactive_over = cnt;
-       }
-       return ret;
-}
-
-static void
-shrink_ckrmzone(struct ckrm_zone *czone, struct scan_control *sc)
-{
-       while (czone->shrink_active || czone->shrink_inactive) {
-               if (czone->shrink_active) {
-                       sc->nr_to_scan = min(czone->shrink_active,
-                                       (unsigned long)SWAP_CLUSTER_MAX);
-                       czone->shrink_active -= sc->nr_to_scan;
-                       refill_inactive_zone(czone, sc);
-               }
-               if (czone->shrink_inactive) {
-                       sc->nr_to_scan = min(czone->shrink_inactive,
-                                       (unsigned long)SWAP_CLUSTER_MAX);
-                       czone->shrink_inactive -= sc->nr_to_scan;
-                       shrink_cache(czone, sc);
-                       if (sc->nr_to_reclaim <= 0) {
-                               czone->shrink_active = 0;
-                               czone->shrink_inactive = 0;
-                               break;
-                       }
-               }
-
-               throttle_vm_writeout();
-       }
-}
-
-/* insert an entry to the list and sort decendently*/
-static void
-list_add_sort(struct list_head *entry, struct list_head *head)
-{
-       struct ckrm_zone *czone, *new =
-                       list_entry(entry, struct ckrm_zone, victim_list);
-       struct list_head* pos = head->next;
-
-       while (pos != head) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               if (new->shrink_weight > czone->shrink_weight) {
-                       __list_add(entry, pos->prev, pos);
-                       return;
-               }
-               pos = pos->next;
-       }
-       list_add_tail(entry, head);
-       return; 
-}
-
-static void
-shrink_choose_victims(struct list_head *victims,
-               unsigned long nr_active, unsigned long nr_inactive)
-{
-       unsigned long nr;
-       struct ckrm_zone* czone;
-       struct list_head *pos, *next;
-
-       pos = victims->next;
-       while ((pos != victims) && (nr_active || nr_inactive)) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               
-               if (nr_active && czone->active_over) {
-                       nr = min(nr_active, czone->active_over);
-                       czone->shrink_active += nr;
-                       czone->active_over -= nr;
-                       nr_active -= nr;
-               }
-
-               if (nr_inactive && czone->inactive_over) {
-                       nr = min(nr_inactive, czone->inactive_over);
-                       czone->shrink_inactive += nr;
-                       czone->inactive_over -= nr;
-                       nr_inactive -= nr;
-               }
-               pos = pos->next;
-       }
-
-       pos = victims->next;
-       while (pos != victims) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               next = pos->next;
-               if (czone->shrink_active == 0 && czone->shrink_inactive == 0) {
-                       list_del_init(pos);
-                       ckrm_clear_shrink(czone);
-               }
-               pos = next;
-       }       
-       return;
-}
-
-static void
-shrink_get_victims(struct zone *zone, unsigned long nr_active,
-               unsigned long nr_inactive, struct list_head *victims)
-{
-       struct list_head *pos;
-       struct ckrm_mem_res *cls;
-       struct ckrm_zone *czone;
-       int zoneindex = zone_idx(zone);
-       
-       if (ckrm_nr_mem_classes <= 1) {
-               if (ckrm_mem_root_class) {
-                       czone = ckrm_mem_root_class->ckrm_zone + zoneindex;
-                       if (!ckrm_test_set_shrink(czone)) {
-                               list_add(&czone->victim_list, victims);
-                               czone->shrink_active = nr_active;
-                               czone->shrink_inactive = nr_inactive;
-                       }
-               }
-               return;
-       }
-       spin_lock_irq(&ckrm_mem_lock);
-       list_for_each_entry(cls, &ckrm_memclass_list, mcls_list) {
-               czone = cls->ckrm_zone + zoneindex;
-               if (ckrm_test_set_shrink(czone))
-                       continue;
-
-               czone->shrink_active = 0;
-               czone->shrink_inactive = 0;
-               czone->shrink_weight = shrink_weight(czone);
-               if (czone->shrink_weight) {
-                       list_add_sort(&czone->victim_list, victims);
-               } else {
-                       ckrm_clear_shrink(czone);
-               }
-       }
-       pos = victims->next;
-       while (pos != victims) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               pos = pos->next;
-       }
-       shrink_choose_victims(victims, nr_active, nr_inactive);
-       spin_unlock_irq(&ckrm_mem_lock);
-       pos = victims->next;
-       while (pos != victims) {
-               czone = list_entry(pos, struct ckrm_zone, victim_list);
-               pos = pos->next;
-       }
-}
-#endif /* CONFIG_CKRM_RES_MEM */
-
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
@@ -1002,9 +835,6 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
 {
        unsigned long nr_active;
        unsigned long nr_inactive;
-#ifdef CONFIG_CKRM_RES_MEM
-       struct ckrm_zone *czone;
-#endif
 
        /*
         * Add one to `nr_to_scan' just to make sure that the kernel will
@@ -1026,25 +856,8 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
 
        sc->nr_to_reclaim = SWAP_CLUSTER_MAX;
 
-#ifdef CONFIG_CKRM_RES_MEM
-       if (nr_active || nr_inactive) {
-               struct list_head *pos, *next;
-               LIST_HEAD(victims);
-
-               shrink_get_victims(zone, nr_active, nr_inactive, &victims);
-               pos = victims.next;
-               while (pos != &victims) {
-                       czone = list_entry(pos, struct ckrm_zone, victim_list);
-                       next = pos->next;
-                       list_del_init(pos);
-                       ckrm_clear_shrink(czone);
-                       sc->nr_to_reclaim = czone->shrink_inactive;
-                       shrink_ckrmzone(czone, sc);
-                       pos = next;
-               }
-       }
-#else 
        while (nr_active || nr_inactive) {
+               sc->ckrm_flags = ckrm_setup_reclamation();
                if (nr_active) {
                        sc->nr_to_scan = min(nr_active,
                                        (unsigned long)SWAP_CLUSTER_MAX);
@@ -1060,98 +873,116 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
                        if (sc->nr_to_reclaim <= 0)
                                break;
                }
+               ckrm_teardown_reclamation();
        }
-#endif
 }
 
-#ifdef CONFIG_CKRM_RES_MEM
+#if defined(CONFIG_CKRM_RES_MEM) && defined(AT_LIMIT_SUPPORT)
 // This function needs to be given more thought.
-// Shrink the class to be at shrink_to%" of its limit
+// Shrink the class to be at 90% of its limit
 static void
-ckrm_shrink_class(struct ckrm_mem_res *cls)
+ckrm_shrink_class(ckrm_mem_res_t *cls)
 {
        struct scan_control sc;
        struct zone *zone;
-       int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
-       int shrink_to = ckrm_mem_get_shrink_to();
+       int zindex = 0, active_credit = 0, inactive_credit = 0;
 
+       if (ckrm_test_set_shrink(cls)) { // set the SHRINK bit atomically
+               // if it is already set somebody is working on it. so... leave
+               return;
+       }
        sc.nr_mapped = read_page_state(nr_mapped);
        sc.nr_scanned = 0;
+       sc.ckrm_flags = ckrm_get_reclaim_flags(cls);
        sc.nr_reclaimed = 0;
        sc.priority = 0; // always very high priority
 
-       check_memclass(cls, "bef_shnk_cls");
        for_each_zone(zone) {
-               int zone_total, zone_limit, active_limit,
-                                       inactive_limit, clszone_limit;
-               struct ckrm_zone *czone;
+               int zone_total, zone_limit, active_limit, inactive_limit;
+               int active_over, inactive_over;
+               unsigned long nr_active, nr_inactive;
                u64 temp;
 
-               czone = &cls->ckrm_zone[zindex];
-               if (ckrm_test_set_shrink(czone))
-                       continue;
-
                zone->temp_priority = zone->prev_priority;
                zone->prev_priority = sc.priority;
 
-               zone_total = zone->nr_active + zone->nr_inactive 
-                                               + zone->free_pages;
+               zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages;
 
                temp = (u64) cls->pg_limit * zone_total;
                do_div(temp, ckrm_tot_lru_pages);
                zone_limit = (int) temp;
-               clszone_limit = (shrink_to * zone_limit) / 100;
-               active_limit = (2 * clszone_limit) / 3; // 2/3rd in active list
-               inactive_limit = clszone_limit / 3; // 1/3rd in inactive list
-
-               czone->shrink_active = 0;
-               cnt = czone->nr_active + act_credit - active_limit;
-               if (cnt > 0) {
-                       czone->shrink_active = (unsigned long) cnt;
+               active_limit = (6 * zone_limit) / 10; // 2/3rd in active list
+               inactive_limit = (3 * zone_limit) / 10; // 1/3rd in inactive list
+
+               active_over = cls->nr_active[zindex] - active_limit + active_credit;
+               inactive_over = active_over +
+                               (cls->nr_inactive[zindex] - inactive_limit) + inactive_credit;
+
+               if (active_over > 0) {
+                       zone->nr_scan_active += active_over + 1;
+                       nr_active = zone->nr_scan_active;
+                       active_credit = 0;
                } else {
-                       act_credit += cnt;
+                       active_credit += active_over;
+                       nr_active = 0;
                }
 
-               czone->shrink_inactive = 0;
-               cnt = czone->shrink_active + inact_credit +
-                                       (czone->nr_inactive - inactive_limit);
-               if (cnt > 0) {
-                       czone->shrink_inactive = (unsigned long) cnt;
+               if (inactive_over > 0) {
+                       zone->nr_scan_inactive += inactive_over;
+                       nr_inactive = zone->nr_scan_inactive;
+                       inactive_credit = 0;
                } else {
-                       inact_credit += cnt;
+                       inactive_credit += inactive_over;
+                       nr_inactive = 0;
                }
-
-
-               if (czone->shrink_active || czone->shrink_inactive) {
-                       sc.nr_to_reclaim = czone->shrink_inactive;
-                       shrink_ckrmzone(czone, &sc);
+               while (nr_active || nr_inactive) {
+                       if (nr_active) {
+                               sc.nr_to_scan = min(nr_active,
+                                               (unsigned long)SWAP_CLUSTER_MAX);
+                               nr_active -= sc.nr_to_scan;
+                               refill_inactive_zone(zone, &sc);
+                       }
+       
+                       if (nr_inactive) {
+                               sc.nr_to_scan = min(nr_inactive,
+                                               (unsigned long)SWAP_CLUSTER_MAX);
+                               nr_inactive -= sc.nr_to_scan;
+                               shrink_cache(zone, &sc);
+                               if (sc.nr_to_reclaim <= 0)
+                                       break;
+                       }
                }
                zone->prev_priority = zone->temp_priority;
                zindex++;
-               ckrm_clear_shrink(czone);
        }
-       check_memclass(cls, "aft_shnk_cls");
+       ckrm_clear_shrink(cls);
 }
 
 static void
 ckrm_shrink_classes(void)
 {
-       struct ckrm_mem_res *cls;
+       ckrm_mem_res_t *cls;
 
-       spin_lock_irq(&ckrm_mem_lock);
+       spin_lock(&ckrm_mem_lock);
        while (!ckrm_shrink_list_empty()) {
-               cls =  list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
+               cls =  list_entry(ckrm_shrink_list.next, ckrm_mem_res_t,
                                shrink_list);
+               spin_unlock(&ckrm_mem_lock);
+               ckrm_shrink_class(cls);
+               spin_lock(&ckrm_mem_lock);
                list_del(&cls->shrink_list);
                cls->flags &= ~MEM_AT_LIMIT;
-               spin_unlock_irq(&ckrm_mem_lock);
-               ckrm_shrink_class(cls);
-               spin_lock_irq(&ckrm_mem_lock);
        }
-       spin_unlock_irq(&ckrm_mem_lock);
+       spin_unlock(&ckrm_mem_lock);
+       throttle_vm_writeout();
 }
 
 #else
+
+#if defined(CONFIG_CKRM_RES_MEM) && !defined(AT_LIMIT_SUPPORT)
+#warning "disabling ckrm_at_limit -- setting ckrm_shrink_classes to noop "
+#endif
+
 #define ckrm_shrink_classes()  do { } while(0)
 #endif
 
@@ -1391,7 +1222,7 @@ scan:
                        shrink_slab(sc.nr_scanned, GFP_KERNEL, lru_pages);
                        sc.nr_reclaimed += reclaim_state->reclaimed_slab;
                        total_reclaimed += sc.nr_reclaimed;
-                       total_scanned += sc.nr_scanned;
+                       total_scanned += sc.nr_scanned;
                        if (zone->all_unreclaimable)
                                continue;
                        if (zone->pages_scanned >= (zone->nr_active +
@@ -1493,7 +1324,7 @@ static int kswapd(void *p)
                if (!ckrm_shrink_list_empty())
                        ckrm_shrink_classes();
                else
-                       balance_pgdat(pgdat, 0);
+               balance_pgdat(pgdat, 0);
        }
        return 0;
 }
@@ -1570,7 +1401,7 @@ static int __init kswapd_init(void)
        swap_setup();
        for_each_pgdat(pgdat)
                pgdat->kswapd
-               = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
+               = find_task_by_real_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
        total_memory = nr_free_pagecache_pages();
        hotcpu_notifier(cpu_callback, 0);
        return 0;
index 65aedf8..9227745 100644 (file)
 #include <linux/wireless.h>            /* Note : will define WIRELESS_EXT */
 #include <net/iw_handler.h>
 #endif /* CONFIG_NET_RADIO */
-#include <linux/vs_network.h>
 #include <asm/current.h>
 #include <linux/vs_network.h>
 
@@ -1895,9 +1894,6 @@ static int dev_ifconf(char __user *arg)
 
        total = 0;
        for (dev = dev_base; dev; dev = dev->next) {
-               if (vx_flags(VXF_HIDE_NETIF, 0) &&
-                       !dev_in_nx_info(dev, current->nx_info))
-                       continue;
                for (i = 0; i < NPROTO; i++) {
                        if (gifconf_list[i]) {
                                int done;
@@ -1958,10 +1954,6 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-       struct nx_info *nxi = current->nx_info;
-
-       if (vx_flags(VXF_HIDE_NETIF, 0) && !dev_in_nx_info(dev, nxi))
-               return;
        if (dev->get_stats) {
                struct net_device_stats *stats = dev->get_stats(dev);
 
index 2a8e289..52641b0 100644 (file)
@@ -251,9 +251,6 @@ int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
        for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
                if (idx < s_idx)
                        continue;
-               if (vx_info_flags(skb->sk->sk_vx_info, VXF_HIDE_NETIF, 0) &&
-                       !dev_in_nx_info(dev, skb->sk->sk_nx_info))
-                       continue;
                if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 0) <= 0)
                        break;
        }
@@ -419,9 +416,6 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
                               sizeof(struct rtnl_link_ifmap) +
                               sizeof(struct rtnl_link_stats) + 128);
 
-       if (vx_flags(VXF_HIDE_NETIF, 0) &&
-               !dev_in_nx_info(dev, current->nx_info))
-               return;
        skb = alloc_skb(size, GFP_KERNEL);
        if (!skb)
                return;
index 758ee11..b21c874 100644 (file)
@@ -847,13 +847,7 @@ struct proto_ops inet_dgram_ops = {
  * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
  * udp_poll
  */
-#if defined(CONFIG_VNET) || defined(CONFIG_VNET_MODULE)
-struct proto_ops inet_sockraw_ops;
-EXPORT_SYMBOL(inet_sockraw_ops);
-#else
-static
-#endif
-struct proto_ops inet_sockraw_ops = {
+static struct proto_ops inet_sockraw_ops = {
        .family =       PF_INET,
        .owner =        THIS_MODULE,
        .release =      inet_release,
index 281099f..a0a259e 100644 (file)
 #include <linux/random.h>
 
 #ifdef CONFIG_CKRM
-#include <linux/ckrm_events.h>
+#include <linux/ckrm.h>
 #endif
 
 #include <net/icmp.h>
index b5d42a2..f030e0f 100644 (file)
@@ -94,6 +94,7 @@
 
 #include <net/sock.h>
 #include <linux/netfilter.h>
+#include <linux/vs_base.h>
 #include <linux/vs_socket.h>
 
 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
index 697cdb1..2fd2975 100644 (file)
@@ -262,7 +262,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int taskflags)
        get_group_info(current->group_info);
        acred.uid = current->fsuid;
        acred.gid = current->fsgid;
-       acred.xid = vx_current_xid();
+       acred.xid = current->xid;
        acred.group_info = current->group_info;
 
        dprintk("RPC:     looking up %s cred\n",
@@ -282,7 +282,7 @@ rpcauth_bindcred(struct rpc_task *task)
        get_group_info(current->group_info);
        acred.uid = current->fsuid;
        acred.gid = current->fsgid;
-       acred.xid = vx_current_xid();
+       acred.xid = current->xid;
        acred.group_info = current->group_info;
 
        dprintk("RPC: %4d looking up %s cred\n",
index 19f17f7..294875e 100644 (file)
@@ -83,7 +83,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
        if (flags & RPC_TASK_ROOTCREDS) {
                cred->uc_uid = cred->uc_puid = 0;
                cred->uc_gid = cred->uc_pgid = 0;
-               cred->uc_xid = cred->uc_pxid = vx_current_xid();
+               cred->uc_xid = cred->uc_pxid = current->xid;
                cred->uc_gids[0] = NOGROUP;
        } else {
                int groups = acred->group_info->ngroups;
@@ -95,7 +95,7 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
                cred->uc_xid = acred->xid;
                cred->uc_puid = current->uid;
                cred->uc_pgid = current->gid;
-               cred->uc_pxid = vx_current_xid();
+               cred->uc_pxid = current->xid;
                for (i = 0; i < groups; i++)
                        cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
                if (i < NFS_NGROUPS)
@@ -131,7 +131,7 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int taskflags)
                 || cred->uc_xid != acred->xid
                 || cred->uc_puid != current->uid
                 || cred->uc_pgid != current->gid
-                || cred->uc_pxid != vx_current_xid())
+                || cred->uc_pxid != current->xid)
                        return 0;
 
                groups = acred->group_info->ngroups;
index e516b27..1bf15fc 100644 (file)
@@ -22,7 +22,7 @@ Summary: The Linux kernel (the core of the Linux operating system)
 %define kversion 2.6.%{sublevel}
 %define rpmversion 2.6.%{sublevel}
 %define rhbsys  %([ -r /etc/beehive-root ] && echo  || echo .`whoami`)
-%define release 1.14_FC2.2.planetlab%{?date:.%{date}}
+%define release 1.12_FC2.1.planetlab%{?date:.%{date}}
 %define signmodules 0
 
 %define KVERREL %{PACKAGE_VERSION}-%{PACKAGE_RELEASE}
@@ -517,9 +517,6 @@ fi
 # no files
 
 %changelog
-* Thu Feb 17 2005 Marc E. Fiuczynski <mef@cs.princeton.edu>
-- merge to Fedora Core 2 2.6.10-1.14_FC2
-
 * Tue Feb 8 2005 Marc E. Fiuczynski <mef@cs.princeton.edu>
 - merge to Fedora Core 2 2.6.10-1.12_FC2
 
index 4e9c198..e8e79c3 100644 (file)
@@ -185,8 +185,6 @@ int mod_unreg_security(const char *name, struct security_operations *ops)
  */
 int capable(int cap)
 {
-       if (vx_check_bit(VXC_CAP_MASK, cap) && !vx_mcaps(1L << cap))
-               return 0;
        if (security_ops->capable(current, cap)) {
                /* capability denied */
                return 0;
@@ -197,24 +195,9 @@ int capable(int cap)
        return 1;
 }
 
-int vx_capable(int cap, int ccap)
-{
-       if (security_ops->capable(current, cap)) {
-               /* capability denied */
-               return 0;
-       }
-       if (!vx_ccaps(ccap))
-               return 0;
-
-       /* capability granted */
-       current->flags |= PF_SUPERPRIV;
-       return 1;
-}
-
 EXPORT_SYMBOL_GPL(register_security);
 EXPORT_SYMBOL_GPL(unregister_security);
 EXPORT_SYMBOL_GPL(mod_reg_security);
 EXPORT_SYMBOL_GPL(mod_unreg_security);
 EXPORT_SYMBOL(capable);
-EXPORT_SYMBOL(vx_capable);
 EXPORT_SYMBOL(security_ops);