From: Planet-Lab Support <support@planet-lab.org>
Date: Tue, 13 Jul 2004 17:57:18 +0000 (+0000)
Subject: This commit was manufactured by cvs2svn to create branch 'vserver'.
X-Git-Tag: vserver-2_6_7-vs1_9_1_12~2
X-Git-Url: http://git.onelab.eu/?p=linux-2.6.git;a=commitdiff_plain;h=c449269f45c2cdf53af08c8d0af37472f66539d9

This commit was manufactured by cvs2svn to create branch 'vserver'.
---

diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h
new file mode 100644
index 000000000..d875d0435
--- /dev/null
+++ b/arch/um/drivers/cow.h
@@ -0,0 +1,41 @@
+#ifndef __COW_H__
+#define __COW_H__
+
+#include <asm/types.h>
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+# define ntohll(x) (x)
+# define htonll(x) (x)
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+# define ntohll(x)  bswap_64(x)
+# define htonll(x)  bswap_64(x)
+#else
+#error "__BYTE_ORDER not defined"
+#endif
+
+extern int init_cow_file(int fd, char *cow_file, char *backing_file, 
+			 int sectorsize, int alignment, int *bitmap_offset_out, 
+			 unsigned long *bitmap_len_out, int *data_offset_out);
+
+extern int file_reader(__u64 offset, char *buf, int len, void *arg);
+extern int read_cow_header(int (*reader)(__u64, char *, int, void *), 
+			   void *arg, __u32 *version_out, 
+			   char **backing_file_out, time_t *mtime_out, 
+			   __u64 *size_out, int *sectorsize_out, 
+			   __u32 *align_out, int *bitmap_offset_out);
+
+extern int write_cow_header(char *cow_file, int fd, char *backing_file, 
+			    int sectorsize, int alignment, long long *size);
+
+extern void cow_sizes(int version, __u64 size, int sectorsize, int align,
+		      int bitmap_offset, unsigned long *bitmap_len_out, 
+		      int *data_offset_out);
+
+#endif
+
+/*
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
new file mode 100644
index 000000000..014c2c853
--- /dev/null
+++ b/arch/um/drivers/cow_user.c
@@ -0,0 +1,375 @@
+#include <stddef.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <sys/time.h>
+#include <sys/param.h>
+#include <sys/user.h>
+#include <netinet/in.h>
+
+#include "os.h"
+
+#include "cow.h"
+#include "cow_sys.h"
+
+#define PATH_LEN_V1 256
+
+struct cow_header_v1 {
+	int magic;
+	int version;
+	char backing_file[PATH_LEN_V1];
+	time_t mtime;
+	__u64 size;
+	int sectorsize;
+};
+
+#define PATH_LEN_V2 MAXPATHLEN
+
+struct cow_header_v2 {
+	unsigned long magic;
+	unsigned long version;
+	char backing_file[PATH_LEN_V2];
+	time_t mtime;
+	__u64 size;
+	int sectorsize;
+};
+
+/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in 
+ * case other systems have different values for MAXPATHLEN
+ */
+#define PATH_LEN_V3 4096
+
+/* Changes from V2 - 
+ *	PATH_LEN_V3 as described above
+ *	Explicitly specify field bit lengths for systems with different
+ *		lengths for the usual C types.  Not sure whether char or
+ *		time_t should be changed, this can be changed later without
+ *		breaking compatibility
+ *	Add alignment field so that different alignments can be used for the
+ *		bitmap and data
+ * 	Add cow_format field to allow for the possibility of different ways
+ *		of specifying the COW blocks.  For now, the only value is 0,
+ * 		for the traditional COW bitmap.
+ *	Move the backing_file field to the end of the header.  This allows
+ *		for the possibility of expanding it into the padding required
+ *		by the bitmap alignment.
+ * 	The bitmap and data portions of the file will be aligned as specified
+ * 		by the alignment field.  This is to allow COW files to be
+ *		put on devices with restrictions on access alignments, such as
+ *		/dev/raw, with a 512 byte alignment restriction.  This also
+ *		allows the data to be more aligned more strictly than on
+ *		sector boundaries.  This is needed for ubd-mmap, which needs
+ *		the data to be page aligned.
+ *	Fixed (finally!) the rounding bug
+ */
+
+struct cow_header_v3 {
+	__u32 magic;
+	__u32 version;
+	time_t mtime;
+	__u64 size;
+	__u32 sectorsize;
+	__u32 alignment;
+	__u32 cow_format;
+	char backing_file[PATH_LEN_V3];
+};
+
+/* COW format definitions - for now, we have only the usual COW bitmap */
+#define COW_BITMAP 0
+
+union cow_header {
+	struct cow_header_v1 v1;
+	struct cow_header_v2 v2;
+	struct cow_header_v3 v3;
+};
+
+#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
+#define COW_VERSION 3
+
+#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len))
+#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align)
+
+void cow_sizes(int version, __u64 size, int sectorsize, int align, 
+	       int bitmap_offset, unsigned long *bitmap_len_out, 
+	       int *data_offset_out)
+{
+	if(version < 3){
+		*bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
+
+		*data_offset_out = bitmap_offset + *bitmap_len_out;
+		*data_offset_out = (*data_offset_out + sectorsize - 1) / 
+			sectorsize;
+		*data_offset_out *= sectorsize;
+	}
+	else {
+		*bitmap_len_out = DIV_ROUND(size, sectorsize);
+		*bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8);
+
+		*data_offset_out = bitmap_offset + *bitmap_len_out;
+		*data_offset_out = ROUND_UP(*data_offset_out, align);
+	}
+}
+
+static int absolutize(char *to, int size, char *from)
+{
+	char save_cwd[256], *slash;
+	int remaining;
+
+	if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
+		cow_printf("absolutize : unable to get cwd - errno = %d\n", 
+			   errno);
+		return(-1);
+	}
+	slash = strrchr(from, '/');
+	if(slash != NULL){
+		*slash = '\0';
+		if(chdir(from)){
+			*slash = '/';
+			cow_printf("absolutize : Can't cd to '%s' - " 
+				   "errno = %d\n", from, errno);
+			return(-1);
+		}
+		*slash = '/';
+		if(getcwd(to, size) == NULL){
+			cow_printf("absolutize : unable to get cwd of '%s' - "
+			       "errno = %d\n", from, errno);
+			return(-1);
+		}
+		remaining = size - strlen(to);
+		if(strlen(slash) + 1 > remaining){
+			cow_printf("absolutize : unable to fit '%s' into %d "
+			       "chars\n", from, size);
+			return(-1);
+		}
+		strcat(to, slash);
+	}
+	else {
+		if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){
+			cow_printf("absolutize : unable to fit '%s' into %d "
+			       "chars\n", from, size);
+			return(-1);
+		}
+		strcpy(to, save_cwd);
+		strcat(to, "/");
+		strcat(to, from);
+	}
+	chdir(save_cwd);
+	return(0);
+}
+
+int write_cow_header(char *cow_file, int fd, char *backing_file, 
+		     int sectorsize, int alignment, long long *size)
+{
+	struct cow_header_v3 *header;
+	unsigned long modtime;
+	int err;
+
+	err = cow_seek_file(fd, 0);
+	if(err < 0){
+		cow_printf("write_cow_header - lseek failed, err = %d\n", -err);
+		goto out;
+	}
+
+	err = -ENOMEM;
+	header = cow_malloc(sizeof(*header));
+	if(header == NULL){
+		cow_printf("Failed to allocate COW V3 header\n");
+		goto out;
+	}
+	header->magic = htonl(COW_MAGIC);
+	header->version = htonl(COW_VERSION);
+
+	err = -EINVAL;
+	if(strlen(backing_file) > sizeof(header->backing_file) - 1){
+		cow_printf("Backing file name \"%s\" is too long - names are "
+			   "limited to %d characters\n", backing_file, 
+			   sizeof(header->backing_file) - 1);
+		goto out_free;
+	}
+
+	if(absolutize(header->backing_file, sizeof(header->backing_file), 
+		      backing_file))
+		goto out_free;
+
+	err = os_file_modtime(header->backing_file, &modtime);
+	if(err < 0){
+		cow_printf("Backing file '%s' mtime request failed, "
+			   "err = %d\n", header->backing_file, -err);
+		goto out_free;
+	}
+
+	err = cow_file_size(header->backing_file, size);
+	if(err < 0){
+		cow_printf("Couldn't get size of backing file '%s', "
+			   "err = %d\n", header->backing_file, -err);
+		goto out_free;
+	}
+
+	header->mtime = htonl(modtime);
+	header->size = htonll(*size);
+	header->sectorsize = htonl(sectorsize);
+	header->alignment = htonl(alignment);
+	header->cow_format = COW_BITMAP;
+
+	err = os_write_file(fd, header, sizeof(*header));
+	if(err != sizeof(*header)){
+		cow_printf("Write of header to new COW file '%s' failed, "
+			   "err = %d\n", cow_file, -err);
+		goto out_free;
+	}
+	err = 0;
+ out_free:
+	cow_free(header);
+ out:
+	return(err);
+}
+
+int file_reader(__u64 offset, char *buf, int len, void *arg)
+{
+	int fd = *((int *) arg);
+
+	return(pread(fd, buf, len, offset));
+}
+
+/* XXX Need to sanity-check the values read from the header */
+
+int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, 
+		    __u32 *version_out, char **backing_file_out, 
+		    time_t *mtime_out, __u64 *size_out, 
+		    int *sectorsize_out, __u32 *align_out, 
+		    int *bitmap_offset_out)
+{
+	union cow_header *header;
+	char *file;
+	int err, n;
+	unsigned long version, magic;
+
+	header = cow_malloc(sizeof(*header));
+	if(header == NULL){
+	        cow_printf("read_cow_header - Failed to allocate header\n");
+		return(-ENOMEM);
+	}
+	err = -EINVAL;
+	n = (*reader)(0, (char *) header, sizeof(*header), arg);
+	if(n < offsetof(typeof(header->v1), backing_file)){
+		cow_printf("read_cow_header - short header\n");
+		goto out;
+	}
+
+	magic = header->v1.magic;
+	if(magic == COW_MAGIC) {
+		version = header->v1.version;
+	}
+	else if(magic == ntohl(COW_MAGIC)){
+		version = ntohl(header->v1.version);
+	}
+	/* No error printed because the non-COW case comes through here */
+	else goto out;
+
+	*version_out = version;
+
+	if(version == 1){
+		if(n < sizeof(header->v1)){
+			cow_printf("read_cow_header - failed to read V1 "
+				   "header\n");
+			goto out;
+		}
+		*mtime_out = header->v1.mtime;
+		*size_out = header->v1.size;
+		*sectorsize_out = header->v1.sectorsize;
+		*bitmap_offset_out = sizeof(header->v1);
+		*align_out = *sectorsize_out;
+		file = header->v1.backing_file;
+	}
+	else if(version == 2){
+		if(n < sizeof(header->v2)){
+			cow_printf("read_cow_header - failed to read V2 "
+				   "header\n");
+			goto out;
+		}
+		*mtime_out = ntohl(header->v2.mtime);
+		*size_out = ntohll(header->v2.size);
+		*sectorsize_out = ntohl(header->v2.sectorsize);
+		*bitmap_offset_out = sizeof(header->v2);
+		*align_out = *sectorsize_out;
+		file = header->v2.backing_file;
+	}
+	else if(version == 3){
+		if(n < sizeof(header->v3)){
+			cow_printf("read_cow_header - failed to read V2 "
+				   "header\n");
+			goto out;
+		}
+		*mtime_out = ntohl(header->v3.mtime);
+		*size_out = ntohll(header->v3.size);
+		*sectorsize_out = ntohl(header->v3.sectorsize);
+		*align_out = ntohl(header->v3.alignment);
+		*bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out);
+		file = header->v3.backing_file;
+	}
+	else {
+		cow_printf("read_cow_header - invalid COW version\n");
+		goto out;		
+	}
+	err = -ENOMEM;
+	*backing_file_out = cow_strdup(file);
+	if(*backing_file_out == NULL){
+		cow_printf("read_cow_header - failed to allocate backing "
+			   "file\n");
+		goto out;
+	}
+	err = 0;
+ out:
+	cow_free(header);
+	return(err);
+}
+
+int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
+		  int alignment, int *bitmap_offset_out, 
+		  unsigned long *bitmap_len_out, int *data_offset_out)
+{
+	__u64 size, offset;
+	char zero = 0;
+	int err;
+
+	err = write_cow_header(cow_file, fd, backing_file, sectorsize, 
+			       alignment, &size);
+	if(err) 
+		goto out;
+	
+	*bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment);
+	cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out,
+		  bitmap_len_out, data_offset_out);
+
+	offset = *data_offset_out + size - sizeof(zero);
+	err = cow_seek_file(fd, offset);
+	if(err < 0){
+		cow_printf("cow bitmap lseek failed : err = %d\n", -err);
+		goto out;
+	}
+
+	/* does not really matter how much we write it is just to set EOF 
+	 * this also sets the entire COW bitmap
+	 * to zero without having to allocate it 
+	 */
+	err = cow_write_file(fd, &zero, sizeof(zero));
+	if(err != sizeof(zero)){
+		cow_printf("Write of bitmap to new COW file '%s' failed, "
+			   "err = %d\n", cow_file, -err);
+		err = -EINVAL;
+		goto out;
+	}
+
+	return(0);
+
+ out:
+	return(err);
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h
new file mode 100644
index 000000000..4bcb829d7
--- /dev/null
+++ b/arch/um/include/irq_kern.h
@@ -0,0 +1,28 @@
+/* 
+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __IRQ_KERN_H__
+#define __IRQ_KERN_H__
+
+#include "linux/interrupt.h"
+
+extern int um_request_irq(unsigned int irq, int fd, int type,
+			  irqreturn_t (*handler)(int, void *, 
+						 struct pt_regs *),
+			  unsigned long irqflags,  const char * devname,
+			  void *dev_id);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/arch/um/include/mem_kern.h b/arch/um/include/mem_kern.h
new file mode 100644
index 000000000..b39f03d94
--- /dev/null
+++ b/arch/um/include/mem_kern.h
@@ -0,0 +1,30 @@
+/* 
+ * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __MEM_KERN_H__
+#define __MEM_KERN_H__
+
+#include "linux/list.h"
+#include "linux/types.h"
+
+struct remapper {
+	struct list_head list;
+	int (*proc)(int, unsigned long, int, __u64);
+};
+
+extern void register_remapper(struct remapper *info);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
new file mode 100644
index 000000000..d0e0f50dc
--- /dev/null
+++ b/arch/um/kernel/physmem.c
@@ -0,0 +1,468 @@
+/* 
+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/mm.h"
+#include "linux/ghash.h"
+#include "linux/slab.h"
+#include "linux/vmalloc.h"
+#include "linux/bootmem.h"
+#include "asm/types.h"
+#include "asm/pgtable.h"
+#include "kern_util.h"
+#include "user_util.h"
+#include "mode_kern.h"
+#include "mem.h"
+#include "mem_user.h"
+#include "os.h"
+#include "kern.h"
+#include "init.h"
+
+#if 0
+static pgd_t physmem_pgd[PTRS_PER_PGD];
+
+static struct phys_desc *lookup_mapping(void *addr)
+{
+	pgd = &physmem_pgd[pgd_index(addr)];
+	if(pgd_none(pgd))
+		return(NULL);
+
+	pmd = pmd_offset(pgd, addr);
+	if(pmd_none(pmd))
+		return(NULL);
+
+	pte = pte_offset_kernel(pmd, addr);
+	return((struct phys_desc *) pte_val(pte));
+}
+
+static struct add_mapping(void *addr, struct phys_desc *new)
+{
+}
+#endif
+
+#define PHYS_HASHSIZE (8192)
+
+struct phys_desc;
+
+DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc);
+
+struct phys_desc {
+	struct virtmem_ptrs virt_ptrs;
+	int fd;
+	__u64 offset;
+	void *virt;
+	unsigned long phys;
+	struct list_head list;
+};
+
+struct virtmem_table virtmem_hash;
+
+static int virt_cmp(void *virt1, void *virt2)
+{
+	return(virt1 != virt2);
+}
+
+static int virt_hash(void *virt)
+{
+	unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT;
+	return(addr % PHYS_HASHSIZE);
+}
+
+DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp, 
+	 virt_hash);
+
+LIST_HEAD(descriptor_mappings);
+
+struct desc_mapping {
+	int fd;
+	struct list_head list;
+	struct list_head pages;
+};
+
+static struct desc_mapping *find_mapping(int fd)
+{
+	struct desc_mapping *desc;
+	struct list_head *ele;
+
+	list_for_each(ele, &descriptor_mappings){
+		desc = list_entry(ele, struct desc_mapping, list);
+		if(desc->fd == fd)
+			return(desc);
+	}
+
+	return(NULL);
+}
+
+static struct desc_mapping *descriptor_mapping(int fd)
+{
+	struct desc_mapping *desc;
+
+	desc = find_mapping(fd);
+	if(desc != NULL)
+		return(desc);
+
+	desc = kmalloc(sizeof(*desc), GFP_ATOMIC);
+	if(desc == NULL)
+		return(NULL);
+
+	*desc = ((struct desc_mapping) 
+		{ .fd =		fd,
+		  .list =	LIST_HEAD_INIT(desc->list),
+		  .pages =	LIST_HEAD_INIT(desc->pages) });
+	list_add(&desc->list, &descriptor_mappings);
+
+	return(desc);
+}
+
+int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w)
+{
+	struct desc_mapping *fd_maps;
+	struct phys_desc *desc;
+	unsigned long phys;
+	int err;
+
+	fd_maps = descriptor_mapping(fd);
+	if(fd_maps == NULL)
+		return(-ENOMEM);
+
+	phys = __pa(virt);
+	if(find_virtmem_hash(&virtmem_hash, virt) != NULL)
+		panic("Address 0x%p is already substituted\n", virt);
+
+	err = -ENOMEM;
+	desc = kmalloc(sizeof(*desc), GFP_ATOMIC);
+	if(desc == NULL)
+		goto out;
+
+	*desc = ((struct phys_desc) 
+		{ .virt_ptrs =	{ NULL, NULL },
+		  .fd =		fd,
+		  .offset =		offset,
+		  .virt =		virt,
+		  .phys =		__pa(virt),
+		  .list = 		LIST_HEAD_INIT(desc->list) });
+	insert_virtmem_hash(&virtmem_hash, desc);
+
+	list_add(&desc->list, &fd_maps->pages);
+
+	virt = (void *) ((unsigned long) virt & PAGE_MASK);
+	err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0);
+	if(!err)
+		goto out;
+
+	remove_virtmem_hash(&virtmem_hash, desc);
+	kfree(desc);
+ out:
+	return(err);
+}
+
+static int physmem_fd = -1;
+
+static void remove_mapping(struct phys_desc *desc)
+{
+	void *virt = desc->virt;
+	int err;
+
+	remove_virtmem_hash(&virtmem_hash, desc);
+	list_del(&desc->list);
+	kfree(desc);
+
+	err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0);
+	if(err)
+		panic("Failed to unmap block device page from physical memory, "
+		      "errno = %d", -err);
+}
+
+int physmem_remove_mapping(void *virt)
+{
+	struct phys_desc *desc;
+
+	virt = (void *) ((unsigned long) virt & PAGE_MASK);
+	desc = find_virtmem_hash(&virtmem_hash, virt);
+	if(desc == NULL)
+		return(0);
+
+	remove_mapping(desc);
+	return(1);
+}
+
+void physmem_forget_descriptor(int fd)
+{
+	struct desc_mapping *desc;
+	struct phys_desc *page;
+	struct list_head *ele, *next;
+	__u64 offset;
+	void *addr;
+	int err;
+
+	desc = find_mapping(fd);
+	if(desc == NULL)
+		return;
+
+	list_for_each_safe(ele, next, &desc->pages){
+		page = list_entry(ele, struct phys_desc, list);
+		offset = page->offset;
+		addr = page->virt;
+		remove_mapping(page);
+		err = os_seek_file(fd, offset);
+		if(err)
+			panic("physmem_forget_descriptor - failed to seek "
+			      "to %lld in fd %d, error = %d\n",
+			      offset, fd, -err);
+		err = os_read_file(fd, addr, PAGE_SIZE);
+		if(err < 0)
+			panic("physmem_forget_descriptor - failed to read "
+			      "from fd %d to 0x%p, error = %d\n",
+			      fd, addr, -err);
+	}
+
+	list_del(&desc->list);
+	kfree(desc);
+}
+
+void arch_free_page(struct page *page, int order)
+{
+	void *virt;
+	int i;
+
+	for(i = 0; i < (1 << order); i++){
+		virt = __va(page_to_phys(page + i));
+		physmem_remove_mapping(virt);
+	}
+}
+
+int is_remapped(void *virt)
+{
+	return(find_virtmem_hash(&virtmem_hash, virt) != NULL);
+}
+
+/* Changed during early boot */
+unsigned long high_physmem;
+
+extern unsigned long physmem_size;
+
+void *to_virt(unsigned long phys)
+{
+	return((void *) uml_physmem + phys);
+}
+
+unsigned long to_phys(void *virt)
+{
+	return(((unsigned long) virt) - uml_physmem);
+}
+
+int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
+{
+	struct page *p, *map;
+	unsigned long phys_len, phys_pages, highmem_len, highmem_pages;
+	unsigned long iomem_len, iomem_pages, total_len, total_pages;
+	int i;
+
+	phys_pages = physmem >> PAGE_SHIFT;
+	phys_len = phys_pages * sizeof(struct page);
+
+	iomem_pages = iomem >> PAGE_SHIFT;
+	iomem_len = iomem_pages * sizeof(struct page);
+
+	highmem_pages = highmem >> PAGE_SHIFT;
+	highmem_len = highmem_pages * sizeof(struct page);
+
+	total_pages = phys_pages + iomem_pages + highmem_pages;
+	total_len = phys_len + iomem_pages + highmem_len;
+
+	if(kmalloc_ok){
+		map = kmalloc(total_len, GFP_KERNEL);
+		if(map == NULL) 
+			map = vmalloc(total_len);
+	}
+	else map = alloc_bootmem_low_pages(total_len);
+
+	if(map == NULL)
+		return(-ENOMEM);
+
+	for(i = 0; i < total_pages; i++){
+		p = &map[i];
+		set_page_count(p, 0);
+		SetPageReserved(p);
+		INIT_LIST_HEAD(&p->lru);
+	}
+
+	mem_map = map;
+	max_mapnr = total_pages;
+	return(0);
+}
+
+struct page *phys_to_page(const unsigned long phys)
+{
+	return(&mem_map[phys >> PAGE_SHIFT]);
+}
+
+struct page *__virt_to_page(const unsigned long virt)
+{
+	return(&mem_map[__pa(virt) >> PAGE_SHIFT]);
+}
+
+unsigned long page_to_phys(struct page *page)
+{
+	return((page - mem_map) << PAGE_SHIFT);
+}
+
+pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+	pte_t pte;
+
+	pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot);
+	if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte));
+	return(pte);
+}
+
+/* Changed during early boot */
+static unsigned long kmem_top = 0;
+
+unsigned long get_kmem_end(void)
+{
+	if(kmem_top == 0) 
+		kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas);
+	return(kmem_top);
+}
+
+void map_memory(unsigned long virt, unsigned long phys, unsigned long len, 
+		int r, int w, int x)
+{
+	__u64 offset;
+	int fd, err;
+
+	fd = phys_mapping(phys, &offset);
+	err = os_map_memory((void *) virt, fd, offset, len, r, w, x);
+	if(err)
+		panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, "
+		      "err = %d\n", virt, fd, offset, len, r, w, x, err);
+}
+
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+
+void setup_physmem(unsigned long start, unsigned long reserve_end,
+		   unsigned long len, unsigned long highmem)
+{
+	unsigned long reserve = reserve_end - start;
+	int pfn = PFN_UP(__pa(reserve_end));
+	int delta = (len - reserve) >> PAGE_SHIFT;
+	int err, offset, bootmap_size;
+
+	physmem_fd = create_mem_file(len + highmem);
+
+	offset = uml_reserved - uml_physmem;
+	err = os_map_memory((void *) uml_reserved, physmem_fd, offset, 
+			    len - offset, 1, 1, 0);
+	if(err < 0){
+		os_print_error(err, "Mapping memory");
+		exit(1);
+	}
+
+	bootmap_size = init_bootmem(pfn, pfn + delta);
+	free_bootmem(__pa(reserve_end) + bootmap_size,
+		     len - bootmap_size - reserve);
+}
+
+int phys_mapping(unsigned long phys, __u64 *offset_out)
+{
+	struct phys_desc *desc = find_virtmem_hash(&virtmem_hash, 
+						   __va(phys & PAGE_MASK));
+	int fd = -1;
+
+	if(desc != NULL){
+		fd = desc->fd;
+		*offset_out = desc->offset;
+	}
+	else if(phys < physmem_size){
+		fd = physmem_fd;
+		*offset_out = phys;
+	}
+	else if(phys < __pa(end_iomem)){
+		struct iomem_region *region = iomem_regions;
+	
+		while(region != NULL){
+			if((phys >= region->phys) && 
+			   (phys < region->phys + region->size)){
+				fd = region->fd;
+				*offset_out = phys - region->phys;
+				break;
+			}
+			region = region->next;
+		}
+	}
+	else if(phys < __pa(end_iomem) + highmem){
+		fd = physmem_fd;
+		*offset_out = phys - iomem_size;
+	}
+
+	return(fd);
+}
+
+static int __init uml_mem_setup(char *line, int *add)
+{
+	char *retptr;
+	physmem_size = memparse(line,&retptr);
+	return 0;
+}
+__uml_setup("mem=", uml_mem_setup,
+"mem=<Amount of desired ram>\n"
+"    This controls how much \"physical\" memory the kernel allocates\n"
+"    for the system. The size is specified as a number followed by\n"
+"    one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n"
+"    This is not related to the amount of memory in the host.  It can\n"
+"    be more, and the excess, if it's ever used, will just be swapped out.\n"
+"	Example: mem=64M\n\n"
+);
+
+unsigned long find_iomem(char *driver, unsigned long *len_out)
+{
+	struct iomem_region *region = iomem_regions;
+	
+	while(region != NULL){
+		if(!strcmp(region->driver, driver)){
+			*len_out = region->size;
+			return(region->virt);
+		}
+	}
+
+	return(0);
+}
+
+int setup_iomem(void)
+{
+	struct iomem_region *region = iomem_regions;
+	unsigned long iomem_start = high_physmem + PAGE_SIZE;
+	int err;
+
+	while(region != NULL){
+		err = os_map_memory((void *) iomem_start, region->fd, 0, 
+				    region->size, 1, 1, 0);
+		if(err)
+			printk("Mapping iomem region for driver '%s' failed, "
+			       "errno = %d\n", region->driver, -err);
+		else {
+			region->virt = iomem_start;
+			region->phys = __pa(region->virt);
+		}
+
+		iomem_start += region->size + PAGE_SIZE;
+		region = region->next;
+	}
+
+	return(0);
+}
+
+__initcall(setup_iomem);
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
new file mode 100644
index 000000000..ea82f19b2
--- /dev/null
+++ b/arch/um/kernel/skas/uaccess.c
@@ -0,0 +1,219 @@
+/* 
+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/stddef.h"
+#include "linux/kernel.h"
+#include "linux/string.h"
+#include "linux/fs.h"
+#include "linux/highmem.h"
+#include "asm/page.h"
+#include "asm/pgtable.h"
+#include "asm/uaccess.h"
+#include "kern_util.h"
+
+extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, 
+			     pte_t *pte_out);
+
+static unsigned long maybe_map(unsigned long virt, int is_write)
+{
+	pte_t pte;
+	int err;
+
+	void *phys = um_virt_to_phys(current, virt, &pte);
+	int dummy_code;
+
+	if(IS_ERR(phys) || (is_write && !pte_write(pte))){
+		err = handle_page_fault(virt, 0, is_write, 0, &dummy_code);
+		if(err)
+			return(0);
+		phys = um_virt_to_phys(current, virt, NULL);
+	}
+	return((unsigned long) phys);
+}
+
+static int do_op(unsigned long addr, int len, int is_write, 
+		 int (*op)(unsigned long addr, int len, void *arg), void *arg)
+{
+	struct page *page;
+	int n;
+
+	addr = maybe_map(addr, is_write);
+	if(addr == -1)
+		return(-1);
+
+	page = phys_to_page(addr);
+	addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
+	n = (*op)(addr, len, arg);
+	kunmap(page);
+
+	return(n);
+}
+
+static int buffer_op(unsigned long addr, int len, int is_write,
+		     int (*op)(unsigned long addr, int len, void *arg),
+		     void *arg)
+{
+	int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len);
+	int remain = len, n;
+
+	n = do_op(addr, size, is_write, op, arg);
+	if(n != 0)
+		return(n < 0 ? remain : 0);
+
+	addr += size;
+	remain -= size;
+	if(remain == 0) 
+		return(0);
+
+	while(addr < ((addr + remain) & PAGE_MASK)){
+		n = do_op(addr, PAGE_SIZE, is_write, op, arg);
+		if(n != 0)
+			return(n < 0 ? remain : 0);
+
+		addr += PAGE_SIZE;
+		remain -= PAGE_SIZE;
+	}
+	if(remain == 0)
+		return(0);
+
+	n = do_op(addr, remain, is_write, op, arg);
+	if(n != 0)
+		return(n < 0 ? remain : 0);
+	return(0);
+}
+
+static int copy_chunk_from_user(unsigned long from, int len, void *arg)
+{
+	unsigned long *to_ptr = arg, to = *to_ptr;
+
+	memcpy((void *) to, (void *) from, len);
+	*to_ptr += len;
+	return(0);
+}
+
+int copy_from_user_skas(void *to, const void *from, int n)
+{
+	if(segment_eq(get_fs(), KERNEL_DS)){
+		memcpy(to, from, n);
+		return(0);
+	}
+
+	return(access_ok_skas(VERIFY_READ, from, n) ?
+	       buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to):
+	       n);
+}
+
+static int copy_chunk_to_user(unsigned long to, int len, void *arg)
+{
+	unsigned long *from_ptr = arg, from = *from_ptr;
+
+	memcpy((void *) to, (void *) from, len);
+	*from_ptr += len;
+	return(0);
+}
+
+int copy_to_user_skas(void *to, const void *from, int n)
+{
+	if(segment_eq(get_fs(), KERNEL_DS)){
+		memcpy(to, from, n);
+		return(0);
+	}
+
+	return(access_ok_skas(VERIFY_WRITE, to, n) ?
+	       buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) :
+	       n);
+}
+
+static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
+{
+	char **to_ptr = arg, *to = *to_ptr;
+	int n;
+
+	strncpy(to, (void *) from, len);
+	n = strnlen(to, len);
+	*to_ptr += n;
+
+	if(n < len) 
+	        return(1);
+	return(0);
+}
+
+int strncpy_from_user_skas(char *dst, const char *src, int count)
+{
+	int n;
+	char *ptr = dst;
+
+	if(segment_eq(get_fs(), KERNEL_DS)){
+		strncpy(dst, src, count);
+		return(strnlen(dst, count));
+	}
+
+	if(!access_ok_skas(VERIFY_READ, src, 1))
+		return(-EFAULT);
+
+	n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, 
+		      &ptr);
+	if(n != 0)
+		return(-EFAULT);
+	return(strnlen(dst, count));
+}
+
+static int clear_chunk(unsigned long addr, int len, void *unused)
+{
+	memset((void *) addr, 0, len);
+	return(0);
+}
+
+int __clear_user_skas(void *mem, int len)
+{
+	return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL));
+}
+
+int clear_user_skas(void *mem, int len)
+{
+	if(segment_eq(get_fs(), KERNEL_DS)){
+		memset(mem, 0, len);
+		return(0);
+	}
+
+	return(access_ok_skas(VERIFY_WRITE, mem, len) ? 
+	       buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len);
+}
+
+static int strnlen_chunk(unsigned long str, int len, void *arg)
+{
+	int *len_ptr = arg, n;
+
+	n = strnlen((void *) str, len);
+	*len_ptr += n;
+
+	if(n < len)
+		return(1);
+	return(0);
+}
+
+int strnlen_user_skas(const void *str, int len)
+{
+	int count = 0, n;
+
+	if(segment_eq(get_fs(), KERNEL_DS))
+		return(strnlen(str, len) + 1);
+
+	n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
+	if(n == 0)
+		return(count + 1);
+	return(-EFAULT);
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/arch/um/kernel/tt/uaccess.c b/arch/um/kernel/tt/uaccess.c
new file mode 100644
index 000000000..9c8401120
--- /dev/null
+++ b/arch/um/kernel/tt/uaccess.c
@@ -0,0 +1,73 @@
+/* 
+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/sched.h"
+#include "asm/uaccess.h"
+
+int copy_from_user_tt(void *to, const void *from, int n)
+{
+	if(!access_ok_tt(VERIFY_READ, from, n)) 
+		return(n);
+
+	return(__do_copy_from_user(to, from, n, &current->thread.fault_addr,
+				   &current->thread.fault_catcher));
+}
+
+int copy_to_user_tt(void *to, const void *from, int n)
+{
+	if(!access_ok_tt(VERIFY_WRITE, to, n))
+		return(n);
+		
+	return(__do_copy_to_user(to, from, n, &current->thread.fault_addr,
+				 &current->thread.fault_catcher));
+}
+
+int strncpy_from_user_tt(char *dst, const char *src, int count)
+{
+	int n;
+
+	if(!access_ok_tt(VERIFY_READ, src, 1)) 
+		return(-EFAULT);
+
+	n = __do_strncpy_from_user(dst, src, count, 
+				   &current->thread.fault_addr,
+				   &current->thread.fault_catcher);
+	if(n < 0) return(-EFAULT);
+	return(n);
+}
+
+int __clear_user_tt(void *mem, int len)
+{
+	return(__do_clear_user(mem, len,
+			       &current->thread.fault_addr,
+			       &current->thread.fault_catcher));
+}
+
+int clear_user_tt(void *mem, int len)
+{
+	if(!access_ok_tt(VERIFY_WRITE, mem, len))
+		return(len);
+
+	return(__do_clear_user(mem, len, &current->thread.fault_addr,
+			       &current->thread.fault_catcher));
+}
+
+int strnlen_user_tt(const void *str, int len)
+{
+	return(__do_strnlen_user(str, len,
+				 &current->thread.fault_addr,
+				 &current->thread.fault_catcher));
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
new file mode 100644
index 000000000..ef0fb71e4
--- /dev/null
+++ b/arch/um/os-Linux/user_syms.c
@@ -0,0 +1,88 @@
+#include "linux/types.h"
+#include "linux/module.h"
+
+/* Some of this are builtin function (some are not but could in the future),
+ * so I *must* declare good prototypes for them and then EXPORT them.
+ * The kernel code uses the macro defined by include/linux/string.h,
+ * so I undef macros; the userspace code does not include that and I
+ * add an EXPORT for the glibc one.*/
+
+#undef strlen
+#undef strstr
+#undef memcpy
+#undef memset
+
+extern size_t strlen(const char *);
+extern void *memcpy(void *, const void *, size_t);
+extern void *memset(void *, int, size_t);
+extern int printf(const char *, ...);
+
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(printf);
+
+EXPORT_SYMBOL(strstr);
+
+/* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms.
+ * However, the modules will use the CRC defined *here*, no matter if it is 
+ * good; so the versions of these symbols will always match
+ */
+#define EXPORT_SYMBOL_PROTO(sym)       \
+       int sym(void);                  \
+       EXPORT_SYMBOL(sym);
+
+EXPORT_SYMBOL_PROTO(__errno_location);
+
+EXPORT_SYMBOL_PROTO(access);
+EXPORT_SYMBOL_PROTO(open);
+EXPORT_SYMBOL_PROTO(open64);
+EXPORT_SYMBOL_PROTO(close);
+EXPORT_SYMBOL_PROTO(read);
+EXPORT_SYMBOL_PROTO(write);
+EXPORT_SYMBOL_PROTO(dup2);
+EXPORT_SYMBOL_PROTO(__xstat);
+EXPORT_SYMBOL_PROTO(__lxstat);
+EXPORT_SYMBOL_PROTO(__lxstat64);
+EXPORT_SYMBOL_PROTO(lseek);
+EXPORT_SYMBOL_PROTO(lseek64);
+EXPORT_SYMBOL_PROTO(chown);
+EXPORT_SYMBOL_PROTO(truncate);
+EXPORT_SYMBOL_PROTO(utime);
+EXPORT_SYMBOL_PROTO(chmod);
+EXPORT_SYMBOL_PROTO(rename);
+EXPORT_SYMBOL_PROTO(__xmknod);
+
+EXPORT_SYMBOL_PROTO(symlink);
+EXPORT_SYMBOL_PROTO(link);
+EXPORT_SYMBOL_PROTO(unlink);
+EXPORT_SYMBOL_PROTO(readlink);
+
+EXPORT_SYMBOL_PROTO(mkdir);
+EXPORT_SYMBOL_PROTO(rmdir);
+EXPORT_SYMBOL_PROTO(opendir);
+EXPORT_SYMBOL_PROTO(readdir);
+EXPORT_SYMBOL_PROTO(closedir);
+EXPORT_SYMBOL_PROTO(seekdir);
+EXPORT_SYMBOL_PROTO(telldir);
+
+EXPORT_SYMBOL_PROTO(ioctl);
+
+EXPORT_SYMBOL_PROTO(pread64);
+EXPORT_SYMBOL_PROTO(pwrite64);
+
+EXPORT_SYMBOL_PROTO(statfs);
+EXPORT_SYMBOL_PROTO(statfs64);
+
+EXPORT_SYMBOL_PROTO(getuid);
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/hostfs/Makefile b/fs/hostfs/Makefile
new file mode 100644
index 000000000..794292e0a
--- /dev/null
+++ b/fs/hostfs/Makefile
@@ -0,0 +1,26 @@
+# 
+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+# Licensed under the GPL
+#
+
+# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino
+# to __st_ino.  It stayed in the same place, so as long as the correct name
+# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa.
+
+STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \
+				echo __)st_ino
+
+hostfs-objs := hostfs_kern.o hostfs_user.o
+
+obj-y = 
+obj-$(CONFIG_HOSTFS) += hostfs.o
+
+SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs))
+
+USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(SINGLE_OBJS))
+USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+
+USER_CFLAGS += -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD)
+
+$(USER_OBJS) : %.o: %.c
+	$(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
new file mode 100644
index 000000000..d1f6c339f
--- /dev/null
+++ b/fs/hostfs/hostfs.h
@@ -0,0 +1,79 @@
+#ifndef __UM_FS_HOSTFS
+#define __UM_FS_HOSTFS
+
+#include "os.h"
+
+/* These are exactly the same definitions as in fs.h, but the names are 
+ * changed so that this file can be included in both kernel and user files.
+ */
+
+#define HOSTFS_ATTR_MODE	1
+#define HOSTFS_ATTR_UID 	2
+#define HOSTFS_ATTR_GID 	4
+#define HOSTFS_ATTR_SIZE	8
+#define HOSTFS_ATTR_ATIME	16
+#define HOSTFS_ATTR_MTIME	32
+#define HOSTFS_ATTR_CTIME	64
+#define HOSTFS_ATTR_ATIME_SET	128
+#define HOSTFS_ATTR_MTIME_SET	256
+#define HOSTFS_ATTR_FORCE	512	/* Not a change, but a change it */
+#define HOSTFS_ATTR_ATTR_FLAG	1024
+
+struct hostfs_iattr {
+	unsigned int	ia_valid;
+	mode_t		ia_mode;
+	uid_t		ia_uid;
+	gid_t		ia_gid;
+	loff_t		ia_size;
+	struct timespec	ia_atime;
+	struct timespec	ia_mtime;
+	struct timespec	ia_ctime;
+	unsigned int	ia_attr_flags;
+};
+
+extern int stat_file(const char *path, unsigned long long *inode_out, 
+		     int *mode_out, int *nlink_out, int *uid_out, int *gid_out,
+		     unsigned long long *size_out, struct timespec *atime_out, 
+		     struct timespec *mtime_out, struct timespec *ctime_out, 
+		     int *blksize_out, unsigned long long *blocks_out);
+extern int access_file(char *path, int r, int w, int x);
+extern int open_file(char *path, int r, int w, int append);
+extern int file_type(const char *path, int *rdev);
+extern void *open_dir(char *path, int *err_out);
+extern char *read_dir(void *stream, unsigned long long *pos, 
+		      unsigned long long *ino_out, int *len_out);
+extern void close_file(void *stream);
+extern void close_dir(void *stream);
+extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
+extern int write_file(int fd, unsigned long long *offset, const char *buf,
+		      int len);
+extern int lseek_file(int fd, long long offset, int whence);
+extern int file_create(char *name, int ur, int uw, int ux, int gr, 
+		       int gw, int gx, int or, int ow, int ox);
+extern int set_attr(const char *file, struct hostfs_iattr *attrs);
+extern int make_symlink(const char *from, const char *to);
+extern int unlink_file(const char *file);
+extern int do_mkdir(const char *file, int mode);
+extern int do_rmdir(const char *file);
+extern int do_mknod(const char *file, int mode, int dev);
+extern int link_file(const char *from, const char *to);
+extern int do_readlink(char *file, char *buf, int size);
+extern int rename_file(char *from, char *to);
+extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, 
+		     long long *bfree_out, long long *bavail_out, 
+		     long long *files_out, long long *ffree_out, 
+		     void *fsid_out, int fsid_size, long *namelen_out, 
+		     long *spare_out);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
new file mode 100644
index 000000000..ef5d5d1bf
--- /dev/null
+++ b/fs/hostfs/hostfs_kern.c
@@ -0,0 +1,1008 @@
+/* 
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ *
+ * Ported the filesystem routines to 2.5.
+ * 2003-02-10 Petr Baudis <pasky@ucw.cz>
+ */
+
+#include <linux/stddef.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/blkdev.h>
+#include <linux/list.h>
+#include <linux/buffer_head.h>
+#include <linux/root_dev.h>
+#include <linux/statfs.h>
+#include <asm/uaccess.h>
+#include "hostfs.h"
+#include "kern_util.h"
+#include "kern.h"
+#include "user_util.h"
+#include "2_5compat.h"
+#include "init.h"
+
+struct hostfs_inode_info {
+	char *host_filename;
+	int fd;
+	int mode;
+	struct inode vfs_inode;
+};
+
+static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
+{
+	return(list_entry(inode, struct hostfs_inode_info, vfs_inode));
+}
+
+#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_dentry->d_inode)
+
+int hostfs_d_delete(struct dentry *dentry)
+{
+	return(1);
+}
+
+struct dentry_operations hostfs_dentry_ops = {
+	.d_delete		= hostfs_d_delete,
+};
+
+/* Changed in hostfs_args before the kernel starts running */
+static char *root_ino = "/";
+static int append = 0;
+
+#define HOSTFS_SUPER_MAGIC 0x00c0ffee
+
+static struct inode_operations hostfs_iops;
+static struct inode_operations hostfs_dir_iops;
+static struct address_space_operations hostfs_link_aops;
+
+static int __init hostfs_args(char *options, int *add)
+{
+	char *ptr;
+
+	ptr = strchr(options, ',');
+	if(ptr != NULL)
+		*ptr++ = '\0';
+	if(*options != '\0')
+		root_ino = options;
+
+	options = ptr;
+	while(options){
+		ptr = strchr(options, ',');
+		if(ptr != NULL)
+			*ptr++ = '\0';
+		if(*options != '\0'){
+			if(!strcmp(options, "append"))
+				append = 1;
+			else printf("hostfs_args - unsupported option - %s\n",
+				    options);
+		}
+		options = ptr;
+	}
+	return(0);
+}
+
+__uml_setup("hostfs=", hostfs_args,
+"hostfs=<root dir>,<flags>,...\n"
+"    This is used to set hostfs parameters.  The root directory argument\n"
+"    is used to confine all hostfs mounts to within the specified directory\n"
+"    tree on the host.  If this isn't specified, then a user inside UML can\n"
+"    mount anything on the host that's accessible to the user that's running\n"
+"    it.\n"
+"    The only flag currently supported is 'append', which specifies that all\n"
+"    files opened by hostfs will be opened in append mode.\n\n"
+);
+
+static char *dentry_name(struct dentry *dentry, int extra)
+{
+	struct dentry *parent;
+	char *root, *name;
+	int len;
+
+	len = 0;
+	parent = dentry;
+	while(parent->d_parent != parent){
+		len += parent->d_name.len + 1;
+		parent = parent->d_parent;
+	}
+	
+	root = HOSTFS_I(parent->d_inode)->host_filename;
+	len += strlen(root);
+	name = kmalloc(len + extra + 1, GFP_KERNEL);
+	if(name == NULL) return(NULL);
+
+	name[len] = '\0';
+	parent = dentry;
+	while(parent->d_parent != parent){
+		len -= parent->d_name.len + 1;
+		name[len] = '/';
+		strncpy(&name[len + 1], parent->d_name.name, 
+			parent->d_name.len);
+		parent = parent->d_parent;
+	}
+	strncpy(name, root, strlen(root));
+	return(name);
+}
+
+static char *inode_name(struct inode *ino, int extra)
+{
+	struct dentry *dentry;
+
+	dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
+	return(dentry_name(dentry, extra));
+}
+
+static int read_name(struct inode *ino, char *name)
+{
+	/* The non-int inode fields are copied into ints by stat_file and
+	 * then copied into the inode because passing the actual pointers
+	 * in and having them treated as int * breaks on big-endian machines
+	 */
+	int err;
+	int i_mode, i_nlink, i_blksize;
+	unsigned long long i_size;
+	unsigned long long i_ino;
+	unsigned long long i_blocks;
+
+	err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, 
+			&ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, 
+			&ino->i_ctime, &i_blksize, &i_blocks);
+	if(err) 
+		return(err);
+
+	ino->i_ino = i_ino;
+	ino->i_mode = i_mode;
+	ino->i_nlink = i_nlink;
+	ino->i_size = i_size;
+	ino->i_blksize = i_blksize;
+	ino->i_blocks = i_blocks;
+	if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid()))
+		ino->i_uid = 0;
+	return(0);
+}
+
+static char *follow_link(char *link)
+{
+	int len, n;
+	char *name, *resolved, *end;
+
+	len = 64;
+	while(1){
+		n = -ENOMEM;
+		name = kmalloc(len, GFP_KERNEL);
+		if(name == NULL)
+			goto out;
+
+		n = do_readlink(link, name, len);
+		if(n < len)
+			break;
+		len *= 2;
+		kfree(name);
+	}
+	if(n < 0)
+		goto out_free;
+
+	if(*name == '/')
+		return(name);
+
+	end = strrchr(link, '/');
+	if(end == NULL)
+		return(name);
+
+	*(end + 1) = '\0';
+	len = strlen(link) + strlen(name) + 1;
+
+	resolved = kmalloc(len, GFP_KERNEL);
+	if(resolved == NULL){
+		n = -ENOMEM;
+		goto out_free;
+	}
+
+	sprintf(resolved, "%s%s", link, name);
+	kfree(name);
+	kfree(link);
+	return(resolved);
+
+ out_free:
+	kfree(name);
+ out:
+	return(ERR_PTR(n));
+}
+
+static int read_inode(struct inode *ino)
+{
+	char *name;
+	int err = 0;
+
+	/* Unfortunately, we are called from iget() when we don't have a dentry
+	 * allocated yet.
+	 */
+	if(list_empty(&ino->i_dentry))
+		goto out;
+ 
+	err = -ENOMEM;
+	name = inode_name(ino, 0);
+	if(name == NULL) 
+		goto out;
+
+	if(file_type(name, NULL) == OS_TYPE_SYMLINK){
+		name = follow_link(name);
+		if(IS_ERR(name)){
+			err = PTR_ERR(name);
+			goto out;
+		}
+	}
+	
+	err = read_name(ino, name);
+	kfree(name);
+ out:
+	return(err);
+}
+
+int hostfs_statfs(struct super_block *sb, struct kstatfs *sf)
+{
+	/* do_statfs uses struct statfs64 internally, but the linux kernel
+	 * struct statfs still has 32-bit versions for most of these fields,
+	 * so we convert them here
+	 */
+	int err;
+	long long f_blocks;
+	long long f_bfree;
+	long long f_bavail;
+	long long f_files;
+	long long f_ffree;
+
+	err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename,
+			&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
+			&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), 
+			&sf->f_namelen, sf->f_spare);
+	if(err) return(err);
+	sf->f_blocks = f_blocks;
+	sf->f_bfree = f_bfree;
+	sf->f_bavail = f_bavail;
+	sf->f_files = f_files;
+	sf->f_ffree = f_ffree;
+	sf->f_type = HOSTFS_SUPER_MAGIC;
+	return(0);
+}
+
+static struct inode *hostfs_alloc_inode(struct super_block *sb)
+{
+	struct hostfs_inode_info *hi;
+
+	hi = kmalloc(sizeof(*hi), GFP_KERNEL);
+	if(hi == NULL) 
+		return(NULL);
+
+	*hi = ((struct hostfs_inode_info) { .host_filename	= NULL,
+					    .fd			= -1,
+					    .mode		= 0 });
+	inode_init_once(&hi->vfs_inode);
+	return(&hi->vfs_inode);
+}
+
+static void hostfs_destroy_inode(struct inode *inode)
+{
+	if(HOSTFS_I(inode)->host_filename) 
+		kfree(HOSTFS_I(inode)->host_filename);
+
+	if(HOSTFS_I(inode)->fd != -1) 
+		close_file(&HOSTFS_I(inode)->fd);
+
+	kfree(HOSTFS_I(inode));
+}
+
+static void hostfs_read_inode(struct inode *inode)
+{
+	read_inode(inode);
+}
+
+static struct super_operations hostfs_sbops = { 
+	.alloc_inode	= hostfs_alloc_inode,
+	.destroy_inode	= hostfs_destroy_inode,
+	.read_inode	= hostfs_read_inode,
+	.statfs		= hostfs_statfs,
+};
+
+int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
+{
+	void *dir;
+	char *name;
+	unsigned long long next, ino;
+	int error, len;
+
+	name = dentry_name(file->f_dentry, 0);
+	if(name == NULL) return(-ENOMEM);
+	dir = open_dir(name, &error);
+	kfree(name);
+	if(dir == NULL) return(-error);
+	next = file->f_pos;
+	while((name = read_dir(dir, &next, &ino, &len)) != NULL){
+		error = (*filldir)(ent, name, len, file->f_pos, 
+				   ino, DT_UNKNOWN);
+		if(error) break;
+		file->f_pos = next;
+	}
+	close_dir(dir);
+	return(0);
+}
+
+int hostfs_file_open(struct inode *ino, struct file *file)
+{
+	char *name;
+	int mode = 0, r = 0, w = 0, fd;
+
+	mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
+	if((mode & HOSTFS_I(ino)->mode) == mode)
+		return(0);
+
+	/* The file may already have been opened, but with the wrong access,
+	 * so this resets things and reopens the file with the new access.
+	 */
+	if(HOSTFS_I(ino)->fd != -1){
+		close_file(&HOSTFS_I(ino)->fd);
+		HOSTFS_I(ino)->fd = -1;
+	}
+
+	HOSTFS_I(ino)->mode |= mode;
+	if(HOSTFS_I(ino)->mode & FMODE_READ) 
+		r = 1;
+	if(HOSTFS_I(ino)->mode & FMODE_WRITE) 
+		w = 1;
+	if(w) 
+		r = 1;
+
+	name = dentry_name(file->f_dentry, 0);
+	if(name == NULL) 
+		return(-ENOMEM);
+
+	fd = open_file(name, r, w, append);
+	kfree(name);
+	if(fd < 0) return(fd);
+	FILE_HOSTFS_I(file)->fd = fd;
+
+	return(0);
+}
+
+int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	return(0);
+}
+
+static struct file_operations hostfs_file_fops = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_file_read,
+	.write		= generic_file_write,
+	.mmap		= generic_file_mmap,
+	.open		= hostfs_file_open,
+	.release	= NULL,
+	.fsync		= hostfs_fsync,
+};
+
+static struct file_operations hostfs_dir_fops = {
+	.readdir	= hostfs_readdir,
+	.read		= generic_read_dir,
+};
+
+int hostfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+	char *buffer;
+	unsigned long long base;
+	int count = PAGE_CACHE_SIZE;
+	int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	int err;
+
+	if (page->index >= end_index)
+		count = inode->i_size & (PAGE_CACHE_SIZE-1);
+
+	buffer = kmap(page);
+	base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
+
+	err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
+	if(err != count){
+		ClearPageUptodate(page);
+		goto out;
+	}
+
+	if (base > inode->i_size)
+		inode->i_size = base;
+
+	if (PageError(page))
+		ClearPageError(page);	
+	err = 0;
+
+ out:	
+	kunmap(page);
+
+	unlock_page(page);
+	return err; 
+}
+
+int hostfs_readpage(struct file *file, struct page *page)
+{
+	char *buffer;
+	long long start;
+	int err = 0;
+
+	start = (long long) page->index << PAGE_CACHE_SHIFT;
+	buffer = kmap(page);
+	err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
+			PAGE_CACHE_SIZE);
+	if(err < 0) goto out;
+
+	memset(&buffer[err], 0, PAGE_CACHE_SIZE - err);
+
+	flush_dcache_page(page);
+	SetPageUptodate(page);
+	if (PageError(page)) ClearPageError(page);
+	err = 0;
+ out:
+	kunmap(page);
+	unlock_page(page);
+	return(err);
+}
+
+int hostfs_prepare_write(struct file *file, struct page *page, 
+			 unsigned int from, unsigned int to)
+{
+	char *buffer;
+	long long start, tmp;
+	int err;
+
+	start = (long long) page->index << PAGE_CACHE_SHIFT;
+	buffer = kmap(page);
+	if(from != 0){
+		tmp = start;
+		err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer,
+				from);
+		if(err < 0) goto out;
+	}
+	if(to != PAGE_CACHE_SIZE){
+		start += to;
+		err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to,
+				PAGE_CACHE_SIZE - to);
+		if(err < 0) goto out;		
+	}
+	err = 0;
+ out:
+	kunmap(page);
+	return(err);
+}
+
+int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
+		 unsigned to)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+	char *buffer;
+	long long start;
+	int err = 0;
+
+	start = (long long) (page->index << PAGE_CACHE_SHIFT) + from;
+	buffer = kmap(page);
+	err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from, 
+			 to - from);
+	if(err > 0) err = 0;
+	if(!err && (start > inode->i_size))
+		inode->i_size = start;
+
+	kunmap(page);
+	return(err);
+}
+
+static struct address_space_operations hostfs_aops = {
+	.writepage 	= hostfs_writepage,
+	.readpage	= hostfs_readpage,
+/* 	.set_page_dirty = __set_page_dirty_nobuffers, */
+	.prepare_write	= hostfs_prepare_write,
+	.commit_write	= hostfs_commit_write
+};
+
+static int init_inode(struct inode *inode, struct dentry *dentry)
+{
+	char *name;
+	int type, err = -ENOMEM, rdev;
+
+	if(dentry){
+		name = dentry_name(dentry, 0);
+		if(name == NULL)
+			goto out;
+		type = file_type(name, &rdev);
+		kfree(name);
+	}
+	else type = OS_TYPE_DIR;
+
+	err = 0;
+	if(type == OS_TYPE_SYMLINK)
+		inode->i_op = &page_symlink_inode_operations;
+	else if(type == OS_TYPE_DIR)
+		inode->i_op = &hostfs_dir_iops;
+	else inode->i_op = &hostfs_iops;
+
+	if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
+	else inode->i_fop = &hostfs_file_fops;
+
+	if(type == OS_TYPE_SYMLINK) 
+		inode->i_mapping->a_ops = &hostfs_link_aops;
+	else inode->i_mapping->a_ops = &hostfs_aops;
+
+	switch (type) {
+	case OS_TYPE_CHARDEV:
+		init_special_inode(inode, S_IFCHR, rdev);
+		break;
+	case OS_TYPE_BLOCKDEV:
+		init_special_inode(inode, S_IFBLK, rdev);
+		break;
+	case OS_TYPE_FIFO:
+		init_special_inode(inode, S_IFIFO, 0);
+		break;
+	case OS_TYPE_SOCK:
+		init_special_inode(inode, S_IFSOCK, 0);
+		break;
+	}
+ out:
+	return(err);
+}
+
+int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, 
+                 struct nameidata *nd)
+{
+	struct inode *inode;
+	char *name;
+	int error, fd;
+
+	error = -ENOMEM;
+	inode = iget(dir->i_sb, 0);
+	if(inode == NULL) goto out;
+
+	error = init_inode(inode, dentry);
+	if(error) 
+		goto out_put;
+	
+	error = -ENOMEM;
+	name = dentry_name(dentry, 0);
+	if(name == NULL)
+		goto out_put;
+
+	fd = file_create(name, 
+			 mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, 
+			 mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, 
+			 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
+	if(fd < 0) 
+		error = fd;
+	else error = read_name(inode, name);
+
+	kfree(name);
+	if(error)
+		goto out_put;
+
+	HOSTFS_I(inode)->fd = fd;
+	HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE;
+	d_instantiate(dentry, inode);
+	return(0);
+
+ out_put:
+	iput(inode);
+ out:
+	return(error);
+}
+
+struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, 
+                            struct nameidata *nd)
+{
+	struct inode *inode;
+	char *name;
+	int err;
+
+	err = -ENOMEM;
+	inode = iget(ino->i_sb, 0);
+	if(inode == NULL) 
+		goto out;
+ 
+	err = init_inode(inode, dentry);
+	if(err) 
+		goto out_put;
+
+	err = -ENOMEM;
+	name = dentry_name(dentry, 0);
+	if(name == NULL)
+		goto out_put;
+
+	err = read_name(inode, name);
+	kfree(name);
+	if(err == -ENOENT){
+		iput(inode);
+		inode = NULL;
+	}
+	else if(err)
+		goto out_put;
+
+	d_add(dentry, inode);
+	dentry->d_op = &hostfs_dentry_ops;
+	return(NULL);
+
+ out_put:
+	iput(inode);
+ out:
+	return(ERR_PTR(err));
+}
+
+static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
+{
+        char *file;
+	int len;
+
+	file = inode_name(ino, dentry->d_name.len + 1);
+	if(file == NULL) return(NULL);
+        strcat(file, "/");
+	len = strlen(file);
+        strncat(file, dentry->d_name.name, dentry->d_name.len);
+	file[len + dentry->d_name.len] = '\0';
+        return(file);
+}
+
+int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
+{
+        char *from_name, *to_name;
+        int err;
+
+        if((from_name = inode_dentry_name(ino, from)) == NULL) 
+                return(-ENOMEM);
+        to_name = dentry_name(to, 0);
+	if(to_name == NULL){
+		kfree(from_name);
+		return(-ENOMEM);
+	}
+        err = link_file(to_name, from_name);
+        kfree(from_name);
+        kfree(to_name);
+        return(err);
+}
+
+int hostfs_unlink(struct inode *ino, struct dentry *dentry)
+{
+	char *file;
+	int err;
+
+	if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
+	if(append)
+		return(-EPERM);
+
+	err = unlink_file(file);
+	kfree(file);
+	return(err);
+}
+
+int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
+{
+	char *file;
+	int err;
+
+	if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
+	err = make_symlink(file, to);
+	kfree(file);
+	return(err);
+}
+
+int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
+{
+	char *file;
+	int err;
+
+	if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
+	err = do_mkdir(file, mode);
+	kfree(file);
+	return(err);
+}
+
+int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
+{
+	char *file;
+	int err;
+
+	if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
+	err = do_rmdir(file);
+	kfree(file);
+	return(err);
+}
+
+int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+	struct inode *inode;
+	char *name;
+	int err = -ENOMEM;
+ 
+	inode = iget(dir->i_sb, 0);
+	if(inode == NULL) 
+		goto out;
+
+	err = init_inode(inode, dentry);
+	if(err) 
+		goto out_put;
+
+	err = -ENOMEM;
+	name = dentry_name(dentry, 0);
+	if(name == NULL)
+		goto out_put;
+
+	init_special_inode(inode, mode, dev);
+	err = do_mknod(name, mode, dev);
+	if(err)
+		goto out_free;
+
+	err = read_name(inode, name);
+	kfree(name);
+	if(err)
+		goto out_put;
+
+	d_instantiate(dentry, inode);
+	return(0);
+
+ out_free:
+	kfree(name);
+ out_put:
+	iput(inode);
+ out:
+	return(err);
+}
+
+int hostfs_rename(struct inode *from_ino, struct dentry *from,
+		  struct inode *to_ino, struct dentry *to)
+{
+	char *from_name, *to_name;
+	int err;
+
+	if((from_name = inode_dentry_name(from_ino, from)) == NULL)
+		return(-ENOMEM);
+	if((to_name = inode_dentry_name(to_ino, to)) == NULL){
+		kfree(from_name);
+		return(-ENOMEM);
+	}
+	err = rename_file(from_name, to_name);
+	kfree(from_name);
+	kfree(to_name);
+	return(err);
+}
+
+void hostfs_truncate(struct inode *ino)
+{
+	not_implemented();
+}
+
+int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
+{
+	char *name;
+	int r = 0, w = 0, x = 0, err;
+
+	if(desired & MAY_READ) r = 1;
+	if(desired & MAY_WRITE) w = 1;
+	if(desired & MAY_EXEC) x = 1;
+	name = inode_name(ino, 0);
+	if(name == NULL) return(-ENOMEM);
+	err = access_file(name, r, w, x);
+	kfree(name);
+	if(!err) err = vfs_permission(ino, desired);
+	return(err);
+}
+
+int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct hostfs_iattr attrs;
+	char *name;
+	int err;
+	
+	if(append) 
+		attr->ia_valid &= ~ATTR_SIZE;
+
+	attrs.ia_valid = 0;
+	if(attr->ia_valid & ATTR_MODE){
+		attrs.ia_valid |= HOSTFS_ATTR_MODE;
+		attrs.ia_mode = attr->ia_mode;
+	}
+	if(attr->ia_valid & ATTR_UID){
+		if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && 
+		   (attr->ia_uid == 0))
+			attr->ia_uid = getuid();
+		attrs.ia_valid |= HOSTFS_ATTR_UID;
+		attrs.ia_uid = attr->ia_uid;
+	}
+	if(attr->ia_valid & ATTR_GID){
+		if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && 
+		   (attr->ia_gid == 0))
+			attr->ia_gid = getuid();
+		attrs.ia_valid |= HOSTFS_ATTR_GID;
+		attrs.ia_gid = attr->ia_gid;
+	}
+	if(attr->ia_valid & ATTR_SIZE){
+		attrs.ia_valid |= HOSTFS_ATTR_SIZE;
+		attrs.ia_size = attr->ia_size;
+	}
+	if(attr->ia_valid & ATTR_ATIME){
+		attrs.ia_valid |= HOSTFS_ATTR_ATIME;
+		attrs.ia_atime = attr->ia_atime;
+	}
+	if(attr->ia_valid & ATTR_MTIME){
+		attrs.ia_valid |= HOSTFS_ATTR_MTIME;
+		attrs.ia_mtime = attr->ia_mtime;
+	}
+	if(attr->ia_valid & ATTR_CTIME){
+		attrs.ia_valid |= HOSTFS_ATTR_CTIME;
+		attrs.ia_ctime = attr->ia_ctime;
+	}
+	if(attr->ia_valid & ATTR_ATIME_SET){
+		attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET;
+	}
+	if(attr->ia_valid & ATTR_MTIME_SET){
+		attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
+	}
+	name = dentry_name(dentry, 0);
+	if(name == NULL) return(-ENOMEM);
+	err = set_attr(name, &attrs);
+	kfree(name);
+	if(err)
+		return(err);
+
+	return(inode_setattr(dentry->d_inode, attr));
+}
+
+int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 
+	   struct kstat *stat)
+{
+	generic_fillattr(dentry->d_inode, stat);
+	return(0);
+}
+
+static struct inode_operations hostfs_iops = {
+	.create		= hostfs_create,
+	.link		= hostfs_link,
+	.unlink		= hostfs_unlink,
+	.symlink	= hostfs_symlink,
+	.mkdir		= hostfs_mkdir,
+	.rmdir		= hostfs_rmdir,
+	.mknod		= hostfs_mknod,
+	.rename		= hostfs_rename,
+	.truncate	= hostfs_truncate,
+	.permission	= hostfs_permission,
+	.setattr	= hostfs_setattr,
+	.getattr	= hostfs_getattr,
+};
+
+static struct inode_operations hostfs_dir_iops = {
+	.create		= hostfs_create,
+	.lookup		= hostfs_lookup,
+	.link		= hostfs_link,
+	.unlink		= hostfs_unlink,
+	.symlink	= hostfs_symlink,
+	.mkdir		= hostfs_mkdir,
+	.rmdir		= hostfs_rmdir,
+	.mknod		= hostfs_mknod,
+	.rename		= hostfs_rename,
+	.truncate	= hostfs_truncate,
+	.permission	= hostfs_permission,
+	.setattr	= hostfs_setattr,
+	.getattr	= hostfs_getattr,
+};
+
+int hostfs_link_readpage(struct file *file, struct page *page)
+{
+	char *buffer, *name;
+	long long start;
+	int err;
+
+	start = page->index << PAGE_CACHE_SHIFT;
+	buffer = kmap(page);
+	name = inode_name(page->mapping->host, 0);
+	if(name == NULL) return(-ENOMEM);
+	err = do_readlink(name, buffer, PAGE_CACHE_SIZE);
+	kfree(name);
+	if(err == PAGE_CACHE_SIZE)
+		err = -E2BIG;
+	else if(err > 0){
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+		if (PageError(page)) ClearPageError(page);
+		err = 0;
+	}
+	kunmap(page);
+	unlock_page(page);
+	return(err);
+}
+
+static struct address_space_operations hostfs_link_aops = {
+	.readpage	= hostfs_link_readpage,
+};
+
+static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
+{
+	struct inode *root_inode;
+	char *name, *data = d;
+	int err;
+
+	sb->s_blocksize = 1024;
+	sb->s_blocksize_bits = 10;
+	sb->s_magic = HOSTFS_SUPER_MAGIC;
+	sb->s_op = &hostfs_sbops;
+
+	if((data == NULL) || (*data == '\0')) 
+		data = root_ino;
+
+	err = -ENOMEM;
+	name = kmalloc(strlen(data) + 1, GFP_KERNEL);
+	if(name == NULL) 
+		goto out;
+
+	strcpy(name, data);
+
+	root_inode = iget(sb, 0);
+	if(root_inode == NULL)
+		goto out_free;
+
+	err = init_inode(root_inode, NULL);
+	if(err)
+		goto out_put;
+
+	HOSTFS_I(root_inode)->host_filename = name;
+
+	err = -ENOMEM;
+	sb->s_root = d_alloc_root(root_inode);
+	if(sb->s_root == NULL)
+		goto out_put;
+
+	err = read_inode(root_inode);
+	if(err)
+		goto out_put;
+
+	return(0);
+
+ out_put:
+	iput(root_inode);
+ out_free:
+	kfree(name);
+ out:
+	return(err);
+}
+
+static struct super_block *hostfs_read_sb(struct file_system_type *type,
+					     int flags, const char *dev_name,
+					     void *data)
+{
+	return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common));
+}
+
+static struct file_system_type hostfs_type = {
+	.owner 		= THIS_MODULE,
+	.name 		= "hostfs",
+	.get_sb 	= hostfs_read_sb,
+	.kill_sb	= kill_anon_super,
+	.fs_flags 	= 0,
+};
+
+static int __init init_hostfs(void)
+{
+	return(register_filesystem(&hostfs_type));
+}
+
+static void __exit exit_hostfs(void)
+{
+	unregister_filesystem(&hostfs_type);
+}
+
+module_init(init_hostfs)
+module_exit(exit_hostfs)
+MODULE_LICENSE("GPL");
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
new file mode 100644
index 000000000..c40626609
--- /dev/null
+++ b/fs/hostfs/hostfs_user.c
@@ -0,0 +1,361 @@
+/* 
+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <errno.h>
+#include <utime.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include "hostfs.h"
+#include "kern_util.h"
+#include "user.h"
+
+int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
+	      int *nlink_out, int *uid_out, int *gid_out, 
+	      unsigned long long *size_out, struct timespec *atime_out,
+	      struct timespec *mtime_out, struct timespec *ctime_out,
+	      int *blksize_out, unsigned long long *blocks_out)
+{
+	struct stat64 buf;
+
+	if(lstat64(path, &buf) < 0) 
+		return(-errno);
+
+	/* See the Makefile for why STAT64_INO_FIELD is passed in
+	 * by the build
+	 */
+	if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD;
+	if(mode_out != NULL) *mode_out = buf.st_mode;
+	if(nlink_out != NULL) *nlink_out = buf.st_nlink;
+	if(uid_out != NULL) *uid_out = buf.st_uid;
+	if(gid_out != NULL) *gid_out = buf.st_gid;
+	if(size_out != NULL) *size_out = buf.st_size;
+	if(atime_out != NULL) {
+		atime_out->tv_sec = buf.st_atime;
+		atime_out->tv_nsec = 0;
+	}
+	if(mtime_out != NULL) {
+		mtime_out->tv_sec = buf.st_mtime;
+		mtime_out->tv_nsec = 0;
+	}
+	if(ctime_out != NULL) {
+		ctime_out->tv_sec = buf.st_ctime;
+		ctime_out->tv_nsec = 0;
+	}
+	if(blksize_out != NULL) *blksize_out = buf.st_blksize;
+	if(blocks_out != NULL) *blocks_out = buf.st_blocks;
+	return(0);
+}
+
+int file_type(const char *path, int *rdev)
+{
+ 	struct stat64 buf;
+
+	if(lstat64(path, &buf) < 0) 
+		return(-errno);
+	if(rdev != NULL) 
+		*rdev = buf.st_rdev;
+
+	if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR);
+	else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK);
+	else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV);
+	else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV);
+	else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO);
+	else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK);
+	else return(OS_TYPE_FILE);
+}
+
+int access_file(char *path, int r, int w, int x)
+{
+	int mode = 0;
+
+	if(r) mode = R_OK;
+	if(w) mode |= W_OK;
+	if(x) mode |= X_OK;
+	if(access(path, mode) != 0) return(-errno);
+	else return(0);
+}
+
+int open_file(char *path, int r, int w, int append)
+{
+	int mode = 0, fd;
+
+	if(r && !w) 
+		mode = O_RDONLY;
+	else if(!r && w) 
+		mode = O_WRONLY;
+	else if(r && w) 
+		mode = O_RDWR;
+	else panic("Impossible mode in open_file");
+
+	if(append)
+		mode |= O_APPEND;
+	fd = open64(path, mode);
+	if(fd < 0) return(-errno);
+	else return(fd);
+}
+
+void *open_dir(char *path, int *err_out)
+{
+	DIR *dir;
+
+	dir = opendir(path);
+	*err_out = errno;
+	if(dir == NULL) return(NULL);
+	return(dir);
+}
+
+char *read_dir(void *stream, unsigned long long *pos, 
+	       unsigned long long *ino_out, int *len_out)
+{
+	DIR *dir = stream;
+	struct dirent *ent;
+
+	seekdir(dir, *pos);
+	ent = readdir(dir);
+	if(ent == NULL) return(NULL);
+	*len_out = strlen(ent->d_name);
+	*ino_out = ent->d_ino;
+	*pos = telldir(dir);
+	return(ent->d_name);
+}
+
+int read_file(int fd, unsigned long long *offset, char *buf, int len)
+{
+	int n;
+
+	n = pread64(fd, buf, len, *offset);
+	if(n < 0) return(-errno);
+	*offset += n;
+	return(n);
+}
+
+int write_file(int fd, unsigned long long *offset, const char *buf, int len)
+{
+	int n;
+
+	n = pwrite64(fd, buf, len, *offset);
+	if(n < 0) return(-errno);
+	*offset += n;
+	return(n);
+}
+
+int lseek_file(int fd, long long offset, int whence)
+{
+	int ret;
+
+	ret = lseek64(fd, offset, whence);
+	if(ret < 0) return(-errno);
+	return(0);
+}
+
+void close_file(void *stream)
+{
+	close(*((int *) stream));
+}
+
+void close_dir(void *stream)
+{
+	closedir(stream);
+}
+
+int file_create(char *name, int ur, int uw, int ux, int gr, 
+		int gw, int gx, int or, int ow, int ox)
+{
+	int mode, fd;
+
+	mode = 0;
+	mode |= ur ? S_IRUSR : 0;
+	mode |= uw ? S_IWUSR : 0;
+	mode |= ux ? S_IXUSR : 0;
+	mode |= gr ? S_IRGRP : 0;
+	mode |= gw ? S_IWGRP : 0;
+	mode |= gx ? S_IXGRP : 0;
+	mode |= or ? S_IROTH : 0;
+	mode |= ow ? S_IWOTH : 0;
+	mode |= ox ? S_IXOTH : 0;
+	fd = open64(name, O_CREAT | O_RDWR, mode);
+	if(fd < 0) 
+		return(-errno);
+	return(fd);
+}
+
+int set_attr(const char *file, struct hostfs_iattr *attrs)
+{
+	struct utimbuf buf;
+	int err, ma;
+
+	if(attrs->ia_valid & HOSTFS_ATTR_MODE){
+		if(chmod(file, attrs->ia_mode) != 0) return(-errno);
+	}
+	if(attrs->ia_valid & HOSTFS_ATTR_UID){
+		if(chown(file, attrs->ia_uid, -1)) return(-errno);
+	}
+	if(attrs->ia_valid & HOSTFS_ATTR_GID){
+		if(chown(file, -1, attrs->ia_gid)) return(-errno);
+	}
+	if(attrs->ia_valid & HOSTFS_ATTR_SIZE){
+		if(truncate(file, attrs->ia_size)) return(-errno);
+	}
+	ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET;
+	if((attrs->ia_valid & ma) == ma){
+		buf.actime = attrs->ia_atime.tv_sec;
+		buf.modtime = attrs->ia_mtime.tv_sec;
+		if(utime(file, &buf) != 0) return(-errno);
+	}
+	else {
+		struct timespec ts;
+
+		if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){
+			err = stat_file(file, NULL, NULL, NULL, NULL, NULL, 
+					NULL, NULL, &ts, NULL, NULL, NULL);
+			if(err != 0) 
+				return(err);
+			buf.actime = attrs->ia_atime.tv_sec;
+			buf.modtime = ts.tv_sec;
+			if(utime(file, &buf) != 0) 
+				return(-errno);
+		}
+		if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){
+			err = stat_file(file, NULL, NULL, NULL, NULL, NULL, 
+					NULL, &ts, NULL, NULL, NULL, NULL);
+			if(err != 0) 
+				return(err);
+			buf.actime = ts.tv_sec;
+			buf.modtime = attrs->ia_mtime.tv_sec;
+			if(utime(file, &buf) != 0) 
+				return(-errno);
+		}
+	}
+	if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ;
+	if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){
+		err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, 
+				&attrs->ia_atime, &attrs->ia_mtime, NULL, 
+				NULL, NULL);
+		if(err != 0) return(err);
+	}
+	return(0);
+}
+
+int make_symlink(const char *from, const char *to)
+{
+	int err;
+
+	err = symlink(to, from);
+	if(err) return(-errno);
+	return(0);
+}
+
+int unlink_file(const char *file)
+{
+	int err;
+
+	err = unlink(file);
+	if(err) return(-errno);
+	return(0);
+}
+
+int do_mkdir(const char *file, int mode)
+{
+	int err;
+
+	err = mkdir(file, mode);
+	if(err) return(-errno);
+	return(0);
+}
+
+int do_rmdir(const char *file)
+{
+	int err;
+
+	err = rmdir(file);
+	if(err) return(-errno);
+	return(0);
+}
+
+int do_mknod(const char *file, int mode, int dev)
+{
+	int err;
+
+	err = mknod(file, mode, dev);
+	if(err) return(-errno);
+	return(0);
+}
+
+int link_file(const char *to, const char *from)
+{
+	int err;
+
+	err = link(to, from);
+	if(err) return(-errno);
+	return(0);
+}
+
+int do_readlink(char *file, char *buf, int size)
+{
+	int n;
+
+	n = readlink(file, buf, size);
+	if(n < 0) 
+		return(-errno);
+	if(n < size) 
+		buf[n] = '\0';
+	return(n);
+}
+
+int rename_file(char *from, char *to)
+{
+	int err;
+
+	err = rename(from, to);
+	if(err < 0) return(-errno);
+	return(0);	
+}
+
+int do_statfs(char *root, long *bsize_out, long long *blocks_out, 
+	      long long *bfree_out, long long *bavail_out, 
+	      long long *files_out, long long *ffree_out,
+	      void *fsid_out, int fsid_size, long *namelen_out, 
+	      long *spare_out)
+{
+	struct statfs64 buf;
+	int err;
+
+	err = statfs64(root, &buf);
+	if(err < 0) return(-errno);
+	*bsize_out = buf.f_bsize;
+	*blocks_out = buf.f_blocks;
+	*bfree_out = buf.f_bfree;
+	*bavail_out = buf.f_bavail;
+	*files_out = buf.f_files;
+	*ffree_out = buf.f_ffree;
+	memcpy(fsid_out, &buf.f_fsid, 
+	       sizeof(buf.f_fsid) > fsid_size ? fsid_size : 
+	       sizeof(buf.f_fsid));
+	*namelen_out = buf.f_namelen;
+	spare_out[0] = buf.f_spare[0];
+	spare_out[1] = buf.f_spare[1];
+	spare_out[2] = buf.f_spare[2];
+	spare_out[3] = buf.f_spare[3];
+	spare_out[4] = buf.f_spare[4];
+	spare_out[5] = buf.f_spare[5];
+	return(0);
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/hppfs/Makefile b/fs/hppfs/Makefile
new file mode 100644
index 000000000..e67f03848
--- /dev/null
+++ b/fs/hppfs/Makefile
@@ -0,0 +1,19 @@
+# 
+# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com)
+# Licensed under the GPL
+#
+
+hppfs-objs := hppfs_kern.o
+
+obj-y = 
+obj-$(CONFIG_HPPFS) += hppfs.o
+
+clean:
+
+modules:
+
+fastdep:
+
+dep:
+
+archmrproper: clean
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
new file mode 100644
index 000000000..ebf08cb55
--- /dev/null
+++ b/fs/hppfs/hppfs_kern.c
@@ -0,0 +1,811 @@
+/* 
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/dcache.h>
+#include <linux/statfs.h>
+#include <asm/uaccess.h>
+#include <asm/fcntl.h>
+#include "os.h"
+
+static int init_inode(struct inode *inode, struct dentry *dentry);
+
+struct hppfs_data {
+	struct list_head list;
+	char contents[PAGE_SIZE - sizeof(struct list_head)];
+};
+
+struct hppfs_private {
+	struct file proc_file;
+	int host_fd;
+	loff_t len;
+	struct hppfs_data *contents;
+};
+
+struct hppfs_inode_info {
+        struct dentry *proc_dentry;
+	struct inode vfs_inode;
+};
+
+static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
+{
+	return(list_entry(inode, struct hppfs_inode_info, vfs_inode));
+}
+
+#define HPPFS_SUPER_MAGIC 0xb00000ee
+
+static struct super_operations hppfs_sbops;
+
+static int is_pid(struct dentry *dentry)
+{
+	struct super_block *sb;
+	int i;
+
+	sb = dentry->d_sb;
+	if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root))
+		return(0);
+
+	for(i = 0; i < dentry->d_name.len; i++){
+		if(!isdigit(dentry->d_name.name[i]))
+			return(0);
+	}
+	return(1);
+}
+
+static char *dentry_name(struct dentry *dentry, int extra)
+{
+	struct dentry *parent;
+	char *root, *name;
+	const char *seg_name;
+	int len, seg_len;
+
+	len = 0;
+	parent = dentry;
+	while(parent->d_parent != parent){
+		if(is_pid(parent))
+			len += strlen("pid") + 1;
+		else len += parent->d_name.len + 1;
+		parent = parent->d_parent;
+	}
+	
+	root = "proc";
+	len += strlen(root);
+	name = kmalloc(len + extra + 1, GFP_KERNEL);
+	if(name == NULL) return(NULL);
+
+	name[len] = '\0';
+	parent = dentry;
+	while(parent->d_parent != parent){
+		if(is_pid(parent)){
+			seg_name = "pid";
+			seg_len = strlen("pid");
+		}
+		else {
+			seg_name = parent->d_name.name;
+			seg_len = parent->d_name.len;
+		}
+
+		len -= seg_len + 1;
+		name[len] = '/';
+		strncpy(&name[len + 1], seg_name, seg_len);
+		parent = parent->d_parent;
+	}
+	strncpy(name, root, strlen(root));
+	return(name);
+}
+
+struct dentry_operations hppfs_dentry_ops = {
+};
+
+static int file_removed(struct dentry *dentry, const char *file)
+{
+	char *host_file;
+	int extra, fd;
+
+	extra = 0;
+	if(file != NULL) extra += strlen(file) + 1;
+
+	host_file = dentry_name(dentry, extra + strlen("/remove"));
+	if(host_file == NULL){
+		printk("file_removed : allocation failed\n");
+		return(-ENOMEM);
+	}
+
+	if(file != NULL){
+		strcat(host_file, "/");
+		strcat(host_file, file);
+	}
+	strcat(host_file, "/remove");
+
+	fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
+	kfree(host_file);
+	if(fd > 0){
+		os_close_file(fd);
+		return(1);
+	}
+	return(0);
+}
+
+static void hppfs_read_inode(struct inode *ino)
+{
+	struct inode *proc_ino;
+
+	if(HPPFS_I(ino)->proc_dentry == NULL)
+		return;
+
+	proc_ino = HPPFS_I(ino)->proc_dentry->d_inode;
+	ino->i_uid = proc_ino->i_uid;
+	ino->i_gid = proc_ino->i_gid;
+	ino->i_atime = proc_ino->i_atime;
+	ino->i_mtime = proc_ino->i_mtime;
+	ino->i_ctime = proc_ino->i_ctime;
+	ino->i_ino = proc_ino->i_ino;
+	ino->i_mode = proc_ino->i_mode;
+	ino->i_nlink = proc_ino->i_nlink;
+	ino->i_size = proc_ino->i_size;
+	ino->i_blksize = proc_ino->i_blksize;
+	ino->i_blocks = proc_ino->i_blocks;
+}
+
+static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, 
+                                  struct nameidata *nd)
+{
+	struct dentry *proc_dentry, *new, *parent;
+	struct inode *inode;
+	int err, deleted;
+
+	deleted = file_removed(dentry, NULL);
+	if(deleted < 0)
+		return(ERR_PTR(deleted));
+	else if(deleted)
+		return(ERR_PTR(-ENOENT));
+
+	err = -ENOMEM;
+	parent = HPPFS_I(ino)->proc_dentry;
+	down(&parent->d_inode->i_sem);
+	proc_dentry = d_lookup(parent, &dentry->d_name);
+	if(proc_dentry == NULL){
+		proc_dentry = d_alloc(parent, &dentry->d_name);
+		if(proc_dentry == NULL){
+			up(&parent->d_inode->i_sem);
+			goto out;
+		}
+		new = (*parent->d_inode->i_op->lookup)(parent->d_inode, 
+						       proc_dentry, NULL);
+		if(new){
+			dput(proc_dentry);
+			proc_dentry = new;
+		}
+	}
+	up(&parent->d_inode->i_sem);
+
+	if(IS_ERR(proc_dentry))
+		return(proc_dentry);
+
+	inode = iget(ino->i_sb, 0);
+	if(inode == NULL) 
+		goto out_dput;
+
+	err = init_inode(inode, proc_dentry);
+	if(err) 
+		goto out_put;
+	
+	hppfs_read_inode(inode);
+
+ 	d_add(dentry, inode);
+	dentry->d_op = &hppfs_dentry_ops;
+	return(NULL);
+
+ out_put:
+	iput(inode);
+ out_dput:
+	dput(proc_dentry);
+ out:
+	return(ERR_PTR(err));
+}
+
+static struct inode_operations hppfs_file_iops = {
+};
+
+static ssize_t read_proc(struct file *file, char *buf, ssize_t count, 
+			 loff_t *ppos, int is_user)
+{
+	ssize_t (*read)(struct file *, char *, size_t, loff_t *);
+	ssize_t n;
+
+	read = file->f_dentry->d_inode->i_fop->read;
+
+	if(!is_user)
+		set_fs(KERNEL_DS);
+		
+	n = (*read)(file, buf, count, &file->f_pos);
+
+	if(!is_user)
+		set_fs(USER_DS);
+
+	if(ppos) *ppos = file->f_pos;
+	return(n);
+}
+
+static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
+{
+	ssize_t n;
+	int cur, err;
+	char *new_buf;
+
+	n = -ENOMEM;
+	new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if(new_buf == NULL){
+		printk("hppfs_read_file : kmalloc failed\n");
+		goto out;
+	}
+	n = 0;
+	while(count > 0){
+		cur = min_t(ssize_t, count, PAGE_SIZE);
+		err = os_read_file(fd, new_buf, cur);
+		if(err < 0){
+			printk("hppfs_read : read failed, errno = %d\n",
+			       count);
+			n = err;
+			goto out_free;
+		}
+		else if(err == 0)
+			break;
+
+		if(copy_to_user(buf, new_buf, err)){
+			n = -EFAULT;
+			goto out_free;
+		}
+		n += err;
+		count -= err;
+	}
+ out_free:
+	kfree(new_buf);
+ out:
+	return(n);
+}
+
+static ssize_t hppfs_read(struct file *file, char *buf, size_t count, 
+			  loff_t *ppos)
+{
+	struct hppfs_private *hppfs = file->private_data;
+	struct hppfs_data *data;
+	loff_t off;
+	int err;
+
+	if(hppfs->contents != NULL){
+		if(*ppos >= hppfs->len) return(0);
+
+		data = hppfs->contents;
+		off = *ppos;
+		while(off >= sizeof(data->contents)){
+			data = list_entry(data->list.next, struct hppfs_data,
+					  list);
+			off -= sizeof(data->contents);
+		}
+
+		if(off + count > hppfs->len)
+			count = hppfs->len - off;
+		copy_to_user(buf, &data->contents[off], count);
+		*ppos += count;
+	}
+	else if(hppfs->host_fd != -1){
+		err = os_seek_file(hppfs->host_fd, *ppos);
+		if(err){
+			printk("hppfs_read : seek failed, errno = %d\n", err);
+			return(err);
+		}
+		count = hppfs_read_file(hppfs->host_fd, buf, count);
+		if(count > 0)
+			*ppos += count;
+	}
+	else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1);
+
+	return(count);
+}
+
+static ssize_t hppfs_write(struct file *file, const char *buf, size_t len, 
+			   loff_t *ppos)
+{
+	struct hppfs_private *data = file->private_data;
+	struct file *proc_file = &data->proc_file;
+	ssize_t (*write)(struct file *, const char *, size_t, loff_t *);
+	int err;
+
+	write = proc_file->f_dentry->d_inode->i_fop->write;
+
+	proc_file->f_pos = file->f_pos;
+	err = (*write)(proc_file, buf, len, &proc_file->f_pos);
+	file->f_pos = proc_file->f_pos;
+
+	return(err);
+}
+
+static int open_host_sock(char *host_file, int *filter_out)
+{
+	char *end;
+	int fd;
+
+	end = &host_file[strlen(host_file)];
+	strcpy(end, "/rw");
+	*filter_out = 1;
+	fd = os_connect_socket(host_file);
+	if(fd > 0)
+		return(fd);
+
+	strcpy(end, "/r");
+	*filter_out = 0;
+	fd = os_connect_socket(host_file);
+	return(fd);
+}
+
+static void free_contents(struct hppfs_data *head)
+{
+	struct hppfs_data *data;
+	struct list_head *ele, *next;
+
+	if(head == NULL) return;
+
+	list_for_each_safe(ele, next, &head->list){
+		data = list_entry(ele, struct hppfs_data, list);
+		kfree(data);
+	}
+	kfree(head);
+}
+
+static struct hppfs_data *hppfs_get_data(int fd, int filter, 
+					 struct file *proc_file, 
+					 struct file *hppfs_file, 
+					 loff_t *size_out)
+{
+	struct hppfs_data *data, *new, *head;
+	int n, err;
+
+	err = -ENOMEM;
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if(data == NULL){
+		printk("hppfs_get_data : head allocation failed\n");
+		goto failed;
+	}
+
+	INIT_LIST_HEAD(&data->list);
+
+	head = data;
+	*size_out = 0;
+
+	if(filter){
+		while((n = read_proc(proc_file, data->contents,
+				     sizeof(data->contents), NULL, 0)) > 0)
+			os_write_file(fd, data->contents, n);
+		err = os_shutdown_socket(fd, 0, 1);
+		if(err){
+			printk("hppfs_get_data : failed to shut down "
+			       "socket\n");
+			goto failed_free;
+		}
+	}
+	while(1){
+		n = os_read_file(fd, data->contents, sizeof(data->contents));
+		if(n < 0){
+			err = n;
+			printk("hppfs_get_data : read failed, errno = %d\n",
+			       err);
+			goto failed_free;
+		}
+		else if(n == 0)
+			break;
+
+		*size_out += n;
+
+		if(n < sizeof(data->contents))
+			break;
+
+		new = kmalloc(sizeof(*data), GFP_KERNEL);
+		if(new == 0){
+			printk("hppfs_get_data : data allocation failed\n");
+			err = -ENOMEM;
+			goto failed_free;
+		}
+	
+		INIT_LIST_HEAD(&new->list);
+		list_add(&new->list, &data->list);
+		data = new;
+	}
+	return(head);
+
+ failed_free:
+	free_contents(head);
+ failed:		
+	return(ERR_PTR(err));
+}
+
+static struct hppfs_private *hppfs_data(void)
+{
+	struct hppfs_private *data;
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if(data == NULL)
+		return(data);
+
+	*data = ((struct hppfs_private ) { .host_fd  		= -1,
+					   .len  		= -1,
+					   .contents 		= NULL } );
+	return(data);
+}
+
+static int file_mode(int fmode)
+{
+	if(fmode == (FMODE_READ | FMODE_WRITE))
+		return(O_RDWR);
+	if(fmode == FMODE_READ)
+		return(O_RDONLY);
+	if(fmode == FMODE_WRITE)
+		return(O_WRONLY);
+	return(0);
+}
+
+static int hppfs_open(struct inode *inode, struct file *file)
+{
+	struct hppfs_private *data;
+	struct dentry *proc_dentry;
+	char *host_file;
+	int err, fd, type, filter;
+
+	err = -ENOMEM;
+	data = hppfs_data();
+	if(data == NULL)
+		goto out;
+
+	host_file = dentry_name(file->f_dentry, strlen("/rw"));
+	if(host_file == NULL)
+		goto out_free2;
+
+	proc_dentry = HPPFS_I(inode)->proc_dentry;
+
+	/* XXX This isn't closed anywhere */
+	err = open_private_file(&data->proc_file, proc_dentry, 
+				file_mode(file->f_mode));
+	if(err)
+		goto out_free1;
+
+	type = os_file_type(host_file);
+	if(type == OS_TYPE_FILE){
+		fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
+		if(fd >= 0) 
+			data->host_fd = fd;
+		else printk("hppfs_open : failed to open '%s', errno = %d\n",
+			    host_file, -fd);
+
+		data->contents = NULL;
+	}
+	else if(type == OS_TYPE_DIR){
+		fd = open_host_sock(host_file, &filter);
+		if(fd > 0){
+			data->contents = hppfs_get_data(fd, filter, 
+							&data->proc_file, 
+							file, &data->len);
+			if(!IS_ERR(data->contents))
+				data->host_fd = fd;
+		}
+		else printk("hppfs_open : failed to open a socket in "
+			    "'%s', errno = %d\n", host_file, -fd);
+	}
+	kfree(host_file);
+
+	file->private_data = data;
+	return(0);
+
+ out_free1:
+	kfree(host_file);
+ out_free2:
+	free_contents(data->contents);
+	kfree(data);
+ out:
+	return(err);
+}
+
+static int hppfs_dir_open(struct inode *inode, struct file *file)
+{
+	struct hppfs_private *data;
+	struct dentry *proc_dentry;
+	int err;
+
+	err = -ENOMEM;
+	data = hppfs_data();
+	if(data == NULL)
+		goto out;
+
+	proc_dentry = HPPFS_I(inode)->proc_dentry;
+	err = open_private_file(&data->proc_file, proc_dentry, 
+				file_mode(file->f_mode));
+	if(err)
+		goto out_free;
+
+	file->private_data = data;
+	return(0);
+
+ out_free:
+	kfree(data);
+ out:
+	return(err);
+}
+
+static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
+{
+	struct hppfs_private *data = file->private_data;
+	struct file *proc_file = &data->proc_file;
+	loff_t (*llseek)(struct file *, loff_t, int);
+	loff_t ret;
+
+	llseek = proc_file->f_dentry->d_inode->i_fop->llseek;
+	if(llseek != NULL){
+		ret = (*llseek)(proc_file, off, where);
+		if(ret < 0)
+			return(ret);
+	}
+
+	return(default_llseek(file, off, where));
+}
+
+static struct file_operations hppfs_file_fops = {
+	.owner		= NULL,
+	.llseek		= hppfs_llseek,
+	.read		= hppfs_read,
+	.write		= hppfs_write,
+	.open		= hppfs_open,
+};
+
+struct hppfs_dirent {
+	void *vfs_dirent;
+	filldir_t filldir;
+	struct dentry *dentry;
+};
+
+static int hppfs_filldir(void *d, const char *name, int size, 
+			 loff_t offset, ino_t inode, unsigned int type)
+{
+	struct hppfs_dirent *dirent = d;
+
+	if(file_removed(dirent->dentry, name))
+		return(0);
+
+	return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset, 
+				  inode, type));
+}
+
+static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
+{
+	struct hppfs_private *data = file->private_data;
+	struct file *proc_file = &data->proc_file;
+	int (*readdir)(struct file *, void *, filldir_t);
+	struct hppfs_dirent dirent = ((struct hppfs_dirent)
+		                      { .vfs_dirent  	= ent,
+					.filldir 	= filldir,
+					.dentry  	= file->f_dentry } );
+	int err;
+
+	readdir = proc_file->f_dentry->d_inode->i_fop->readdir;
+
+	proc_file->f_pos = file->f_pos;
+	err = (*readdir)(proc_file, &dirent, hppfs_filldir);
+	file->f_pos = proc_file->f_pos;
+
+	return(err);
+}
+
+static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	return(0);
+}
+
+static struct file_operations hppfs_dir_fops = {
+	.owner		= NULL,
+	.readdir	= hppfs_readdir,
+	.open		= hppfs_dir_open,
+	.fsync		= hppfs_fsync,
+};
+
+static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf)
+{
+	sf->f_blocks = 0;
+	sf->f_bfree = 0;
+	sf->f_bavail = 0;
+	sf->f_files = 0;
+	sf->f_ffree = 0;
+	sf->f_type = HPPFS_SUPER_MAGIC;
+	return(0);
+}
+
+static struct inode *hppfs_alloc_inode(struct super_block *sb)
+{
+	struct hppfs_inode_info *hi;
+
+	hi = kmalloc(sizeof(*hi), GFP_KERNEL);
+	if(hi == NULL) 
+		return(NULL);
+
+	*hi = ((struct hppfs_inode_info) { .proc_dentry	= NULL });
+	inode_init_once(&hi->vfs_inode);
+	return(&hi->vfs_inode);
+}
+
+void hppfs_delete_inode(struct inode *ino)
+{
+	clear_inode(ino);
+}
+
+static void hppfs_destroy_inode(struct inode *inode)
+{
+	kfree(HPPFS_I(inode));
+}
+
+static struct super_operations hppfs_sbops = { 
+	.alloc_inode	= hppfs_alloc_inode,
+	.destroy_inode	= hppfs_destroy_inode,
+	.read_inode	= hppfs_read_inode,
+	.delete_inode	= hppfs_delete_inode,
+	.statfs		= hppfs_statfs,
+};
+
+static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+	struct file proc_file;
+	struct dentry *proc_dentry;
+	int (*readlink)(struct dentry *, char *, int);
+	int err, n;
+
+	proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+	err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
+	if(err) 
+		return(err);
+
+	readlink = proc_dentry->d_inode->i_op->readlink;
+	n = (*readlink)(proc_dentry, buffer, buflen);
+
+	close_private_file(&proc_file);
+	
+	return(n);
+}
+
+static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct file proc_file;
+	struct dentry *proc_dentry;
+	int (*follow_link)(struct dentry *, struct nameidata *);
+	int err, n;
+
+	proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+	err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
+	if(err) 
+		return(err);
+
+	follow_link = proc_dentry->d_inode->i_op->follow_link;
+	n = (*follow_link)(proc_dentry, nd);
+
+	close_private_file(&proc_file);
+	
+	return(n);
+}
+
+static struct inode_operations hppfs_dir_iops = {
+	.lookup		= hppfs_lookup,
+};
+
+static struct inode_operations hppfs_link_iops = {
+	.readlink	= hppfs_readlink,
+	.follow_link	= hppfs_follow_link,
+};
+
+static int init_inode(struct inode *inode, struct dentry *dentry)
+{
+	if(S_ISDIR(dentry->d_inode->i_mode)){
+		inode->i_op = &hppfs_dir_iops;
+		inode->i_fop = &hppfs_dir_fops;
+	}
+	else if(S_ISLNK(dentry->d_inode->i_mode)){
+		inode->i_op = &hppfs_link_iops;
+		inode->i_fop = &hppfs_file_fops;
+	}
+	else {
+		inode->i_op = &hppfs_file_iops;
+		inode->i_fop = &hppfs_file_fops;
+	}
+
+	HPPFS_I(inode)->proc_dentry = dentry;
+
+	return(0);
+}
+
+static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
+{
+	struct inode *root_inode;
+	struct file_system_type *procfs;
+	struct super_block *proc_sb;
+	int err;
+
+	err = -ENOENT;
+	procfs = get_fs_type("proc");
+	if(procfs == NULL) 
+		goto out;
+
+	if(list_empty(&procfs->fs_supers))
+		goto out;
+
+	proc_sb = list_entry(procfs->fs_supers.next, struct super_block,
+			     s_instances);
+	
+	sb->s_blocksize = 1024;
+	sb->s_blocksize_bits = 10;
+	sb->s_magic = HPPFS_SUPER_MAGIC;
+	sb->s_op = &hppfs_sbops;
+
+	root_inode = iget(sb, 0);
+	if(root_inode == NULL)
+		goto out;
+
+	err = init_inode(root_inode, proc_sb->s_root);
+	if(err)
+		goto out_put;
+
+	err = -ENOMEM;
+	sb->s_root = d_alloc_root(root_inode);
+	if(sb->s_root == NULL)
+		goto out_put;
+
+	hppfs_read_inode(root_inode);
+
+	return(0);
+
+ out_put:
+	iput(root_inode);
+ out:
+	return(err);
+}
+
+static struct super_block *hppfs_read_super(struct file_system_type *type,
+					     int flags, const char *dev_name,
+					     void *data)
+{
+	return(get_sb_nodev(type, flags, data, hppfs_fill_super));
+}
+
+static struct file_system_type hppfs_type = {
+	.owner 		= THIS_MODULE,
+	.name 		= "hppfs",
+	.get_sb 	= hppfs_read_super,
+	.kill_sb	= kill_anon_super,
+	.fs_flags 	= 0,
+};
+
+static int __init init_hppfs(void)
+{
+	return(register_filesystem(&hppfs_type));
+}
+
+static void __exit exit_hppfs(void)
+{
+	unregister_filesystem(&hppfs_type);
+}
+
+module_init(init_hppfs)
+module_exit(exit_hppfs)
+MODULE_LICENSE("GPL");
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile
new file mode 100644
index 000000000..09f098a10
--- /dev/null
+++ b/fs/relayfs/Makefile
@@ -0,0 +1,8 @@
+#
+# relayfs Makefile
+#
+
+obj-$(CONFIG_RELAYFS_FS) += relayfs.o
+
+relayfs-y := relay.o relay_lockless.o relay_locking.o inode.o resize.o
+relayfs-$(CONFIG_KLOG_CHANNEL) += klog.o
diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c
new file mode 100644
index 000000000..6e8736015
--- /dev/null
+++ b/fs/relayfs/inode.c
@@ -0,0 +1,629 @@
+/*
+ * VFS-related code for RelayFS, a high-speed data relay filesystem.
+ *
+ * Copyright (C) 2003 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp
+ * Copyright (C) 2003 - Karim Yaghmour <karim@opersys.com>
+ *
+ * Based on ramfs, Copyright (C) 2002 - Linus Torvalds
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/backing-dev.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <asm/uaccess.h>
+#include <asm/relay.h>
+
+#define RELAYFS_MAGIC			0x26F82121
+
+static struct super_operations		relayfs_ops;
+static struct address_space_operations	relayfs_aops;
+static struct inode_operations		relayfs_file_inode_operations;
+static struct file_operations		relayfs_file_operations;
+static struct inode_operations		relayfs_dir_inode_operations;
+
+static struct vfsmount *		relayfs_mount;
+static int				relayfs_mount_count;
+
+static struct backing_dev_info		relayfs_backing_dev_info = {
+	.ra_pages	= 0,	/* No readahead */
+	.memory_backed	= 1,	/* Does not contribute to dirty memory */
+};
+
+static struct inode *
+relayfs_get_inode(struct super_block *sb, int mode, dev_t dev)
+{
+	struct inode * inode;
+	
+	inode = new_inode(sb);
+
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = current->fsuid;
+		inode->i_gid = current->fsgid;
+		inode->i_blksize = PAGE_CACHE_SIZE;
+		inode->i_blocks = 0;
+		inode->i_mapping->a_ops = &relayfs_aops;
+		inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		switch (mode & S_IFMT) {
+		default:
+			init_special_inode(inode, mode, dev);
+			break;
+		case S_IFREG:
+			inode->i_op = &relayfs_file_inode_operations;
+			inode->i_fop = &relayfs_file_operations;
+			break;
+		case S_IFDIR:
+			inode->i_op = &relayfs_dir_inode_operations;
+			inode->i_fop = &simple_dir_operations;
+
+			/* directory inodes start off with i_nlink == 2 (for "." entry) */
+			inode->i_nlink++;
+			break;
+		case S_IFLNK:
+			inode->i_op = &page_symlink_inode_operations;
+			break;
+		}
+	}
+	return inode;
+}
+
+/*
+ * File creation. Allocate an inode, and we're done..
+ */
+/* SMP-safe */
+static int 
+relayfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+	struct inode * inode;
+	int error = -ENOSPC;
+
+	inode = relayfs_get_inode(dir->i_sb, mode, dev);
+
+	if (inode) {
+		d_instantiate(dentry, inode);
+		dget(dentry);	/* Extra count - pin the dentry in core */
+		error = 0;
+	}
+	return error;
+}
+
+static int 
+relayfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+{
+	int retval;
+
+	retval = relayfs_mknod(dir, dentry, mode | S_IFDIR, 0);
+
+	if (!retval)
+		dir->i_nlink++;
+	return retval;
+}
+
+static int 
+relayfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+{
+	return relayfs_mknod(dir, dentry, mode | S_IFREG, 0);
+}
+
+static int 
+relayfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
+{
+	struct inode *inode;
+	int error = -ENOSPC;
+
+	inode = relayfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+
+	if (inode) {
+		int l = strlen(symname)+1;
+		error = page_symlink(inode, symname, l);
+		if (!error) {
+			d_instantiate(dentry, inode);
+			dget(dentry);
+		} else
+			iput(inode);
+	}
+	return error;
+}
+
+/**
+ *	relayfs_create_entry - create a relayfs directory or file
+ *	@name: the name of the file to create
+ *	@parent: parent directory
+ *	@dentry: result dentry
+ *	@entry_type: type of file to create (S_IFREG, S_IFDIR)
+ *	@mode: mode
+ *	@data: data to associate with the file
+ *
+ *	Creates a file or directory with the specifed permissions.
+ */
+static int 
+relayfs_create_entry(const char * name, struct dentry * parent, struct dentry **dentry, int entry_type, int mode, void * data)
+{
+	struct qstr qname;
+	struct dentry * d;
+	
+	int error = 0;
+
+	error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count);
+	if (error) {
+		printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error);
+		return error;
+	}
+
+	qname.name = name;
+	qname.len = strlen(name);
+	qname.hash = full_name_hash(name, qname.len);
+
+	if (parent == NULL)
+		if (relayfs_mount && relayfs_mount->mnt_sb)
+			parent = relayfs_mount->mnt_sb->s_root;
+
+	if (parent == NULL) {
+		simple_release_fs(&relayfs_mount, &relayfs_mount_count);
+ 		return -EINVAL;
+	}
+
+	parent = dget(parent);
+	down(&parent->d_inode->i_sem);
+	d = lookup_hash(&qname, parent);
+	if (IS_ERR(d)) {
+		error = PTR_ERR(d);
+		goto release_mount;
+	}
+	
+	if (d->d_inode) {
+		error = -EEXIST;
+		goto release_mount;
+	}
+
+	if (entry_type == S_IFREG)
+		error = relayfs_create(parent->d_inode, d, entry_type | mode, NULL);
+	else
+		error = relayfs_mkdir(parent->d_inode, d, entry_type | mode);
+	if (error)
+		goto release_mount;
+
+	if ((entry_type == S_IFREG) && data) {
+		d->d_inode->u.generic_ip = data;
+		goto exit; /* don't release mount for regular files */
+	}
+
+release_mount:
+	simple_release_fs(&relayfs_mount, &relayfs_mount_count);
+exit:	
+	*dentry = d;
+	up(&parent->d_inode->i_sem);
+	dput(parent);
+
+	return error;
+}
+
+/**
+ *	relayfs_create_file - create a file in the relay filesystem
+ *	@name: the name of the file to create
+ *	@parent: parent directory
+ *	@dentry: result dentry
+ *	@data: data to associate with the file
+ *	@mode: mode, if not specied the default perms are used
+ *
+ *	The file will be created user rw on behalf of current user.
+ */
+int 
+relayfs_create_file(const char * name, struct dentry * parent, struct dentry **dentry, void * data, int mode)
+{
+	if (!mode)
+		mode = S_IRUSR | S_IWUSR;
+	
+	return relayfs_create_entry(name, parent, dentry, S_IFREG,
+				    mode, data);
+}
+
+/**
+ *	relayfs_create_dir - create a directory in the relay filesystem
+ *	@name: the name of the directory to create
+ *	@parent: parent directory
+ *	@dentry: result dentry
+ *
+ *	The directory will be created world rwx on behalf of current user.
+ */
+int 
+relayfs_create_dir(const char * name, struct dentry * parent, struct dentry **dentry)
+{
+	return relayfs_create_entry(name, parent, dentry, S_IFDIR,
+				    S_IRWXU | S_IRUGO | S_IXUGO, NULL);
+}
+
+/**
+ *	relayfs_remove_file - remove a file in the relay filesystem
+ *	@dentry: file dentry
+ *
+ *	Remove a file previously created by relayfs_create_file.
+ */
+int 
+relayfs_remove_file(struct dentry *dentry)
+{
+	struct dentry *parent;
+	int is_reg;
+	
+	parent = dentry->d_parent;
+	if (parent == NULL)
+		return -EINVAL;
+
+	is_reg = S_ISREG(dentry->d_inode->i_mode);
+
+	parent = dget(parent);
+	down(&parent->d_inode->i_sem);
+	if (dentry->d_inode) {
+		simple_unlink(parent->d_inode, dentry);
+		d_delete(dentry);
+	}
+	dput(dentry);
+	up(&parent->d_inode->i_sem);
+	dput(parent);
+
+	if(is_reg)
+		simple_release_fs(&relayfs_mount, &relayfs_mount_count);
+
+	return 0;
+}
+
+/**
+ *	relayfs_open - open file op for relayfs files
+ *	@inode: the inode
+ *	@filp: the file
+ *
+ *	Associates the channel with the file, and increments the
+ *	channel refcount.  Reads will be 'auto-consuming'.
+ */
+int
+relayfs_open(struct inode *inode, struct file *filp)
+{
+	struct rchan *rchan;
+	struct rchan_reader *reader;
+	int retval = 0;
+
+	if (inode->u.generic_ip) {
+		rchan = (struct rchan *)inode->u.generic_ip;
+		if (rchan == NULL)
+			return -EACCES;
+		reader = __add_rchan_reader(rchan, filp, 1, 0);
+		if (reader == NULL)
+			return -ENOMEM;
+		filp->private_data = reader;
+		retval = rchan->callbacks->fileop_notify(rchan->id, filp,
+							 RELAY_FILE_OPEN);
+		if (retval == 0)
+			/* Inc relay channel refcount for file */
+			rchan_get(rchan->id);
+		else {
+			__remove_rchan_reader(reader);
+			retval = -EPERM;
+		}
+	}
+
+	return retval;
+}
+
+/**
+ *	relayfs_mmap - mmap file op for relayfs files
+ *	@filp: the file
+ *	@vma: the vma describing what to map
+ *
+ *	Calls upon relay_mmap_buffer to map the file into user space.
+ */
+int 
+relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct rchan *rchan;
+	
+	rchan = ((struct rchan_reader *)filp->private_data)->rchan;
+
+	return __relay_mmap_buffer(rchan, vma);
+}
+
+/**
+ *	relayfs_file_read - read file op for relayfs files
+ *	@filp: the file
+ *	@buf: user buf to read into
+ *	@count: bytes requested
+ *	@offset: offset into file
+ *
+ *	Reads count bytes from the channel, or as much as is available within
+ *	the sub-buffer currently being read.  Reads are 'auto-consuming'.
+ *	See relay_read() for details.
+ *
+ *	Returns bytes read on success, 0 or -EAGAIN if nothing available,
+ *	negative otherwise.
+ */
+ssize_t 
+relayfs_file_read(struct file *filp, char * buf, size_t count, loff_t *offset)
+{
+	size_t read_count;
+	struct rchan_reader *reader;
+	u32 dummy; /* all VFS readers are auto-consuming */
+
+	if (offset != &filp->f_pos) /* pread, seeking not supported */
+		return -ESPIPE;
+
+	if (count == 0)
+		return 0;
+
+	reader = (struct rchan_reader *)filp->private_data;
+	read_count = relay_read(reader, buf, count,
+		filp->f_flags & (O_NDELAY | O_NONBLOCK) ? 0 : 1, &dummy);
+
+	return read_count;
+}
+
+/**
+ *	relayfs_file_write - write file op for relayfs files
+ *	@filp: the file
+ *	@buf: user buf to write from
+ *	@count: bytes to write
+ *	@offset: offset into file
+ *
+ *	Reserves a slot in the relay buffer and writes count bytes
+ *	into it.  The current limit for a single write is 2 pages
+ *	worth.  The user_deliver() channel callback will be invoked on
+ *	
+ *	Returns bytes written on success, 0 or -EAGAIN if nothing available,
+ *	negative otherwise.
+ */
+ssize_t 
+relayfs_file_write(struct file *filp, const char *buf, size_t count, loff_t *offset)
+{
+	int write_count;
+	char * write_buf;
+	struct rchan *rchan;
+	int err = 0;
+	void *wrote_pos;
+	struct rchan_reader *reader;
+
+	reader = (struct rchan_reader *)filp->private_data;
+	if (reader == NULL)
+		return -EPERM;
+
+	rchan = reader->rchan;
+	if (rchan == NULL)
+		return -EPERM;
+
+	if (count == 0)
+		return 0;
+
+	/* Change this if need to write more than 2 pages at once */
+	if (count > 2 * PAGE_SIZE)
+		return -EINVAL;
+	
+	write_buf = (char *)__get_free_pages(GFP_KERNEL, 1);
+	if (write_buf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(write_buf, buf, count))
+		return -EFAULT;
+
+	if (filp->f_flags & (O_NDELAY | O_NONBLOCK)) {
+		write_count = relay_write(rchan->id, write_buf, count, -1, &wrote_pos);
+		if (write_count == 0)
+			return -EAGAIN;
+	} else {
+		err = wait_event_interruptible(rchan->write_wait,
+	         (write_count = relay_write(rchan->id, write_buf, count, -1, &wrote_pos)));
+		if (err)
+			return err;
+	}
+	
+	free_pages((unsigned long)write_buf, 1);
+	
+        rchan->callbacks->user_deliver(rchan->id, wrote_pos, write_count);
+
+	return write_count;
+}
+
+/**
+ *	relayfs_ioctl - ioctl file op for relayfs files
+ *	@inode: the inode
+ *	@filp: the file
+ *	@cmd: the command
+ *	@arg: command arg
+ *
+ *	Passes the specified cmd/arg to the kernel client.  arg may be a 
+ *	pointer to user-space data, in which case the kernel client is 
+ *	responsible for copying the data to/from user space appropriately.
+ *	The kernel client is also responsible for returning a meaningful
+ *	return value for ioctl calls.
+ *	
+ *	Returns result of relay channel callback, -EPERM if unsuccessful.
+ */
+int
+relayfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct rchan *rchan;
+	struct rchan_reader *reader;
+
+	reader = (struct rchan_reader *)filp->private_data;
+	if (reader == NULL)
+		return -EPERM;
+
+	rchan = reader->rchan;
+	if (rchan == NULL)
+		return -EPERM;
+
+	return rchan->callbacks->ioctl(rchan->id, cmd, arg);
+}
+
+/**
+ *	relayfs_poll - poll file op for relayfs files
+ *	@filp: the file
+ *	@wait: poll table
+ *
+ *	Poll implemention.
+ */
+static unsigned int
+relayfs_poll(struct file *filp, poll_table *wait)
+{
+	struct rchan_reader *reader;
+	unsigned int mask = 0;
+	
+	reader = (struct rchan_reader *)filp->private_data;
+
+	if (reader->rchan->finalized)
+		return POLLERR;
+
+	if (filp->f_mode & FMODE_READ) {
+		poll_wait(filp, &reader->rchan->read_wait, wait);
+		if (!rchan_empty(reader))
+			mask |= POLLIN | POLLRDNORM;
+	}
+	
+	if (filp->f_mode & FMODE_WRITE) {
+		poll_wait(filp, &reader->rchan->write_wait, wait);
+		if (!rchan_full(reader))
+			mask |= POLLOUT | POLLWRNORM;
+	}
+	
+	return mask;
+}
+
+/**
+ *	relayfs_release - release file op for relayfs files
+ *	@inode: the inode
+ *	@filp: the file
+ *
+ *	Decrements the channel refcount, as the filesystem is
+ *	no longer using it.
+ */
+int
+relayfs_release(struct inode *inode, struct file *filp)
+{
+	struct rchan_reader *reader;
+	struct rchan *rchan;
+
+	reader = (struct rchan_reader *)filp->private_data;
+	if (reader == NULL || reader->rchan == NULL)
+		return 0;
+	rchan = reader->rchan;
+	
+        rchan->callbacks->fileop_notify(reader->rchan->id, filp,
+					RELAY_FILE_CLOSE);
+	__remove_rchan_reader(reader);
+	/* The channel is no longer in use as far as this file is concerned */
+	rchan_put(rchan);
+
+	return 0;
+}
+
+static struct address_space_operations relayfs_aops = {
+	.readpage	= simple_readpage,
+	.prepare_write	= simple_prepare_write,
+	.commit_write	= simple_commit_write
+};
+
+static struct file_operations relayfs_file_operations = {
+	.open		= relayfs_open,
+	.read		= relayfs_file_read,
+	.write		= relayfs_file_write,
+	.ioctl		= relayfs_ioctl,
+	.poll		= relayfs_poll,
+	.mmap		= relayfs_mmap,
+	.fsync		= simple_sync_file,
+	.release	= relayfs_release,
+};
+
+static struct inode_operations relayfs_file_inode_operations = {
+	.getattr	= simple_getattr,
+};
+
+static struct inode_operations relayfs_dir_inode_operations = {
+	.create		= relayfs_create,
+	.lookup		= simple_lookup,
+	.link		= simple_link,
+	.unlink		= simple_unlink,
+	.symlink	= relayfs_symlink,
+	.mkdir		= relayfs_mkdir,
+	.rmdir		= simple_rmdir,
+	.mknod		= relayfs_mknod,
+	.rename		= simple_rename,
+};
+
+static struct super_operations relayfs_ops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+};
+
+static int 
+relayfs_fill_super(struct super_block * sb, void * data, int silent)
+{
+	struct inode * inode;
+	struct dentry * root;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = RELAYFS_MAGIC;
+	sb->s_op = &relayfs_ops;
+	inode = relayfs_get_inode(sb, S_IFDIR | 0755, 0);
+
+	if (!inode)
+		return -ENOMEM;
+
+	root = d_alloc_root(inode);
+	if (!root) {
+		iput(inode);
+		return -ENOMEM;
+	}
+	sb->s_root = root;
+
+	return 0;
+}
+
+static struct super_block *
+relayfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
+{
+	return get_sb_single(fs_type, flags, data, relayfs_fill_super);
+}
+
+static struct file_system_type relayfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "relayfs",
+	.get_sb		= relayfs_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
+static int __init 
+init_relayfs_fs(void)
+{
+	int err = register_filesystem(&relayfs_fs_type);
+#ifdef CONFIG_KLOG_CHANNEL
+	if (!err)
+		create_klog_channel();
+#endif
+	return err;
+}
+
+static void __exit 
+exit_relayfs_fs(void)
+{
+#ifdef CONFIG_KLOG_CHANNEL
+	remove_klog_channel();
+#endif
+	unregister_filesystem(&relayfs_fs_type);
+}
+
+module_init(init_relayfs_fs)
+module_exit(exit_relayfs_fs)
+
+MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
+MODULE_DESCRIPTION("Relay Filesystem");
+MODULE_LICENSE("GPL");
+
diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
new file mode 100644
index 000000000..11f4636ce
--- /dev/null
+++ b/fs/relayfs/relay.c
@@ -0,0 +1,1911 @@
+/*
+ * Public API and common code for RelayFS.
+ *
+ * Please see Documentation/filesystems/relayfs.txt for API description.
+ * 
+ * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com)
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/page-flags.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+#include <asm/current.h>
+#include <asm/uaccess.h>
+#include <asm/bitops.h>
+#include <asm/pgtable.h>
+#include <asm/relay.h>
+#include <asm/hardirq.h>
+
+#include "relay_lockless.h"
+#include "relay_locking.h"
+#include "resize.h"
+
+/* Relay channel table, indexed by channel id */
+static struct rchan *	rchan_table[RELAY_MAX_CHANNELS];
+static rwlock_t		rchan_table_lock = RW_LOCK_UNLOCKED;
+
+/* Relay operation structs, one per scheme */
+static struct relay_ops lockless_ops = {
+	.reserve = lockless_reserve,
+	.commit = lockless_commit,
+	.get_offset = lockless_get_offset,
+	.finalize = lockless_finalize,
+	.reset = lockless_reset,
+	.reset_index = lockless_reset_index
+};
+
+static struct relay_ops locking_ops = {
+	.reserve = locking_reserve,
+	.commit = locking_commit,
+	.get_offset = locking_get_offset,
+	.finalize = locking_finalize,
+	.reset = locking_reset,
+	.reset_index = locking_reset_index
+};
+
+/*
+ * Low-level relayfs kernel API.  These functions should not normally be 
+ * used by clients.  See high-level kernel API below.
+ */
+
+/**
+ *	rchan_get - get channel associated with id, incrementing refcount 
+ *	@rchan_id: the channel id
+ *
+ *	Returns channel if successful, NULL otherwise.
+ */
+struct rchan *
+rchan_get(int rchan_id)
+{
+	struct rchan *rchan;
+	
+	if ((rchan_id < 0) || (rchan_id >= RELAY_MAX_CHANNELS))
+		return NULL;
+	
+	read_lock(&rchan_table_lock);
+	rchan = rchan_table[rchan_id];
+	if (rchan)
+		atomic_inc(&rchan->refcount);
+	read_unlock(&rchan_table_lock);
+
+	return rchan;
+}
+
+/**
+ *	clear_readers - clear non-VFS readers
+ *	@rchan: the channel
+ *
+ *	Clear the channel pointers of all non-VFS readers open on the channel.
+ */
+static inline void
+clear_readers(struct rchan *rchan)
+{
+	struct list_head *p;
+	struct rchan_reader *reader;
+	
+	read_lock(&rchan->open_readers_lock);
+	list_for_each(p, &rchan->open_readers) {
+		reader = list_entry(p, struct rchan_reader, list);
+		if (!reader->vfs_reader)
+			reader->rchan = NULL;
+	}
+	read_unlock(&rchan->open_readers_lock);
+}
+
+/**
+ *	rchan_alloc_id - reserve a channel id and store associated channel
+ *	@rchan: the channel
+ *
+ *	Returns channel id if successful, -1 otherwise.
+ */
+static inline int
+rchan_alloc_id(struct rchan *rchan)
+{
+	int i;
+	int rchan_id = -1;
+	
+	if (rchan == NULL)
+		return -1;
+
+	write_lock(&rchan_table_lock);
+	for (i = 0; i < RELAY_MAX_CHANNELS; i++) {
+		if (rchan_table[i] == NULL) {
+			rchan_table[i] = rchan;
+			rchan_id = rchan->id = i;
+			break;
+		}
+	}
+	if (rchan_id != -1)
+		atomic_inc(&rchan->refcount);
+	write_unlock(&rchan_table_lock);
+	
+	return rchan_id;
+}
+
+/**
+ *	rchan_free_id - revoke a channel id and remove associated channel
+ *	@rchan_id: the channel id
+ */
+static inline void
+rchan_free_id(int rchan_id)
+{
+	struct rchan *rchan;
+
+	if ((rchan_id < 0) || (rchan_id >= RELAY_MAX_CHANNELS))
+		return;
+
+	write_lock(&rchan_table_lock);
+	rchan = rchan_table[rchan_id];
+	rchan_table[rchan_id] = NULL;
+	write_unlock(&rchan_table_lock);
+}
+
+/**
+ *	rchan_destroy_buf - destroy the current channel buffer
+ *	@rchan: the channel
+ */
+static inline void
+rchan_destroy_buf(struct rchan *rchan)
+{
+	if (rchan->buf && !rchan->init_buf)
+		free_rchan_buf(rchan->buf,
+			       rchan->buf_page_array,
+			       rchan->buf_page_count);
+}
+
+/**
+ *	relay_release - perform end-of-buffer processing for last buffer
+ *	@rchan: the channel
+ *
+ *	Returns 0 if successful, negative otherwise.
+ *
+ *	Releases the channel buffer, destroys the channel, and removes the
+ *	relay file from the relayfs filesystem.  Should only be called from 
+ *	rchan_put().  If we're here, it means by definition refcount is 0.
+ */
+static int 
+relay_release(struct rchan *rchan)
+{
+	if (rchan == NULL)
+		return -EBADF;
+
+	rchan_destroy_buf(rchan);
+	rchan_free_id(rchan->id);
+	relayfs_remove_file(rchan->dentry);
+	clear_readers(rchan);
+	kfree(rchan);
+
+	return 0;
+}
+
+/**
+ *	rchan_get - decrement channel refcount, releasing it if 0
+ *	@rchan: the channel
+ *
+ *	If the refcount reaches 0, the channel will be destroyed.
+ */
+void 
+rchan_put(struct rchan *rchan)
+{
+	if (atomic_dec_and_test(&rchan->refcount))
+		relay_release(rchan);
+}
+
+/**
+ *	relay_reserve -  reserve a slot in the channel buffer
+ *	@rchan: the channel
+ *	@len: the length of the slot to reserve
+ *	@td: the time delta between buffer start and current write, or TSC
+ *	@err: receives the result flags
+ *	@interrupting: 1 if interrupting previous, used only in locking scheme
+ *
+ *	Returns pointer to the beginning of the reserved slot, NULL if error.
+ *
+ *	The errcode value contains the result flags and is an ORed combination 
+ *	of the following:
+ *
+ *	RELAY_BUFFER_SWITCH_NONE - no buffer switch occurred
+ *	RELAY_EVENT_DISCARD_NONE - event should not be discarded
+ *	RELAY_BUFFER_SWITCH - buffer switch occurred
+ *	RELAY_EVENT_DISCARD - event should be discarded (all buffers are full)
+ *	RELAY_EVENT_TOO_LONG - event won't fit into even an empty buffer
+ *
+ *	buffer_start and buffer_end callbacks are triggered at this point
+ *	if applicable.
+ */
+char *
+relay_reserve(struct rchan *rchan,
+	      u32 len,
+	      struct timeval *ts,
+	      u32 *td,
+	      int *err,
+	      int *interrupting)
+{
+	if (rchan == NULL)
+		return NULL;
+	
+	*interrupting = 0;
+
+	return rchan->relay_ops->reserve(rchan, len, ts, td, err, interrupting);
+}
+
+
+/**
+ *	wakeup_readers - wake up VFS readers waiting on a channel
+ *	@private: the channel
+ *
+ *	This is the work function used to defer reader waking.  The
+ *	reason waking is deferred is that calling directly from commit
+ *	causes problems if you're writing from say the scheduler.
+ */
+static void 
+wakeup_readers(void *private)
+{
+	struct rchan *rchan = (struct rchan *)private;
+
+	wake_up_interruptible(&rchan->read_wait);
+}
+
+
+/**
+ *	relay_commit - commit a reserved slot in the buffer
+ *	@rchan: the channel
+ *	@from: commit the length starting here
+ *	@len: length committed
+ *	@interrupting: 1 if interrupting previous, used only in locking scheme
+ *
+ *      After the write into the reserved buffer has been complted, this
+ *      function must be called in order for the relay to determine whether 
+ *      buffers are complete and to wake up VFS readers.
+ *
+ *	delivery callback is triggered at this point if applicable.
+ */
+void
+relay_commit(struct rchan *rchan,
+	     char *from,
+	     u32 len,
+	     int reserve_code,
+	     int interrupting)
+{
+	int deliver;
+
+	if (rchan == NULL)
+		return;
+	
+	deliver = packet_delivery(rchan) || 
+		   (reserve_code & RELAY_BUFFER_SWITCH);
+
+	rchan->relay_ops->commit(rchan, from, len, deliver, interrupting);
+
+	/* The params are always the same, so no worry about re-queuing */
+	if (deliver && 	waitqueue_active(&rchan->read_wait)) {
+		PREPARE_WORK(&rchan->wake_readers, wakeup_readers, rchan);
+		schedule_delayed_work(&rchan->wake_readers, 1);
+	}
+}
+
+/**
+ *	relay_get_offset - get current and max channel buffer offsets
+ *	@rchan: the channel
+ *	@max_offset: maximum channel offset
+ *
+ *	Returns the current and maximum channel buffer offsets.
+ */
+u32
+relay_get_offset(struct rchan *rchan, u32 *max_offset)
+{
+	return rchan->relay_ops->get_offset(rchan, max_offset);
+}
+
+/**
+ *	reset_index - try once to reset the current channel index
+ *	@rchan: the channel
+ *	@old_index: the index read before reset
+ *
+ *	Attempts to reset the channel index to 0.  It tries once, and
+ *	if it fails, returns negative, 0 otherwise.
+ */
+int
+reset_index(struct rchan *rchan, u32 old_index)
+{
+	return rchan->relay_ops->reset_index(rchan, old_index);
+}
+
+/*
+ * close() vm_op implementation for relayfs file mapping.
+ */
+static void
+relay_file_mmap_close(struct vm_area_struct *vma)
+{
+	struct file *filp = vma->vm_file;
+	struct rchan_reader *reader;
+	struct rchan *rchan;
+
+	reader = (struct rchan_reader *)filp->private_data;
+	rchan = reader->rchan;
+
+	atomic_dec(&rchan->mapped);
+
+	rchan->callbacks->fileop_notify(reader->rchan->id, filp,
+					RELAY_FILE_UNMAP);
+}
+
+/*
+ * vm_ops for relay file mappings.
+ */
+static struct vm_operations_struct relay_file_mmap_ops = {
+	.close = relay_file_mmap_close
+};
+
+/* \begin{Code inspired from BTTV driver} */
+static inline unsigned long 
+kvirt_to_pa(unsigned long adr)
+{
+	unsigned long kva, ret;
+
+	kva = (unsigned long) page_address(vmalloc_to_page((void *) adr));
+	kva |= adr & (PAGE_SIZE - 1);
+	ret = __pa(kva);
+	return ret;
+}
+
+static int
+relay_mmap_region(struct vm_area_struct *vma,
+		  const char *adr,
+		  const char *start_pos,
+		  unsigned long size)
+{
+	unsigned long start = (unsigned long) adr;
+	unsigned long page, pos;
+
+	pos = (unsigned long) start_pos;
+
+	while (size > 0) {
+		page = kvirt_to_pa(pos);
+		if (remap_page_range(vma, start, page, PAGE_SIZE, PAGE_SHARED))
+			return -EAGAIN;
+		start += PAGE_SIZE;
+		pos += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	return 0;
+}
+/* \end{Code inspired from BTTV driver} */
+
+/**
+ *	relay_mmap_buffer: - mmap buffer to process address space
+ *	@rchan_id: relay channel id
+ *	@vma: vm_area_struct describing memory to be mapped
+ *
+ *	Returns:
+ *	0 if ok
+ *	-EAGAIN, when remap failed
+ *	-EINVAL, invalid requested length
+ *
+ *	Caller should already have grabbed mmap_sem.
+ */
+int 
+__relay_mmap_buffer(struct rchan *rchan,
+		    struct vm_area_struct *vma)
+{
+	int err = 0;
+	unsigned long length = vma->vm_end - vma->vm_start;
+	struct file *filp = vma->vm_file;
+
+	if (rchan == NULL) {
+		err = -EBADF;
+		goto exit;
+	}
+
+	if (rchan->init_buf) {
+		err = -EPERM;
+		goto exit;
+	}
+	
+	if (length != (unsigned long)rchan->alloc_size) {
+		err = -EINVAL;
+		goto exit;
+	}
+
+	err = relay_mmap_region(vma,
+				(char *)vma->vm_start,
+				rchan->buf,
+				rchan->alloc_size);
+
+	if (err == 0) {
+		vma->vm_ops = &relay_file_mmap_ops;
+		err = rchan->callbacks->fileop_notify(rchan->id, filp,
+						      RELAY_FILE_MAP);
+		if (err == 0)
+			atomic_inc(&rchan->mapped);
+	}
+exit:	
+	return err;
+}
+
+/*
+ * High-level relayfs kernel API.  See Documentation/filesystems/relafys.txt.
+ */
+
+/*
+ * rchan_callback implementations defining default channel behavior.  Used
+ * in place of corresponding NULL values in client callback struct.
+ */
+
+/*
+ * buffer_end() default callback.  Does nothing.
+ */
+static int 
+buffer_end_default_callback(int rchan_id,
+			    char *current_write_pos,
+			    char *end_of_buffer,
+			    struct timeval end_time,
+			    u32 end_tsc,
+			    int using_tsc) 
+{
+	return 0;
+}
+
+/*
+ * buffer_start() default callback.  Does nothing.
+ */
+static int 
+buffer_start_default_callback(int rchan_id,
+			      char *current_write_pos,
+			      u32 buffer_id,
+			      struct timeval start_time,
+			      u32 start_tsc,
+			      int using_tsc)
+{
+	return 0;
+}
+
+/*
+ * deliver() default callback.  Does nothing.
+ */
+static void 
+deliver_default_callback(int rchan_id, char *from, u32 len)
+{
+}
+
+/*
+ * user_deliver() default callback.  Does nothing.
+ */
+static void 
+user_deliver_default_callback(int rchan_id, char *from, u32 len)
+{
+}
+
+/*
+ * needs_resize() default callback.  Does nothing.
+ */
+static void
+needs_resize_default_callback(int rchan_id,
+			      int resize_type,
+			      u32 suggested_buf_size,
+			      u32 suggested_n_bufs)
+{
+}
+
+/*
+ * fileop_notify() default callback.  Does nothing.
+ */
+static int
+fileop_notify_default_callback(int rchan_id,
+			       struct file *filp,
+			       enum relay_fileop fileop)
+{
+	return 0;
+}
+
+/*
+ * ioctl() default callback.  Does nothing.
+ */
+static int
+ioctl_default_callback(int rchan_id,
+		       unsigned int cmd,
+		       unsigned long arg)
+{
+	return 0;
+}
+
+/* relay channel default callbacks */
+static struct rchan_callbacks default_channel_callbacks = {
+	.buffer_start = buffer_start_default_callback,
+	.buffer_end = buffer_end_default_callback,
+	.deliver = deliver_default_callback,
+	.user_deliver = user_deliver_default_callback,
+	.needs_resize = needs_resize_default_callback,
+	.fileop_notify = fileop_notify_default_callback,
+	.ioctl = ioctl_default_callback,
+};
+
+/**
+ *	check_attribute_flags - check sanity of channel attributes
+ *	@flags: channel attributes
+ *	@resizeable: 1 if true
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+static int
+check_attribute_flags(u32 *attribute_flags, int resizeable)
+{
+	u32 flags = *attribute_flags;
+	
+	if (!(flags & RELAY_DELIVERY_BULK) && !(flags & RELAY_DELIVERY_PACKET))
+		return -EINVAL; /* Delivery mode must be specified */
+	
+	if (!(flags & RELAY_USAGE_SMP) && !(flags & RELAY_USAGE_GLOBAL))
+		return -EINVAL; /* Usage must be specified */
+	
+	if (resizeable) {  /* Resizeable can never be continuous */
+		*attribute_flags &= ~RELAY_MODE_CONTINUOUS;
+		*attribute_flags |= RELAY_MODE_NO_OVERWRITE;
+	}
+	
+	if ((flags & RELAY_MODE_CONTINUOUS) &&
+	    (flags & RELAY_MODE_NO_OVERWRITE))
+		return -EINVAL; /* Can't have it both ways */
+	
+	if (!(flags & RELAY_MODE_CONTINUOUS) &&
+	    !(flags & RELAY_MODE_NO_OVERWRITE))
+		*attribute_flags |= RELAY_MODE_CONTINUOUS; /* Default to continuous */
+	
+	if (!(flags & RELAY_SCHEME_ANY))
+		return -EINVAL; /* One or both must be specified */
+	else if (flags & RELAY_SCHEME_LOCKLESS) {
+		if (have_cmpxchg())
+			*attribute_flags &= ~RELAY_SCHEME_LOCKING;
+		else if (flags & RELAY_SCHEME_LOCKING)
+			*attribute_flags &= ~RELAY_SCHEME_LOCKLESS;
+		else
+			return -EINVAL; /* Locking scheme not an alternative */
+	}
+	
+	if (!(flags & RELAY_TIMESTAMP_ANY))
+		return -EINVAL; /* One or both must be specified */
+	else if (flags & RELAY_TIMESTAMP_TSC) {
+		if (have_tsc())
+			*attribute_flags &= ~RELAY_TIMESTAMP_GETTIMEOFDAY;
+		else if (flags & RELAY_TIMESTAMP_GETTIMEOFDAY)
+			*attribute_flags &= ~RELAY_TIMESTAMP_TSC;
+		else
+			return -EINVAL; /* gettimeofday not an alternative */
+	}
+
+	return 0;
+}
+
+/*
+ * High-level API functions.
+ */
+
+/**
+ *	__relay_reset - internal reset function
+ *	@rchan: the channel
+ *	@init: 1 if this is a first-time channel initialization
+ *
+ *	See relay_reset for description of effect.
+ */
+void
+__relay_reset(struct rchan *rchan, int init)
+{
+	int i;
+	
+	if (init) {
+		rchan->version = RELAYFS_CHANNEL_VERSION;
+		init_MUTEX(&rchan->resize_sem);
+		init_waitqueue_head(&rchan->read_wait);
+		init_waitqueue_head(&rchan->write_wait);
+		atomic_set(&rchan->refcount, 0);
+		INIT_LIST_HEAD(&rchan->open_readers);
+		rchan->open_readers_lock = RW_LOCK_UNLOCKED;
+	}
+	
+	rchan->buf_id = rchan->buf_idx = 0;
+	atomic_set(&rchan->suspended, 0);
+	atomic_set(&rchan->mapped, 0);
+	rchan->half_switch = 0;
+	rchan->bufs_produced = 0;
+	rchan->bufs_consumed = 0;
+	rchan->bytes_consumed = 0;
+	rchan->initialized = 0;
+	rchan->finalized = 0;
+	rchan->resize_min = rchan->resize_max = 0;
+	rchan->resizing = 0;
+	rchan->replace_buffer = 0;
+	rchan->resize_buf = NULL;
+	rchan->resize_buf_size = 0;
+	rchan->resize_alloc_size = 0;
+	rchan->resize_n_bufs = 0;
+	rchan->resize_err = 0;
+	rchan->resize_failures = 0;
+	rchan->resize_order = 0;
+
+	rchan->expand_page_array = NULL;
+	rchan->expand_page_count = 0;
+	rchan->shrink_page_array = NULL;
+	rchan->shrink_page_count = 0;
+	rchan->resize_page_array = NULL;
+	rchan->resize_page_count = 0;
+	rchan->old_buf_page_array = NULL;
+	rchan->expand_buf_id = 0;
+
+	INIT_WORK(&rchan->wake_readers, NULL, NULL);
+	INIT_WORK(&rchan->wake_writers, NULL, NULL);
+
+	for (i = 0; i < RELAY_MAX_BUFS; i++)
+		rchan->unused_bytes[i] = 0;
+	
+	rchan->relay_ops->reset(rchan, init);
+}
+
+/**
+ *	relay_reset - reset the channel
+ *	@rchan: the channel
+ *
+ *	Returns 0 if successful, negative if not.
+ *
+ *	This has the effect of erasing all data from the buffer and
+ *	restarting the channel in its initial state.  The buffer itself
+ *	is not freed, so any mappings are still in effect.
+ *
+ *	NOTE: Care should be taken that the channnel isn't actually
+ *	being used by anything when this call is made.
+ */
+int
+relay_reset(int rchan_id)
+{
+	struct rchan *rchan;
+
+	rchan = rchan_get(rchan_id);
+	if (rchan == NULL)
+		return -EBADF;
+
+	__relay_reset(rchan, 0);
+	update_readers_consumed(rchan, 0, 0);
+
+	rchan_put(rchan);
+
+	return 0;
+}
+
+/**
+ *	check_init_buf - check the sanity of init_buf, if present
+ *	@init_buf: the initbuf
+ *	@init_buf_size: the total initbuf size
+ *	@bufsize: the channel's sub-buffer size
+ *	@nbufs: the number of sub-buffers in the channel
+ *
+ *	Returns 0 if ok, negative otherwise.
+ */
+static int
+check_init_buf(char *init_buf, u32 init_buf_size, u32 bufsize, u32 nbufs)
+{
+	int err = 0;
+	
+	if (init_buf && nbufs == 1) /* 1 sub-buffer makes no sense */
+		err = -EINVAL;
+
+	if (init_buf && (bufsize * nbufs != init_buf_size))
+		err = -EINVAL;
+
+	return err;
+}
+
+/**
+ *	rchan_create_buf - allocate the initial channel buffer
+ *	@rchan: the channel
+ *	@size_alloc: the total size of the channel buffer
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+static inline int
+rchan_create_buf(struct rchan *rchan, int size_alloc)
+{
+	struct page **page_array;
+	int page_count;
+
+	if ((rchan->buf = (char *)alloc_rchan_buf(size_alloc, &page_array, &page_count)) == NULL) {
+		rchan->buf_page_array = NULL;
+		rchan->buf_page_count = 0;
+		return -ENOMEM;
+	}
+
+	rchan->buf_page_array = page_array;
+	rchan->buf_page_count = page_count;
+
+	return 0;
+}
+
+/**
+ *	rchan_create - allocate and initialize a channel, including buffer
+ *	@chanpath: path specifying the relayfs channel file to create
+ *	@bufsize: the size of the sub-buffers within the channel buffer
+ *	@nbufs: the number of sub-buffers within the channel buffer
+ *	@rchan_flags: flags specifying buffer attributes
+ *	@err: err code
+ *
+ *	Returns channel if successful, NULL otherwise, err receives errcode.
+ *
+ *	Allocates a struct rchan representing a relay channel, according
+ *	to the attributes passed in via rchan_flags.  Does some basic sanity
+ *	checking but doesn't try to do anything smart.  In particular, the
+ *	number of buffers must be a power of 2, and if the lockless scheme
+ *	is being used, the sub-buffer size must also be a power of 2.  The
+ *	locking scheme can use buffers of any size.
+ */
+static struct rchan *
+rchan_create(const char *chanpath, 
+	     int bufsize, 
+	     int nbufs, 
+	     u32 rchan_flags,
+	     char *init_buf,
+	     u32 init_buf_size,
+	     int *err)
+{
+	int size_alloc;
+	struct rchan *rchan = NULL;
+
+	*err = 0;
+
+	rchan = (struct rchan *)kmalloc(sizeof(struct rchan), GFP_KERNEL);
+	if (rchan == NULL) {
+		*err = -ENOMEM;
+		return NULL;
+	}
+	rchan->buf = rchan->init_buf = NULL;
+
+	*err = check_init_buf(init_buf, init_buf_size, bufsize, nbufs);
+	if (*err)
+		goto exit;
+	
+	if (nbufs == 1 && bufsize) {
+		rchan->n_bufs = nbufs;
+		rchan->buf_size = bufsize;
+		size_alloc = bufsize;
+		goto alloc;
+	}
+	
+	if (bufsize <= 0 ||
+	    (rchan_flags & RELAY_SCHEME_LOCKLESS && hweight32(bufsize) != 1) ||
+	    hweight32(nbufs) != 1 ||
+	    nbufs < RELAY_MIN_BUFS ||
+	    nbufs > RELAY_MAX_BUFS) {
+		*err = -EINVAL;
+		goto exit;
+	}
+
+	size_alloc = FIX_SIZE(bufsize * nbufs);
+	if (size_alloc > RELAY_MAX_BUF_SIZE) {
+		*err = -EINVAL;
+		goto exit;
+	}
+	rchan->n_bufs = nbufs;
+	rchan->buf_size = bufsize;
+
+	if (rchan_flags & RELAY_SCHEME_LOCKLESS) {
+		offset_bits(rchan) = ffs(bufsize) - 1;
+		offset_mask(rchan) =  RELAY_BUF_OFFSET_MASK(offset_bits(rchan));
+		bufno_bits(rchan) = ffs(nbufs) - 1;
+	}
+alloc:
+	if (rchan_alloc_id(rchan) == -1) {
+		*err = -ENOMEM;
+		goto exit;
+	}
+
+	if (init_buf == NULL) {
+		*err = rchan_create_buf(rchan, size_alloc);
+		if (*err) {
+			rchan_free_id(rchan->id);
+			goto exit;
+		}
+	} else
+		rchan->buf = rchan->init_buf = init_buf;
+	
+	rchan->alloc_size = size_alloc;
+
+	if (rchan_flags & RELAY_SCHEME_LOCKLESS)
+		rchan->relay_ops = &lockless_ops;
+	else
+		rchan->relay_ops = &locking_ops;
+
+exit:
+	if (*err) {
+		kfree(rchan);
+		rchan = NULL;
+	}
+
+	return rchan;
+}
+
+
+static char tmpname[NAME_MAX];
+
+/**
+ *	rchan_create_dir - create directory for file
+ *	@chanpath: path to file, including filename
+ *	@residual: filename remaining after parse
+ *	@topdir: the directory filename should be created in
+ *
+ *	Returns 0 if successful, negative otherwise.
+ *
+ *	Inspired by xlate_proc_name() in procfs.  Given a file path which
+ *	includes the filename, creates any and all directories necessary 
+ *	to create the file.
+ */
+static int 
+rchan_create_dir(const char * chanpath, 
+		 const char **residual, 
+		 struct dentry **topdir)
+{
+	const char *cp = chanpath, *next;
+	struct dentry *parent = NULL;
+	int len, err = 0;
+	
+	while (1) {
+		next = strchr(cp, '/');
+		if (!next)
+			break;
+
+		len = next - cp;
+
+		strncpy(tmpname, cp, len);
+		tmpname[len] = '\0';
+		err = relayfs_create_dir(tmpname, parent, &parent);
+		if (err && (err != -EEXIST))
+			return err;
+		cp += len + 1;
+	}
+
+	*residual = cp;
+	*topdir = parent;
+
+	return err;
+}
+
+/**
+ *	rchan_create_file - create file, including parent directories
+ *	@chanpath: path to file, including filename
+ *	@dentry: result dentry
+ *	@data: data to associate with the file
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+static int 
+rchan_create_file(const char * chanpath, 
+		  struct dentry **dentry, 
+		  struct rchan * data,
+		  int mode)
+{
+	int err;
+	const char * fname;
+	struct dentry *topdir;
+
+	err = rchan_create_dir(chanpath, &fname, &topdir);
+	if (err && (err != -EEXIST))
+		return err;
+
+	err = relayfs_create_file(fname, topdir, dentry, (void *)data, mode);
+
+	return err;
+}
+
+/**
+ *	relay_open - create a new file/channel buffer in relayfs
+ *	@chanpath: name of file to create, including path
+ *	@bufsize: size of sub-buffers
+ *	@nbufs: number of sub-buffers
+ *	@flags: channel attributes
+ *	@callbacks: client callback functions
+ *	@start_reserve: number of bytes to reserve at start of each sub-buffer
+ *	@end_reserve: number of bytes to reserve at end of each sub-buffer
+ *	@rchan_start_reserve: additional reserve at start of first sub-buffer
+ *	@resize_min: minimum total buffer size, if set
+ *	@resize_max: maximum total buffer size, if set
+ *	@mode: the perms to be given to the relayfs file, 0 to accept defaults
+ *	@init_buf: initial memory buffer to start out with, NULL if N/A
+ *	@init_buf_size: initial memory buffer size to start out with, 0 if N/A
+ *
+ *	Returns channel id if successful, negative otherwise.
+ *
+ *	Creates a relay channel using the sizes and attributes specified.
+ *	The default permissions, used if mode == 0 are S_IRUSR | S_IWUSR.  See
+ *	Documentation/filesystems/relayfs.txt for details.
+ */
+int
+relay_open(const char *chanpath,
+	   int bufsize,
+	   int nbufs,
+	   u32 flags,
+	   struct rchan_callbacks *channel_callbacks,
+	   u32 start_reserve,
+	   u32 end_reserve,
+	   u32 rchan_start_reserve,
+	   u32 resize_min,
+	   u32 resize_max,
+	   int mode,
+	   char *init_buf,
+	   u32 init_buf_size)
+{
+	int err;
+	struct rchan *rchan;
+	struct dentry *dentry;
+	struct rchan_callbacks *callbacks = NULL;
+
+	if (chanpath == NULL)
+		return -EINVAL;
+
+	if (nbufs != 1) {
+		err = check_attribute_flags(&flags, resize_min ? 1 : 0);
+		if (err)
+			return err;
+	}
+
+	rchan = rchan_create(chanpath, bufsize, nbufs, flags, init_buf, init_buf_size, &err);
+
+	if (err < 0)
+		return err;
+
+	/* Create file in fs */
+	if ((err = rchan_create_file(chanpath, &dentry, rchan, mode)) < 0) {
+		rchan_destroy_buf(rchan);
+		rchan_free_id(rchan->id);
+		kfree(rchan);
+		return err;
+	}
+
+	rchan->dentry = dentry;
+
+	if (channel_callbacks == NULL)
+		callbacks = &default_channel_callbacks;
+	else
+		callbacks = channel_callbacks;
+
+	if (callbacks->buffer_end == NULL)
+		callbacks->buffer_end = buffer_end_default_callback;
+	if (callbacks->buffer_start == NULL)
+		callbacks->buffer_start = buffer_start_default_callback;
+	if (callbacks->deliver == NULL)
+		callbacks->deliver = deliver_default_callback;
+	if (callbacks->user_deliver == NULL)
+		callbacks->user_deliver = user_deliver_default_callback;
+	if (callbacks->needs_resize == NULL)
+		callbacks->needs_resize = needs_resize_default_callback;
+	if (callbacks->fileop_notify == NULL)
+		callbacks->fileop_notify = fileop_notify_default_callback;
+	if (callbacks->ioctl == NULL)
+		callbacks->ioctl = ioctl_default_callback;
+	rchan->callbacks = callbacks;
+
+	/* Just to let the client know the sizes used */
+	rchan->callbacks->needs_resize(rchan->id,
+				       RELAY_RESIZE_REPLACED,
+				       rchan->buf_size,
+				       rchan->n_bufs);
+
+	rchan->flags = flags;
+	rchan->start_reserve = start_reserve;
+	rchan->end_reserve = end_reserve;
+	rchan->rchan_start_reserve = rchan_start_reserve;
+
+	__relay_reset(rchan, 1);
+
+	if (resize_min > 0 && resize_max > 0 && 
+	   resize_max < RELAY_MAX_TOTAL_BUF_SIZE) {
+		rchan->resize_min = resize_min;
+		rchan->resize_max = resize_max;
+		init_shrink_timer(rchan);
+	}
+
+	rchan_get(rchan->id);
+
+	return rchan->id;
+}
+
+/**
+ *	relay_discard_init_buf - alloc channel buffer and copy init_buf into it
+ *	@rchan_id: the channel id
+ *
+ *	Returns 0 if successful, negative otherwise.
+ *
+ *	NOTE: May sleep.  Should also be called only when the channel isn't
+ *	actively being written into.
+ */
+int
+relay_discard_init_buf(int rchan_id)
+{
+	struct rchan *rchan;
+	int err = 0;
+	
+	rchan = rchan_get(rchan_id);
+	if (rchan == NULL)
+		return -EBADF;
+
+	if (rchan->init_buf == NULL) {
+		err = -EINVAL;
+		goto out;
+	}
+	
+	err = rchan_create_buf(rchan, rchan->alloc_size);
+	if (err)
+		goto out;
+	
+	memcpy(rchan->buf, rchan->init_buf, rchan->n_bufs * rchan->buf_size);
+	rchan->init_buf = NULL;
+out:
+	rchan_put(rchan);
+	
+	return err;
+}
+
+/**
+ *	relay_finalize - perform end-of-buffer processing for last buffer
+ *	@rchan_id: the channel id
+ *	@releasing: true if called when releasing file
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+static int 
+relay_finalize(int rchan_id)
+{
+	struct rchan *rchan = rchan_get(rchan_id);
+	if (rchan == NULL)
+		return -EBADF;
+
+	if (rchan->finalized == 0) {
+		rchan->relay_ops->finalize(rchan);
+		rchan->finalized = 1;
+	}
+
+	if (waitqueue_active(&rchan->read_wait)) {
+		PREPARE_WORK(&rchan->wake_readers, wakeup_readers, rchan);
+		schedule_delayed_work(&rchan->wake_readers, 1);
+	}
+
+	rchan_put(rchan);
+
+	return 0;
+}
+
+/**
+ *	restore_callbacks - restore default channel callbacks
+ *	@rchan: the channel
+ *
+ *	Restore callbacks to the default versions.
+ */
+static inline void
+restore_callbacks(struct rchan *rchan)
+{
+	if (rchan->callbacks != &default_channel_callbacks)
+		rchan->callbacks = &default_channel_callbacks;
+}
+
+/**
+ *	relay_close - close the channel
+ *	@rchan_id: relay channel id
+ *	
+ *	Finalizes the last sub-buffer and marks the channel as finalized.
+ *	The channel buffer and channel data structure are then freed
+ *	automatically when the last reference to the channel is given up.
+ */
+int 
+relay_close(int rchan_id)
+{
+	int err;
+	struct rchan *rchan;
+
+	if ((rchan_id < 0) || (rchan_id >= RELAY_MAX_CHANNELS))
+		return -EBADF;
+
+	err = relay_finalize(rchan_id);
+
+	if (!err) {
+		read_lock(&rchan_table_lock);
+		rchan = rchan_table[rchan_id];
+		read_unlock(&rchan_table_lock);
+
+		if (rchan) {
+			restore_callbacks(rchan);
+			if (rchan->resize_min)
+				del_timer(&rchan->shrink_timer);
+			rchan_put(rchan);
+		}
+	}
+	
+	return err;
+}
+
+/**
+ *	relay_write - reserve a slot in the channel and write data into it
+ *	@rchan_id: relay channel id
+ *	@data_ptr: data to be written into reserved slot
+ *	@count: number of bytes to write
+ *	@td_offset: optional offset where time delta should be written
+ *	@wrote_pos: optional ptr returning buf pos written to, ignored if NULL 
+ *
+ *	Returns the number of bytes written, 0 or negative on failure.
+ *
+ *	Reserves space in the channel and writes count bytes of data_ptr
+ *	to it.  Automatically performs any necessary locking, depending
+ *	on the scheme and SMP usage in effect (no locking is done for the
+ *	lockless scheme regardless of usage). 
+ *
+ *	If td_offset is >= 0, the internal time delta calculated when
+ *	slot was reserved will be written at that offset.
+ *
+ *	If wrote_pos is non-NULL, it will receive the location the data
+ *	was written to, which may be needed for some applications but is not
+ *	normally interesting.
+ */
+int
+relay_write(int rchan_id, 
+	    const void *data_ptr, 
+	    size_t count,
+	    int td_offset,
+	    void **wrote_pos)
+{
+	unsigned long flags;
+	char *reserved, *write_pos;
+	int bytes_written = 0;
+	int reserve_code, interrupting;
+	struct timeval ts;
+	u32 td;
+	struct rchan *rchan;
+	
+	rchan = rchan_get(rchan_id);
+	if (rchan == NULL)
+		return -EBADF;
+
+	relay_lock_channel(rchan, flags); /* nop for lockless */
+
+	write_pos = reserved = relay_reserve(rchan, count, &ts, &td, 
+					     &reserve_code, &interrupting);
+
+	if (reserved != NULL) {
+		relay_write_direct(write_pos, data_ptr, count);
+		if ((td_offset >= 0) && (td_offset < count - sizeof(td)))
+			*((u32 *)(reserved + td_offset)) = td;
+		bytes_written = count;
+	} else if (reserve_code == RELAY_WRITE_TOO_LONG)
+		bytes_written = -EINVAL;
+
+	if (bytes_written > 0)
+		relay_commit(rchan, reserved, bytes_written, reserve_code, interrupting);
+
+	relay_unlock_channel(rchan, flags); /* nop for lockless */
+
+	rchan_put(rchan);
+
+	if (wrote_pos)
+		*wrote_pos = reserved;
+	
+	return bytes_written;
+}
+
+/**
+ *	wakeup_writers - wake up VFS writers waiting on a channel
+ *	@private: the channel
+ *
+ *	This is the work function used to defer writer waking.  The
+ *	reason waking is deferred is that calling directly from 
+ *	buffers_consumed causes problems if you're writing from say 
+ *	the scheduler.
+ */
+static void 
+wakeup_writers(void *private)
+{
+	struct rchan *rchan = (struct rchan *)private;
+	
+	wake_up_interruptible(&rchan->write_wait);
+}
+
+
+/**
+ *	__relay_buffers_consumed - internal version of relay_buffers_consumed
+ *	@rchan: the relay channel
+ *	@bufs_consumed: number of buffers to add to current count for channel
+ *	
+ *	Internal - updates the channel's consumed buffer count.
+ */
+static void
+__relay_buffers_consumed(struct rchan *rchan, u32 bufs_consumed)
+{
+	rchan->bufs_consumed += bufs_consumed;
+	
+	if (rchan->bufs_consumed > rchan->bufs_produced)
+		rchan->bufs_consumed = rchan->bufs_produced;
+	
+	atomic_set(&rchan->suspended, 0);
+
+	PREPARE_WORK(&rchan->wake_writers, wakeup_writers, rchan);
+	schedule_delayed_work(&rchan->wake_writers, 1);
+}
+
+/**
+ *	__reader_buffers_consumed - update reader/channel consumed buffer count
+ *	@reader: channel reader
+ *	@bufs_consumed: number of buffers to add to current count for channel
+ *	
+ *	Internal - updates the reader's consumed buffer count.  If the reader's
+ *	resulting total is greater than the channel's, update the channel's.
+*/
+static void
+__reader_buffers_consumed(struct rchan_reader *reader, u32 bufs_consumed)
+{
+	reader->bufs_consumed += bufs_consumed;
+	
+	if (reader->bufs_consumed > reader->rchan->bufs_consumed)
+		__relay_buffers_consumed(reader->rchan, bufs_consumed);
+}
+
+/**
+ *	relay_buffers_consumed - add to the # buffers consumed for the channel
+ *	@reader: channel reader
+ *	@bufs_consumed: number of buffers to add to current count for channel
+ *	
+ *	Adds to the channel's consumed buffer count.  buffers_consumed should
+ *	be the number of buffers newly consumed, not the total number consumed.
+ *
+ *	NOTE: kernel clients don't need to call this function if the reader
+ *	is auto-consuming or the channel is MODE_CONTINUOUS.
+ */
+void 
+relay_buffers_consumed(struct rchan_reader *reader, u32 bufs_consumed)
+{
+	if (reader && reader->rchan)
+		__reader_buffers_consumed(reader, bufs_consumed);
+}
+
+/**
+ *	__relay_bytes_consumed - internal version of relay_bytes_consumed 
+ *	@rchan: the relay channel
+ *	@bytes_consumed: number of bytes to add to current count for channel
+ *	@read_offset: where the bytes were consumed from
+ *	
+ *	Internal - updates the channel's consumed count.
+*/
+static void
+__relay_bytes_consumed(struct rchan *rchan, u32 bytes_consumed, u32 read_offset)
+{
+	u32 consuming_idx;
+	u32 unused;
+
+	consuming_idx = read_offset / rchan->buf_size;
+
+	if (consuming_idx >= rchan->n_bufs)
+		consuming_idx = rchan->n_bufs - 1;
+	rchan->bytes_consumed += bytes_consumed;
+
+	unused = rchan->unused_bytes[consuming_idx];
+	
+	if (rchan->bytes_consumed + unused >= rchan->buf_size) {
+		__relay_buffers_consumed(rchan, 1);
+		rchan->bytes_consumed = 0;
+	}
+}
+
+/**
+ *	__reader_bytes_consumed - update reader/channel consumed count
+ *	@reader: channel reader
+ *	@bytes_consumed: number of bytes to add to current count for channel
+ *	@read_offset: where the bytes were consumed from
+ *	
+ *	Internal - updates the reader's consumed count.  If the reader's
+ *	resulting total is greater than the channel's, update the channel's.
+*/
+static void
+__reader_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset)
+{
+	u32 consuming_idx;
+	u32 unused;
+
+	consuming_idx = read_offset / reader->rchan->buf_size;
+
+	if (consuming_idx >= reader->rchan->n_bufs)
+		consuming_idx = reader->rchan->n_bufs - 1;
+
+	reader->bytes_consumed += bytes_consumed;
+	
+	unused = reader->rchan->unused_bytes[consuming_idx];
+	
+	if (reader->bytes_consumed + unused >= reader->rchan->buf_size) {
+		reader->bufs_consumed++;
+		reader->bytes_consumed = 0;
+	}
+
+	if ((reader->bufs_consumed > reader->rchan->bufs_consumed) ||
+	    ((reader->bufs_consumed == reader->rchan->bufs_consumed) &&
+	     (reader->bytes_consumed > reader->rchan->bytes_consumed)))
+		__relay_bytes_consumed(reader->rchan, bytes_consumed, read_offset);
+}
+
+/**
+ *	relay_bytes_consumed - add to the # bytes consumed for the channel
+ *	@reader: channel reader
+ *	@bytes_consumed: number of bytes to add to current count for channel
+ *	@read_offset: where the bytes were consumed from
+ *	
+ *	Adds to the channel's consumed count.  bytes_consumed should be the
+ *	number of bytes actually read e.g. return value of relay_read() and
+ *	the read_offset should be the actual offset the bytes were read from
+ *	e.g. the actual_read_offset set by relay_read(). See
+ *	Documentation/filesystems/relayfs.txt for more details.
+ *
+ *	NOTE: kernel clients don't need to call this function if the reader
+ *	is auto-consuming or the channel is MODE_CONTINUOUS.
+ */
+void
+relay_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset)
+{
+	if (reader && reader->rchan)
+		__reader_bytes_consumed(reader, bytes_consumed, read_offset);
+}
+
+/**
+ *	update_readers_consumed - apply offset change to reader
+ *	@rchan: the channel
+ *
+ *	Apply the consumed counts to all readers open on the channel.
+ */
+void
+update_readers_consumed(struct rchan *rchan, u32 bufs_consumed, u32 bytes_consumed)
+{
+	struct list_head *p;
+	struct rchan_reader *reader;
+	
+	read_lock(&rchan->open_readers_lock);
+	list_for_each(p, &rchan->open_readers) {
+		reader = list_entry(p, struct rchan_reader, list);
+		reader->bufs_consumed = bufs_consumed;
+		reader->bytes_consumed = bytes_consumed;
+		if (reader->vfs_reader) 
+			reader->pos.file->f_pos = 0;
+		else
+			reader->pos.f_pos = 0;
+		reader->offset_changed = 1;
+	}
+	read_unlock(&rchan->open_readers_lock);
+}
+
+/**
+ *	do_read - utility function to do the actual read to user
+ *	@rchan: the channel
+ *	@buf: user buf to read into, NULL if just getting info
+ *	@count: bytes requested
+ *	@read_offset: offset into channel
+ *	@new_offset: new offset into channel after read
+ *	@actual_read_offset: read offset actually used
+ *
+ *	Returns the number of bytes read, 0 if none.
+ */
+static ssize_t
+do_read(struct rchan *rchan, char *buf, size_t count, u32 read_offset, u32 *new_offset, u32 *actual_read_offset)
+{
+	u32 read_bufno, cur_bufno;
+	u32 avail_offset, cur_idx, max_offset, buf_end_offset;
+	u32 avail_count, buf_size;
+	int unused_bytes = 0;
+	size_t read_count = 0;
+	u32 last_buf_byte_offset;
+
+	*actual_read_offset = read_offset;
+	
+	buf_size = rchan->buf_size;
+	if (unlikely(!buf_size)) BUG();
+
+	read_bufno = read_offset / buf_size;
+	if (unlikely(read_bufno >= RELAY_MAX_BUFS)) BUG();
+	unused_bytes = rchan->unused_bytes[read_bufno];
+
+	avail_offset = cur_idx = relay_get_offset(rchan, &max_offset);
+
+	if (cur_idx == read_offset) {
+		if (atomic_read(&rchan->suspended) == 1) {
+			read_offset += 1;
+			if (read_offset >= max_offset)
+				read_offset = 0;
+			*actual_read_offset = read_offset;
+		} else {
+			*new_offset = read_offset;
+			return 0;
+		}
+	} else {
+		last_buf_byte_offset = (read_bufno + 1) * buf_size - 1;
+		if (read_offset == last_buf_byte_offset) {
+			if (unused_bytes != 1) {
+				read_offset += 1;
+				if (read_offset >= max_offset)
+					read_offset = 0;
+				*actual_read_offset = read_offset;
+			}
+		}
+	}
+
+	read_bufno = read_offset / buf_size;
+	if (unlikely(read_bufno >= RELAY_MAX_BUFS)) BUG();
+	unused_bytes = rchan->unused_bytes[read_bufno];
+
+	cur_bufno = cur_idx / buf_size;
+
+	buf_end_offset = (read_bufno + 1) * buf_size - unused_bytes;
+	if (avail_offset > buf_end_offset)
+		avail_offset = buf_end_offset;
+	else if (avail_offset < read_offset)
+		avail_offset = buf_end_offset;
+	avail_count = avail_offset - read_offset;
+	read_count = avail_count >= count ? count : avail_count;
+
+	if (read_count && buf != NULL)
+		if (copy_to_user(buf, rchan->buf + read_offset, read_count))
+			return -EFAULT;
+
+	if (read_bufno == cur_bufno)
+		if (read_count && (read_offset + read_count >= buf_end_offset) && (read_offset + read_count <= cur_idx)) {
+			*new_offset = cur_idx;
+			return read_count;
+		}
+
+	if (read_offset + read_count + unused_bytes > max_offset)
+		*new_offset = 0;
+	else if (read_offset + read_count >= buf_end_offset)
+		*new_offset = read_offset + read_count + unused_bytes;
+	else
+		*new_offset = read_offset + read_count;
+
+	return read_count;
+}
+
+/**
+ *	__relay_read - read bytes from channel, relative to current reader pos
+ *	@reader: channel reader
+ *	@buf: user buf to read into, NULL if just getting info
+ *	@count: bytes requested
+ *	@read_offset: offset into channel
+ *	@new_offset: new offset into channel after read
+ *	@actual_read_offset: read offset actually used
+ *	@wait: if non-zero, wait for something to read
+ *
+ *	Internal - see relay_read() for details.
+ *
+ *	Returns the number of bytes read, 0 if none, negative on failure.
+ */
+static ssize_t
+__relay_read(struct rchan_reader *reader, char *buf, size_t count, u32 read_offset, u32 *new_offset, u32 *actual_read_offset, int wait)
+{
+	int err = 0;
+	size_t read_count = 0;
+	struct rchan *rchan = reader->rchan;
+
+	if (!wait && !rchan->initialized)
+		return -EAGAIN;
+
+	if (using_lockless(rchan))
+		read_offset &= idx_mask(rchan);
+
+	if (read_offset >= rchan->n_bufs * rchan->buf_size) {
+		*new_offset = 0;
+		if (!wait)
+			return -EAGAIN;
+		else
+			return -EINTR;
+	}
+	
+	if (buf != NULL && wait) {
+		err = wait_event_interruptible(rchan->read_wait,
+		       ((rchan->finalized == 1) ||
+			(atomic_read(&rchan->suspended) == 1) ||
+			(relay_get_offset(rchan, NULL) != read_offset)));
+
+		if (rchan->finalized)
+			return 0;
+
+		if (reader->offset_changed) {
+			reader->offset_changed = 0;
+			return -EINTR;
+		}
+		
+		if (err)
+			return err;
+	}
+
+	read_count = do_read(rchan, buf, count, read_offset, new_offset, actual_read_offset);
+
+	if (read_count < 0)
+		err = read_count;
+	
+	if (err)
+		return err;
+	else
+		return read_count;
+}
+
+/**
+ *	relay_read - read bytes from channel, relative to current reader pos
+ *	@reader: channel reader
+ *	@buf: user buf to read into, NULL if just getting info
+ *	@count: bytes requested
+ *	@wait: if non-zero, wait for something to read
+ *	@actual_read_offset: set read offset actually used, must not be NULL
+ *
+ *	Reads count bytes from the channel, or as much as is available within
+ *	the sub-buffer currently being read.  The read offset that will be
+ *	read from is the position contained within the reader object.  If the
+ *	wait flag is set, buf is non-NULL, and there is nothing available,
+ *	it will wait until there is.  If the wait flag is 0 and there is
+ *	nothing available, -EAGAIN is returned.  If buf is NULL, the value
+ *	returned is the number of bytes that would have been read.
+ *	actual_read_offset is the value that should be passed as the read
+ *	offset to relay_bytes_consumed, needed only if the reader is not
+ *	auto-consuming and the channel is MODE_NO_OVERWRITE, but in any case,
+ *	it must not be NULL.  See Documentation/filesystems/relayfs.txt for
+ *	more details.
+ */
+ssize_t
+relay_read(struct rchan_reader *reader, char *buf, size_t count, int wait, u32 *actual_read_offset)
+{
+	u32 new_offset;
+	u32 read_offset;
+	ssize_t read_count;
+	
+	if (reader == NULL || reader->rchan == NULL)
+		return -EBADF;
+
+	if (actual_read_offset == NULL)
+		return -EINVAL;
+
+	if (reader->vfs_reader)
+		read_offset = (u32)(reader->pos.file->f_pos);
+	else
+		read_offset = reader->pos.f_pos;
+	*actual_read_offset = read_offset;
+	
+	read_count = __relay_read(reader, buf, count, read_offset,
+				  &new_offset, actual_read_offset, wait);
+
+	if (read_count < 0)
+		return read_count;
+
+	if (reader->vfs_reader)
+		reader->pos.file->f_pos = new_offset;
+	else
+		reader->pos.f_pos = new_offset;
+
+	if (reader->auto_consume && ((read_count) || (new_offset != read_offset)))
+		__reader_bytes_consumed(reader, read_count, *actual_read_offset);
+
+	if (read_count == 0 && !wait)
+		return -EAGAIN;
+	
+	return read_count;
+}
+
+/**
+ *	relay_bytes_avail - number of bytes available in current sub-buffer
+ *	@reader: channel reader
+ *	
+ *	Returns the number of bytes available relative to the reader's
+ *	current read position within the corresponding sub-buffer, 0 if
+ *	there is nothing available.  See Documentation/filesystems/relayfs.txt
+ *	for more details.
+ */
+ssize_t
+relay_bytes_avail(struct rchan_reader *reader)
+{
+	u32 f_pos;
+	u32 new_offset;
+	u32 actual_read_offset;
+	ssize_t bytes_read;
+	
+	if (reader == NULL || reader->rchan == NULL)
+		return -EBADF;
+	
+	if (reader->vfs_reader)
+		f_pos = (u32)reader->pos.file->f_pos;
+	else
+		f_pos = reader->pos.f_pos;
+	new_offset = f_pos;
+
+	bytes_read = __relay_read(reader, NULL, reader->rchan->buf_size,
+				  f_pos, &new_offset, &actual_read_offset, 0);
+
+	if ((new_offset != f_pos) &&
+	    ((bytes_read == -EINTR) || (bytes_read == 0)))
+		bytes_read = -EAGAIN;
+	else if ((bytes_read < 0) && (bytes_read != -EAGAIN))
+		bytes_read = 0;
+
+	return bytes_read;
+}
+
+/**
+ *	rchan_empty - boolean, is the channel empty wrt reader?
+ *	@reader: channel reader
+ *	
+ *	Returns 1 if the channel is empty, 0 otherwise.
+ */
+int
+rchan_empty(struct rchan_reader *reader)
+{
+	ssize_t avail_count;
+	u32 buffers_ready;
+	struct rchan *rchan = reader->rchan;
+	u32 cur_idx, curbuf_bytes;
+	int mapped;
+
+	if (atomic_read(&rchan->suspended) == 1)
+		return 0;
+
+	mapped = atomic_read(&rchan->mapped);
+	
+	if (mapped && bulk_delivery(rchan)) {
+		buffers_ready = rchan->bufs_produced - rchan->bufs_consumed;
+		return buffers_ready ? 0 : 1;
+	}
+
+	if (mapped && packet_delivery(rchan)) {
+		buffers_ready = rchan->bufs_produced - rchan->bufs_consumed;
+		if (buffers_ready)
+			return 0;
+		else {
+			cur_idx = relay_get_offset(rchan, NULL);
+			curbuf_bytes = cur_idx % rchan->buf_size;
+			return curbuf_bytes == rchan->bytes_consumed ? 1 : 0;
+		}
+	}
+
+	avail_count = relay_bytes_avail(reader);
+
+	return avail_count ? 0 : 1;
+}
+
+/**
+ *	rchan_full - boolean, is the channel full wrt consuming reader?
+ *	@reader: channel reader
+ *	
+ *	Returns 1 if the channel is full, 0 otherwise.
+ */
+int
+rchan_full(struct rchan_reader *reader)
+{
+	u32 buffers_ready;
+	struct rchan *rchan = reader->rchan;
+
+	if (mode_continuous(rchan))
+		return 0;
+
+	buffers_ready = rchan->bufs_produced - rchan->bufs_consumed;
+
+	return buffers_ready > reader->rchan->n_bufs - 1 ? 1 : 0;
+}
+
+/**
+ *	relay_info - get status and other information about a relay channel
+ *	@rchan_id: relay channel id
+ *	@rchan_info: pointer to the rchan_info struct to be filled in
+ *	
+ *	Fills in an rchan_info struct with channel status and attribute 
+ *	information.  See Documentation/filesystems/relayfs.txt for details.
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+int 
+relay_info(int rchan_id, struct rchan_info *rchan_info)
+{
+	int i;
+	struct rchan *rchan;
+
+	rchan = rchan_get(rchan_id);
+	if (rchan == NULL)
+		return -EBADF;
+
+	rchan_info->flags = rchan->flags;
+	rchan_info->buf_size = rchan->buf_size;
+	rchan_info->buf_addr = rchan->buf;
+	rchan_info->alloc_size = rchan->alloc_size;
+	rchan_info->n_bufs = rchan->n_bufs;
+	rchan_info->cur_idx = relay_get_offset(rchan, NULL);
+	rchan_info->bufs_produced = rchan->bufs_produced;
+	rchan_info->bufs_consumed = rchan->bufs_consumed;
+	rchan_info->buf_id = rchan->buf_id;
+
+	for (i = 0; i < rchan->n_bufs; i++) {
+		rchan_info->unused_bytes[i] = rchan->unused_bytes[i];
+		if (using_lockless(rchan))
+			rchan_info->buffer_complete[i] = (atomic_read(&fill_count(rchan, i)) == rchan->buf_size);
+		else
+			rchan_info->buffer_complete[i] = 0;
+	}
+
+	rchan_put(rchan);
+
+	return 0;
+}
+
+/**
+ *	__add_rchan_reader - creates and adds a reader to a channel
+ *	@rchan: relay channel
+ *	@filp: the file associated with rchan, if applicable
+ *	@auto_consume: boolean, whether reader's reads automatically consume
+ *	@map_reader: boolean, whether reader's reading via a channel mapping
+ *
+ *	Returns a pointer to the reader object create, NULL if unsuccessful
+ *
+ *	Creates and initializes an rchan_reader object for reading the channel.
+ *	If filp is non-NULL, the reader is a VFS reader, otherwise not.
+ *
+ *	If the reader is a map reader, it isn't considered a VFS reader for
+ *	our purposes.  Also, map_readers can't be auto-consuming.
+ */
+struct rchan_reader *
+__add_rchan_reader(struct rchan *rchan, struct file *filp, int auto_consume, int map_reader)
+{
+	struct rchan_reader *reader;
+	u32 will_read;
+	
+	reader = kmalloc(sizeof(struct rchan_reader), GFP_KERNEL);
+
+	if (reader) {
+		write_lock(&rchan->open_readers_lock);
+		reader->rchan = rchan;
+		if (filp) {
+			reader->vfs_reader = 1;
+			reader->pos.file = filp;
+		} else {
+			reader->vfs_reader = 0;
+			reader->pos.f_pos = 0;
+		}
+		reader->map_reader = map_reader;
+		reader->auto_consume = auto_consume;
+
+		if (!map_reader) {
+			will_read = rchan->bufs_produced % rchan->n_bufs;
+			if (!will_read && atomic_read(&rchan->suspended))
+				will_read = rchan->n_bufs;
+			reader->bufs_consumed = rchan->bufs_produced - will_read;
+			rchan->bufs_consumed = reader->bufs_consumed;
+			rchan->bytes_consumed = reader->bytes_consumed = 0;
+			reader->offset_changed = 0;
+		}
+		
+		list_add(&reader->list, &rchan->open_readers);
+		write_unlock(&rchan->open_readers_lock);
+	}
+
+	return reader;
+}
+
+/**
+ *	add_rchan_reader - create a reader for a channel
+ *	@rchan_id: relay channel handle
+ *	@auto_consume: boolean, whether reader's reads automatically consume
+ *
+ *	Returns a pointer to the reader object created, NULL if unsuccessful
+ *
+ *	Creates and initializes an rchan_reader object for reading the channel.
+ *	This function is useful only for non-VFS readers.
+ */
+struct rchan_reader *
+add_rchan_reader(int rchan_id, int auto_consume)
+{
+	struct rchan *rchan = rchan_get(rchan_id);
+	if (rchan == NULL)
+		return NULL;
+
+	return __add_rchan_reader(rchan, NULL, auto_consume, 0);
+}
+
+/**
+ *	add_map_reader - create a map reader for a channel
+ *	@rchan_id: relay channel handle
+ *
+ *	Returns a pointer to the reader object created, NULL if unsuccessful
+ *
+ *	Creates and initializes an rchan_reader object for reading the channel.
+ *	This function is useful only for map readers.
+ */
+struct rchan_reader *
+add_map_reader(int rchan_id)
+{
+	struct rchan *rchan = rchan_get(rchan_id);
+	if (rchan == NULL)
+		return NULL;
+
+	return __add_rchan_reader(rchan, NULL, 0, 1);
+}
+
+/**
+ *	__remove_rchan_reader - destroy a channel reader
+ *	@reader: channel reader
+ *
+ *	Internal - removes reader from the open readers list, and frees it.
+ */
+void
+__remove_rchan_reader(struct rchan_reader *reader)
+{
+	struct list_head *p;
+	struct rchan_reader *found_reader = NULL;
+	
+	write_lock(&reader->rchan->open_readers_lock);
+	list_for_each(p, &reader->rchan->open_readers) {
+		found_reader = list_entry(p, struct rchan_reader, list);
+		if (found_reader == reader) {
+			list_del(&found_reader->list);
+			break;
+		}
+	}
+	write_unlock(&reader->rchan->open_readers_lock);
+
+	if (found_reader)
+		kfree(found_reader);
+}
+
+/**
+ *	remove_rchan_reader - destroy a channel reader
+ *	@reader: channel reader
+ *
+ *	Finds and removes the given reader from the channel.  This function
+ *	is useful only for non-VFS readers.
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+int 
+remove_rchan_reader(struct rchan_reader *reader)
+{
+	int err = 0;
+	
+	if (reader) {
+		rchan_put(reader->rchan);
+		__remove_rchan_reader(reader);
+	} else
+		err = -EINVAL;
+
+	return err;
+}
+
+/**
+ *	remove_map_reader - destroy a map reader
+ *	@reader: channel reader
+ *
+ *	Finds and removes the given map reader from the channel.  This function
+ *	is useful only for map readers.
+ *
+ *	Returns 0 if successful, negative otherwise.
+ */
+int 
+remove_map_reader(struct rchan_reader *reader)
+{
+	return remove_rchan_reader(reader);
+}
+
+EXPORT_SYMBOL(relay_open);
+EXPORT_SYMBOL(relay_close);
+EXPORT_SYMBOL(relay_reset);
+EXPORT_SYMBOL(relay_reserve);
+EXPORT_SYMBOL(relay_commit);
+EXPORT_SYMBOL(relay_read);
+EXPORT_SYMBOL(relay_write);
+EXPORT_SYMBOL(relay_bytes_avail);
+EXPORT_SYMBOL(relay_buffers_consumed);
+EXPORT_SYMBOL(relay_bytes_consumed);
+EXPORT_SYMBOL(relay_info);
+EXPORT_SYMBOL(relay_discard_init_buf);
+
+
diff --git a/include/asm-um/cpufeature.h b/include/asm-um/cpufeature.h
new file mode 100644
index 000000000..fb7bd42a4
--- /dev/null
+++ b/include/asm-um/cpufeature.h
@@ -0,0 +1,6 @@
+#ifndef __UM_CPUFEATURE_H
+#define __UM_CPUFEATURE_H
+
+#include "asm/arch/cpufeature.h"
+
+#endif
diff --git a/include/asm-um/local.h b/include/asm-um/local.h
new file mode 100644
index 000000000..9a280c5bb
--- /dev/null
+++ b/include/asm-um/local.h
@@ -0,0 +1,6 @@
+#ifndef __UM_LOCAL_H
+#define __UM_LOCAL_H
+
+#include "asm/arch/local.h"
+
+#endif
diff --git a/include/asm-um/module-generic.h b/include/asm-um/module-generic.h
new file mode 100644
index 000000000..5a265f56b
--- /dev/null
+++ b/include/asm-um/module-generic.h
@@ -0,0 +1,6 @@
+#ifndef __UM_MODULE_GENERIC_H
+#define __UM_MODULE_GENERIC_H
+
+#include "asm/arch/module.h"
+
+#endif
diff --git a/include/asm-um/sections.h b/include/asm-um/sections.h
new file mode 100644
index 000000000..6b0231eef
--- /dev/null
+++ b/include/asm-um/sections.h
@@ -0,0 +1,7 @@
+#ifndef _UM_SECTIONS_H
+#define _UM_SECTIONS_H
+
+/* nothing to see, move along */
+#include <asm-generic/sections.h>
+
+#endif
diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h
new file mode 100644
index 000000000..2c52874ab
--- /dev/null
+++ b/include/linux/relayfs_fs.h
@@ -0,0 +1,686 @@
+/*
+ * linux/include/linux/relayfs_fs.h
+ *
+ * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com)
+ *
+ * RelayFS definitions and declarations
+ *
+ * Please see Documentation/filesystems/relayfs.txt for more info.
+ */
+
+#ifndef _LINUX_RELAYFS_FS_H
+#define _LINUX_RELAYFS_FS_H
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+
+/*
+ * Tracks changes to rchan struct
+ */
+#define RELAYFS_CHANNEL_VERSION		1
+
+/*
+ * Maximum number of simultaneously open channels
+ */
+#define RELAY_MAX_CHANNELS		256
+
+/*
+ * Relay properties
+ */
+#define RELAY_MIN_BUFS			2
+#define RELAY_MIN_BUFSIZE		4096
+#define RELAY_MAX_BUFS			256
+#define RELAY_MAX_BUF_SIZE		0x1000000
+#define RELAY_MAX_TOTAL_BUF_SIZE	0x8000000
+
+/*
+ * Lockless scheme utility macros
+ */
+#define RELAY_MAX_BUFNO(bufno_bits) (1UL << (bufno_bits))
+#define RELAY_BUF_SIZE(offset_bits) (1UL << (offset_bits))
+#define RELAY_BUF_OFFSET_MASK(offset_bits) (RELAY_BUF_SIZE(offset_bits) - 1)
+#define RELAY_BUFNO_GET(index, offset_bits) ((index) >> (offset_bits))
+#define RELAY_BUF_OFFSET_GET(index, mask) ((index) & (mask))
+#define RELAY_BUF_OFFSET_CLEAR(index, mask) ((index) & ~(mask))
+
+/*
+ * Flags returned by relay_reserve()
+ */
+#define RELAY_BUFFER_SWITCH_NONE	0x0
+#define RELAY_WRITE_DISCARD_NONE	0x0
+#define RELAY_BUFFER_SWITCH		0x1
+#define RELAY_WRITE_DISCARD		0x2
+#define RELAY_WRITE_TOO_LONG		0x4
+
+/*
+ * Relay attribute flags
+ */
+#define RELAY_DELIVERY_BULK		0x1
+#define RELAY_DELIVERY_PACKET		0x2
+#define RELAY_SCHEME_LOCKLESS		0x4
+#define RELAY_SCHEME_LOCKING		0x8
+#define RELAY_SCHEME_ANY		0xC
+#define RELAY_TIMESTAMP_TSC		0x10
+#define RELAY_TIMESTAMP_GETTIMEOFDAY	0x20
+#define RELAY_TIMESTAMP_ANY		0x30
+#define RELAY_USAGE_SMP			0x40
+#define RELAY_USAGE_GLOBAL		0x80
+#define RELAY_MODE_CONTINUOUS		0x100
+#define RELAY_MODE_NO_OVERWRITE		0x200
+
+/*
+ * Flags for needs_resize() callback
+ */
+#define RELAY_RESIZE_NONE	0x0
+#define RELAY_RESIZE_EXPAND	0x1
+#define RELAY_RESIZE_SHRINK	0x2
+#define RELAY_RESIZE_REPLACE	0x4
+#define RELAY_RESIZE_REPLACED	0x8
+
+/*
+ * Values for fileop_notify() callback
+ */
+enum relay_fileop
+{
+	RELAY_FILE_OPEN,
+	RELAY_FILE_CLOSE,
+	RELAY_FILE_MAP,
+	RELAY_FILE_UNMAP
+};
+
+/*
+ * Data structure returned by relay_info()
+ */
+struct rchan_info
+{
+	u32 flags;		/* relay attribute flags for channel */
+	u32 buf_size;		/* channel's sub-buffer size */
+	char *buf_addr;		/* address of channel start */
+	u32 alloc_size;		/* total buffer size actually allocated */
+	u32 n_bufs;		/* number of sub-buffers in channel */
+	u32 cur_idx;		/* current write index into channel */
+	u32 bufs_produced;	/* current count of sub-buffers produced */
+	u32 bufs_consumed;	/* current count of sub-buffers consumed */
+	u32 buf_id;		/* buf_id of current sub-buffer */
+	int buffer_complete[RELAY_MAX_BUFS];	/* boolean per sub-buffer */
+	int unused_bytes[RELAY_MAX_BUFS];	/* count per sub-buffer */
+};
+
+/*
+ * Relay channel client callbacks
+ */
+struct rchan_callbacks
+{
+	/*
+	 * buffer_start - called at the beginning of a new sub-buffer
+	 * @rchan_id: the channel id
+	 * @current_write_pos: position in sub-buffer client should write to
+	 * @buffer_id: the id of the new sub-buffer
+	 * @start_time: the timestamp associated with the start of sub-buffer
+	 * @start_tsc: the TSC associated with the timestamp, if using_tsc
+	 * @using_tsc: boolean, indicates whether start_tsc is valid
+	 *
+	 * Return value should be the number of bytes written by the client.
+	 *
+	 * See Documentation/filesystems/relayfs.txt for details.
+	 */
+	int (*buffer_start) (int rchan_id,
+			     char *current_write_pos,
+			     u32 buffer_id,
+			     struct timeval start_time,
+			     u32 start_tsc,
+			     int using_tsc);
+
+	/*
+	 * buffer_end - called at the end of a sub-buffer
+	 * @rchan_id: the channel id
+	 * @current_write_pos: position in sub-buffer of end of data
+	 * @end_of_buffer: the position of the end of the sub-buffer
+	 * @end_time: the timestamp associated with the end of the sub-buffer
+	 * @end_tsc: the TSC associated with the end_time, if using_tsc
+	 * @using_tsc: boolean, indicates whether end_tsc is valid
+	 *
+	 * Return value should be the number of bytes written by the client.
+	 *
+	 * See Documentation/filesystems/relayfs.txt for details.
+	 */
+	int (*buffer_end) (int rchan_id,
+			   char *current_write_pos,
+			   char *end_of_buffer,
+			   struct timeval end_time,
+			   u32 end_tsc,
+			   int using_tsc);
+
+	/*
+	 * deliver - called when data is ready for the client
+	 * @rchan_id: the channel id
+	 * @from: the start of the delivered data
+	 * @len: the length of the delivered data
+	 *
+	 * See Documentation/filesystems/relayfs.txt for details.
+	 */
+	void (*deliver) (int rchan_id, char *from, u32 len);
+
+	/*
+	 * user_deliver - called when data has been written from userspace
+	 * @rchan_id: the channel id
+	 * @from: the start of the delivered data
+	 * @len: the length of the delivered data
+	 *
+	 * See Documentation/filesystems/relayfs.txt for details.
+	 */
+	void (*user_deliver) (int rchan_id, char *from, u32 len);
+
+	/*
+	 * needs_resize - called when a resizing event occurs
+	 * @rchan_id: the channel id
+	 * @resize_type: the type of resizing event
+	 * @suggested_buf_size: the suggested new sub-buffer size
+	 * @suggested_buf_size: the suggested new number of sub-buffers
+	 *
+	 * See Documentation/filesystems/relayfs.txt for details.
+	 */
+	void (*needs_resize)(int rchan_id,
+			     int resize_type,
+			     u32 suggested_buf_size,
+			     u32 suggested_n_bufs);
+
+	/*
+	 * fileop_notify - called on open/close/mmap/munmap of a relayfs file
+	 * @rchan_id: the channel id
+	 * @filp: relayfs file pointer
+	 * @fileop: which file operation is in progress
+	 *
+	 * The return value can direct the outcome of the operation.
+	 *
+	 * See Documentation/filesystems/relayfs.txt for details.
+	 */
+        int (*fileop_notify)(int rchan_id,
+			     struct file *filp,
+			     enum relay_fileop fileop);
+
+	/*
+	 * ioctl - called in ioctl context from userspace
+	 * @rchan_id: the channel id
+	 * @cmd: ioctl cmd
+	 * @arg: ioctl cmd arg
+	 *
+	 * The return value is returned as the value from the ioctl call.
+	 *
+	 * See Documentation/filesystems/relayfs.txt for details.
+	 */
+	int (*ioctl) (int rchan_id, unsigned int cmd, unsigned long arg);
+};
+
+/*
+ * Lockless scheme-specific data
+ */
+struct lockless_rchan
+{
+	u8 bufno_bits;		/* # bits used for sub-buffer id */
+	u8 offset_bits;		/* # bits used for offset within sub-buffer */
+	u32 index;		/* current index = sub-buffer id and offset */
+	u32 offset_mask;	/* used to obtain offset portion of index */
+	u32 index_mask;		/* used to mask off unused bits index */
+	atomic_t fill_count[RELAY_MAX_BUFS];	/* fill count per sub-buffer */
+};
+
+/*
+ * Locking scheme-specific data
+ */
+struct locking_rchan
+{
+	char *write_buf;		/* start of write sub-buffer */
+	char *write_buf_end;		/* end of write sub-buffer */
+	char *current_write_pos;	/* current write pointer */
+	char *write_limit;		/* takes reserves into account */
+	char *in_progress_event_pos;	/* used for interrupted writes */
+	u16 in_progress_event_size;	/* used for interrupted writes */
+	char *interrupted_pos;		/* used for interrupted writes */
+	u16 interrupting_size;		/* used for interrupted writes */
+	spinlock_t lock;		/* channel lock for locking scheme */
+};
+
+struct relay_ops;
+
+/*
+ * Offset resizing data structure
+ */
+struct resize_offset
+{
+	u32 ge;
+	u32 le;
+	int delta;
+};
+
+/*
+ * Relay channel data structure
+ */
+struct rchan
+{
+	u32 version;			/* the version of this struct */
+	char *buf;			/* the channel buffer */
+	union
+	{
+		struct lockless_rchan lockless;
+		struct locking_rchan locking;
+	} scheme;			/* scheme-specific channel data */
+
+	int id;				/* the channel id */
+	struct rchan_callbacks *callbacks;	/* client callbacks */
+	u32 flags;			/* relay channel attributes */
+	u32 buf_id;			/* current sub-buffer id */
+	u32 buf_idx;			/* current sub-buffer index */
+
+	atomic_t mapped;		/* map count */
+
+	atomic_t suspended;		/* channel suspended i.e full? */
+	int half_switch;		/* used internally for suspend */
+
+	struct timeval  buf_start_time;	/* current sub-buffer start time */
+	u32 buf_start_tsc;		/* current sub-buffer start TSC */
+	
+	u32 buf_size;			/* sub-buffer size */
+	u32 alloc_size;			/* total buffer size allocated */
+	u32 n_bufs;			/* number of sub-buffers */
+
+	u32 bufs_produced;		/* count of sub-buffers produced */
+	u32 bufs_consumed;		/* count of sub-buffers consumed */
+	u32 bytes_consumed;		/* bytes consumed in cur sub-buffer */
+
+	int initialized;		/* first buffer initialized? */
+	int finalized;			/* channel finalized? */
+
+	u32 start_reserve;		/* reserve at start of sub-buffers */
+	u32 end_reserve;		/* reserve at end of sub-buffers */
+	u32 rchan_start_reserve;	/* additional reserve sub-buffer 0 */
+	
+	struct dentry *dentry;		/* channel file dentry */
+
+	wait_queue_head_t read_wait;	/* VFS read wait queue */
+	wait_queue_head_t write_wait;	/* VFS write wait queue */
+	struct work_struct wake_readers; /* reader wake-up work struct */
+	struct work_struct wake_writers; /* reader wake-up work struct */
+	atomic_t refcount;		/* channel refcount */
+
+	struct relay_ops *relay_ops;	/* scheme-specific channel ops */
+
+	int unused_bytes[RELAY_MAX_BUFS]; /* unused count per sub-buffer */
+
+	struct semaphore resize_sem;	/* serializes alloc/repace */
+	struct work_struct work;	/* resize allocation work struct */
+
+	struct list_head open_readers;	/* open readers for this channel */
+	rwlock_t open_readers_lock;	/* protection for open_readers list */
+
+	char *init_buf;			/* init channel buffer, if non-NULL */
+	
+	u32 resize_min;			/* minimum resized total buffer size */
+	u32 resize_max;			/* maximum resized total buffer size */
+	char *resize_buf;		/* for autosize alloc/free */
+	u32 resize_buf_size;		/* resized sub-buffer size */
+	u32 resize_n_bufs;		/* resized number of sub-buffers */
+	u32 resize_alloc_size;		/* resized actual total size */
+	int resizing;			/* is resizing in progress? */
+	int resize_err;			/* resizing err code */
+	int resize_failures;		/* number of resize failures */
+	int replace_buffer;		/* is the alloced buffer ready?  */
+	struct resize_offset resize_offset; /* offset change */
+	struct timer_list shrink_timer;	/* timer used for shrinking */
+	int resize_order;		/* size of last resize */
+	u32 expand_buf_id;		/* subbuf id expand will occur at */
+
+	struct page **buf_page_array;	/* array of current buffer pages */
+	int buf_page_count;		/* number of current buffer pages */
+	struct page **expand_page_array;/* new pages to be inserted */
+	int expand_page_count;		/* number of new pages */
+	struct page **shrink_page_array;/* old pages to be freed */
+	int shrink_page_count;		/* number of old pages */
+	struct page **resize_page_array;/* will become current pages */
+	int resize_page_count;		/* number of resize pages */
+	struct page **old_buf_page_array; /* hold for freeing */
+} ____cacheline_aligned;
+
+/*
+ * Relay channel reader struct
+ */
+struct rchan_reader
+{
+	struct list_head list;		/* for list inclusion */
+	struct rchan *rchan;		/* the channel we're reading from */
+	int auto_consume;		/* does this reader auto-consume? */
+	u32 bufs_consumed;		/* buffers this reader has consumed */
+	u32 bytes_consumed;		/* bytes consumed in cur sub-buffer */
+	int offset_changed;		/* have channel offsets changed? */
+	int vfs_reader;			/* are we a VFS reader? */
+	int map_reader;			/* are we an mmap reader? */
+
+	union
+	{
+		struct file *file;
+		u32 f_pos;
+	} pos;				/* current read offset */
+};
+
+/*
+ * These help make union member access less tedious
+ */
+#define channel_buffer(rchan) ((rchan)->buf)
+#define idx(rchan) ((rchan)->scheme.lockless.index)
+#define bufno_bits(rchan) ((rchan)->scheme.lockless.bufno_bits)
+#define offset_bits(rchan) ((rchan)->scheme.lockless.offset_bits)
+#define offset_mask(rchan) ((rchan)->scheme.lockless.offset_mask)
+#define idx_mask(rchan) ((rchan)->scheme.lockless.index_mask)
+#define bulk_delivery(rchan) (((rchan)->flags & RELAY_DELIVERY_BULK) ? 1 : 0)
+#define packet_delivery(rchan) (((rchan)->flags & RELAY_DELIVERY_PACKET) ? 1 : 0)
+#define using_lockless(rchan) (((rchan)->flags & RELAY_SCHEME_LOCKLESS) ? 1 : 0)
+#define using_locking(rchan) (((rchan)->flags & RELAY_SCHEME_LOCKING) ? 1 : 0)
+#define using_tsc(rchan) (((rchan)->flags & RELAY_TIMESTAMP_TSC) ? 1 : 0)
+#define using_gettimeofday(rchan) (((rchan)->flags & RELAY_TIMESTAMP_GETTIMEOFDAY) ? 1 : 0)
+#define usage_smp(rchan) (((rchan)->flags & RELAY_USAGE_SMP) ? 1 : 0)
+#define usage_global(rchan) (((rchan)->flags & RELAY_USAGE_GLOBAL) ? 1 : 0)
+#define mode_continuous(rchan) (((rchan)->flags & RELAY_MODE_CONTINUOUS) ? 1 : 0)
+#define fill_count(rchan, i) ((rchan)->scheme.lockless.fill_count[(i)])
+#define write_buf(rchan) ((rchan)->scheme.locking.write_buf)
+#define read_buf(rchan) ((rchan)->scheme.locking.read_buf)
+#define write_buf_end(rchan) ((rchan)->scheme.locking.write_buf_end)
+#define read_buf_end(rchan) ((rchan)->scheme.locking.read_buf_end)
+#define cur_write_pos(rchan) ((rchan)->scheme.locking.current_write_pos)
+#define read_limit(rchan) ((rchan)->scheme.locking.read_limit)
+#define write_limit(rchan) ((rchan)->scheme.locking.write_limit)
+#define in_progress_event_pos(rchan) ((rchan)->scheme.locking.in_progress_event_pos)
+#define in_progress_event_size(rchan) ((rchan)->scheme.locking.in_progress_event_size)
+#define interrupted_pos(rchan) ((rchan)->scheme.locking.interrupted_pos)
+#define interrupting_size(rchan) ((rchan)->scheme.locking.interrupting_size)
+#define channel_lock(rchan) ((rchan)->scheme.locking.lock)
+
+
+/**
+ *	calc_time_delta - utility function for time delta calculation
+ *	@now: current time
+ *	@start: start time
+ *
+ *	Returns the time delta produced by subtracting start time from now.
+ */
+static inline u32
+calc_time_delta(struct timeval *now, 
+		struct timeval *start)
+{
+	return (now->tv_sec - start->tv_sec) * 1000000
+		+ (now->tv_usec - start->tv_usec);
+}
+
+/**
+ *	recalc_time_delta - utility function for time delta recalculation
+ *	@now: current time
+ *	@new_delta: the new time delta calculated
+ *	@cpu: the associated CPU id
+ */
+static inline void 
+recalc_time_delta(struct timeval *now,
+		  u32 *new_delta,
+		  struct rchan *rchan)
+{
+	if (using_tsc(rchan) == 0)
+		*new_delta = calc_time_delta(now, &rchan->buf_start_time);
+}
+
+/**
+ *	have_cmpxchg - does this architecture have a cmpxchg?
+ *
+ *	Returns 1 if this architecture has a cmpxchg useable by 
+ *	the lockless scheme, 0 otherwise.
+ */
+static inline int 
+have_cmpxchg(void)
+{
+#if defined(__HAVE_ARCH_CMPXCHG)
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+/**
+ *	relay_write_direct - write data directly into destination buffer
+ */
+#define relay_write_direct(DEST, SRC, SIZE) \
+do\
+{\
+   memcpy(DEST, SRC, SIZE);\
+   DEST += SIZE;\
+} while (0);
+
+/**
+ *	relay_lock_channel - lock the relay channel if applicable
+ *
+ *	This macro only affects the locking scheme.  If the locking scheme
+ *	is in use and the channel usage is SMP, does a local_irq_save.  If the 
+ *	locking sheme is in use and the channel usage is GLOBAL, uses 
+ *	spin_lock_irqsave.  FLAGS is initialized to 0 since we know that
+ *	it is being initialized prior to use and we avoid the compiler warning.
+ */
+#define relay_lock_channel(RCHAN, FLAGS) \
+do\
+{\
+   FLAGS = 0;\
+   if (using_locking(RCHAN)) {\
+      if (usage_smp(RCHAN)) {\
+         local_irq_save(FLAGS); \
+      } else {\
+         spin_lock_irqsave(&(RCHAN)->scheme.locking.lock, FLAGS); \
+      }\
+   }\
+} while (0);
+
+/**
+ *	relay_unlock_channel - unlock the relay channel if applicable
+ *
+ *	This macro only affects the locking scheme.  See relay_lock_channel.
+ */
+#define relay_unlock_channel(RCHAN, FLAGS) \
+do\
+{\
+   if (using_locking(RCHAN)) {\
+      if (usage_smp(RCHAN)) {\
+         local_irq_restore(FLAGS); \
+      } else {\
+         spin_unlock_irqrestore(&(RCHAN)->scheme.locking.lock, FLAGS); \
+      }\
+   }\
+} while (0);
+
+/*
+ * Define cmpxchg if we don't have it
+ */
+#ifndef __HAVE_ARCH_CMPXCHG
+#define cmpxchg(p,o,n) 0
+#endif
+
+/*
+ * High-level relayfs kernel API, fs/relayfs/relay.c
+ */
+extern int
+relay_open(const char *chanpath,
+	   int bufsize,
+	   int nbufs,
+	   u32 flags,
+	   struct rchan_callbacks *channel_callbacks,
+	   u32 start_reserve,
+	   u32 end_reserve,
+	   u32 rchan_start_reserve,
+	   u32 resize_min,
+	   u32 resize_max,
+	   int mode,
+	   char *init_buf,
+	   u32 init_buf_size);
+
+extern int
+relay_close(int rchan_id);
+
+extern int
+relay_write(int rchan_id,
+	    const void *data_ptr, 
+	    size_t count,
+	    int td_offset,
+	    void **wrote_pos);
+
+extern ssize_t
+relay_read(struct rchan_reader *reader,
+	   char *buf,
+	   size_t count,
+	   int wait,
+	   u32 *actual_read_offset);
+
+extern int
+relay_discard_init_buf(int rchan_id);
+
+extern struct rchan_reader *
+add_rchan_reader(int rchan_id, int autoconsume);
+
+extern int
+remove_rchan_reader(struct rchan_reader *reader);
+
+extern struct rchan_reader *
+add_map_reader(int rchan_id);
+
+extern int
+remove_map_reader(struct rchan_reader *reader);
+
+extern int 
+relay_info(int rchan_id, struct rchan_info *rchan_info);
+
+extern void 
+relay_buffers_consumed(struct rchan_reader *reader, u32 buffers_consumed);
+
+extern void
+relay_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset);
+
+extern ssize_t
+relay_bytes_avail(struct rchan_reader *reader);
+
+extern int
+relay_realloc_buffer(int rchan_id, u32 new_nbufs, int in_background);
+
+extern int
+relay_replace_buffer(int rchan_id);
+
+extern int
+rchan_empty(struct rchan_reader *reader);
+
+extern int
+rchan_full(struct rchan_reader *reader);
+
+extern void
+update_readers_consumed(struct rchan *rchan, u32 bufs_consumed, u32 bytes_consumed);
+
+extern int 
+__relay_mmap_buffer(struct rchan *rchan, struct vm_area_struct *vma);
+
+extern struct rchan_reader *
+__add_rchan_reader(struct rchan *rchan, struct file *filp, int auto_consume, int map_reader);
+
+extern void
+__remove_rchan_reader(struct rchan_reader *reader);
+
+/*
+ * Low-level relayfs kernel API, fs/relayfs/relay.c
+ */
+extern struct rchan *
+rchan_get(int rchan_id);
+
+extern void
+rchan_put(struct rchan *rchan);
+
+extern char *
+relay_reserve(struct rchan *rchan,
+	      u32 data_len,
+	      struct timeval *time_stamp,
+	      u32 *time_delta,
+	      int *errcode,
+	      int *interrupting);
+
+extern void 
+relay_commit(struct rchan *rchan,
+	     char *from, 
+	     u32 len, 
+	     int reserve_code,
+	     int interrupting);
+
+extern u32 
+relay_get_offset(struct rchan *rchan, u32 *max_offset);
+
+extern int
+relay_reset(int rchan_id);
+
+/*
+ * VFS functions, fs/relayfs/inode.c
+ */
+extern int 
+relayfs_create_dir(const char *name, 
+		   struct dentry *parent, 
+		   struct dentry **dentry);
+
+extern int
+relayfs_create_file(const char * name,
+		    struct dentry *parent, 
+		    struct dentry **dentry,
+		    void * data,
+		    int mode);
+
+extern int 
+relayfs_remove_file(struct dentry *dentry);
+
+extern int
+reset_index(struct rchan *rchan, u32 old_index);
+
+
+/*
+ * klog functions, fs/relayfs/klog.c
+ */
+extern int
+create_klog_channel(void);
+
+extern int
+remove_klog_channel(void);
+
+/*
+ * Scheme-specific channel ops
+ */
+struct relay_ops
+{
+	char * (*reserve) (struct rchan *rchan,
+			   u32 slot_len,
+			   struct timeval *time_stamp,
+			   u32 *tsc,
+			   int * errcode,
+			   int * interrupting);
+	
+	void (*commit) (struct rchan *rchan,
+			char *from,
+			u32 len, 
+			int deliver, 
+			int interrupting);
+
+	u32 (*get_offset) (struct rchan *rchan,
+			   u32 *max_offset);
+	
+	void (*resume) (struct rchan *rchan);
+	void (*finalize) (struct rchan *rchan);
+	void (*reset) (struct rchan *rchan,
+		       int init);
+	int (*reset_index) (struct rchan *rchan,
+			    u32 old_index);
+};
+
+#endif /* _LINUX_RELAYFS_FS_H */
+
+
+
+
+
diff --git a/include/linux/vs_base.h b/include/linux/vs_base.h
new file mode 100644
index 000000000..4f04513ff
--- /dev/null
+++ b/include/linux/vs_base.h
@@ -0,0 +1,78 @@
+#ifndef _VX_VS_BASE_H
+#define _VX_VS_BASE_H
+
+#include "vserver/context.h"
+
+// #define VX_DEBUG
+
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define vx_task_xid(t)	((t)->xid)
+
+#define vx_current_xid() vx_task_xid(current)
+
+#define vx_check(c,m)	__vx_check(vx_current_xid(),c,m)
+
+#define vx_weak_check(c,m)	((m) ? vx_check(c,m) : 1)
+
+
+/*
+ * check current context for ADMIN/WATCH and
+ * optionally agains supplied argument
+ */
+static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode)
+{
+	if (mode & VX_ARG_MASK) {
+		if ((mode & VX_IDENT) &&
+			(id == cid))
+			return 1;
+	}
+	if (mode & VX_ATR_MASK) {
+		if ((mode & VX_DYNAMIC) &&
+			(id >= MIN_D_CONTEXT) &&
+			(id <= MAX_S_CONTEXT))
+			return 1;
+		if ((mode & VX_STATIC) &&
+			(id > 1) && (id < MIN_D_CONTEXT))
+			return 1;
+	}
+	return (((mode & VX_ADMIN) && (cid == 0)) ||
+		((mode & VX_WATCH) && (cid == 1)));
+}
+
+
+#define __vx_flags(v,m,f)	(((v) & (m)) ^ (f))
+
+#define	__vx_task_flags(t,m,f) \
+	(((t) && ((t)->vx_info)) ? \
+		__vx_flags((t)->vx_info->vx_flags,(m),(f)) : 0)
+
+#define vx_current_flags() \
+	((current->vx_info) ? current->vx_info->vx_flags : 0)
+
+#define vx_flags(m,f)	__vx_flags(vx_current_flags(),(m),(f))
+
+
+#define vx_current_ccaps() \
+	((current->vx_info) ? current->vx_info->vx_ccaps : 0)
+
+#define vx_ccaps(c)	(vx_current_ccaps() & (c))
+
+#define vx_current_bcaps() \
+	(((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \
+	current->vx_info->vx_bcaps : cap_bset)
+
+
+/* generic flag merging */
+
+#define	vx_mask_flags(v,f,m)	(((v) & ~(m)) | ((f) & (m)))
+
+#define	vx_mask_mask(v,f,m)	(((v) & ~(m)) | ((v) & (f) & (m)))
+
+#endif
diff --git a/include/linux/vs_context.h b/include/linux/vs_context.h
new file mode 100644
index 000000000..727a16cdc
--- /dev/null
+++ b/include/linux/vs_context.h
@@ -0,0 +1,128 @@
+#ifndef _VX_VS_CONTEXT_H
+#define _VX_VS_CONTEXT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+
+#undef	vxdprintk
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+
+extern int proc_pid_vx_info(struct task_struct *, char *);
+
+
+#define get_vx_info(i)	__get_vx_info(i,__FILE__,__LINE__)
+
+static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
+	const char *_file, int _line)
+{
+	if (!vxi)
+		return NULL;
+	vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n",
+		vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
+		_file, _line);
+	atomic_inc(&vxi->vx_usecnt);
+	return vxi;
+}
+
+
+#define	free_vx_info(i)	\
+	call_rcu(&i->vx_rcu, rcu_free_vx_info, i);
+
+#define put_vx_info(i)	__put_vx_info(i,__FILE__,__LINE__)
+
+static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
+{
+	if (!vxi)
+		return;
+	vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n",
+		vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
+		_file, _line);
+	if (atomic_dec_and_test(&vxi->vx_usecnt))
+		free_vx_info(vxi);
+}
+
+#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
+	const char *_file, int _line)
+{
+	BUG_ON(*vxp);
+	if (!vxi)
+		return;
+	vxdprintk("set_vx_info(%p[#%d.%d.%d])\t%s:%d\n",
+		vxi, vxi?vxi->vx_id:0,
+		vxi?atomic_read(&vxi->vx_usecnt):0,
+		vxi?atomic_read(&vxi->vx_refcnt):0,
+		_file, _line);
+	atomic_inc(&vxi->vx_refcnt);
+	*vxp = __get_vx_info(vxi, _file, _line);
+}
+
+#define	clr_vx_info(p)	__clr_vx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_vx_info(struct vx_info **vxp,
+	const char *_file, int _line)
+{
+	struct vx_info *vxo = *vxp;
+
+	if (!vxo)
+		return;
+	vxdprintk("clr_vx_info(%p[#%d.%d.%d])\t%s:%d\n",
+		vxo, vxo?vxo->vx_id:0,
+		vxo?atomic_read(&vxo->vx_usecnt):0,
+		vxo?atomic_read(&vxo->vx_refcnt):0,
+		_file, _line);
+	*vxp = NULL;
+	wmb();
+	if (vxo && atomic_dec_and_test(&vxo->vx_refcnt))
+		unhash_vx_info(vxo);
+	__put_vx_info(vxo, _file, _line);
+}
+
+
+#define task_get_vx_info(i)	__task_get_vx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p,
+	const char *_file, int _line)
+{
+	struct vx_info *vxi;
+	
+	task_lock(p);
+	vxi = __get_vx_info(p->vx_info, _file, _line);
+	task_unlock(p);
+	return vxi;
+}
+
+
+#define vx_verify_info(p,i)	\
+	__vx_verify_info((p)->vx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __vx_verify_info(
+	struct vx_info *vxa, struct vx_info *vxb,
+	const char *_file, int _line)
+{
+	if (vxa == vxb)
+		return;
+	printk(KERN_ERR "vx bad assumption (%p==%p) at %s:%d\n",
+		vxa, vxb, _file, _line);
+}
+
+
+#undef	vxdprintk
+#define vxdprintk(x...)
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_cvirt.h b/include/linux/vs_cvirt.h
new file mode 100644
index 000000000..65f430362
--- /dev/null
+++ b/include/linux/vs_cvirt.h
@@ -0,0 +1,71 @@
+#ifndef _VX_VS_CVIRT_H
+#define _VX_VS_CVIRT_H
+
+
+// #define VX_DEBUG
+
+#include "vserver/cvirt.h"
+#include "vs_base.h"
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+/* utsname virtualization */
+
+static inline struct new_utsname *vx_new_utsname(void)
+{
+	if (current->vx_info)
+		return &current->vx_info->cvirt.utsname;
+	return &system_utsname;
+}
+
+#define vx_new_uts(x)		((vx_new_utsname())->x)
+
+
+/* pid faking stuff */
+
+
+#define vx_map_tgid(v,p) \
+	__vx_map_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_map_tgid(struct vx_info *vxi, int pid,
+	char *file, int line)
+{
+	if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+		vxdprintk("vx_map_tgid: %p/%llx: %d -> %d in %s:%d\n",
+			vxi, vxi->vx_flags, pid,
+			(pid == vxi->vx_initpid)?1:pid,
+			file, line);
+		if (pid == vxi->vx_initpid)
+			return 1;
+	}
+	return pid;
+}
+
+#define vx_rmap_tgid(v,p) \
+	__vx_rmap_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_rmap_tgid(struct vx_info *vxi, int pid,
+	char *file, int line)
+{
+	if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+		vxdprintk("vx_rmap_tgid: %p/%llx: %d -> %d in %s:%d\n",
+			vxi, vxi->vx_flags, pid,
+			(pid == 1)?vxi->vx_initpid:pid,
+			file, line);
+		if ((pid == 1) && vxi->vx_initpid)
+			return vxi->vx_initpid;
+	}
+	return pid;
+}
+
+#undef	vxdprintk
+#define vxdprintk(x...)
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_dlimit.h b/include/linux/vs_dlimit.h
new file mode 100644
index 000000000..d80c563f6
--- /dev/null
+++ b/include/linux/vs_dlimit.h
@@ -0,0 +1,169 @@
+#ifndef _VX_VS_DLIMIT_H
+#define _VX_VS_DLIMIT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/dlimit.h"
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define get_dl_info(i)	__get_dl_info(i,__FILE__,__LINE__)
+
+static inline struct dl_info *__get_dl_info(struct dl_info *dli,
+	const char *_file, int _line)
+{
+	if (!dli)
+		return NULL;
+	vxdprintk("get_dl_info(%p[#%d.%d])\t%s:%d\n",
+		dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0,
+		_file, _line);
+	atomic_inc(&dli->dl_usecnt);
+	return dli;
+}
+
+
+#define	free_dl_info(i)	\
+	call_rcu(&i->dl_rcu, rcu_free_dl_info, i);
+
+#define put_dl_info(i)	__put_dl_info(i,__FILE__,__LINE__)
+
+static inline void __put_dl_info(struct dl_info *dli, const char *_file, int _line)
+{
+	if (!dli)
+		return;
+	vxdprintk("put_dl_info(%p[#%d.%d])\t%s:%d\n",
+		dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0,
+		_file, _line);
+	if (atomic_dec_and_test(&dli->dl_usecnt))
+		free_dl_info(dli);
+}
+
+
+extern int vx_debug_dlimit;
+
+#define	__dlimit_char(d)	((d)?'*':' ')
+
+static inline int __dl_alloc_space(struct super_block *sb,
+	xid_t xid, dlsize_t nr, const char *file, int line)
+{
+	struct dl_info *dli = NULL;
+	int ret = 0;
+
+	if (nr == 0)
+		goto out;
+	dli = locate_dl_info(sb, xid);
+	if (!dli)
+		goto out;
+
+	spin_lock(&dli->dl_lock);
+	ret = (dli->dl_space_used + nr > dli->dl_space_total);
+	if (!ret)
+		dli->dl_space_used += nr;
+	spin_unlock(&dli->dl_lock);
+	put_dl_info(dli);
+out:
+	if (vx_debug_dlimit)
+		printk("ALLOC (%p,#%d)%c %lld bytes (%d)@ %s:%d\n",
+			sb, xid, __dlimit_char(dli), nr, ret, file, line);
+	return ret;
+}
+
+static inline void __dl_free_space(struct super_block *sb,
+	xid_t xid, dlsize_t nr, const char *file, int line)
+{
+	struct dl_info *dli = NULL;
+
+	if (nr == 0)
+		goto out;
+	dli = locate_dl_info(sb, xid);
+	if (!dli)
+		goto out;
+
+	spin_lock(&dli->dl_lock);
+	dli->dl_space_used -= nr;
+	spin_unlock(&dli->dl_lock);
+	put_dl_info(dli);
+out:
+	if (vx_debug_dlimit)
+		printk("FREE  (%p,#%d)%c %lld bytes @ %s:%d\n",
+			sb, xid, __dlimit_char(dli), nr, file, line);
+}
+
+static inline int __dl_alloc_inode(struct super_block *sb,
+	xid_t xid, const char *file, int line)
+{
+	struct dl_info *dli;
+	int ret = 0;
+
+	dli = locate_dl_info(sb, xid);
+	if (!dli)
+		goto out;
+
+	spin_lock(&dli->dl_lock);
+	ret = (dli->dl_inodes_used >= dli->dl_inodes_total);
+	if (!ret)
+		dli->dl_inodes_used++;
+	spin_unlock(&dli->dl_lock);
+	put_dl_info(dli);
+out:
+	if (vx_debug_dlimit)
+		printk("ALLOC (%p,#%d)%c inode (%d)@ %s:%d\n",
+			sb, xid, __dlimit_char(dli), ret, file, line);
+	return ret;
+}
+
+static inline void __dl_free_inode(struct super_block *sb,
+	xid_t xid, const char *file, int line)
+{
+	struct dl_info *dli;
+
+	dli = locate_dl_info(sb, xid);
+	if (!dli)
+		goto out;
+
+	spin_lock(&dli->dl_lock);
+	dli->dl_inodes_used--;
+	spin_unlock(&dli->dl_lock);
+	put_dl_info(dli);
+out:
+	if (vx_debug_dlimit)
+		printk("FREE  (%p,#%d)%c inode @ %s:%d\n",
+			sb, xid, __dlimit_char(dli), file, line);
+}
+
+
+
+#define DLIMIT_ALLOC_BLOCK(sb, xid, nr) \
+	__dl_alloc_space(sb, xid, \
+		((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \
+		__FILE__, __LINE__ )
+
+#define DLIMIT_FREE_BLOCK(sb, xid, nr) \
+	__dl_free_space(sb, xid, \
+		((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \
+		__FILE__, __LINE__ )
+
+#define DLIMIT_ALLOC_INODE(sb, xid) \
+	__dl_alloc_inode(sb, xid, __FILE__, __LINE__ )
+
+#define DLIMIT_FREE_INODE(sb, xid) \
+	__dl_free_inode(sb, xid, __FILE__, __LINE__ )
+
+
+#define	DLIMIT_ADJUST_BLOCK(sb, xid, fb, rb)
+
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_limit.h b/include/linux/vs_limit.h
new file mode 100644
index 000000000..82e8de4ec
--- /dev/null
+++ b/include/linux/vs_limit.h
@@ -0,0 +1,119 @@
+#ifndef _VX_VS_LIMIT_H
+#define _VX_VS_LIMIT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/limit.h"
+
+
+/* file limits */
+
+#define VX_DEBUG_ACC_FILE	0
+#define VX_DEBUG_ACC_OPENFD	0
+
+#if	(VX_DEBUG_ACC_FILE) || (VX_DEBUG_ACC_OPENFD)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define vx_acc_cres(v,d,r) \
+	__vx_acc_cres((v), (r), (d), __FILE__, __LINE__)
+
+static inline void __vx_acc_cres(struct vx_info *vxi,
+	int res, int dir, char *file, int line)
+{
+        if (vxi) {
+	if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+			(res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+	printk("vx_acc_cres[%5d,%2d]: %5d%s in %s:%d\n",
+                        (vxi?vxi->vx_id:-1), res,
+                        (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+			(dir>0)?"++":"--", file, line);
+                if (dir > 0)
+                        atomic_inc(&vxi->limit.rcur[res]);
+                else
+                        atomic_dec(&vxi->limit.rcur[res]);
+        }
+}
+
+#define vx_nproc_inc(p)	vx_acc_cres(current->vx_info, 1, RLIMIT_NPROC)
+#define vx_nproc_dec(p)	vx_acc_cres(current->vx_info,-1, RLIMIT_NPROC)
+
+#define vx_files_inc(f)	vx_acc_cres(current->vx_info, 1, RLIMIT_NOFILE)
+#define vx_files_dec(f)	vx_acc_cres(current->vx_info,-1, RLIMIT_NOFILE)
+
+#define vx_openfd_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD)
+#define vx_openfd_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD)
+
+/*
+#define vx_openfd_inc(f) do {					\
+	vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD);	\
+	printk("vx_openfd_inc: %d[#%d] in %s:%d\n",		\
+		f, current->xid, __FILE__, __LINE__);		\
+	} while (0)
+
+#define vx_openfd_dec(f) do {					\
+	vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD);	\
+	printk("vx_openfd_dec: %d[#%d] in %s:%d\n",		\
+		f, current->xid, __FILE__, __LINE__);		\
+	} while (0)
+*/
+
+#define vx_cres_avail(v,n,r) \
+        __vx_cres_avail((v), (r), (n), __FILE__, __LINE__)
+
+static inline int __vx_cres_avail(struct vx_info *vxi,
+                int res, int num, char *file, int line)
+{
+	unsigned long value;
+
+	if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+		(res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+                printk("vx_cres_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+                        (vxi?vxi->vx_id:-1), res,
+			(vxi?vxi->limit.rlim[res]:1),
+                        (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+			num, file, line);
+        if (!vxi)
+                return 1;
+	value = atomic_read(&vxi->limit.rcur[res]);	
+	if (value > vxi->limit.rmax[res])
+		vxi->limit.rmax[res] = value;
+        if (vxi->limit.rlim[res] == RLIM_INFINITY)
+                return 1;
+        if (value + num <= vxi->limit.rlim[res])
+                return 1;
+	atomic_inc(&vxi->limit.lhit[res]);
+        return 0;
+}
+
+#define vx_nproc_avail(n) \
+	vx_cres_avail(current->vx_info, (n), RLIMIT_NPROC)
+
+#define vx_files_avail(n) \
+	vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE)
+
+#define vx_openfd_avail(n) \
+	vx_cres_avail(current->vx_info, (n), RLIMIT_OPENFD)
+
+
+/* socket limits */
+
+#define vx_sock_inc(f)	vx_acc_cres(current->vx_info, 1, VLIMIT_SOCK)
+#define vx_sock_dec(f)	vx_acc_cres(current->vx_info,-1, VLIMIT_SOCK)
+
+#define vx_sock_avail(n) \
+	vx_cres_avail(current->vx_info, (n), VLIMIT_SOCK)
+
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_memory.h b/include/linux/vs_memory.h
new file mode 100644
index 000000000..2fe9c0809
--- /dev/null
+++ b/include/linux/vs_memory.h
@@ -0,0 +1,132 @@
+#ifndef _VX_VS_MEMORY_H
+#define _VX_VS_MEMORY_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/limit.h"
+
+
+#define VX_DEBUG_ACC_RSS   0
+#define VX_DEBUG_ACC_VM    0
+#define VX_DEBUG_ACC_VML   0
+
+#if	(VX_DEBUG_ACC_RSS) || (VX_DEBUG_ACC_VM) || (VX_DEBUG_ACC_VML)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+#define vx_acc_page(m, d, v, r) \
+	__vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__)
+
+static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi,
+                int res, int dir, char *file, int line)
+{
+        if (v) {
+                if (dir > 0)
+                        ++(*v);
+                else
+                        --(*v);
+        }
+        if (vxi) {
+                if (dir > 0)
+                        atomic_inc(&vxi->limit.rcur[res]);
+                else
+                        atomic_dec(&vxi->limit.rcur[res]);
+        }
+}
+
+
+#define vx_acc_pages(m, p, v, r) \
+	__vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__)
+
+static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi,
+                int res, int pages, char *file, int line)
+{
+        if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+		(res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+		(res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+		vxdprintk("vx_acc_pages  [%5d,%2d]: %5d += %5d in %s:%d\n",
+			(vxi?vxi->vx_id:-1), res,
+			(vxi?atomic_read(&vxi->limit.res[res]):0),
+			pages, file, line);
+        if (pages == 0)
+                return;
+        if (v)
+                *v += pages;
+        if (vxi)
+                atomic_add(pages, &vxi->limit.rcur[res]);
+}
+
+
+
+#define vx_acc_vmpage(m,d)     vx_acc_page(m, d, total_vm,  RLIMIT_AS)
+#define vx_acc_vmlpage(m,d)    vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspage(m,d)    vx_acc_page(m, d, rss,	    RLIMIT_RSS)
+
+#define vx_acc_vmpages(m,p)    vx_acc_pages(m, p, total_vm,  RLIMIT_AS)
+#define vx_acc_vmlpages(m,p)   vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspages(m,p)   vx_acc_pages(m, p, rss,       RLIMIT_RSS)
+
+#define vx_pages_add(s,r,p)    __vx_acc_pages(0, s, r, p, __FILE__, __LINE__)
+#define vx_pages_sub(s,r,p)    __vx_pages_add(s, r, -(p))
+
+#define vx_vmpages_inc(m)      vx_acc_vmpage(m, 1)
+#define vx_vmpages_dec(m)      vx_acc_vmpage(m,-1)
+#define vx_vmpages_add(m,p)    vx_acc_vmpages(m, p)
+#define vx_vmpages_sub(m,p)    vx_acc_vmpages(m,-(p))
+
+#define vx_vmlocked_inc(m)     vx_acc_vmlpage(m, 1)
+#define vx_vmlocked_dec(m)     vx_acc_vmlpage(m,-1)
+#define vx_vmlocked_add(m,p)   vx_acc_vmlpages(m, p)
+#define vx_vmlocked_sub(m,p)   vx_acc_vmlpages(m,-(p))
+
+#define vx_rsspages_inc(m)     vx_acc_rsspage(m, 1)
+#define vx_rsspages_dec(m)     vx_acc_rsspage(m,-1)
+#define vx_rsspages_add(m,p)   vx_acc_rsspages(m, p)
+#define vx_rsspages_sub(m,p)   vx_acc_rsspages(m,-(p))
+
+
+
+#define vx_pages_avail(m, p, r) \
+        __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__)
+
+static inline int __vx_pages_avail(struct vx_info *vxi,
+                int res, int pages, char *file, int line)
+{
+	unsigned long value;
+
+        if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+                (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+                (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+                printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+                        (vxi?vxi->vx_id:-1), res,
+			(vxi?vxi->limit.rlim[res]:1),
+                        (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+			pages, file, line);
+        if (!vxi)
+                return 1;
+	value = atomic_read(&vxi->limit.rcur[res]);	
+	if (value > vxi->limit.rmax[res])
+		vxi->limit.rmax[res] = value;
+        if (vxi->limit.rlim[res] == RLIM_INFINITY)
+                return 1;
+        if (value + pages <= vxi->limit.rlim[res])
+                return 1;
+	atomic_inc(&vxi->limit.lhit[res]);
+        return 0;
+}
+
+#define vx_vmpages_avail(m,p)  vx_pages_avail(m, p, RLIMIT_AS)
+#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
+#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_network.h b/include/linux/vs_network.h
new file mode 100644
index 000000000..0a3349c09
--- /dev/null
+++ b/include/linux/vs_network.h
@@ -0,0 +1,154 @@
+#ifndef _NX_VS_NETWORK_H
+#define _NX_VS_NETWORK_H
+
+
+// #define NX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/network.h"
+
+#if defined(NX_DEBUG)
+#define nxdprintk(x...) printk("nxd: " x)
+#else
+#define nxdprintk(x...)
+#endif
+
+
+extern int proc_pid_nx_info(struct task_struct *, char *);
+
+
+#define get_nx_info(i)	__get_nx_info(i,__FILE__,__LINE__)
+
+static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
+	const char *_file, int _line)
+{
+	if (!nxi)
+		return NULL;
+	nxdprintk("get_nx_info(%p[#%d.%d])\t%s:%d\n",
+		nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
+		_file, _line);
+	atomic_inc(&nxi->nx_usecnt);
+	return nxi;
+}
+
+
+#define	free_nx_info(nxi)	\
+	call_rcu(&nxi->nx_rcu, rcu_free_nx_info, nxi);
+
+#define put_nx_info(i)	__put_nx_info(i,__FILE__,__LINE__)
+
+static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
+{
+	if (!nxi)
+		return;
+	nxdprintk("put_nx_info(%p[#%d.%d])\t%s:%d\n",
+		nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
+		_file, _line);
+	if (atomic_dec_and_test(&nxi->nx_usecnt))
+		free_nx_info(nxi);
+}
+
+
+#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
+	const char *_file, int _line)
+{
+	BUG_ON(*nxp);
+	if (!nxi)
+		return;
+	nxdprintk("set_nx_info(%p[#%d.%d.%d])\t%s:%d\n",
+		nxi, nxi?nxi->nx_id:0,
+		nxi?atomic_read(&nxi->nx_usecnt):0,
+		nxi?atomic_read(&nxi->nx_refcnt):0,
+		_file, _line);
+	atomic_inc(&nxi->nx_refcnt);
+	*nxp = __get_nx_info(nxi, _file, _line);
+}
+
+#define	clr_nx_info(p)	__clr_nx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_nx_info(struct nx_info **nxp,
+	const char *_file, int _line)
+{
+	struct nx_info *nxo = *nxp;
+
+	if (!nxo)
+		return;
+	nxdprintk("clr_nx_info(%p[#%d.%d.%d])\t%s:%d\n",
+		nxo, nxo?nxo->nx_id:0,
+		nxo?atomic_read(&nxo->nx_usecnt):0,
+		nxo?atomic_read(&nxo->nx_refcnt):0,
+		_file, _line);
+	*nxp = NULL;
+	wmb();
+	if (nxo && atomic_dec_and_test(&nxo->nx_refcnt))
+		unhash_nx_info(nxo);
+	__put_nx_info(nxo, _file, _line);
+}
+
+
+#define task_get_nx_info(i)	__task_get_nx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
+	const char *_file, int _line)
+{
+	struct nx_info *nxi;
+	
+	task_lock(p);
+	nxi = __get_nx_info(p->nx_info, _file, _line);
+	task_unlock(p);
+	return nxi;
+}
+
+#define nx_verify_info(p,i)	\
+	__nx_verify_info((p)->nx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __nx_verify_info(
+	struct nx_info *ipa, struct nx_info *ipb,
+	const char *_file, int _line)
+{
+	if (ipa == ipb)
+		return;
+	printk(KERN_ERR "ip bad assumption (%p==%p) at %s:%d\n",
+		ipa, ipb, _file, _line);
+}
+
+
+#define nx_task_nid(t)	((t)->nid)
+
+#define nx_current_nid() nx_task_nid(current)
+
+#define nx_check(c,m)	__nx_check(nx_current_nid(),c,m)
+
+#define nx_weak_check(c,m)	((m) ? nx_check(c,m) : 1)
+
+#undef nxdprintk
+#define nxdprintk(x...)
+
+
+#define __nx_flags(v,m,f)	(((v) & (m)) ^ (f))
+
+#define	__nx_task_flags(t,m,f) \
+	(((t) && ((t)->nx_info)) ? \
+		__nx_flags((t)->nx_info->nx_flags,(m),(f)) : 0)
+
+#define nx_current_flags() \
+	((current->nx_info) ? current->nx_info->nx_flags : 0)
+
+#define nx_flags(m,f)	__nx_flags(nx_current_flags(),(m),(f))
+
+
+#define nx_current_ncaps() \
+	((current->nx_info) ? current->nx_info->nx_ncaps : 0)
+
+#define nx_ncaps(c)	(nx_current_ncaps() & (c))
+
+
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vs_socket.h b/include/linux/vs_socket.h
new file mode 100644
index 000000000..499245822
--- /dev/null
+++ b/include/linux/vs_socket.h
@@ -0,0 +1,65 @@
+#ifndef _VX_VS_LIMIT_H
+#define _VX_VS_LIMIT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/network.h"
+
+
+/* socket accounting */
+
+#include <linux/socket.h>
+
+static inline int vx_sock_type(int family)
+{
+	int type = 4;
+
+	if (family > 0 && family < 3)
+		type = family;
+	else if (family == PF_INET6)
+		type = 3;
+	return type;
+}
+
+#define vx_acc_sock(v,f,p,s) \
+	__vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__)
+
+static inline void __vx_acc_sock(struct vx_info *vxi,
+	int family, int pos, int size, char *file, int line)
+{
+        if (vxi) {
+		int type = vx_sock_type(family);
+
+		atomic_inc(&vxi->cacct.sock[type][pos].count);
+		atomic_add(size, &vxi->cacct.sock[type][pos].total);
+        }
+}
+
+#define vx_sock_recv(sk,s) \
+	vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s))
+#define vx_sock_send(sk,s) \
+	vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s))
+#define vx_sock_fail(sk,s) \
+	vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s))
+
+
+#define	sock_vx_init(s)	 do {		\
+	(s)->sk_xid = 0;		\
+	(s)->sk_vx_info = NULL;		\
+	} while (0)
+
+#define	sock_nx_init(s)	 do {		\
+	(s)->sk_nid = 0;		\
+	(s)->sk_nx_info = NULL;		\
+	} while (0)
+
+
+#else
+#warning duplicate inclusion
+#endif
diff --git a/include/linux/vserver/dlimit.h b/include/linux/vserver/dlimit.h
new file mode 100644
index 000000000..74872ed74
--- /dev/null
+++ b/include/linux/vserver/dlimit.h
@@ -0,0 +1,83 @@
+#ifndef _VX_DLIMIT_H
+#define _VX_DLIMIT_H
+
+#include "switch.h"
+#include <linux/spinlock.h>
+
+/*  inode vserver commands */
+
+#define VCMD_add_dlimit		VC_CMD(DLIMIT, 1, 0)
+#define VCMD_rem_dlimit		VC_CMD(DLIMIT, 2, 0)
+
+#define VCMD_set_dlimit		VC_CMD(DLIMIT, 5, 0)
+#define VCMD_get_dlimit		VC_CMD(DLIMIT, 6, 0)
+
+
+struct  vcmd_ctx_dlimit_base_v0 {
+	const char __user *name;
+	uint32_t flags;
+};
+
+struct  vcmd_ctx_dlimit_v0 {
+	const char __user *name;
+	uint32_t space_used;			/* used space in kbytes */
+	uint32_t space_total;			/* maximum space in kbytes */
+	uint32_t inodes_used;			/* used inodes */
+	uint32_t inodes_total;			/* maximum inodes */
+	uint32_t reserved;			/* reserved for root in % */
+	uint32_t flags;
+};
+
+#define CDLIM_UNSET             (0ULL)
+#define CDLIM_INFINITY          (~0ULL)
+#define CDLIM_KEEP              (~1ULL)
+
+
+#ifdef	__KERNEL__
+
+struct super_block;
+
+struct dl_info {
+	struct hlist_node dl_hlist;		/* linked list of contexts */
+	struct rcu_head dl_rcu;			/* the rcu head */
+	xid_t dl_xid;				/* context id */
+	atomic_t dl_usecnt;			/* usage count */
+	atomic_t dl_refcnt;			/* reference count */
+
+	struct super_block *dl_sb;		/* associated superblock */
+
+//	struct rw_semaphore dl_sem;		/* protect the values */
+	spinlock_t dl_lock;			/* protect the values */
+
+	uint64_t dl_space_used;			/* used space in bytes */
+	uint64_t dl_space_total;		/* maximum space in bytes */
+	uint32_t dl_inodes_used;		/* used inodes */
+	uint32_t dl_inodes_total;		/* maximum inodes */
+
+	unsigned int dl_nrlmult;		/* non root limit mult */
+};
+
+extern void rcu_free_dl_info(void *);
+extern void unhash_dl_info(struct dl_info *);
+
+extern struct dl_info *locate_dl_info(struct super_block *, xid_t);
+
+
+struct kstatfs;
+
+extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
+
+
+extern int vc_add_dlimit(uint32_t, void __user *);
+extern int vc_rem_dlimit(uint32_t, void __user *);
+
+extern int vc_set_dlimit(uint32_t, void __user *);
+extern int vc_get_dlimit(uint32_t, void __user *);
+
+
+typedef	uint64_t dlsize_t;
+
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _VX_DLIMIT_H */
diff --git a/kernel/vserver/dlimit.c b/kernel/vserver/dlimit.c
new file mode 100644
index 000000000..eb9282f9f
--- /dev/null
+++ b/kernel/vserver/dlimit.c
@@ -0,0 +1,439 @@
+/*
+ *  linux/kernel/vserver/dlimit.c
+ *
+ *  Virtual Server: Context Disk Limits
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  initial version
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/namespace.h>
+#include <linux/namei.h>
+#include <linux/statfs.h>
+#include <linux/vserver/switch.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+#include <linux/vs_dlimit.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+/*	__alloc_dl_info()
+
+	* allocate an initialized dl_info struct
+	* doesn't make it visible (hash)			*/
+
+static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid)
+{
+	struct dl_info *new = NULL;
+	
+	vxdprintk("alloc_dl_info(%p,%d)\n", sb, xid);
+
+	/* would this benefit from a slab cache? */
+	new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
+	if (!new)
+		return 0;
+
+	memset (new, 0, sizeof(struct dl_info));
+	new->dl_xid = xid;
+	new->dl_sb = sb;
+	INIT_RCU_HEAD(&new->dl_rcu);
+	INIT_HLIST_NODE(&new->dl_hlist);
+	spin_lock_init(&new->dl_lock);
+	atomic_set(&new->dl_refcnt, 0);
+	atomic_set(&new->dl_usecnt, 0);
+
+	/* rest of init goes here */
+
+	vxdprintk("alloc_dl_info(%p,%d) = %p\n", sb, xid, new);
+	return new;
+}
+
+/*	__dealloc_dl_info()
+
+	* final disposal of dl_info				*/
+
+static void __dealloc_dl_info(struct dl_info *dli)
+{
+	vxdprintk("dealloc_dl_info(%p)\n", dli);
+
+	dli->dl_hlist.next = LIST_POISON1;
+	dli->dl_xid = -1;
+	dli->dl_sb = 0;
+
+	BUG_ON(atomic_read(&dli->dl_usecnt));
+	BUG_ON(atomic_read(&dli->dl_refcnt));
+
+	kfree(dli);
+}
+
+
+/*	hash table for dl_info hash */
+
+#define	DL_HASH_SIZE	13
+
+struct hlist_head dl_info_hash[DL_HASH_SIZE];
+
+static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED;
+
+
+static inline unsigned int __hashval(struct super_block *sb, xid_t xid)
+{
+	return ((xid ^ (unsigned int)sb) % DL_HASH_SIZE);
+}
+
+
+
+/*	__hash_dl_info()
+
+	* add the dli to the global hash table
+	* requires the hash_lock to be held			*/
+
+static inline void __hash_dl_info(struct dl_info *dli)
+{
+	struct hlist_head *head;
+	
+	vxdprintk("__hash_dl_info: %p[#%d]\n", dli, dli->dl_xid);
+	get_dl_info(dli);
+	head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)];
+	hlist_add_head_rcu(&dli->dl_hlist, head);
+}
+
+/*	__unhash_dl_info()
+
+	* remove the dli from the global hash table
+	* requires the hash_lock to be held			*/
+
+static inline void __unhash_dl_info(struct dl_info *dli)
+{
+	vxdprintk("__unhash_dl_info: %p[#%d]\n", dli, dli->dl_xid);
+	hlist_del_rcu(&dli->dl_hlist);
+	put_dl_info(dli);
+}
+
+
+#define hlist_for_each_rcu(pos, head) \
+	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
+		pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
+
+
+/*	__lookup_dl_info()
+
+	* requires the rcu_read_lock()
+	* doesn't increment the dl_refcnt			*/
+
+static inline struct dl_info *__lookup_dl_info(struct super_block *sb, xid_t xid)
+{
+	struct hlist_head *head = &dl_info_hash[__hashval(sb, xid)];
+	struct hlist_node *pos;
+
+	hlist_for_each_rcu(pos, head) {
+		struct dl_info *dli =
+			hlist_entry(pos, struct dl_info, dl_hlist);
+
+		if (dli->dl_xid == xid && dli->dl_sb == sb) {
+			return dli;
+		}
+	}
+	return NULL;
+}
+
+
+struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid)
+{
+        struct dl_info *dli;
+
+	rcu_read_lock();
+	dli = get_dl_info(__lookup_dl_info(sb, xid));
+	rcu_read_unlock();
+        return dli;
+}
+
+void rcu_free_dl_info(void *obj)
+{
+	struct dl_info *dli = obj;
+	int usecnt, refcnt;
+
+	BUG_ON(!dli);
+
+	usecnt = atomic_read(&dli->dl_usecnt);
+	BUG_ON(usecnt < 0);
+
+	refcnt = atomic_read(&dli->dl_refcnt);
+	BUG_ON(refcnt < 0);
+
+	if (!usecnt)
+		__dealloc_dl_info(dli);
+	else
+		printk("!!! rcu didn't free\n");
+}
+
+
+
+
+int vc_add_dlimit(uint32_t id, void __user *data)
+{
+	struct nameidata nd;
+	struct vcmd_ctx_dlimit_base_v0 vc_data;
+	int ret;
+
+	if (!vx_check(0, VX_ADMIN))
+		return -ENOSYS;
+	if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	ret = user_path_walk_link(vc_data.name, &nd);
+	if (!ret) {
+		struct super_block *sb;
+		struct dl_info *dli;
+
+		ret = -EINVAL;
+		if (!nd.dentry->d_inode)
+			goto out_release;
+		if (!(sb = nd.dentry->d_inode->i_sb))
+			goto out_release;	
+		
+		dli = __alloc_dl_info(sb, id);
+		spin_lock(&dl_info_hash_lock);		
+
+		ret = -EEXIST;
+		if (__lookup_dl_info(sb, id))
+			goto out_unlock;	
+		__hash_dl_info(dli);
+		dli = NULL;
+		ret = 0;
+
+	out_unlock:
+		spin_unlock(&dl_info_hash_lock);		
+		if (dli)
+			__dealloc_dl_info(dli);
+	out_release:
+		path_release(&nd);
+	}
+	return ret;
+}
+
+
+int vc_rem_dlimit(uint32_t id, void __user *data)
+{
+	struct nameidata nd;
+	struct vcmd_ctx_dlimit_base_v0 vc_data;
+	int ret;
+
+	if (!vx_check(0, VX_ADMIN))
+		return -ENOSYS;
+	if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	ret = user_path_walk_link(vc_data.name, &nd);
+	if (!ret) {
+		struct super_block *sb;
+		struct dl_info *dli;
+
+		ret = -EINVAL;
+		if (!nd.dentry->d_inode)
+			goto out_release;
+		if (!(sb = nd.dentry->d_inode->i_sb))
+			goto out_release;	
+		
+		spin_lock(&dl_info_hash_lock);		
+		dli = __lookup_dl_info(sb, id);
+
+		ret = -ESRCH;
+		if (!dli)
+			goto out_unlock;
+		
+		__unhash_dl_info(dli);
+		ret = 0;
+
+	out_unlock:
+		spin_unlock(&dl_info_hash_lock);		
+	out_release:
+		path_release(&nd);
+	}
+	return ret;
+}
+
+
+int vc_set_dlimit(uint32_t id, void __user *data)
+{
+	struct nameidata nd;
+	struct vcmd_ctx_dlimit_v0 vc_data;
+	int ret;
+
+	if (!vx_check(0, VX_ADMIN))
+		return -ENOSYS;
+	if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	ret = user_path_walk_link(vc_data.name, &nd);
+	if (!ret) {
+		struct super_block *sb;
+		struct dl_info *dli;
+
+		ret = -EINVAL;
+		if (!nd.dentry->d_inode)
+			goto out_release;
+		if (!(sb = nd.dentry->d_inode->i_sb))
+			goto out_release;	
+		if (vc_data.reserved > 100 ||
+			vc_data.inodes_used > vc_data.inodes_total ||
+			vc_data.space_used > vc_data.space_total)
+			goto out_release;
+
+		ret = -ESRCH;
+		dli = locate_dl_info(sb, id);
+		if (!dli)
+			goto out_release;
+
+		spin_lock(&dli->dl_lock);		
+
+		if (vc_data.inodes_used != (uint32_t)CDLIM_KEEP)
+			dli->dl_inodes_used = vc_data.inodes_used;
+		if (vc_data.inodes_total != (uint32_t)CDLIM_KEEP)
+			dli->dl_inodes_total = vc_data.inodes_total;
+		if (vc_data.space_used != (uint32_t)CDLIM_KEEP) {
+			dli->dl_space_used = vc_data.space_used;
+			dli->dl_space_used <<= 10;
+		}
+		if (vc_data.space_total == (uint32_t)CDLIM_INFINITY)
+			dli->dl_space_total = (uint64_t)CDLIM_INFINITY;
+		else if (vc_data.space_total != (uint32_t)CDLIM_KEEP) {
+			dli->dl_space_total = vc_data.space_total;
+			dli->dl_space_total <<= 10;
+		}
+		if (vc_data.reserved != (uint32_t)CDLIM_KEEP)
+			dli->dl_nrlmult = (1 << 10) * (100 - vc_data.reserved) / 100;
+
+		spin_unlock(&dli->dl_lock);		
+		
+		put_dl_info(dli);
+		ret = 0;
+
+	out_release:
+		path_release(&nd);
+	}
+	return ret;
+}
+
+int vc_get_dlimit(uint32_t id, void __user *data)
+{
+	struct nameidata nd;
+	struct vcmd_ctx_dlimit_v0 vc_data;
+	int ret;
+
+	if (!vx_check(0, VX_ADMIN))
+		return -ENOSYS;
+	if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+		return -EFAULT;
+
+	ret = user_path_walk_link(vc_data.name, &nd);
+	if (!ret) {
+		struct super_block *sb;
+		struct dl_info *dli;
+
+		ret = -EINVAL;
+		if (!nd.dentry->d_inode)
+			goto out_release;
+		if (!(sb = nd.dentry->d_inode->i_sb))
+			goto out_release;	
+		if (vc_data.reserved > 100 ||
+			vc_data.inodes_used > vc_data.inodes_total ||
+			vc_data.space_used > vc_data.space_total)
+			goto out_release;
+
+		ret = -ESRCH;
+		dli = locate_dl_info(sb, id);
+		if (!dli)
+			goto out_release;
+
+		spin_lock(&dli->dl_lock);		
+		vc_data.inodes_used = dli->dl_inodes_used;
+		vc_data.inodes_total = dli->dl_inodes_total;
+		vc_data.space_used = dli->dl_space_used >> 10;
+		if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
+			vc_data.space_total = (uint32_t)CDLIM_INFINITY;
+		else
+			vc_data.space_total = dli->dl_space_total >> 10;
+
+		vc_data.reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
+		spin_unlock(&dli->dl_lock);		
+		
+		put_dl_info(dli);
+		ret = -EFAULT;
+		if (copy_to_user(data, &vc_data, sizeof(vc_data)))
+			goto out_release;
+
+		ret = 0;
+	out_release:
+		path_release(&nd);
+	}
+	return ret;
+}
+
+
+void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+	struct dl_info *dli;
+        __u64 blimit, bfree, bavail;
+        __u32 ifree;
+		
+	dli = locate_dl_info(sb, current->xid);
+	if (!dli)
+		return;
+
+	spin_lock(&dli->dl_lock);
+	if (dli->dl_inodes_total == (uint32_t)CDLIM_INFINITY)
+		goto no_ilim;
+
+	/* reduce max inodes available to limit */
+	if (buf->f_files > dli->dl_inodes_total)
+		buf->f_files = dli->dl_inodes_total;
+
+	ifree = dli->dl_inodes_total - dli->dl_inodes_used;
+	/* reduce free inodes to min */
+	if (ifree < buf->f_ffree)
+		buf->f_ffree = ifree;
+
+no_ilim:
+	if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
+		goto no_blim;
+
+	blimit = dli->dl_space_total >> sb->s_blocksize_bits;
+
+	if (dli->dl_space_total < dli->dl_space_used)
+		bfree = 0;
+	else
+		bfree = (dli->dl_space_total - dli->dl_space_used)
+			>> sb->s_blocksize_bits;
+
+	bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
+	if (bavail < dli->dl_space_used)
+		bavail = 0;
+	else
+		bavail = (bavail - dli->dl_space_used)
+			>> sb->s_blocksize_bits;
+
+	/* reduce max space available to limit */
+	if (buf->f_blocks > blimit)
+		buf->f_blocks = blimit;
+
+	/* reduce free space to min */
+	if (bfree < buf->f_bfree)
+		buf->f_bfree = bfree;
+
+	/* reduce avail space to min */
+	if (bavail < buf->f_bavail)
+		buf->f_bavail = bavail;
+
+no_blim:
+	spin_unlock(&dli->dl_lock);
+	put_dl_info(dli);
+	
+	return;	
+}
+
diff --git a/kernel/vserver/helper.c b/kernel/vserver/helper.c
new file mode 100644
index 000000000..880b84335
--- /dev/null
+++ b/kernel/vserver/helper.c
@@ -0,0 +1,92 @@
+/*
+ *  linux/kernel/vserver/helper.c
+ *
+ *  Virtual Context Support
+ *
+ *  Copyright (C) 2004  Herbert Pötzl
+ *
+ *  V0.01  basic helper
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/kmod.h>
+#include <linux/vserver.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+
+char vshelper_path[255] = "/sbin/vshelper";
+
+
+/*
+ *      vshelper path is set via /proc/sys
+ *      invoked by vserver sys_reboot(), with
+ *      the following arguments
+ *
+ *      argv [0] = vshelper_path;
+ *      argv [1] = action: "restart", "halt", "poweroff", ...
+ *      argv [2] = context identifier
+ *      argv [3] = additional argument (restart2)
+ *
+ *      envp [*] = type-specific parameters
+ */
+
+long vs_reboot(unsigned int cmd, void * arg)
+{
+	char id_buf[8], cmd_buf[32];
+	char uid_buf[32], pid_buf[32];
+	char buffer[256];
+
+	char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0};
+	char *envp[] = {"HOME=/", "TERM=linux",
+			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+			uid_buf, pid_buf, cmd_buf, 0};
+
+	snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid());
+
+	snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
+	snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid);
+	snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
+
+	switch (cmd) {
+	case LINUX_REBOOT_CMD_RESTART:
+		argv[1] = "restart";
+		break;	
+
+	case LINUX_REBOOT_CMD_HALT:
+		argv[1] = "halt";
+		break;	
+
+	case LINUX_REBOOT_CMD_POWER_OFF:
+		argv[1] = "poweroff";
+		break;	
+
+	case LINUX_REBOOT_CMD_SW_SUSPEND:
+		argv[1] = "swsusp";
+		break;	
+
+	case LINUX_REBOOT_CMD_RESTART2:
+		if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0)
+			return -EFAULT;
+		argv[3] = buffer;
+	default:
+		argv[1] = "restart2";
+		break;	
+	}
+
+	/* maybe we should wait ? */
+	if (call_usermodehelper(*argv, argv, envp, 0)) {
+		printk( KERN_WARNING
+			"vs_reboot(): failed to exec (%s %s %s %s)\n",
+			vshelper_path, argv[1], argv[2], argv[3]);
+		return -EPERM;
+	}
+	return 0;
+}
+