--- /dev/null
+#ifndef __COW_H__
+#define __COW_H__
+
+#include <asm/types.h>
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+# define ntohll(x) (x)
+# define htonll(x) (x)
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+# define ntohll(x) bswap_64(x)
+# define htonll(x) bswap_64(x)
+#else
+#error "__BYTE_ORDER not defined"
+#endif
+
+extern int init_cow_file(int fd, char *cow_file, char *backing_file,
+ int sectorsize, int alignment, int *bitmap_offset_out,
+ unsigned long *bitmap_len_out, int *data_offset_out);
+
+extern int file_reader(__u64 offset, char *buf, int len, void *arg);
+extern int read_cow_header(int (*reader)(__u64, char *, int, void *),
+ void *arg, __u32 *version_out,
+ char **backing_file_out, time_t *mtime_out,
+ __u64 *size_out, int *sectorsize_out,
+ __u32 *align_out, int *bitmap_offset_out);
+
+extern int write_cow_header(char *cow_file, int fd, char *backing_file,
+ int sectorsize, int alignment, long long *size);
+
+extern void cow_sizes(int version, __u64 size, int sectorsize, int align,
+ int bitmap_offset, unsigned long *bitmap_len_out,
+ int *data_offset_out);
+
+#endif
+
+/*
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+#include <stddef.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <byteswap.h>
+#include <sys/time.h>
+#include <sys/param.h>
+#include <sys/user.h>
+#include <netinet/in.h>
+
+#include "os.h"
+
+#include "cow.h"
+#include "cow_sys.h"
+
+#define PATH_LEN_V1 256
+
+struct cow_header_v1 {
+ int magic;
+ int version;
+ char backing_file[PATH_LEN_V1];
+ time_t mtime;
+ __u64 size;
+ int sectorsize;
+};
+
+#define PATH_LEN_V2 MAXPATHLEN
+
+struct cow_header_v2 {
+ unsigned long magic;
+ unsigned long version;
+ char backing_file[PATH_LEN_V2];
+ time_t mtime;
+ __u64 size;
+ int sectorsize;
+};
+
+/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in
+ * case other systems have different values for MAXPATHLEN
+ */
+#define PATH_LEN_V3 4096
+
+/* Changes from V2 -
+ * PATH_LEN_V3 as described above
+ * Explicitly specify field bit lengths for systems with different
+ * lengths for the usual C types. Not sure whether char or
+ * time_t should be changed, this can be changed later without
+ * breaking compatibility
+ * Add alignment field so that different alignments can be used for the
+ * bitmap and data
+ * Add cow_format field to allow for the possibility of different ways
+ * of specifying the COW blocks. For now, the only value is 0,
+ * for the traditional COW bitmap.
+ * Move the backing_file field to the end of the header. This allows
+ * for the possibility of expanding it into the padding required
+ * by the bitmap alignment.
+ * The bitmap and data portions of the file will be aligned as specified
+ * by the alignment field. This is to allow COW files to be
+ * put on devices with restrictions on access alignments, such as
+ * /dev/raw, with a 512 byte alignment restriction. This also
+ * allows the data to be more aligned more strictly than on
+ * sector boundaries. This is needed for ubd-mmap, which needs
+ * the data to be page aligned.
+ * Fixed (finally!) the rounding bug
+ */
+
+struct cow_header_v3 {
+ __u32 magic;
+ __u32 version;
+ time_t mtime;
+ __u64 size;
+ __u32 sectorsize;
+ __u32 alignment;
+ __u32 cow_format;
+ char backing_file[PATH_LEN_V3];
+};
+
+/* COW format definitions - for now, we have only the usual COW bitmap */
+#define COW_BITMAP 0
+
+union cow_header {
+ struct cow_header_v1 v1;
+ struct cow_header_v2 v2;
+ struct cow_header_v3 v3;
+};
+
+#define COW_MAGIC 0x4f4f4f4d /* MOOO */
+#define COW_VERSION 3
+
+#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len))
+#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align)
+
+void cow_sizes(int version, __u64 size, int sectorsize, int align,
+ int bitmap_offset, unsigned long *bitmap_len_out,
+ int *data_offset_out)
+{
+ if(version < 3){
+ *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
+
+ *data_offset_out = bitmap_offset + *bitmap_len_out;
+ *data_offset_out = (*data_offset_out + sectorsize - 1) /
+ sectorsize;
+ *data_offset_out *= sectorsize;
+ }
+ else {
+ *bitmap_len_out = DIV_ROUND(size, sectorsize);
+ *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8);
+
+ *data_offset_out = bitmap_offset + *bitmap_len_out;
+ *data_offset_out = ROUND_UP(*data_offset_out, align);
+ }
+}
+
+static int absolutize(char *to, int size, char *from)
+{
+ char save_cwd[256], *slash;
+ int remaining;
+
+ if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
+ cow_printf("absolutize : unable to get cwd - errno = %d\n",
+ errno);
+ return(-1);
+ }
+ slash = strrchr(from, '/');
+ if(slash != NULL){
+ *slash = '\0';
+ if(chdir(from)){
+ *slash = '/';
+ cow_printf("absolutize : Can't cd to '%s' - "
+ "errno = %d\n", from, errno);
+ return(-1);
+ }
+ *slash = '/';
+ if(getcwd(to, size) == NULL){
+ cow_printf("absolutize : unable to get cwd of '%s' - "
+ "errno = %d\n", from, errno);
+ return(-1);
+ }
+ remaining = size - strlen(to);
+ if(strlen(slash) + 1 > remaining){
+ cow_printf("absolutize : unable to fit '%s' into %d "
+ "chars\n", from, size);
+ return(-1);
+ }
+ strcat(to, slash);
+ }
+ else {
+ if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){
+ cow_printf("absolutize : unable to fit '%s' into %d "
+ "chars\n", from, size);
+ return(-1);
+ }
+ strcpy(to, save_cwd);
+ strcat(to, "/");
+ strcat(to, from);
+ }
+ chdir(save_cwd);
+ return(0);
+}
+
+int write_cow_header(char *cow_file, int fd, char *backing_file,
+ int sectorsize, int alignment, long long *size)
+{
+ struct cow_header_v3 *header;
+ unsigned long modtime;
+ int err;
+
+ err = cow_seek_file(fd, 0);
+ if(err < 0){
+ cow_printf("write_cow_header - lseek failed, err = %d\n", -err);
+ goto out;
+ }
+
+ err = -ENOMEM;
+ header = cow_malloc(sizeof(*header));
+ if(header == NULL){
+ cow_printf("Failed to allocate COW V3 header\n");
+ goto out;
+ }
+ header->magic = htonl(COW_MAGIC);
+ header->version = htonl(COW_VERSION);
+
+ err = -EINVAL;
+ if(strlen(backing_file) > sizeof(header->backing_file) - 1){
+ cow_printf("Backing file name \"%s\" is too long - names are "
+ "limited to %d characters\n", backing_file,
+ sizeof(header->backing_file) - 1);
+ goto out_free;
+ }
+
+ if(absolutize(header->backing_file, sizeof(header->backing_file),
+ backing_file))
+ goto out_free;
+
+ err = os_file_modtime(header->backing_file, &modtime);
+ if(err < 0){
+ cow_printf("Backing file '%s' mtime request failed, "
+ "err = %d\n", header->backing_file, -err);
+ goto out_free;
+ }
+
+ err = cow_file_size(header->backing_file, size);
+ if(err < 0){
+ cow_printf("Couldn't get size of backing file '%s', "
+ "err = %d\n", header->backing_file, -err);
+ goto out_free;
+ }
+
+ header->mtime = htonl(modtime);
+ header->size = htonll(*size);
+ header->sectorsize = htonl(sectorsize);
+ header->alignment = htonl(alignment);
+ header->cow_format = COW_BITMAP;
+
+ err = os_write_file(fd, header, sizeof(*header));
+ if(err != sizeof(*header)){
+ cow_printf("Write of header to new COW file '%s' failed, "
+ "err = %d\n", cow_file, -err);
+ goto out_free;
+ }
+ err = 0;
+ out_free:
+ cow_free(header);
+ out:
+ return(err);
+}
+
+int file_reader(__u64 offset, char *buf, int len, void *arg)
+{
+ int fd = *((int *) arg);
+
+ return(pread(fd, buf, len, offset));
+}
+
+/* XXX Need to sanity-check the values read from the header */
+
+int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
+ __u32 *version_out, char **backing_file_out,
+ time_t *mtime_out, __u64 *size_out,
+ int *sectorsize_out, __u32 *align_out,
+ int *bitmap_offset_out)
+{
+ union cow_header *header;
+ char *file;
+ int err, n;
+ unsigned long version, magic;
+
+ header = cow_malloc(sizeof(*header));
+ if(header == NULL){
+ cow_printf("read_cow_header - Failed to allocate header\n");
+ return(-ENOMEM);
+ }
+ err = -EINVAL;
+ n = (*reader)(0, (char *) header, sizeof(*header), arg);
+ if(n < offsetof(typeof(header->v1), backing_file)){
+ cow_printf("read_cow_header - short header\n");
+ goto out;
+ }
+
+ magic = header->v1.magic;
+ if(magic == COW_MAGIC) {
+ version = header->v1.version;
+ }
+ else if(magic == ntohl(COW_MAGIC)){
+ version = ntohl(header->v1.version);
+ }
+ /* No error printed because the non-COW case comes through here */
+ else goto out;
+
+ *version_out = version;
+
+ if(version == 1){
+ if(n < sizeof(header->v1)){
+ cow_printf("read_cow_header - failed to read V1 "
+ "header\n");
+ goto out;
+ }
+ *mtime_out = header->v1.mtime;
+ *size_out = header->v1.size;
+ *sectorsize_out = header->v1.sectorsize;
+ *bitmap_offset_out = sizeof(header->v1);
+ *align_out = *sectorsize_out;
+ file = header->v1.backing_file;
+ }
+ else if(version == 2){
+ if(n < sizeof(header->v2)){
+ cow_printf("read_cow_header - failed to read V2 "
+ "header\n");
+ goto out;
+ }
+ *mtime_out = ntohl(header->v2.mtime);
+ *size_out = ntohll(header->v2.size);
+ *sectorsize_out = ntohl(header->v2.sectorsize);
+ *bitmap_offset_out = sizeof(header->v2);
+ *align_out = *sectorsize_out;
+ file = header->v2.backing_file;
+ }
+ else if(version == 3){
+ if(n < sizeof(header->v3)){
+ cow_printf("read_cow_header - failed to read V2 "
+ "header\n");
+ goto out;
+ }
+ *mtime_out = ntohl(header->v3.mtime);
+ *size_out = ntohll(header->v3.size);
+ *sectorsize_out = ntohl(header->v3.sectorsize);
+ *align_out = ntohl(header->v3.alignment);
+ *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out);
+ file = header->v3.backing_file;
+ }
+ else {
+ cow_printf("read_cow_header - invalid COW version\n");
+ goto out;
+ }
+ err = -ENOMEM;
+ *backing_file_out = cow_strdup(file);
+ if(*backing_file_out == NULL){
+ cow_printf("read_cow_header - failed to allocate backing "
+ "file\n");
+ goto out;
+ }
+ err = 0;
+ out:
+ cow_free(header);
+ return(err);
+}
+
+int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
+ int alignment, int *bitmap_offset_out,
+ unsigned long *bitmap_len_out, int *data_offset_out)
+{
+ __u64 size, offset;
+ char zero = 0;
+ int err;
+
+ err = write_cow_header(cow_file, fd, backing_file, sectorsize,
+ alignment, &size);
+ if(err)
+ goto out;
+
+ *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment);
+ cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out,
+ bitmap_len_out, data_offset_out);
+
+ offset = *data_offset_out + size - sizeof(zero);
+ err = cow_seek_file(fd, offset);
+ if(err < 0){
+ cow_printf("cow bitmap lseek failed : err = %d\n", -err);
+ goto out;
+ }
+
+ /* does not really matter how much we write it is just to set EOF
+ * this also sets the entire COW bitmap
+ * to zero without having to allocate it
+ */
+ err = cow_write_file(fd, &zero, sizeof(zero));
+ if(err != sizeof(zero)){
+ cow_printf("Write of bitmap to new COW file '%s' failed, "
+ "err = %d\n", cow_file, -err);
+ err = -EINVAL;
+ goto out;
+ }
+
+ return(0);
+
+ out:
+ return(err);
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+/*
+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __IRQ_KERN_H__
+#define __IRQ_KERN_H__
+
+#include "linux/interrupt.h"
+
+extern int um_request_irq(unsigned int irq, int fd, int type,
+ irqreturn_t (*handler)(int, void *,
+ struct pt_regs *),
+ unsigned long irqflags, const char * devname,
+ void *dev_id);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+/*
+ * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __MEM_KERN_H__
+#define __MEM_KERN_H__
+
+#include "linux/list.h"
+#include "linux/types.h"
+
+struct remapper {
+ struct list_head list;
+ int (*proc)(int, unsigned long, int, __u64);
+};
+
+extern void register_remapper(struct remapper *info);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+/*
+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/mm.h"
+#include "linux/ghash.h"
+#include "linux/slab.h"
+#include "linux/vmalloc.h"
+#include "linux/bootmem.h"
+#include "asm/types.h"
+#include "asm/pgtable.h"
+#include "kern_util.h"
+#include "user_util.h"
+#include "mode_kern.h"
+#include "mem.h"
+#include "mem_user.h"
+#include "os.h"
+#include "kern.h"
+#include "init.h"
+
+#if 0
+static pgd_t physmem_pgd[PTRS_PER_PGD];
+
+static struct phys_desc *lookup_mapping(void *addr)
+{
+ pgd = &physmem_pgd[pgd_index(addr)];
+ if(pgd_none(pgd))
+ return(NULL);
+
+ pmd = pmd_offset(pgd, addr);
+ if(pmd_none(pmd))
+ return(NULL);
+
+ pte = pte_offset_kernel(pmd, addr);
+ return((struct phys_desc *) pte_val(pte));
+}
+
+static struct add_mapping(void *addr, struct phys_desc *new)
+{
+}
+#endif
+
+#define PHYS_HASHSIZE (8192)
+
+struct phys_desc;
+
+DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc);
+
+struct phys_desc {
+ struct virtmem_ptrs virt_ptrs;
+ int fd;
+ __u64 offset;
+ void *virt;
+ unsigned long phys;
+ struct list_head list;
+};
+
+struct virtmem_table virtmem_hash;
+
+static int virt_cmp(void *virt1, void *virt2)
+{
+ return(virt1 != virt2);
+}
+
+static int virt_hash(void *virt)
+{
+ unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT;
+ return(addr % PHYS_HASHSIZE);
+}
+
+DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp,
+ virt_hash);
+
+LIST_HEAD(descriptor_mappings);
+
+struct desc_mapping {
+ int fd;
+ struct list_head list;
+ struct list_head pages;
+};
+
+static struct desc_mapping *find_mapping(int fd)
+{
+ struct desc_mapping *desc;
+ struct list_head *ele;
+
+ list_for_each(ele, &descriptor_mappings){
+ desc = list_entry(ele, struct desc_mapping, list);
+ if(desc->fd == fd)
+ return(desc);
+ }
+
+ return(NULL);
+}
+
+static struct desc_mapping *descriptor_mapping(int fd)
+{
+ struct desc_mapping *desc;
+
+ desc = find_mapping(fd);
+ if(desc != NULL)
+ return(desc);
+
+ desc = kmalloc(sizeof(*desc), GFP_ATOMIC);
+ if(desc == NULL)
+ return(NULL);
+
+ *desc = ((struct desc_mapping)
+ { .fd = fd,
+ .list = LIST_HEAD_INIT(desc->list),
+ .pages = LIST_HEAD_INIT(desc->pages) });
+ list_add(&desc->list, &descriptor_mappings);
+
+ return(desc);
+}
+
+int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w)
+{
+ struct desc_mapping *fd_maps;
+ struct phys_desc *desc;
+ unsigned long phys;
+ int err;
+
+ fd_maps = descriptor_mapping(fd);
+ if(fd_maps == NULL)
+ return(-ENOMEM);
+
+ phys = __pa(virt);
+ if(find_virtmem_hash(&virtmem_hash, virt) != NULL)
+ panic("Address 0x%p is already substituted\n", virt);
+
+ err = -ENOMEM;
+ desc = kmalloc(sizeof(*desc), GFP_ATOMIC);
+ if(desc == NULL)
+ goto out;
+
+ *desc = ((struct phys_desc)
+ { .virt_ptrs = { NULL, NULL },
+ .fd = fd,
+ .offset = offset,
+ .virt = virt,
+ .phys = __pa(virt),
+ .list = LIST_HEAD_INIT(desc->list) });
+ insert_virtmem_hash(&virtmem_hash, desc);
+
+ list_add(&desc->list, &fd_maps->pages);
+
+ virt = (void *) ((unsigned long) virt & PAGE_MASK);
+ err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0);
+ if(!err)
+ goto out;
+
+ remove_virtmem_hash(&virtmem_hash, desc);
+ kfree(desc);
+ out:
+ return(err);
+}
+
+static int physmem_fd = -1;
+
+static void remove_mapping(struct phys_desc *desc)
+{
+ void *virt = desc->virt;
+ int err;
+
+ remove_virtmem_hash(&virtmem_hash, desc);
+ list_del(&desc->list);
+ kfree(desc);
+
+ err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0);
+ if(err)
+ panic("Failed to unmap block device page from physical memory, "
+ "errno = %d", -err);
+}
+
+int physmem_remove_mapping(void *virt)
+{
+ struct phys_desc *desc;
+
+ virt = (void *) ((unsigned long) virt & PAGE_MASK);
+ desc = find_virtmem_hash(&virtmem_hash, virt);
+ if(desc == NULL)
+ return(0);
+
+ remove_mapping(desc);
+ return(1);
+}
+
+void physmem_forget_descriptor(int fd)
+{
+ struct desc_mapping *desc;
+ struct phys_desc *page;
+ struct list_head *ele, *next;
+ __u64 offset;
+ void *addr;
+ int err;
+
+ desc = find_mapping(fd);
+ if(desc == NULL)
+ return;
+
+ list_for_each_safe(ele, next, &desc->pages){
+ page = list_entry(ele, struct phys_desc, list);
+ offset = page->offset;
+ addr = page->virt;
+ remove_mapping(page);
+ err = os_seek_file(fd, offset);
+ if(err)
+ panic("physmem_forget_descriptor - failed to seek "
+ "to %lld in fd %d, error = %d\n",
+ offset, fd, -err);
+ err = os_read_file(fd, addr, PAGE_SIZE);
+ if(err < 0)
+ panic("physmem_forget_descriptor - failed to read "
+ "from fd %d to 0x%p, error = %d\n",
+ fd, addr, -err);
+ }
+
+ list_del(&desc->list);
+ kfree(desc);
+}
+
+void arch_free_page(struct page *page, int order)
+{
+ void *virt;
+ int i;
+
+ for(i = 0; i < (1 << order); i++){
+ virt = __va(page_to_phys(page + i));
+ physmem_remove_mapping(virt);
+ }
+}
+
+int is_remapped(void *virt)
+{
+ return(find_virtmem_hash(&virtmem_hash, virt) != NULL);
+}
+
+/* Changed during early boot */
+unsigned long high_physmem;
+
+extern unsigned long physmem_size;
+
+void *to_virt(unsigned long phys)
+{
+ return((void *) uml_physmem + phys);
+}
+
+unsigned long to_phys(void *virt)
+{
+ return(((unsigned long) virt) - uml_physmem);
+}
+
+int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
+{
+ struct page *p, *map;
+ unsigned long phys_len, phys_pages, highmem_len, highmem_pages;
+ unsigned long iomem_len, iomem_pages, total_len, total_pages;
+ int i;
+
+ phys_pages = physmem >> PAGE_SHIFT;
+ phys_len = phys_pages * sizeof(struct page);
+
+ iomem_pages = iomem >> PAGE_SHIFT;
+ iomem_len = iomem_pages * sizeof(struct page);
+
+ highmem_pages = highmem >> PAGE_SHIFT;
+ highmem_len = highmem_pages * sizeof(struct page);
+
+ total_pages = phys_pages + iomem_pages + highmem_pages;
+ total_len = phys_len + iomem_pages + highmem_len;
+
+ if(kmalloc_ok){
+ map = kmalloc(total_len, GFP_KERNEL);
+ if(map == NULL)
+ map = vmalloc(total_len);
+ }
+ else map = alloc_bootmem_low_pages(total_len);
+
+ if(map == NULL)
+ return(-ENOMEM);
+
+ for(i = 0; i < total_pages; i++){
+ p = &map[i];
+ set_page_count(p, 0);
+ SetPageReserved(p);
+ INIT_LIST_HEAD(&p->lru);
+ }
+
+ mem_map = map;
+ max_mapnr = total_pages;
+ return(0);
+}
+
+struct page *phys_to_page(const unsigned long phys)
+{
+ return(&mem_map[phys >> PAGE_SHIFT]);
+}
+
+struct page *__virt_to_page(const unsigned long virt)
+{
+ return(&mem_map[__pa(virt) >> PAGE_SHIFT]);
+}
+
+unsigned long page_to_phys(struct page *page)
+{
+ return((page - mem_map) << PAGE_SHIFT);
+}
+
+pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+ pte_t pte;
+
+ pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot);
+ if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte));
+ return(pte);
+}
+
+/* Changed during early boot */
+static unsigned long kmem_top = 0;
+
+unsigned long get_kmem_end(void)
+{
+ if(kmem_top == 0)
+ kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas);
+ return(kmem_top);
+}
+
+void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
+ int r, int w, int x)
+{
+ __u64 offset;
+ int fd, err;
+
+ fd = phys_mapping(phys, &offset);
+ err = os_map_memory((void *) virt, fd, offset, len, r, w, x);
+ if(err)
+ panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, "
+ "err = %d\n", virt, fd, offset, len, r, w, x, err);
+}
+
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+
+void setup_physmem(unsigned long start, unsigned long reserve_end,
+ unsigned long len, unsigned long highmem)
+{
+ unsigned long reserve = reserve_end - start;
+ int pfn = PFN_UP(__pa(reserve_end));
+ int delta = (len - reserve) >> PAGE_SHIFT;
+ int err, offset, bootmap_size;
+
+ physmem_fd = create_mem_file(len + highmem);
+
+ offset = uml_reserved - uml_physmem;
+ err = os_map_memory((void *) uml_reserved, physmem_fd, offset,
+ len - offset, 1, 1, 0);
+ if(err < 0){
+ os_print_error(err, "Mapping memory");
+ exit(1);
+ }
+
+ bootmap_size = init_bootmem(pfn, pfn + delta);
+ free_bootmem(__pa(reserve_end) + bootmap_size,
+ len - bootmap_size - reserve);
+}
+
+int phys_mapping(unsigned long phys, __u64 *offset_out)
+{
+ struct phys_desc *desc = find_virtmem_hash(&virtmem_hash,
+ __va(phys & PAGE_MASK));
+ int fd = -1;
+
+ if(desc != NULL){
+ fd = desc->fd;
+ *offset_out = desc->offset;
+ }
+ else if(phys < physmem_size){
+ fd = physmem_fd;
+ *offset_out = phys;
+ }
+ else if(phys < __pa(end_iomem)){
+ struct iomem_region *region = iomem_regions;
+
+ while(region != NULL){
+ if((phys >= region->phys) &&
+ (phys < region->phys + region->size)){
+ fd = region->fd;
+ *offset_out = phys - region->phys;
+ break;
+ }
+ region = region->next;
+ }
+ }
+ else if(phys < __pa(end_iomem) + highmem){
+ fd = physmem_fd;
+ *offset_out = phys - iomem_size;
+ }
+
+ return(fd);
+}
+
+static int __init uml_mem_setup(char *line, int *add)
+{
+ char *retptr;
+ physmem_size = memparse(line,&retptr);
+ return 0;
+}
+__uml_setup("mem=", uml_mem_setup,
+"mem=<Amount of desired ram>\n"
+" This controls how much \"physical\" memory the kernel allocates\n"
+" for the system. The size is specified as a number followed by\n"
+" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n"
+" This is not related to the amount of memory in the host. It can\n"
+" be more, and the excess, if it's ever used, will just be swapped out.\n"
+" Example: mem=64M\n\n"
+);
+
+unsigned long find_iomem(char *driver, unsigned long *len_out)
+{
+ struct iomem_region *region = iomem_regions;
+
+ while(region != NULL){
+ if(!strcmp(region->driver, driver)){
+ *len_out = region->size;
+ return(region->virt);
+ }
+ }
+
+ return(0);
+}
+
+int setup_iomem(void)
+{
+ struct iomem_region *region = iomem_regions;
+ unsigned long iomem_start = high_physmem + PAGE_SIZE;
+ int err;
+
+ while(region != NULL){
+ err = os_map_memory((void *) iomem_start, region->fd, 0,
+ region->size, 1, 1, 0);
+ if(err)
+ printk("Mapping iomem region for driver '%s' failed, "
+ "errno = %d\n", region->driver, -err);
+ else {
+ region->virt = iomem_start;
+ region->phys = __pa(region->virt);
+ }
+
+ iomem_start += region->size + PAGE_SIZE;
+ region = region->next;
+ }
+
+ return(0);
+}
+
+__initcall(setup_iomem);
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+/*
+ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/stddef.h"
+#include "linux/kernel.h"
+#include "linux/string.h"
+#include "linux/fs.h"
+#include "linux/highmem.h"
+#include "asm/page.h"
+#include "asm/pgtable.h"
+#include "asm/uaccess.h"
+#include "kern_util.h"
+
+extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
+ pte_t *pte_out);
+
+static unsigned long maybe_map(unsigned long virt, int is_write)
+{
+ pte_t pte;
+ int err;
+
+ void *phys = um_virt_to_phys(current, virt, &pte);
+ int dummy_code;
+
+ if(IS_ERR(phys) || (is_write && !pte_write(pte))){
+ err = handle_page_fault(virt, 0, is_write, 0, &dummy_code);
+ if(err)
+ return(0);
+ phys = um_virt_to_phys(current, virt, NULL);
+ }
+ return((unsigned long) phys);
+}
+
+static int do_op(unsigned long addr, int len, int is_write,
+ int (*op)(unsigned long addr, int len, void *arg), void *arg)
+{
+ struct page *page;
+ int n;
+
+ addr = maybe_map(addr, is_write);
+ if(addr == -1)
+ return(-1);
+
+ page = phys_to_page(addr);
+ addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
+ n = (*op)(addr, len, arg);
+ kunmap(page);
+
+ return(n);
+}
+
+static int buffer_op(unsigned long addr, int len, int is_write,
+ int (*op)(unsigned long addr, int len, void *arg),
+ void *arg)
+{
+ int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len);
+ int remain = len, n;
+
+ n = do_op(addr, size, is_write, op, arg);
+ if(n != 0)
+ return(n < 0 ? remain : 0);
+
+ addr += size;
+ remain -= size;
+ if(remain == 0)
+ return(0);
+
+ while(addr < ((addr + remain) & PAGE_MASK)){
+ n = do_op(addr, PAGE_SIZE, is_write, op, arg);
+ if(n != 0)
+ return(n < 0 ? remain : 0);
+
+ addr += PAGE_SIZE;
+ remain -= PAGE_SIZE;
+ }
+ if(remain == 0)
+ return(0);
+
+ n = do_op(addr, remain, is_write, op, arg);
+ if(n != 0)
+ return(n < 0 ? remain : 0);
+ return(0);
+}
+
+static int copy_chunk_from_user(unsigned long from, int len, void *arg)
+{
+ unsigned long *to_ptr = arg, to = *to_ptr;
+
+ memcpy((void *) to, (void *) from, len);
+ *to_ptr += len;
+ return(0);
+}
+
+int copy_from_user_skas(void *to, const void *from, int n)
+{
+ if(segment_eq(get_fs(), KERNEL_DS)){
+ memcpy(to, from, n);
+ return(0);
+ }
+
+ return(access_ok_skas(VERIFY_READ, from, n) ?
+ buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to):
+ n);
+}
+
+static int copy_chunk_to_user(unsigned long to, int len, void *arg)
+{
+ unsigned long *from_ptr = arg, from = *from_ptr;
+
+ memcpy((void *) to, (void *) from, len);
+ *from_ptr += len;
+ return(0);
+}
+
+int copy_to_user_skas(void *to, const void *from, int n)
+{
+ if(segment_eq(get_fs(), KERNEL_DS)){
+ memcpy(to, from, n);
+ return(0);
+ }
+
+ return(access_ok_skas(VERIFY_WRITE, to, n) ?
+ buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) :
+ n);
+}
+
+static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
+{
+ char **to_ptr = arg, *to = *to_ptr;
+ int n;
+
+ strncpy(to, (void *) from, len);
+ n = strnlen(to, len);
+ *to_ptr += n;
+
+ if(n < len)
+ return(1);
+ return(0);
+}
+
+int strncpy_from_user_skas(char *dst, const char *src, int count)
+{
+ int n;
+ char *ptr = dst;
+
+ if(segment_eq(get_fs(), KERNEL_DS)){
+ strncpy(dst, src, count);
+ return(strnlen(dst, count));
+ }
+
+ if(!access_ok_skas(VERIFY_READ, src, 1))
+ return(-EFAULT);
+
+ n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user,
+ &ptr);
+ if(n != 0)
+ return(-EFAULT);
+ return(strnlen(dst, count));
+}
+
+static int clear_chunk(unsigned long addr, int len, void *unused)
+{
+ memset((void *) addr, 0, len);
+ return(0);
+}
+
+int __clear_user_skas(void *mem, int len)
+{
+ return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL));
+}
+
+int clear_user_skas(void *mem, int len)
+{
+ if(segment_eq(get_fs(), KERNEL_DS)){
+ memset(mem, 0, len);
+ return(0);
+ }
+
+ return(access_ok_skas(VERIFY_WRITE, mem, len) ?
+ buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len);
+}
+
+static int strnlen_chunk(unsigned long str, int len, void *arg)
+{
+ int *len_ptr = arg, n;
+
+ n = strnlen((void *) str, len);
+ *len_ptr += n;
+
+ if(n < len)
+ return(1);
+ return(0);
+}
+
+int strnlen_user_skas(const void *str, int len)
+{
+ int count = 0, n;
+
+ if(segment_eq(get_fs(), KERNEL_DS))
+ return(strnlen(str, len) + 1);
+
+ n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
+ if(n == 0)
+ return(count + 1);
+ return(-EFAULT);
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+/*
+ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+#include "linux/sched.h"
+#include "asm/uaccess.h"
+
+int copy_from_user_tt(void *to, const void *from, int n)
+{
+ if(!access_ok_tt(VERIFY_READ, from, n))
+ return(n);
+
+ return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr,
+ ¤t->thread.fault_catcher));
+}
+
+int copy_to_user_tt(void *to, const void *from, int n)
+{
+ if(!access_ok_tt(VERIFY_WRITE, to, n))
+ return(n);
+
+ return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr,
+ ¤t->thread.fault_catcher));
+}
+
+int strncpy_from_user_tt(char *dst, const char *src, int count)
+{
+ int n;
+
+ if(!access_ok_tt(VERIFY_READ, src, 1))
+ return(-EFAULT);
+
+ n = __do_strncpy_from_user(dst, src, count,
+ ¤t->thread.fault_addr,
+ ¤t->thread.fault_catcher);
+ if(n < 0) return(-EFAULT);
+ return(n);
+}
+
+int __clear_user_tt(void *mem, int len)
+{
+ return(__do_clear_user(mem, len,
+ ¤t->thread.fault_addr,
+ ¤t->thread.fault_catcher));
+}
+
+int clear_user_tt(void *mem, int len)
+{
+ if(!access_ok_tt(VERIFY_WRITE, mem, len))
+ return(len);
+
+ return(__do_clear_user(mem, len, ¤t->thread.fault_addr,
+ ¤t->thread.fault_catcher));
+}
+
+int strnlen_user_tt(const void *str, int len)
+{
+ return(__do_strnlen_user(str, len,
+ ¤t->thread.fault_addr,
+ ¤t->thread.fault_catcher));
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+#include "linux/types.h"
+#include "linux/module.h"
+
+/* Some of this are builtin function (some are not but could in the future),
+ * so I *must* declare good prototypes for them and then EXPORT them.
+ * The kernel code uses the macro defined by include/linux/string.h,
+ * so I undef macros; the userspace code does not include that and I
+ * add an EXPORT for the glibc one.*/
+
+#undef strlen
+#undef strstr
+#undef memcpy
+#undef memset
+
+extern size_t strlen(const char *);
+extern void *memcpy(void *, const void *, size_t);
+extern void *memset(void *, int, size_t);
+extern int printf(const char *, ...);
+
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(printf);
+
+EXPORT_SYMBOL(strstr);
+
+/* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms.
+ * However, the modules will use the CRC defined *here*, no matter if it is
+ * good; so the versions of these symbols will always match
+ */
+#define EXPORT_SYMBOL_PROTO(sym) \
+ int sym(void); \
+ EXPORT_SYMBOL(sym);
+
+EXPORT_SYMBOL_PROTO(__errno_location);
+
+EXPORT_SYMBOL_PROTO(access);
+EXPORT_SYMBOL_PROTO(open);
+EXPORT_SYMBOL_PROTO(open64);
+EXPORT_SYMBOL_PROTO(close);
+EXPORT_SYMBOL_PROTO(read);
+EXPORT_SYMBOL_PROTO(write);
+EXPORT_SYMBOL_PROTO(dup2);
+EXPORT_SYMBOL_PROTO(__xstat);
+EXPORT_SYMBOL_PROTO(__lxstat);
+EXPORT_SYMBOL_PROTO(__lxstat64);
+EXPORT_SYMBOL_PROTO(lseek);
+EXPORT_SYMBOL_PROTO(lseek64);
+EXPORT_SYMBOL_PROTO(chown);
+EXPORT_SYMBOL_PROTO(truncate);
+EXPORT_SYMBOL_PROTO(utime);
+EXPORT_SYMBOL_PROTO(chmod);
+EXPORT_SYMBOL_PROTO(rename);
+EXPORT_SYMBOL_PROTO(__xmknod);
+
+EXPORT_SYMBOL_PROTO(symlink);
+EXPORT_SYMBOL_PROTO(link);
+EXPORT_SYMBOL_PROTO(unlink);
+EXPORT_SYMBOL_PROTO(readlink);
+
+EXPORT_SYMBOL_PROTO(mkdir);
+EXPORT_SYMBOL_PROTO(rmdir);
+EXPORT_SYMBOL_PROTO(opendir);
+EXPORT_SYMBOL_PROTO(readdir);
+EXPORT_SYMBOL_PROTO(closedir);
+EXPORT_SYMBOL_PROTO(seekdir);
+EXPORT_SYMBOL_PROTO(telldir);
+
+EXPORT_SYMBOL_PROTO(ioctl);
+
+EXPORT_SYMBOL_PROTO(pread64);
+EXPORT_SYMBOL_PROTO(pwrite64);
+
+EXPORT_SYMBOL_PROTO(statfs);
+EXPORT_SYMBOL_PROTO(statfs64);
+
+EXPORT_SYMBOL_PROTO(getuid);
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+#
+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+# Licensed under the GPL
+#
+
+# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino
+# to __st_ino. It stayed in the same place, so as long as the correct name
+# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa.
+
+STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \
+ echo __)st_ino
+
+hostfs-objs := hostfs_kern.o hostfs_user.o
+
+obj-y =
+obj-$(CONFIG_HOSTFS) += hostfs.o
+
+SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs))
+
+USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(SINGLE_OBJS))
+USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+
+USER_CFLAGS += -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD)
+
+$(USER_OBJS) : %.o: %.c
+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
--- /dev/null
+#ifndef __UM_FS_HOSTFS
+#define __UM_FS_HOSTFS
+
+#include "os.h"
+
+/* These are exactly the same definitions as in fs.h, but the names are
+ * changed so that this file can be included in both kernel and user files.
+ */
+
+#define HOSTFS_ATTR_MODE 1
+#define HOSTFS_ATTR_UID 2
+#define HOSTFS_ATTR_GID 4
+#define HOSTFS_ATTR_SIZE 8
+#define HOSTFS_ATTR_ATIME 16
+#define HOSTFS_ATTR_MTIME 32
+#define HOSTFS_ATTR_CTIME 64
+#define HOSTFS_ATTR_ATIME_SET 128
+#define HOSTFS_ATTR_MTIME_SET 256
+#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */
+#define HOSTFS_ATTR_ATTR_FLAG 1024
+
+struct hostfs_iattr {
+ unsigned int ia_valid;
+ mode_t ia_mode;
+ uid_t ia_uid;
+ gid_t ia_gid;
+ loff_t ia_size;
+ struct timespec ia_atime;
+ struct timespec ia_mtime;
+ struct timespec ia_ctime;
+ unsigned int ia_attr_flags;
+};
+
+extern int stat_file(const char *path, unsigned long long *inode_out,
+ int *mode_out, int *nlink_out, int *uid_out, int *gid_out,
+ unsigned long long *size_out, struct timespec *atime_out,
+ struct timespec *mtime_out, struct timespec *ctime_out,
+ int *blksize_out, unsigned long long *blocks_out);
+extern int access_file(char *path, int r, int w, int x);
+extern int open_file(char *path, int r, int w, int append);
+extern int file_type(const char *path, int *rdev);
+extern void *open_dir(char *path, int *err_out);
+extern char *read_dir(void *stream, unsigned long long *pos,
+ unsigned long long *ino_out, int *len_out);
+extern void close_file(void *stream);
+extern void close_dir(void *stream);
+extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
+extern int write_file(int fd, unsigned long long *offset, const char *buf,
+ int len);
+extern int lseek_file(int fd, long long offset, int whence);
+extern int file_create(char *name, int ur, int uw, int ux, int gr,
+ int gw, int gx, int or, int ow, int ox);
+extern int set_attr(const char *file, struct hostfs_iattr *attrs);
+extern int make_symlink(const char *from, const char *to);
+extern int unlink_file(const char *file);
+extern int do_mkdir(const char *file, int mode);
+extern int do_rmdir(const char *file);
+extern int do_mknod(const char *file, int mode, int dev);
+extern int link_file(const char *from, const char *to);
+extern int do_readlink(char *file, char *buf, int size);
+extern int rename_file(char *from, char *to);
+extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
+ long long *bfree_out, long long *bavail_out,
+ long long *files_out, long long *ffree_out,
+ void *fsid_out, int fsid_size, long *namelen_out,
+ long *spare_out);
+
+#endif
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+/*
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ *
+ * Ported the filesystem routines to 2.5.
+ * 2003-02-10 Petr Baudis <pasky@ucw.cz>
+ */
+
+#include <linux/stddef.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/blkdev.h>
+#include <linux/list.h>
+#include <linux/buffer_head.h>
+#include <linux/root_dev.h>
+#include <linux/statfs.h>
+#include <asm/uaccess.h>
+#include "hostfs.h"
+#include "kern_util.h"
+#include "kern.h"
+#include "user_util.h"
+#include "2_5compat.h"
+#include "init.h"
+
+struct hostfs_inode_info {
+ char *host_filename;
+ int fd;
+ int mode;
+ struct inode vfs_inode;
+};
+
+static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
+{
+ return(list_entry(inode, struct hostfs_inode_info, vfs_inode));
+}
+
+#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_dentry->d_inode)
+
+int hostfs_d_delete(struct dentry *dentry)
+{
+ return(1);
+}
+
+struct dentry_operations hostfs_dentry_ops = {
+ .d_delete = hostfs_d_delete,
+};
+
+/* Changed in hostfs_args before the kernel starts running */
+static char *root_ino = "/";
+static int append = 0;
+
+#define HOSTFS_SUPER_MAGIC 0x00c0ffee
+
+static struct inode_operations hostfs_iops;
+static struct inode_operations hostfs_dir_iops;
+static struct address_space_operations hostfs_link_aops;
+
+static int __init hostfs_args(char *options, int *add)
+{
+ char *ptr;
+
+ ptr = strchr(options, ',');
+ if(ptr != NULL)
+ *ptr++ = '\0';
+ if(*options != '\0')
+ root_ino = options;
+
+ options = ptr;
+ while(options){
+ ptr = strchr(options, ',');
+ if(ptr != NULL)
+ *ptr++ = '\0';
+ if(*options != '\0'){
+ if(!strcmp(options, "append"))
+ append = 1;
+ else printf("hostfs_args - unsupported option - %s\n",
+ options);
+ }
+ options = ptr;
+ }
+ return(0);
+}
+
+__uml_setup("hostfs=", hostfs_args,
+"hostfs=<root dir>,<flags>,...\n"
+" This is used to set hostfs parameters. The root directory argument\n"
+" is used to confine all hostfs mounts to within the specified directory\n"
+" tree on the host. If this isn't specified, then a user inside UML can\n"
+" mount anything on the host that's accessible to the user that's running\n"
+" it.\n"
+" The only flag currently supported is 'append', which specifies that all\n"
+" files opened by hostfs will be opened in append mode.\n\n"
+);
+
+static char *dentry_name(struct dentry *dentry, int extra)
+{
+ struct dentry *parent;
+ char *root, *name;
+ int len;
+
+ len = 0;
+ parent = dentry;
+ while(parent->d_parent != parent){
+ len += parent->d_name.len + 1;
+ parent = parent->d_parent;
+ }
+
+ root = HOSTFS_I(parent->d_inode)->host_filename;
+ len += strlen(root);
+ name = kmalloc(len + extra + 1, GFP_KERNEL);
+ if(name == NULL) return(NULL);
+
+ name[len] = '\0';
+ parent = dentry;
+ while(parent->d_parent != parent){
+ len -= parent->d_name.len + 1;
+ name[len] = '/';
+ strncpy(&name[len + 1], parent->d_name.name,
+ parent->d_name.len);
+ parent = parent->d_parent;
+ }
+ strncpy(name, root, strlen(root));
+ return(name);
+}
+
+static char *inode_name(struct inode *ino, int extra)
+{
+ struct dentry *dentry;
+
+ dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
+ return(dentry_name(dentry, extra));
+}
+
+static int read_name(struct inode *ino, char *name)
+{
+ /* The non-int inode fields are copied into ints by stat_file and
+ * then copied into the inode because passing the actual pointers
+ * in and having them treated as int * breaks on big-endian machines
+ */
+ int err;
+ int i_mode, i_nlink, i_blksize;
+ unsigned long long i_size;
+ unsigned long long i_ino;
+ unsigned long long i_blocks;
+
+ err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
+ &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
+ &ino->i_ctime, &i_blksize, &i_blocks);
+ if(err)
+ return(err);
+
+ ino->i_ino = i_ino;
+ ino->i_mode = i_mode;
+ ino->i_nlink = i_nlink;
+ ino->i_size = i_size;
+ ino->i_blksize = i_blksize;
+ ino->i_blocks = i_blocks;
+ if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid()))
+ ino->i_uid = 0;
+ return(0);
+}
+
+static char *follow_link(char *link)
+{
+ int len, n;
+ char *name, *resolved, *end;
+
+ len = 64;
+ while(1){
+ n = -ENOMEM;
+ name = kmalloc(len, GFP_KERNEL);
+ if(name == NULL)
+ goto out;
+
+ n = do_readlink(link, name, len);
+ if(n < len)
+ break;
+ len *= 2;
+ kfree(name);
+ }
+ if(n < 0)
+ goto out_free;
+
+ if(*name == '/')
+ return(name);
+
+ end = strrchr(link, '/');
+ if(end == NULL)
+ return(name);
+
+ *(end + 1) = '\0';
+ len = strlen(link) + strlen(name) + 1;
+
+ resolved = kmalloc(len, GFP_KERNEL);
+ if(resolved == NULL){
+ n = -ENOMEM;
+ goto out_free;
+ }
+
+ sprintf(resolved, "%s%s", link, name);
+ kfree(name);
+ kfree(link);
+ return(resolved);
+
+ out_free:
+ kfree(name);
+ out:
+ return(ERR_PTR(n));
+}
+
+static int read_inode(struct inode *ino)
+{
+ char *name;
+ int err = 0;
+
+ /* Unfortunately, we are called from iget() when we don't have a dentry
+ * allocated yet.
+ */
+ if(list_empty(&ino->i_dentry))
+ goto out;
+
+ err = -ENOMEM;
+ name = inode_name(ino, 0);
+ if(name == NULL)
+ goto out;
+
+ if(file_type(name, NULL) == OS_TYPE_SYMLINK){
+ name = follow_link(name);
+ if(IS_ERR(name)){
+ err = PTR_ERR(name);
+ goto out;
+ }
+ }
+
+ err = read_name(ino, name);
+ kfree(name);
+ out:
+ return(err);
+}
+
+int hostfs_statfs(struct super_block *sb, struct kstatfs *sf)
+{
+ /* do_statfs uses struct statfs64 internally, but the linux kernel
+ * struct statfs still has 32-bit versions for most of these fields,
+ * so we convert them here
+ */
+ int err;
+ long long f_blocks;
+ long long f_bfree;
+ long long f_bavail;
+ long long f_files;
+ long long f_ffree;
+
+ err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename,
+ &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
+ &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
+ &sf->f_namelen, sf->f_spare);
+ if(err) return(err);
+ sf->f_blocks = f_blocks;
+ sf->f_bfree = f_bfree;
+ sf->f_bavail = f_bavail;
+ sf->f_files = f_files;
+ sf->f_ffree = f_ffree;
+ sf->f_type = HOSTFS_SUPER_MAGIC;
+ return(0);
+}
+
+static struct inode *hostfs_alloc_inode(struct super_block *sb)
+{
+ struct hostfs_inode_info *hi;
+
+ hi = kmalloc(sizeof(*hi), GFP_KERNEL);
+ if(hi == NULL)
+ return(NULL);
+
+ *hi = ((struct hostfs_inode_info) { .host_filename = NULL,
+ .fd = -1,
+ .mode = 0 });
+ inode_init_once(&hi->vfs_inode);
+ return(&hi->vfs_inode);
+}
+
+static void hostfs_destroy_inode(struct inode *inode)
+{
+ if(HOSTFS_I(inode)->host_filename)
+ kfree(HOSTFS_I(inode)->host_filename);
+
+ if(HOSTFS_I(inode)->fd != -1)
+ close_file(&HOSTFS_I(inode)->fd);
+
+ kfree(HOSTFS_I(inode));
+}
+
+static void hostfs_read_inode(struct inode *inode)
+{
+ read_inode(inode);
+}
+
+static struct super_operations hostfs_sbops = {
+ .alloc_inode = hostfs_alloc_inode,
+ .destroy_inode = hostfs_destroy_inode,
+ .read_inode = hostfs_read_inode,
+ .statfs = hostfs_statfs,
+};
+
+int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
+{
+ void *dir;
+ char *name;
+ unsigned long long next, ino;
+ int error, len;
+
+ name = dentry_name(file->f_dentry, 0);
+ if(name == NULL) return(-ENOMEM);
+ dir = open_dir(name, &error);
+ kfree(name);
+ if(dir == NULL) return(-error);
+ next = file->f_pos;
+ while((name = read_dir(dir, &next, &ino, &len)) != NULL){
+ error = (*filldir)(ent, name, len, file->f_pos,
+ ino, DT_UNKNOWN);
+ if(error) break;
+ file->f_pos = next;
+ }
+ close_dir(dir);
+ return(0);
+}
+
+int hostfs_file_open(struct inode *ino, struct file *file)
+{
+ char *name;
+ int mode = 0, r = 0, w = 0, fd;
+
+ mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
+ if((mode & HOSTFS_I(ino)->mode) == mode)
+ return(0);
+
+ /* The file may already have been opened, but with the wrong access,
+ * so this resets things and reopens the file with the new access.
+ */
+ if(HOSTFS_I(ino)->fd != -1){
+ close_file(&HOSTFS_I(ino)->fd);
+ HOSTFS_I(ino)->fd = -1;
+ }
+
+ HOSTFS_I(ino)->mode |= mode;
+ if(HOSTFS_I(ino)->mode & FMODE_READ)
+ r = 1;
+ if(HOSTFS_I(ino)->mode & FMODE_WRITE)
+ w = 1;
+ if(w)
+ r = 1;
+
+ name = dentry_name(file->f_dentry, 0);
+ if(name == NULL)
+ return(-ENOMEM);
+
+ fd = open_file(name, r, w, append);
+ kfree(name);
+ if(fd < 0) return(fd);
+ FILE_HOSTFS_I(file)->fd = fd;
+
+ return(0);
+}
+
+int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+ return(0);
+}
+
+static struct file_operations hostfs_file_fops = {
+ .llseek = generic_file_llseek,
+ .read = generic_file_read,
+ .write = generic_file_write,
+ .mmap = generic_file_mmap,
+ .open = hostfs_file_open,
+ .release = NULL,
+ .fsync = hostfs_fsync,
+};
+
+static struct file_operations hostfs_dir_fops = {
+ .readdir = hostfs_readdir,
+ .read = generic_read_dir,
+};
+
+int hostfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ char *buffer;
+ unsigned long long base;
+ int count = PAGE_CACHE_SIZE;
+ int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ int err;
+
+ if (page->index >= end_index)
+ count = inode->i_size & (PAGE_CACHE_SIZE-1);
+
+ buffer = kmap(page);
+ base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
+
+ err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
+ if(err != count){
+ ClearPageUptodate(page);
+ goto out;
+ }
+
+ if (base > inode->i_size)
+ inode->i_size = base;
+
+ if (PageError(page))
+ ClearPageError(page);
+ err = 0;
+
+ out:
+ kunmap(page);
+
+ unlock_page(page);
+ return err;
+}
+
+int hostfs_readpage(struct file *file, struct page *page)
+{
+ char *buffer;
+ long long start;
+ int err = 0;
+
+ start = (long long) page->index << PAGE_CACHE_SHIFT;
+ buffer = kmap(page);
+ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
+ PAGE_CACHE_SIZE);
+ if(err < 0) goto out;
+
+ memset(&buffer[err], 0, PAGE_CACHE_SIZE - err);
+
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ if (PageError(page)) ClearPageError(page);
+ err = 0;
+ out:
+ kunmap(page);
+ unlock_page(page);
+ return(err);
+}
+
+int hostfs_prepare_write(struct file *file, struct page *page,
+ unsigned int from, unsigned int to)
+{
+ char *buffer;
+ long long start, tmp;
+ int err;
+
+ start = (long long) page->index << PAGE_CACHE_SHIFT;
+ buffer = kmap(page);
+ if(from != 0){
+ tmp = start;
+ err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer,
+ from);
+ if(err < 0) goto out;
+ }
+ if(to != PAGE_CACHE_SIZE){
+ start += to;
+ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to,
+ PAGE_CACHE_SIZE - to);
+ if(err < 0) goto out;
+ }
+ err = 0;
+ out:
+ kunmap(page);
+ return(err);
+}
+
+int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
+ unsigned to)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ char *buffer;
+ long long start;
+ int err = 0;
+
+ start = (long long) (page->index << PAGE_CACHE_SHIFT) + from;
+ buffer = kmap(page);
+ err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from,
+ to - from);
+ if(err > 0) err = 0;
+ if(!err && (start > inode->i_size))
+ inode->i_size = start;
+
+ kunmap(page);
+ return(err);
+}
+
+static struct address_space_operations hostfs_aops = {
+ .writepage = hostfs_writepage,
+ .readpage = hostfs_readpage,
+/* .set_page_dirty = __set_page_dirty_nobuffers, */
+ .prepare_write = hostfs_prepare_write,
+ .commit_write = hostfs_commit_write
+};
+
+static int init_inode(struct inode *inode, struct dentry *dentry)
+{
+ char *name;
+ int type, err = -ENOMEM, rdev;
+
+ if(dentry){
+ name = dentry_name(dentry, 0);
+ if(name == NULL)
+ goto out;
+ type = file_type(name, &rdev);
+ kfree(name);
+ }
+ else type = OS_TYPE_DIR;
+
+ err = 0;
+ if(type == OS_TYPE_SYMLINK)
+ inode->i_op = &page_symlink_inode_operations;
+ else if(type == OS_TYPE_DIR)
+ inode->i_op = &hostfs_dir_iops;
+ else inode->i_op = &hostfs_iops;
+
+ if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
+ else inode->i_fop = &hostfs_file_fops;
+
+ if(type == OS_TYPE_SYMLINK)
+ inode->i_mapping->a_ops = &hostfs_link_aops;
+ else inode->i_mapping->a_ops = &hostfs_aops;
+
+ switch (type) {
+ case OS_TYPE_CHARDEV:
+ init_special_inode(inode, S_IFCHR, rdev);
+ break;
+ case OS_TYPE_BLOCKDEV:
+ init_special_inode(inode, S_IFBLK, rdev);
+ break;
+ case OS_TYPE_FIFO:
+ init_special_inode(inode, S_IFIFO, 0);
+ break;
+ case OS_TYPE_SOCK:
+ init_special_inode(inode, S_IFSOCK, 0);
+ break;
+ }
+ out:
+ return(err);
+}
+
+int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
+ struct nameidata *nd)
+{
+ struct inode *inode;
+ char *name;
+ int error, fd;
+
+ error = -ENOMEM;
+ inode = iget(dir->i_sb, 0);
+ if(inode == NULL) goto out;
+
+ error = init_inode(inode, dentry);
+ if(error)
+ goto out_put;
+
+ error = -ENOMEM;
+ name = dentry_name(dentry, 0);
+ if(name == NULL)
+ goto out_put;
+
+ fd = file_create(name,
+ mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
+ mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
+ mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
+ if(fd < 0)
+ error = fd;
+ else error = read_name(inode, name);
+
+ kfree(name);
+ if(error)
+ goto out_put;
+
+ HOSTFS_I(inode)->fd = fd;
+ HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE;
+ d_instantiate(dentry, inode);
+ return(0);
+
+ out_put:
+ iput(inode);
+ out:
+ return(error);
+}
+
+struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct inode *inode;
+ char *name;
+ int err;
+
+ err = -ENOMEM;
+ inode = iget(ino->i_sb, 0);
+ if(inode == NULL)
+ goto out;
+
+ err = init_inode(inode, dentry);
+ if(err)
+ goto out_put;
+
+ err = -ENOMEM;
+ name = dentry_name(dentry, 0);
+ if(name == NULL)
+ goto out_put;
+
+ err = read_name(inode, name);
+ kfree(name);
+ if(err == -ENOENT){
+ iput(inode);
+ inode = NULL;
+ }
+ else if(err)
+ goto out_put;
+
+ d_add(dentry, inode);
+ dentry->d_op = &hostfs_dentry_ops;
+ return(NULL);
+
+ out_put:
+ iput(inode);
+ out:
+ return(ERR_PTR(err));
+}
+
+static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
+{
+ char *file;
+ int len;
+
+ file = inode_name(ino, dentry->d_name.len + 1);
+ if(file == NULL) return(NULL);
+ strcat(file, "/");
+ len = strlen(file);
+ strncat(file, dentry->d_name.name, dentry->d_name.len);
+ file[len + dentry->d_name.len] = '\0';
+ return(file);
+}
+
+int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
+{
+ char *from_name, *to_name;
+ int err;
+
+ if((from_name = inode_dentry_name(ino, from)) == NULL)
+ return(-ENOMEM);
+ to_name = dentry_name(to, 0);
+ if(to_name == NULL){
+ kfree(from_name);
+ return(-ENOMEM);
+ }
+ err = link_file(to_name, from_name);
+ kfree(from_name);
+ kfree(to_name);
+ return(err);
+}
+
+int hostfs_unlink(struct inode *ino, struct dentry *dentry)
+{
+ char *file;
+ int err;
+
+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
+ if(append)
+ return(-EPERM);
+
+ err = unlink_file(file);
+ kfree(file);
+ return(err);
+}
+
+int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
+{
+ char *file;
+ int err;
+
+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
+ err = make_symlink(file, to);
+ kfree(file);
+ return(err);
+}
+
+int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
+{
+ char *file;
+ int err;
+
+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
+ err = do_mkdir(file, mode);
+ kfree(file);
+ return(err);
+}
+
+int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
+{
+ char *file;
+ int err;
+
+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
+ err = do_rmdir(file);
+ kfree(file);
+ return(err);
+}
+
+int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+ struct inode *inode;
+ char *name;
+ int err = -ENOMEM;
+
+ inode = iget(dir->i_sb, 0);
+ if(inode == NULL)
+ goto out;
+
+ err = init_inode(inode, dentry);
+ if(err)
+ goto out_put;
+
+ err = -ENOMEM;
+ name = dentry_name(dentry, 0);
+ if(name == NULL)
+ goto out_put;
+
+ init_special_inode(inode, mode, dev);
+ err = do_mknod(name, mode, dev);
+ if(err)
+ goto out_free;
+
+ err = read_name(inode, name);
+ kfree(name);
+ if(err)
+ goto out_put;
+
+ d_instantiate(dentry, inode);
+ return(0);
+
+ out_free:
+ kfree(name);
+ out_put:
+ iput(inode);
+ out:
+ return(err);
+}
+
+int hostfs_rename(struct inode *from_ino, struct dentry *from,
+ struct inode *to_ino, struct dentry *to)
+{
+ char *from_name, *to_name;
+ int err;
+
+ if((from_name = inode_dentry_name(from_ino, from)) == NULL)
+ return(-ENOMEM);
+ if((to_name = inode_dentry_name(to_ino, to)) == NULL){
+ kfree(from_name);
+ return(-ENOMEM);
+ }
+ err = rename_file(from_name, to_name);
+ kfree(from_name);
+ kfree(to_name);
+ return(err);
+}
+
+void hostfs_truncate(struct inode *ino)
+{
+ not_implemented();
+}
+
+int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
+{
+ char *name;
+ int r = 0, w = 0, x = 0, err;
+
+ if(desired & MAY_READ) r = 1;
+ if(desired & MAY_WRITE) w = 1;
+ if(desired & MAY_EXEC) x = 1;
+ name = inode_name(ino, 0);
+ if(name == NULL) return(-ENOMEM);
+ err = access_file(name, r, w, x);
+ kfree(name);
+ if(!err) err = vfs_permission(ino, desired);
+ return(err);
+}
+
+int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct hostfs_iattr attrs;
+ char *name;
+ int err;
+
+ if(append)
+ attr->ia_valid &= ~ATTR_SIZE;
+
+ attrs.ia_valid = 0;
+ if(attr->ia_valid & ATTR_MODE){
+ attrs.ia_valid |= HOSTFS_ATTR_MODE;
+ attrs.ia_mode = attr->ia_mode;
+ }
+ if(attr->ia_valid & ATTR_UID){
+ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
+ (attr->ia_uid == 0))
+ attr->ia_uid = getuid();
+ attrs.ia_valid |= HOSTFS_ATTR_UID;
+ attrs.ia_uid = attr->ia_uid;
+ }
+ if(attr->ia_valid & ATTR_GID){
+ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
+ (attr->ia_gid == 0))
+ attr->ia_gid = getuid();
+ attrs.ia_valid |= HOSTFS_ATTR_GID;
+ attrs.ia_gid = attr->ia_gid;
+ }
+ if(attr->ia_valid & ATTR_SIZE){
+ attrs.ia_valid |= HOSTFS_ATTR_SIZE;
+ attrs.ia_size = attr->ia_size;
+ }
+ if(attr->ia_valid & ATTR_ATIME){
+ attrs.ia_valid |= HOSTFS_ATTR_ATIME;
+ attrs.ia_atime = attr->ia_atime;
+ }
+ if(attr->ia_valid & ATTR_MTIME){
+ attrs.ia_valid |= HOSTFS_ATTR_MTIME;
+ attrs.ia_mtime = attr->ia_mtime;
+ }
+ if(attr->ia_valid & ATTR_CTIME){
+ attrs.ia_valid |= HOSTFS_ATTR_CTIME;
+ attrs.ia_ctime = attr->ia_ctime;
+ }
+ if(attr->ia_valid & ATTR_ATIME_SET){
+ attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET;
+ }
+ if(attr->ia_valid & ATTR_MTIME_SET){
+ attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
+ }
+ name = dentry_name(dentry, 0);
+ if(name == NULL) return(-ENOMEM);
+ err = set_attr(name, &attrs);
+ kfree(name);
+ if(err)
+ return(err);
+
+ return(inode_setattr(dentry->d_inode, attr));
+}
+
+int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ generic_fillattr(dentry->d_inode, stat);
+ return(0);
+}
+
+static struct inode_operations hostfs_iops = {
+ .create = hostfs_create,
+ .link = hostfs_link,
+ .unlink = hostfs_unlink,
+ .symlink = hostfs_symlink,
+ .mkdir = hostfs_mkdir,
+ .rmdir = hostfs_rmdir,
+ .mknod = hostfs_mknod,
+ .rename = hostfs_rename,
+ .truncate = hostfs_truncate,
+ .permission = hostfs_permission,
+ .setattr = hostfs_setattr,
+ .getattr = hostfs_getattr,
+};
+
+static struct inode_operations hostfs_dir_iops = {
+ .create = hostfs_create,
+ .lookup = hostfs_lookup,
+ .link = hostfs_link,
+ .unlink = hostfs_unlink,
+ .symlink = hostfs_symlink,
+ .mkdir = hostfs_mkdir,
+ .rmdir = hostfs_rmdir,
+ .mknod = hostfs_mknod,
+ .rename = hostfs_rename,
+ .truncate = hostfs_truncate,
+ .permission = hostfs_permission,
+ .setattr = hostfs_setattr,
+ .getattr = hostfs_getattr,
+};
+
+int hostfs_link_readpage(struct file *file, struct page *page)
+{
+ char *buffer, *name;
+ long long start;
+ int err;
+
+ start = page->index << PAGE_CACHE_SHIFT;
+ buffer = kmap(page);
+ name = inode_name(page->mapping->host, 0);
+ if(name == NULL) return(-ENOMEM);
+ err = do_readlink(name, buffer, PAGE_CACHE_SIZE);
+ kfree(name);
+ if(err == PAGE_CACHE_SIZE)
+ err = -E2BIG;
+ else if(err > 0){
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ if (PageError(page)) ClearPageError(page);
+ err = 0;
+ }
+ kunmap(page);
+ unlock_page(page);
+ return(err);
+}
+
+static struct address_space_operations hostfs_link_aops = {
+ .readpage = hostfs_link_readpage,
+};
+
+static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
+{
+ struct inode *root_inode;
+ char *name, *data = d;
+ int err;
+
+ sb->s_blocksize = 1024;
+ sb->s_blocksize_bits = 10;
+ sb->s_magic = HOSTFS_SUPER_MAGIC;
+ sb->s_op = &hostfs_sbops;
+
+ if((data == NULL) || (*data == '\0'))
+ data = root_ino;
+
+ err = -ENOMEM;
+ name = kmalloc(strlen(data) + 1, GFP_KERNEL);
+ if(name == NULL)
+ goto out;
+
+ strcpy(name, data);
+
+ root_inode = iget(sb, 0);
+ if(root_inode == NULL)
+ goto out_free;
+
+ err = init_inode(root_inode, NULL);
+ if(err)
+ goto out_put;
+
+ HOSTFS_I(root_inode)->host_filename = name;
+
+ err = -ENOMEM;
+ sb->s_root = d_alloc_root(root_inode);
+ if(sb->s_root == NULL)
+ goto out_put;
+
+ err = read_inode(root_inode);
+ if(err)
+ goto out_put;
+
+ return(0);
+
+ out_put:
+ iput(root_inode);
+ out_free:
+ kfree(name);
+ out:
+ return(err);
+}
+
+static struct super_block *hostfs_read_sb(struct file_system_type *type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common));
+}
+
+static struct file_system_type hostfs_type = {
+ .owner = THIS_MODULE,
+ .name = "hostfs",
+ .get_sb = hostfs_read_sb,
+ .kill_sb = kill_anon_super,
+ .fs_flags = 0,
+};
+
+static int __init init_hostfs(void)
+{
+ return(register_filesystem(&hostfs_type));
+}
+
+static void __exit exit_hostfs(void)
+{
+ unregister_filesystem(&hostfs_type);
+}
+
+module_init(init_hostfs)
+module_exit(exit_hostfs)
+MODULE_LICENSE("GPL");
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+/*
+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <errno.h>
+#include <utime.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include "hostfs.h"
+#include "kern_util.h"
+#include "user.h"
+
+int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
+ int *nlink_out, int *uid_out, int *gid_out,
+ unsigned long long *size_out, struct timespec *atime_out,
+ struct timespec *mtime_out, struct timespec *ctime_out,
+ int *blksize_out, unsigned long long *blocks_out)
+{
+ struct stat64 buf;
+
+ if(lstat64(path, &buf) < 0)
+ return(-errno);
+
+ /* See the Makefile for why STAT64_INO_FIELD is passed in
+ * by the build
+ */
+ if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD;
+ if(mode_out != NULL) *mode_out = buf.st_mode;
+ if(nlink_out != NULL) *nlink_out = buf.st_nlink;
+ if(uid_out != NULL) *uid_out = buf.st_uid;
+ if(gid_out != NULL) *gid_out = buf.st_gid;
+ if(size_out != NULL) *size_out = buf.st_size;
+ if(atime_out != NULL) {
+ atime_out->tv_sec = buf.st_atime;
+ atime_out->tv_nsec = 0;
+ }
+ if(mtime_out != NULL) {
+ mtime_out->tv_sec = buf.st_mtime;
+ mtime_out->tv_nsec = 0;
+ }
+ if(ctime_out != NULL) {
+ ctime_out->tv_sec = buf.st_ctime;
+ ctime_out->tv_nsec = 0;
+ }
+ if(blksize_out != NULL) *blksize_out = buf.st_blksize;
+ if(blocks_out != NULL) *blocks_out = buf.st_blocks;
+ return(0);
+}
+
+int file_type(const char *path, int *rdev)
+{
+ struct stat64 buf;
+
+ if(lstat64(path, &buf) < 0)
+ return(-errno);
+ if(rdev != NULL)
+ *rdev = buf.st_rdev;
+
+ if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR);
+ else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK);
+ else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV);
+ else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV);
+ else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO);
+ else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK);
+ else return(OS_TYPE_FILE);
+}
+
+int access_file(char *path, int r, int w, int x)
+{
+ int mode = 0;
+
+ if(r) mode = R_OK;
+ if(w) mode |= W_OK;
+ if(x) mode |= X_OK;
+ if(access(path, mode) != 0) return(-errno);
+ else return(0);
+}
+
+int open_file(char *path, int r, int w, int append)
+{
+ int mode = 0, fd;
+
+ if(r && !w)
+ mode = O_RDONLY;
+ else if(!r && w)
+ mode = O_WRONLY;
+ else if(r && w)
+ mode = O_RDWR;
+ else panic("Impossible mode in open_file");
+
+ if(append)
+ mode |= O_APPEND;
+ fd = open64(path, mode);
+ if(fd < 0) return(-errno);
+ else return(fd);
+}
+
+void *open_dir(char *path, int *err_out)
+{
+ DIR *dir;
+
+ dir = opendir(path);
+ *err_out = errno;
+ if(dir == NULL) return(NULL);
+ return(dir);
+}
+
+char *read_dir(void *stream, unsigned long long *pos,
+ unsigned long long *ino_out, int *len_out)
+{
+ DIR *dir = stream;
+ struct dirent *ent;
+
+ seekdir(dir, *pos);
+ ent = readdir(dir);
+ if(ent == NULL) return(NULL);
+ *len_out = strlen(ent->d_name);
+ *ino_out = ent->d_ino;
+ *pos = telldir(dir);
+ return(ent->d_name);
+}
+
+int read_file(int fd, unsigned long long *offset, char *buf, int len)
+{
+ int n;
+
+ n = pread64(fd, buf, len, *offset);
+ if(n < 0) return(-errno);
+ *offset += n;
+ return(n);
+}
+
+int write_file(int fd, unsigned long long *offset, const char *buf, int len)
+{
+ int n;
+
+ n = pwrite64(fd, buf, len, *offset);
+ if(n < 0) return(-errno);
+ *offset += n;
+ return(n);
+}
+
+int lseek_file(int fd, long long offset, int whence)
+{
+ int ret;
+
+ ret = lseek64(fd, offset, whence);
+ if(ret < 0) return(-errno);
+ return(0);
+}
+
+void close_file(void *stream)
+{
+ close(*((int *) stream));
+}
+
+void close_dir(void *stream)
+{
+ closedir(stream);
+}
+
+int file_create(char *name, int ur, int uw, int ux, int gr,
+ int gw, int gx, int or, int ow, int ox)
+{
+ int mode, fd;
+
+ mode = 0;
+ mode |= ur ? S_IRUSR : 0;
+ mode |= uw ? S_IWUSR : 0;
+ mode |= ux ? S_IXUSR : 0;
+ mode |= gr ? S_IRGRP : 0;
+ mode |= gw ? S_IWGRP : 0;
+ mode |= gx ? S_IXGRP : 0;
+ mode |= or ? S_IROTH : 0;
+ mode |= ow ? S_IWOTH : 0;
+ mode |= ox ? S_IXOTH : 0;
+ fd = open64(name, O_CREAT | O_RDWR, mode);
+ if(fd < 0)
+ return(-errno);
+ return(fd);
+}
+
+int set_attr(const char *file, struct hostfs_iattr *attrs)
+{
+ struct utimbuf buf;
+ int err, ma;
+
+ if(attrs->ia_valid & HOSTFS_ATTR_MODE){
+ if(chmod(file, attrs->ia_mode) != 0) return(-errno);
+ }
+ if(attrs->ia_valid & HOSTFS_ATTR_UID){
+ if(chown(file, attrs->ia_uid, -1)) return(-errno);
+ }
+ if(attrs->ia_valid & HOSTFS_ATTR_GID){
+ if(chown(file, -1, attrs->ia_gid)) return(-errno);
+ }
+ if(attrs->ia_valid & HOSTFS_ATTR_SIZE){
+ if(truncate(file, attrs->ia_size)) return(-errno);
+ }
+ ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET;
+ if((attrs->ia_valid & ma) == ma){
+ buf.actime = attrs->ia_atime.tv_sec;
+ buf.modtime = attrs->ia_mtime.tv_sec;
+ if(utime(file, &buf) != 0) return(-errno);
+ }
+ else {
+ struct timespec ts;
+
+ if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){
+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, &ts, NULL, NULL, NULL);
+ if(err != 0)
+ return(err);
+ buf.actime = attrs->ia_atime.tv_sec;
+ buf.modtime = ts.tv_sec;
+ if(utime(file, &buf) != 0)
+ return(-errno);
+ }
+ if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){
+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL,
+ NULL, &ts, NULL, NULL, NULL, NULL);
+ if(err != 0)
+ return(err);
+ buf.actime = ts.tv_sec;
+ buf.modtime = attrs->ia_mtime.tv_sec;
+ if(utime(file, &buf) != 0)
+ return(-errno);
+ }
+ }
+ if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ;
+ if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){
+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
+ &attrs->ia_atime, &attrs->ia_mtime, NULL,
+ NULL, NULL);
+ if(err != 0) return(err);
+ }
+ return(0);
+}
+
+int make_symlink(const char *from, const char *to)
+{
+ int err;
+
+ err = symlink(to, from);
+ if(err) return(-errno);
+ return(0);
+}
+
+int unlink_file(const char *file)
+{
+ int err;
+
+ err = unlink(file);
+ if(err) return(-errno);
+ return(0);
+}
+
+int do_mkdir(const char *file, int mode)
+{
+ int err;
+
+ err = mkdir(file, mode);
+ if(err) return(-errno);
+ return(0);
+}
+
+int do_rmdir(const char *file)
+{
+ int err;
+
+ err = rmdir(file);
+ if(err) return(-errno);
+ return(0);
+}
+
+int do_mknod(const char *file, int mode, int dev)
+{
+ int err;
+
+ err = mknod(file, mode, dev);
+ if(err) return(-errno);
+ return(0);
+}
+
+int link_file(const char *to, const char *from)
+{
+ int err;
+
+ err = link(to, from);
+ if(err) return(-errno);
+ return(0);
+}
+
+int do_readlink(char *file, char *buf, int size)
+{
+ int n;
+
+ n = readlink(file, buf, size);
+ if(n < 0)
+ return(-errno);
+ if(n < size)
+ buf[n] = '\0';
+ return(n);
+}
+
+int rename_file(char *from, char *to)
+{
+ int err;
+
+ err = rename(from, to);
+ if(err < 0) return(-errno);
+ return(0);
+}
+
+int do_statfs(char *root, long *bsize_out, long long *blocks_out,
+ long long *bfree_out, long long *bavail_out,
+ long long *files_out, long long *ffree_out,
+ void *fsid_out, int fsid_size, long *namelen_out,
+ long *spare_out)
+{
+ struct statfs64 buf;
+ int err;
+
+ err = statfs64(root, &buf);
+ if(err < 0) return(-errno);
+ *bsize_out = buf.f_bsize;
+ *blocks_out = buf.f_blocks;
+ *bfree_out = buf.f_bfree;
+ *bavail_out = buf.f_bavail;
+ *files_out = buf.f_files;
+ *ffree_out = buf.f_ffree;
+ memcpy(fsid_out, &buf.f_fsid,
+ sizeof(buf.f_fsid) > fsid_size ? fsid_size :
+ sizeof(buf.f_fsid));
+ *namelen_out = buf.f_namelen;
+ spare_out[0] = buf.f_spare[0];
+ spare_out[1] = buf.f_spare[1];
+ spare_out[2] = buf.f_spare[2];
+ spare_out[3] = buf.f_spare[3];
+ spare_out[4] = buf.f_spare[4];
+ spare_out[5] = buf.f_spare[5];
+ return(0);
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+#
+# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com)
+# Licensed under the GPL
+#
+
+hppfs-objs := hppfs_kern.o
+
+obj-y =
+obj-$(CONFIG_HPPFS) += hppfs.o
+
+clean:
+
+modules:
+
+fastdep:
+
+dep:
+
+archmrproper: clean
--- /dev/null
+/*
+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/dcache.h>
+#include <linux/statfs.h>
+#include <asm/uaccess.h>
+#include <asm/fcntl.h>
+#include "os.h"
+
+static int init_inode(struct inode *inode, struct dentry *dentry);
+
+struct hppfs_data {
+ struct list_head list;
+ char contents[PAGE_SIZE - sizeof(struct list_head)];
+};
+
+struct hppfs_private {
+ struct file proc_file;
+ int host_fd;
+ loff_t len;
+ struct hppfs_data *contents;
+};
+
+struct hppfs_inode_info {
+ struct dentry *proc_dentry;
+ struct inode vfs_inode;
+};
+
+static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
+{
+ return(list_entry(inode, struct hppfs_inode_info, vfs_inode));
+}
+
+#define HPPFS_SUPER_MAGIC 0xb00000ee
+
+static struct super_operations hppfs_sbops;
+
+static int is_pid(struct dentry *dentry)
+{
+ struct super_block *sb;
+ int i;
+
+ sb = dentry->d_sb;
+ if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root))
+ return(0);
+
+ for(i = 0; i < dentry->d_name.len; i++){
+ if(!isdigit(dentry->d_name.name[i]))
+ return(0);
+ }
+ return(1);
+}
+
+static char *dentry_name(struct dentry *dentry, int extra)
+{
+ struct dentry *parent;
+ char *root, *name;
+ const char *seg_name;
+ int len, seg_len;
+
+ len = 0;
+ parent = dentry;
+ while(parent->d_parent != parent){
+ if(is_pid(parent))
+ len += strlen("pid") + 1;
+ else len += parent->d_name.len + 1;
+ parent = parent->d_parent;
+ }
+
+ root = "proc";
+ len += strlen(root);
+ name = kmalloc(len + extra + 1, GFP_KERNEL);
+ if(name == NULL) return(NULL);
+
+ name[len] = '\0';
+ parent = dentry;
+ while(parent->d_parent != parent){
+ if(is_pid(parent)){
+ seg_name = "pid";
+ seg_len = strlen("pid");
+ }
+ else {
+ seg_name = parent->d_name.name;
+ seg_len = parent->d_name.len;
+ }
+
+ len -= seg_len + 1;
+ name[len] = '/';
+ strncpy(&name[len + 1], seg_name, seg_len);
+ parent = parent->d_parent;
+ }
+ strncpy(name, root, strlen(root));
+ return(name);
+}
+
+struct dentry_operations hppfs_dentry_ops = {
+};
+
+static int file_removed(struct dentry *dentry, const char *file)
+{
+ char *host_file;
+ int extra, fd;
+
+ extra = 0;
+ if(file != NULL) extra += strlen(file) + 1;
+
+ host_file = dentry_name(dentry, extra + strlen("/remove"));
+ if(host_file == NULL){
+ printk("file_removed : allocation failed\n");
+ return(-ENOMEM);
+ }
+
+ if(file != NULL){
+ strcat(host_file, "/");
+ strcat(host_file, file);
+ }
+ strcat(host_file, "/remove");
+
+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
+ kfree(host_file);
+ if(fd > 0){
+ os_close_file(fd);
+ return(1);
+ }
+ return(0);
+}
+
+static void hppfs_read_inode(struct inode *ino)
+{
+ struct inode *proc_ino;
+
+ if(HPPFS_I(ino)->proc_dentry == NULL)
+ return;
+
+ proc_ino = HPPFS_I(ino)->proc_dentry->d_inode;
+ ino->i_uid = proc_ino->i_uid;
+ ino->i_gid = proc_ino->i_gid;
+ ino->i_atime = proc_ino->i_atime;
+ ino->i_mtime = proc_ino->i_mtime;
+ ino->i_ctime = proc_ino->i_ctime;
+ ino->i_ino = proc_ino->i_ino;
+ ino->i_mode = proc_ino->i_mode;
+ ino->i_nlink = proc_ino->i_nlink;
+ ino->i_size = proc_ino->i_size;
+ ino->i_blksize = proc_ino->i_blksize;
+ ino->i_blocks = proc_ino->i_blocks;
+}
+
+static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct dentry *proc_dentry, *new, *parent;
+ struct inode *inode;
+ int err, deleted;
+
+ deleted = file_removed(dentry, NULL);
+ if(deleted < 0)
+ return(ERR_PTR(deleted));
+ else if(deleted)
+ return(ERR_PTR(-ENOENT));
+
+ err = -ENOMEM;
+ parent = HPPFS_I(ino)->proc_dentry;
+ down(&parent->d_inode->i_sem);
+ proc_dentry = d_lookup(parent, &dentry->d_name);
+ if(proc_dentry == NULL){
+ proc_dentry = d_alloc(parent, &dentry->d_name);
+ if(proc_dentry == NULL){
+ up(&parent->d_inode->i_sem);
+ goto out;
+ }
+ new = (*parent->d_inode->i_op->lookup)(parent->d_inode,
+ proc_dentry, NULL);
+ if(new){
+ dput(proc_dentry);
+ proc_dentry = new;
+ }
+ }
+ up(&parent->d_inode->i_sem);
+
+ if(IS_ERR(proc_dentry))
+ return(proc_dentry);
+
+ inode = iget(ino->i_sb, 0);
+ if(inode == NULL)
+ goto out_dput;
+
+ err = init_inode(inode, proc_dentry);
+ if(err)
+ goto out_put;
+
+ hppfs_read_inode(inode);
+
+ d_add(dentry, inode);
+ dentry->d_op = &hppfs_dentry_ops;
+ return(NULL);
+
+ out_put:
+ iput(inode);
+ out_dput:
+ dput(proc_dentry);
+ out:
+ return(ERR_PTR(err));
+}
+
+static struct inode_operations hppfs_file_iops = {
+};
+
+static ssize_t read_proc(struct file *file, char *buf, ssize_t count,
+ loff_t *ppos, int is_user)
+{
+ ssize_t (*read)(struct file *, char *, size_t, loff_t *);
+ ssize_t n;
+
+ read = file->f_dentry->d_inode->i_fop->read;
+
+ if(!is_user)
+ set_fs(KERNEL_DS);
+
+ n = (*read)(file, buf, count, &file->f_pos);
+
+ if(!is_user)
+ set_fs(USER_DS);
+
+ if(ppos) *ppos = file->f_pos;
+ return(n);
+}
+
+static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
+{
+ ssize_t n;
+ int cur, err;
+ char *new_buf;
+
+ n = -ENOMEM;
+ new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if(new_buf == NULL){
+ printk("hppfs_read_file : kmalloc failed\n");
+ goto out;
+ }
+ n = 0;
+ while(count > 0){
+ cur = min_t(ssize_t, count, PAGE_SIZE);
+ err = os_read_file(fd, new_buf, cur);
+ if(err < 0){
+ printk("hppfs_read : read failed, errno = %d\n",
+ count);
+ n = err;
+ goto out_free;
+ }
+ else if(err == 0)
+ break;
+
+ if(copy_to_user(buf, new_buf, err)){
+ n = -EFAULT;
+ goto out_free;
+ }
+ n += err;
+ count -= err;
+ }
+ out_free:
+ kfree(new_buf);
+ out:
+ return(n);
+}
+
+static ssize_t hppfs_read(struct file *file, char *buf, size_t count,
+ loff_t *ppos)
+{
+ struct hppfs_private *hppfs = file->private_data;
+ struct hppfs_data *data;
+ loff_t off;
+ int err;
+
+ if(hppfs->contents != NULL){
+ if(*ppos >= hppfs->len) return(0);
+
+ data = hppfs->contents;
+ off = *ppos;
+ while(off >= sizeof(data->contents)){
+ data = list_entry(data->list.next, struct hppfs_data,
+ list);
+ off -= sizeof(data->contents);
+ }
+
+ if(off + count > hppfs->len)
+ count = hppfs->len - off;
+ copy_to_user(buf, &data->contents[off], count);
+ *ppos += count;
+ }
+ else if(hppfs->host_fd != -1){
+ err = os_seek_file(hppfs->host_fd, *ppos);
+ if(err){
+ printk("hppfs_read : seek failed, errno = %d\n", err);
+ return(err);
+ }
+ count = hppfs_read_file(hppfs->host_fd, buf, count);
+ if(count > 0)
+ *ppos += count;
+ }
+ else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1);
+
+ return(count);
+}
+
+static ssize_t hppfs_write(struct file *file, const char *buf, size_t len,
+ loff_t *ppos)
+{
+ struct hppfs_private *data = file->private_data;
+ struct file *proc_file = &data->proc_file;
+ ssize_t (*write)(struct file *, const char *, size_t, loff_t *);
+ int err;
+
+ write = proc_file->f_dentry->d_inode->i_fop->write;
+
+ proc_file->f_pos = file->f_pos;
+ err = (*write)(proc_file, buf, len, &proc_file->f_pos);
+ file->f_pos = proc_file->f_pos;
+
+ return(err);
+}
+
+static int open_host_sock(char *host_file, int *filter_out)
+{
+ char *end;
+ int fd;
+
+ end = &host_file[strlen(host_file)];
+ strcpy(end, "/rw");
+ *filter_out = 1;
+ fd = os_connect_socket(host_file);
+ if(fd > 0)
+ return(fd);
+
+ strcpy(end, "/r");
+ *filter_out = 0;
+ fd = os_connect_socket(host_file);
+ return(fd);
+}
+
+static void free_contents(struct hppfs_data *head)
+{
+ struct hppfs_data *data;
+ struct list_head *ele, *next;
+
+ if(head == NULL) return;
+
+ list_for_each_safe(ele, next, &head->list){
+ data = list_entry(ele, struct hppfs_data, list);
+ kfree(data);
+ }
+ kfree(head);
+}
+
+static struct hppfs_data *hppfs_get_data(int fd, int filter,
+ struct file *proc_file,
+ struct file *hppfs_file,
+ loff_t *size_out)
+{
+ struct hppfs_data *data, *new, *head;
+ int n, err;
+
+ err = -ENOMEM;
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if(data == NULL){
+ printk("hppfs_get_data : head allocation failed\n");
+ goto failed;
+ }
+
+ INIT_LIST_HEAD(&data->list);
+
+ head = data;
+ *size_out = 0;
+
+ if(filter){
+ while((n = read_proc(proc_file, data->contents,
+ sizeof(data->contents), NULL, 0)) > 0)
+ os_write_file(fd, data->contents, n);
+ err = os_shutdown_socket(fd, 0, 1);
+ if(err){
+ printk("hppfs_get_data : failed to shut down "
+ "socket\n");
+ goto failed_free;
+ }
+ }
+ while(1){
+ n = os_read_file(fd, data->contents, sizeof(data->contents));
+ if(n < 0){
+ err = n;
+ printk("hppfs_get_data : read failed, errno = %d\n",
+ err);
+ goto failed_free;
+ }
+ else if(n == 0)
+ break;
+
+ *size_out += n;
+
+ if(n < sizeof(data->contents))
+ break;
+
+ new = kmalloc(sizeof(*data), GFP_KERNEL);
+ if(new == 0){
+ printk("hppfs_get_data : data allocation failed\n");
+ err = -ENOMEM;
+ goto failed_free;
+ }
+
+ INIT_LIST_HEAD(&new->list);
+ list_add(&new->list, &data->list);
+ data = new;
+ }
+ return(head);
+
+ failed_free:
+ free_contents(head);
+ failed:
+ return(ERR_PTR(err));
+}
+
+static struct hppfs_private *hppfs_data(void)
+{
+ struct hppfs_private *data;
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if(data == NULL)
+ return(data);
+
+ *data = ((struct hppfs_private ) { .host_fd = -1,
+ .len = -1,
+ .contents = NULL } );
+ return(data);
+}
+
+static int file_mode(int fmode)
+{
+ if(fmode == (FMODE_READ | FMODE_WRITE))
+ return(O_RDWR);
+ if(fmode == FMODE_READ)
+ return(O_RDONLY);
+ if(fmode == FMODE_WRITE)
+ return(O_WRONLY);
+ return(0);
+}
+
+static int hppfs_open(struct inode *inode, struct file *file)
+{
+ struct hppfs_private *data;
+ struct dentry *proc_dentry;
+ char *host_file;
+ int err, fd, type, filter;
+
+ err = -ENOMEM;
+ data = hppfs_data();
+ if(data == NULL)
+ goto out;
+
+ host_file = dentry_name(file->f_dentry, strlen("/rw"));
+ if(host_file == NULL)
+ goto out_free2;
+
+ proc_dentry = HPPFS_I(inode)->proc_dentry;
+
+ /* XXX This isn't closed anywhere */
+ err = open_private_file(&data->proc_file, proc_dentry,
+ file_mode(file->f_mode));
+ if(err)
+ goto out_free1;
+
+ type = os_file_type(host_file);
+ if(type == OS_TYPE_FILE){
+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
+ if(fd >= 0)
+ data->host_fd = fd;
+ else printk("hppfs_open : failed to open '%s', errno = %d\n",
+ host_file, -fd);
+
+ data->contents = NULL;
+ }
+ else if(type == OS_TYPE_DIR){
+ fd = open_host_sock(host_file, &filter);
+ if(fd > 0){
+ data->contents = hppfs_get_data(fd, filter,
+ &data->proc_file,
+ file, &data->len);
+ if(!IS_ERR(data->contents))
+ data->host_fd = fd;
+ }
+ else printk("hppfs_open : failed to open a socket in "
+ "'%s', errno = %d\n", host_file, -fd);
+ }
+ kfree(host_file);
+
+ file->private_data = data;
+ return(0);
+
+ out_free1:
+ kfree(host_file);
+ out_free2:
+ free_contents(data->contents);
+ kfree(data);
+ out:
+ return(err);
+}
+
+static int hppfs_dir_open(struct inode *inode, struct file *file)
+{
+ struct hppfs_private *data;
+ struct dentry *proc_dentry;
+ int err;
+
+ err = -ENOMEM;
+ data = hppfs_data();
+ if(data == NULL)
+ goto out;
+
+ proc_dentry = HPPFS_I(inode)->proc_dentry;
+ err = open_private_file(&data->proc_file, proc_dentry,
+ file_mode(file->f_mode));
+ if(err)
+ goto out_free;
+
+ file->private_data = data;
+ return(0);
+
+ out_free:
+ kfree(data);
+ out:
+ return(err);
+}
+
+static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
+{
+ struct hppfs_private *data = file->private_data;
+ struct file *proc_file = &data->proc_file;
+ loff_t (*llseek)(struct file *, loff_t, int);
+ loff_t ret;
+
+ llseek = proc_file->f_dentry->d_inode->i_fop->llseek;
+ if(llseek != NULL){
+ ret = (*llseek)(proc_file, off, where);
+ if(ret < 0)
+ return(ret);
+ }
+
+ return(default_llseek(file, off, where));
+}
+
+static struct file_operations hppfs_file_fops = {
+ .owner = NULL,
+ .llseek = hppfs_llseek,
+ .read = hppfs_read,
+ .write = hppfs_write,
+ .open = hppfs_open,
+};
+
+struct hppfs_dirent {
+ void *vfs_dirent;
+ filldir_t filldir;
+ struct dentry *dentry;
+};
+
+static int hppfs_filldir(void *d, const char *name, int size,
+ loff_t offset, ino_t inode, unsigned int type)
+{
+ struct hppfs_dirent *dirent = d;
+
+ if(file_removed(dirent->dentry, name))
+ return(0);
+
+ return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset,
+ inode, type));
+}
+
+static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
+{
+ struct hppfs_private *data = file->private_data;
+ struct file *proc_file = &data->proc_file;
+ int (*readdir)(struct file *, void *, filldir_t);
+ struct hppfs_dirent dirent = ((struct hppfs_dirent)
+ { .vfs_dirent = ent,
+ .filldir = filldir,
+ .dentry = file->f_dentry } );
+ int err;
+
+ readdir = proc_file->f_dentry->d_inode->i_fop->readdir;
+
+ proc_file->f_pos = file->f_pos;
+ err = (*readdir)(proc_file, &dirent, hppfs_filldir);
+ file->f_pos = proc_file->f_pos;
+
+ return(err);
+}
+
+static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+ return(0);
+}
+
+static struct file_operations hppfs_dir_fops = {
+ .owner = NULL,
+ .readdir = hppfs_readdir,
+ .open = hppfs_dir_open,
+ .fsync = hppfs_fsync,
+};
+
+static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf)
+{
+ sf->f_blocks = 0;
+ sf->f_bfree = 0;
+ sf->f_bavail = 0;
+ sf->f_files = 0;
+ sf->f_ffree = 0;
+ sf->f_type = HPPFS_SUPER_MAGIC;
+ return(0);
+}
+
+static struct inode *hppfs_alloc_inode(struct super_block *sb)
+{
+ struct hppfs_inode_info *hi;
+
+ hi = kmalloc(sizeof(*hi), GFP_KERNEL);
+ if(hi == NULL)
+ return(NULL);
+
+ *hi = ((struct hppfs_inode_info) { .proc_dentry = NULL });
+ inode_init_once(&hi->vfs_inode);
+ return(&hi->vfs_inode);
+}
+
+void hppfs_delete_inode(struct inode *ino)
+{
+ clear_inode(ino);
+}
+
+static void hppfs_destroy_inode(struct inode *inode)
+{
+ kfree(HPPFS_I(inode));
+}
+
+static struct super_operations hppfs_sbops = {
+ .alloc_inode = hppfs_alloc_inode,
+ .destroy_inode = hppfs_destroy_inode,
+ .read_inode = hppfs_read_inode,
+ .delete_inode = hppfs_delete_inode,
+ .statfs = hppfs_statfs,
+};
+
+static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+ struct file proc_file;
+ struct dentry *proc_dentry;
+ int (*readlink)(struct dentry *, char *, int);
+ int err, n;
+
+ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+ err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
+ if(err)
+ return(err);
+
+ readlink = proc_dentry->d_inode->i_op->readlink;
+ n = (*readlink)(proc_dentry, buffer, buflen);
+
+ close_private_file(&proc_file);
+
+ return(n);
+}
+
+static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct file proc_file;
+ struct dentry *proc_dentry;
+ int (*follow_link)(struct dentry *, struct nameidata *);
+ int err, n;
+
+ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+ err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
+ if(err)
+ return(err);
+
+ follow_link = proc_dentry->d_inode->i_op->follow_link;
+ n = (*follow_link)(proc_dentry, nd);
+
+ close_private_file(&proc_file);
+
+ return(n);
+}
+
+static struct inode_operations hppfs_dir_iops = {
+ .lookup = hppfs_lookup,
+};
+
+static struct inode_operations hppfs_link_iops = {
+ .readlink = hppfs_readlink,
+ .follow_link = hppfs_follow_link,
+};
+
+static int init_inode(struct inode *inode, struct dentry *dentry)
+{
+ if(S_ISDIR(dentry->d_inode->i_mode)){
+ inode->i_op = &hppfs_dir_iops;
+ inode->i_fop = &hppfs_dir_fops;
+ }
+ else if(S_ISLNK(dentry->d_inode->i_mode)){
+ inode->i_op = &hppfs_link_iops;
+ inode->i_fop = &hppfs_file_fops;
+ }
+ else {
+ inode->i_op = &hppfs_file_iops;
+ inode->i_fop = &hppfs_file_fops;
+ }
+
+ HPPFS_I(inode)->proc_dentry = dentry;
+
+ return(0);
+}
+
+static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
+{
+ struct inode *root_inode;
+ struct file_system_type *procfs;
+ struct super_block *proc_sb;
+ int err;
+
+ err = -ENOENT;
+ procfs = get_fs_type("proc");
+ if(procfs == NULL)
+ goto out;
+
+ if(list_empty(&procfs->fs_supers))
+ goto out;
+
+ proc_sb = list_entry(procfs->fs_supers.next, struct super_block,
+ s_instances);
+
+ sb->s_blocksize = 1024;
+ sb->s_blocksize_bits = 10;
+ sb->s_magic = HPPFS_SUPER_MAGIC;
+ sb->s_op = &hppfs_sbops;
+
+ root_inode = iget(sb, 0);
+ if(root_inode == NULL)
+ goto out;
+
+ err = init_inode(root_inode, proc_sb->s_root);
+ if(err)
+ goto out_put;
+
+ err = -ENOMEM;
+ sb->s_root = d_alloc_root(root_inode);
+ if(sb->s_root == NULL)
+ goto out_put;
+
+ hppfs_read_inode(root_inode);
+
+ return(0);
+
+ out_put:
+ iput(root_inode);
+ out:
+ return(err);
+}
+
+static struct super_block *hppfs_read_super(struct file_system_type *type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return(get_sb_nodev(type, flags, data, hppfs_fill_super));
+}
+
+static struct file_system_type hppfs_type = {
+ .owner = THIS_MODULE,
+ .name = "hppfs",
+ .get_sb = hppfs_read_super,
+ .kill_sb = kill_anon_super,
+ .fs_flags = 0,
+};
+
+static int __init init_hppfs(void)
+{
+ return(register_filesystem(&hppfs_type));
+}
+
+static void __exit exit_hppfs(void)
+{
+ unregister_filesystem(&hppfs_type);
+}
+
+module_init(init_hppfs)
+module_exit(exit_hppfs)
+MODULE_LICENSE("GPL");
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only. This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
--- /dev/null
+#
+# relayfs Makefile
+#
+
+obj-$(CONFIG_RELAYFS_FS) += relayfs.o
+
+relayfs-y := relay.o relay_lockless.o relay_locking.o inode.o resize.o
+relayfs-$(CONFIG_KLOG_CHANNEL) += klog.o
--- /dev/null
+/*
+ * VFS-related code for RelayFS, a high-speed data relay filesystem.
+ *
+ * Copyright (C) 2003 - Tom Zanussi <zanussi@us.ibm.com>, IBM Corp
+ * Copyright (C) 2003 - Karim Yaghmour <karim@opersys.com>
+ *
+ * Based on ramfs, Copyright (C) 2002 - Linus Torvalds
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/backing-dev.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <asm/uaccess.h>
+#include <asm/relay.h>
+
+#define RELAYFS_MAGIC 0x26F82121
+
+static struct super_operations relayfs_ops;
+static struct address_space_operations relayfs_aops;
+static struct inode_operations relayfs_file_inode_operations;
+static struct file_operations relayfs_file_operations;
+static struct inode_operations relayfs_dir_inode_operations;
+
+static struct vfsmount * relayfs_mount;
+static int relayfs_mount_count;
+
+static struct backing_dev_info relayfs_backing_dev_info = {
+ .ra_pages = 0, /* No readahead */
+ .memory_backed = 1, /* Does not contribute to dirty memory */
+};
+
+static struct inode *
+relayfs_get_inode(struct super_block *sb, int mode, dev_t dev)
+{
+ struct inode * inode;
+
+ inode = new_inode(sb);
+
+ if (inode) {
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_blocks = 0;
+ inode->i_mapping->a_ops = &relayfs_aops;
+ inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ switch (mode & S_IFMT) {
+ default:
+ init_special_inode(inode, mode, dev);
+ break;
+ case S_IFREG:
+ inode->i_op = &relayfs_file_inode_operations;
+ inode->i_fop = &relayfs_file_operations;
+ break;
+ case S_IFDIR:
+ inode->i_op = &relayfs_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inode->i_nlink++;
+ break;
+ case S_IFLNK:
+ inode->i_op = &page_symlink_inode_operations;
+ break;
+ }
+ }
+ return inode;
+}
+
+/*
+ * File creation. Allocate an inode, and we're done..
+ */
+/* SMP-safe */
+static int
+relayfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+ struct inode * inode;
+ int error = -ENOSPC;
+
+ inode = relayfs_get_inode(dir->i_sb, mode, dev);
+
+ if (inode) {
+ d_instantiate(dentry, inode);
+ dget(dentry); /* Extra count - pin the dentry in core */
+ error = 0;
+ }
+ return error;
+}
+
+static int
+relayfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+{
+ int retval;
+
+ retval = relayfs_mknod(dir, dentry, mode | S_IFDIR, 0);
+
+ if (!retval)
+ dir->i_nlink++;
+ return retval;
+}
+
+static int
+relayfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+{
+ return relayfs_mknod(dir, dentry, mode | S_IFREG, 0);
+}
+
+static int
+relayfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
+{
+ struct inode *inode;
+ int error = -ENOSPC;
+
+ inode = relayfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
+
+ if (inode) {
+ int l = strlen(symname)+1;
+ error = page_symlink(inode, symname, l);
+ if (!error) {
+ d_instantiate(dentry, inode);
+ dget(dentry);
+ } else
+ iput(inode);
+ }
+ return error;
+}
+
+/**
+ * relayfs_create_entry - create a relayfs directory or file
+ * @name: the name of the file to create
+ * @parent: parent directory
+ * @dentry: result dentry
+ * @entry_type: type of file to create (S_IFREG, S_IFDIR)
+ * @mode: mode
+ * @data: data to associate with the file
+ *
+ * Creates a file or directory with the specifed permissions.
+ */
+static int
+relayfs_create_entry(const char * name, struct dentry * parent, struct dentry **dentry, int entry_type, int mode, void * data)
+{
+ struct qstr qname;
+ struct dentry * d;
+
+ int error = 0;
+
+ error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count);
+ if (error) {
+ printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error);
+ return error;
+ }
+
+ qname.name = name;
+ qname.len = strlen(name);
+ qname.hash = full_name_hash(name, qname.len);
+
+ if (parent == NULL)
+ if (relayfs_mount && relayfs_mount->mnt_sb)
+ parent = relayfs_mount->mnt_sb->s_root;
+
+ if (parent == NULL) {
+ simple_release_fs(&relayfs_mount, &relayfs_mount_count);
+ return -EINVAL;
+ }
+
+ parent = dget(parent);
+ down(&parent->d_inode->i_sem);
+ d = lookup_hash(&qname, parent);
+ if (IS_ERR(d)) {
+ error = PTR_ERR(d);
+ goto release_mount;
+ }
+
+ if (d->d_inode) {
+ error = -EEXIST;
+ goto release_mount;
+ }
+
+ if (entry_type == S_IFREG)
+ error = relayfs_create(parent->d_inode, d, entry_type | mode, NULL);
+ else
+ error = relayfs_mkdir(parent->d_inode, d, entry_type | mode);
+ if (error)
+ goto release_mount;
+
+ if ((entry_type == S_IFREG) && data) {
+ d->d_inode->u.generic_ip = data;
+ goto exit; /* don't release mount for regular files */
+ }
+
+release_mount:
+ simple_release_fs(&relayfs_mount, &relayfs_mount_count);
+exit:
+ *dentry = d;
+ up(&parent->d_inode->i_sem);
+ dput(parent);
+
+ return error;
+}
+
+/**
+ * relayfs_create_file - create a file in the relay filesystem
+ * @name: the name of the file to create
+ * @parent: parent directory
+ * @dentry: result dentry
+ * @data: data to associate with the file
+ * @mode: mode, if not specied the default perms are used
+ *
+ * The file will be created user rw on behalf of current user.
+ */
+int
+relayfs_create_file(const char * name, struct dentry * parent, struct dentry **dentry, void * data, int mode)
+{
+ if (!mode)
+ mode = S_IRUSR | S_IWUSR;
+
+ return relayfs_create_entry(name, parent, dentry, S_IFREG,
+ mode, data);
+}
+
+/**
+ * relayfs_create_dir - create a directory in the relay filesystem
+ * @name: the name of the directory to create
+ * @parent: parent directory
+ * @dentry: result dentry
+ *
+ * The directory will be created world rwx on behalf of current user.
+ */
+int
+relayfs_create_dir(const char * name, struct dentry * parent, struct dentry **dentry)
+{
+ return relayfs_create_entry(name, parent, dentry, S_IFDIR,
+ S_IRWXU | S_IRUGO | S_IXUGO, NULL);
+}
+
+/**
+ * relayfs_remove_file - remove a file in the relay filesystem
+ * @dentry: file dentry
+ *
+ * Remove a file previously created by relayfs_create_file.
+ */
+int
+relayfs_remove_file(struct dentry *dentry)
+{
+ struct dentry *parent;
+ int is_reg;
+
+ parent = dentry->d_parent;
+ if (parent == NULL)
+ return -EINVAL;
+
+ is_reg = S_ISREG(dentry->d_inode->i_mode);
+
+ parent = dget(parent);
+ down(&parent->d_inode->i_sem);
+ if (dentry->d_inode) {
+ simple_unlink(parent->d_inode, dentry);
+ d_delete(dentry);
+ }
+ dput(dentry);
+ up(&parent->d_inode->i_sem);
+ dput(parent);
+
+ if(is_reg)
+ simple_release_fs(&relayfs_mount, &relayfs_mount_count);
+
+ return 0;
+}
+
+/**
+ * relayfs_open - open file op for relayfs files
+ * @inode: the inode
+ * @filp: the file
+ *
+ * Associates the channel with the file, and increments the
+ * channel refcount. Reads will be 'auto-consuming'.
+ */
+int
+relayfs_open(struct inode *inode, struct file *filp)
+{
+ struct rchan *rchan;
+ struct rchan_reader *reader;
+ int retval = 0;
+
+ if (inode->u.generic_ip) {
+ rchan = (struct rchan *)inode->u.generic_ip;
+ if (rchan == NULL)
+ return -EACCES;
+ reader = __add_rchan_reader(rchan, filp, 1, 0);
+ if (reader == NULL)
+ return -ENOMEM;
+ filp->private_data = reader;
+ retval = rchan->callbacks->fileop_notify(rchan->id, filp,
+ RELAY_FILE_OPEN);
+ if (retval == 0)
+ /* Inc relay channel refcount for file */
+ rchan_get(rchan->id);
+ else {
+ __remove_rchan_reader(reader);
+ retval = -EPERM;
+ }
+ }
+
+ return retval;
+}
+
+/**
+ * relayfs_mmap - mmap file op for relayfs files
+ * @filp: the file
+ * @vma: the vma describing what to map
+ *
+ * Calls upon relay_mmap_buffer to map the file into user space.
+ */
+int
+relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct rchan *rchan;
+
+ rchan = ((struct rchan_reader *)filp->private_data)->rchan;
+
+ return __relay_mmap_buffer(rchan, vma);
+}
+
+/**
+ * relayfs_file_read - read file op for relayfs files
+ * @filp: the file
+ * @buf: user buf to read into
+ * @count: bytes requested
+ * @offset: offset into file
+ *
+ * Reads count bytes from the channel, or as much as is available within
+ * the sub-buffer currently being read. Reads are 'auto-consuming'.
+ * See relay_read() for details.
+ *
+ * Returns bytes read on success, 0 or -EAGAIN if nothing available,
+ * negative otherwise.
+ */
+ssize_t
+relayfs_file_read(struct file *filp, char * buf, size_t count, loff_t *offset)
+{
+ size_t read_count;
+ struct rchan_reader *reader;
+ u32 dummy; /* all VFS readers are auto-consuming */
+
+ if (offset != &filp->f_pos) /* pread, seeking not supported */
+ return -ESPIPE;
+
+ if (count == 0)
+ return 0;
+
+ reader = (struct rchan_reader *)filp->private_data;
+ read_count = relay_read(reader, buf, count,
+ filp->f_flags & (O_NDELAY | O_NONBLOCK) ? 0 : 1, &dummy);
+
+ return read_count;
+}
+
+/**
+ * relayfs_file_write - write file op for relayfs files
+ * @filp: the file
+ * @buf: user buf to write from
+ * @count: bytes to write
+ * @offset: offset into file
+ *
+ * Reserves a slot in the relay buffer and writes count bytes
+ * into it. The current limit for a single write is 2 pages
+ * worth. The user_deliver() channel callback will be invoked on
+ *
+ * Returns bytes written on success, 0 or -EAGAIN if nothing available,
+ * negative otherwise.
+ */
+ssize_t
+relayfs_file_write(struct file *filp, const char *buf, size_t count, loff_t *offset)
+{
+ int write_count;
+ char * write_buf;
+ struct rchan *rchan;
+ int err = 0;
+ void *wrote_pos;
+ struct rchan_reader *reader;
+
+ reader = (struct rchan_reader *)filp->private_data;
+ if (reader == NULL)
+ return -EPERM;
+
+ rchan = reader->rchan;
+ if (rchan == NULL)
+ return -EPERM;
+
+ if (count == 0)
+ return 0;
+
+ /* Change this if need to write more than 2 pages at once */
+ if (count > 2 * PAGE_SIZE)
+ return -EINVAL;
+
+ write_buf = (char *)__get_free_pages(GFP_KERNEL, 1);
+ if (write_buf == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(write_buf, buf, count))
+ return -EFAULT;
+
+ if (filp->f_flags & (O_NDELAY | O_NONBLOCK)) {
+ write_count = relay_write(rchan->id, write_buf, count, -1, &wrote_pos);
+ if (write_count == 0)
+ return -EAGAIN;
+ } else {
+ err = wait_event_interruptible(rchan->write_wait,
+ (write_count = relay_write(rchan->id, write_buf, count, -1, &wrote_pos)));
+ if (err)
+ return err;
+ }
+
+ free_pages((unsigned long)write_buf, 1);
+
+ rchan->callbacks->user_deliver(rchan->id, wrote_pos, write_count);
+
+ return write_count;
+}
+
+/**
+ * relayfs_ioctl - ioctl file op for relayfs files
+ * @inode: the inode
+ * @filp: the file
+ * @cmd: the command
+ * @arg: command arg
+ *
+ * Passes the specified cmd/arg to the kernel client. arg may be a
+ * pointer to user-space data, in which case the kernel client is
+ * responsible for copying the data to/from user space appropriately.
+ * The kernel client is also responsible for returning a meaningful
+ * return value for ioctl calls.
+ *
+ * Returns result of relay channel callback, -EPERM if unsuccessful.
+ */
+int
+relayfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct rchan *rchan;
+ struct rchan_reader *reader;
+
+ reader = (struct rchan_reader *)filp->private_data;
+ if (reader == NULL)
+ return -EPERM;
+
+ rchan = reader->rchan;
+ if (rchan == NULL)
+ return -EPERM;
+
+ return rchan->callbacks->ioctl(rchan->id, cmd, arg);
+}
+
+/**
+ * relayfs_poll - poll file op for relayfs files
+ * @filp: the file
+ * @wait: poll table
+ *
+ * Poll implemention.
+ */
+static unsigned int
+relayfs_poll(struct file *filp, poll_table *wait)
+{
+ struct rchan_reader *reader;
+ unsigned int mask = 0;
+
+ reader = (struct rchan_reader *)filp->private_data;
+
+ if (reader->rchan->finalized)
+ return POLLERR;
+
+ if (filp->f_mode & FMODE_READ) {
+ poll_wait(filp, &reader->rchan->read_wait, wait);
+ if (!rchan_empty(reader))
+ mask |= POLLIN | POLLRDNORM;
+ }
+
+ if (filp->f_mode & FMODE_WRITE) {
+ poll_wait(filp, &reader->rchan->write_wait, wait);
+ if (!rchan_full(reader))
+ mask |= POLLOUT | POLLWRNORM;
+ }
+
+ return mask;
+}
+
+/**
+ * relayfs_release - release file op for relayfs files
+ * @inode: the inode
+ * @filp: the file
+ *
+ * Decrements the channel refcount, as the filesystem is
+ * no longer using it.
+ */
+int
+relayfs_release(struct inode *inode, struct file *filp)
+{
+ struct rchan_reader *reader;
+ struct rchan *rchan;
+
+ reader = (struct rchan_reader *)filp->private_data;
+ if (reader == NULL || reader->rchan == NULL)
+ return 0;
+ rchan = reader->rchan;
+
+ rchan->callbacks->fileop_notify(reader->rchan->id, filp,
+ RELAY_FILE_CLOSE);
+ __remove_rchan_reader(reader);
+ /* The channel is no longer in use as far as this file is concerned */
+ rchan_put(rchan);
+
+ return 0;
+}
+
+static struct address_space_operations relayfs_aops = {
+ .readpage = simple_readpage,
+ .prepare_write = simple_prepare_write,
+ .commit_write = simple_commit_write
+};
+
+static struct file_operations relayfs_file_operations = {
+ .open = relayfs_open,
+ .read = relayfs_file_read,
+ .write = relayfs_file_write,
+ .ioctl = relayfs_ioctl,
+ .poll = relayfs_poll,
+ .mmap = relayfs_mmap,
+ .fsync = simple_sync_file,
+ .release = relayfs_release,
+};
+
+static struct inode_operations relayfs_file_inode_operations = {
+ .getattr = simple_getattr,
+};
+
+static struct inode_operations relayfs_dir_inode_operations = {
+ .create = relayfs_create,
+ .lookup = simple_lookup,
+ .link = simple_link,
+ .unlink = simple_unlink,
+ .symlink = relayfs_symlink,
+ .mkdir = relayfs_mkdir,
+ .rmdir = simple_rmdir,
+ .mknod = relayfs_mknod,
+ .rename = simple_rename,
+};
+
+static struct super_operations relayfs_ops = {
+ .statfs = simple_statfs,
+ .drop_inode = generic_delete_inode,
+};
+
+static int
+relayfs_fill_super(struct super_block * sb, void * data, int silent)
+{
+ struct inode * inode;
+ struct dentry * root;
+
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = RELAYFS_MAGIC;
+ sb->s_op = &relayfs_ops;
+ inode = relayfs_get_inode(sb, S_IFDIR | 0755, 0);
+
+ if (!inode)
+ return -ENOMEM;
+
+ root = d_alloc_root(inode);
+ if (!root) {
+ iput(inode);
+ return -ENOMEM;
+ }
+ sb->s_root = root;
+
+ return 0;
+}
+
+static struct super_block *
+relayfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ return get_sb_single(fs_type, flags, data, relayfs_fill_super);
+}
+
+static struct file_system_type relayfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "relayfs",
+ .get_sb = relayfs_get_sb,
+ .kill_sb = kill_litter_super,
+};
+
+static int __init
+init_relayfs_fs(void)
+{
+ int err = register_filesystem(&relayfs_fs_type);
+#ifdef CONFIG_KLOG_CHANNEL
+ if (!err)
+ create_klog_channel();
+#endif
+ return err;
+}
+
+static void __exit
+exit_relayfs_fs(void)
+{
+#ifdef CONFIG_KLOG_CHANNEL
+ remove_klog_channel();
+#endif
+ unregister_filesystem(&relayfs_fs_type);
+}
+
+module_init(init_relayfs_fs)
+module_exit(exit_relayfs_fs)
+
+MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
+MODULE_DESCRIPTION("Relay Filesystem");
+MODULE_LICENSE("GPL");
+
--- /dev/null
+/*
+ * Public API and common code for RelayFS.
+ *
+ * Please see Documentation/filesystems/relayfs.txt for API description.
+ *
+ * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com)
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/page-flags.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/delay.h>
+
+#include <asm/io.h>
+#include <asm/current.h>
+#include <asm/uaccess.h>
+#include <asm/bitops.h>
+#include <asm/pgtable.h>
+#include <asm/relay.h>
+#include <asm/hardirq.h>
+
+#include "relay_lockless.h"
+#include "relay_locking.h"
+#include "resize.h"
+
+/* Relay channel table, indexed by channel id */
+static struct rchan * rchan_table[RELAY_MAX_CHANNELS];
+static rwlock_t rchan_table_lock = RW_LOCK_UNLOCKED;
+
+/* Relay operation structs, one per scheme */
+static struct relay_ops lockless_ops = {
+ .reserve = lockless_reserve,
+ .commit = lockless_commit,
+ .get_offset = lockless_get_offset,
+ .finalize = lockless_finalize,
+ .reset = lockless_reset,
+ .reset_index = lockless_reset_index
+};
+
+static struct relay_ops locking_ops = {
+ .reserve = locking_reserve,
+ .commit = locking_commit,
+ .get_offset = locking_get_offset,
+ .finalize = locking_finalize,
+ .reset = locking_reset,
+ .reset_index = locking_reset_index
+};
+
+/*
+ * Low-level relayfs kernel API. These functions should not normally be
+ * used by clients. See high-level kernel API below.
+ */
+
+/**
+ * rchan_get - get channel associated with id, incrementing refcount
+ * @rchan_id: the channel id
+ *
+ * Returns channel if successful, NULL otherwise.
+ */
+struct rchan *
+rchan_get(int rchan_id)
+{
+ struct rchan *rchan;
+
+ if ((rchan_id < 0) || (rchan_id >= RELAY_MAX_CHANNELS))
+ return NULL;
+
+ read_lock(&rchan_table_lock);
+ rchan = rchan_table[rchan_id];
+ if (rchan)
+ atomic_inc(&rchan->refcount);
+ read_unlock(&rchan_table_lock);
+
+ return rchan;
+}
+
+/**
+ * clear_readers - clear non-VFS readers
+ * @rchan: the channel
+ *
+ * Clear the channel pointers of all non-VFS readers open on the channel.
+ */
+static inline void
+clear_readers(struct rchan *rchan)
+{
+ struct list_head *p;
+ struct rchan_reader *reader;
+
+ read_lock(&rchan->open_readers_lock);
+ list_for_each(p, &rchan->open_readers) {
+ reader = list_entry(p, struct rchan_reader, list);
+ if (!reader->vfs_reader)
+ reader->rchan = NULL;
+ }
+ read_unlock(&rchan->open_readers_lock);
+}
+
+/**
+ * rchan_alloc_id - reserve a channel id and store associated channel
+ * @rchan: the channel
+ *
+ * Returns channel id if successful, -1 otherwise.
+ */
+static inline int
+rchan_alloc_id(struct rchan *rchan)
+{
+ int i;
+ int rchan_id = -1;
+
+ if (rchan == NULL)
+ return -1;
+
+ write_lock(&rchan_table_lock);
+ for (i = 0; i < RELAY_MAX_CHANNELS; i++) {
+ if (rchan_table[i] == NULL) {
+ rchan_table[i] = rchan;
+ rchan_id = rchan->id = i;
+ break;
+ }
+ }
+ if (rchan_id != -1)
+ atomic_inc(&rchan->refcount);
+ write_unlock(&rchan_table_lock);
+
+ return rchan_id;
+}
+
+/**
+ * rchan_free_id - revoke a channel id and remove associated channel
+ * @rchan_id: the channel id
+ */
+static inline void
+rchan_free_id(int rchan_id)
+{
+ struct rchan *rchan;
+
+ if ((rchan_id < 0) || (rchan_id >= RELAY_MAX_CHANNELS))
+ return;
+
+ write_lock(&rchan_table_lock);
+ rchan = rchan_table[rchan_id];
+ rchan_table[rchan_id] = NULL;
+ write_unlock(&rchan_table_lock);
+}
+
+/**
+ * rchan_destroy_buf - destroy the current channel buffer
+ * @rchan: the channel
+ */
+static inline void
+rchan_destroy_buf(struct rchan *rchan)
+{
+ if (rchan->buf && !rchan->init_buf)
+ free_rchan_buf(rchan->buf,
+ rchan->buf_page_array,
+ rchan->buf_page_count);
+}
+
+/**
+ * relay_release - perform end-of-buffer processing for last buffer
+ * @rchan: the channel
+ *
+ * Returns 0 if successful, negative otherwise.
+ *
+ * Releases the channel buffer, destroys the channel, and removes the
+ * relay file from the relayfs filesystem. Should only be called from
+ * rchan_put(). If we're here, it means by definition refcount is 0.
+ */
+static int
+relay_release(struct rchan *rchan)
+{
+ if (rchan == NULL)
+ return -EBADF;
+
+ rchan_destroy_buf(rchan);
+ rchan_free_id(rchan->id);
+ relayfs_remove_file(rchan->dentry);
+ clear_readers(rchan);
+ kfree(rchan);
+
+ return 0;
+}
+
+/**
+ * rchan_get - decrement channel refcount, releasing it if 0
+ * @rchan: the channel
+ *
+ * If the refcount reaches 0, the channel will be destroyed.
+ */
+void
+rchan_put(struct rchan *rchan)
+{
+ if (atomic_dec_and_test(&rchan->refcount))
+ relay_release(rchan);
+}
+
+/**
+ * relay_reserve - reserve a slot in the channel buffer
+ * @rchan: the channel
+ * @len: the length of the slot to reserve
+ * @td: the time delta between buffer start and current write, or TSC
+ * @err: receives the result flags
+ * @interrupting: 1 if interrupting previous, used only in locking scheme
+ *
+ * Returns pointer to the beginning of the reserved slot, NULL if error.
+ *
+ * The errcode value contains the result flags and is an ORed combination
+ * of the following:
+ *
+ * RELAY_BUFFER_SWITCH_NONE - no buffer switch occurred
+ * RELAY_EVENT_DISCARD_NONE - event should not be discarded
+ * RELAY_BUFFER_SWITCH - buffer switch occurred
+ * RELAY_EVENT_DISCARD - event should be discarded (all buffers are full)
+ * RELAY_EVENT_TOO_LONG - event won't fit into even an empty buffer
+ *
+ * buffer_start and buffer_end callbacks are triggered at this point
+ * if applicable.
+ */
+char *
+relay_reserve(struct rchan *rchan,
+ u32 len,
+ struct timeval *ts,
+ u32 *td,
+ int *err,
+ int *interrupting)
+{
+ if (rchan == NULL)
+ return NULL;
+
+ *interrupting = 0;
+
+ return rchan->relay_ops->reserve(rchan, len, ts, td, err, interrupting);
+}
+
+
+/**
+ * wakeup_readers - wake up VFS readers waiting on a channel
+ * @private: the channel
+ *
+ * This is the work function used to defer reader waking. The
+ * reason waking is deferred is that calling directly from commit
+ * causes problems if you're writing from say the scheduler.
+ */
+static void
+wakeup_readers(void *private)
+{
+ struct rchan *rchan = (struct rchan *)private;
+
+ wake_up_interruptible(&rchan->read_wait);
+}
+
+
+/**
+ * relay_commit - commit a reserved slot in the buffer
+ * @rchan: the channel
+ * @from: commit the length starting here
+ * @len: length committed
+ * @interrupting: 1 if interrupting previous, used only in locking scheme
+ *
+ * After the write into the reserved buffer has been complted, this
+ * function must be called in order for the relay to determine whether
+ * buffers are complete and to wake up VFS readers.
+ *
+ * delivery callback is triggered at this point if applicable.
+ */
+void
+relay_commit(struct rchan *rchan,
+ char *from,
+ u32 len,
+ int reserve_code,
+ int interrupting)
+{
+ int deliver;
+
+ if (rchan == NULL)
+ return;
+
+ deliver = packet_delivery(rchan) ||
+ (reserve_code & RELAY_BUFFER_SWITCH);
+
+ rchan->relay_ops->commit(rchan, from, len, deliver, interrupting);
+
+ /* The params are always the same, so no worry about re-queuing */
+ if (deliver && waitqueue_active(&rchan->read_wait)) {
+ PREPARE_WORK(&rchan->wake_readers, wakeup_readers, rchan);
+ schedule_delayed_work(&rchan->wake_readers, 1);
+ }
+}
+
+/**
+ * relay_get_offset - get current and max channel buffer offsets
+ * @rchan: the channel
+ * @max_offset: maximum channel offset
+ *
+ * Returns the current and maximum channel buffer offsets.
+ */
+u32
+relay_get_offset(struct rchan *rchan, u32 *max_offset)
+{
+ return rchan->relay_ops->get_offset(rchan, max_offset);
+}
+
+/**
+ * reset_index - try once to reset the current channel index
+ * @rchan: the channel
+ * @old_index: the index read before reset
+ *
+ * Attempts to reset the channel index to 0. It tries once, and
+ * if it fails, returns negative, 0 otherwise.
+ */
+int
+reset_index(struct rchan *rchan, u32 old_index)
+{
+ return rchan->relay_ops->reset_index(rchan, old_index);
+}
+
+/*
+ * close() vm_op implementation for relayfs file mapping.
+ */
+static void
+relay_file_mmap_close(struct vm_area_struct *vma)
+{
+ struct file *filp = vma->vm_file;
+ struct rchan_reader *reader;
+ struct rchan *rchan;
+
+ reader = (struct rchan_reader *)filp->private_data;
+ rchan = reader->rchan;
+
+ atomic_dec(&rchan->mapped);
+
+ rchan->callbacks->fileop_notify(reader->rchan->id, filp,
+ RELAY_FILE_UNMAP);
+}
+
+/*
+ * vm_ops for relay file mappings.
+ */
+static struct vm_operations_struct relay_file_mmap_ops = {
+ .close = relay_file_mmap_close
+};
+
+/* \begin{Code inspired from BTTV driver} */
+static inline unsigned long
+kvirt_to_pa(unsigned long adr)
+{
+ unsigned long kva, ret;
+
+ kva = (unsigned long) page_address(vmalloc_to_page((void *) adr));
+ kva |= adr & (PAGE_SIZE - 1);
+ ret = __pa(kva);
+ return ret;
+}
+
+static int
+relay_mmap_region(struct vm_area_struct *vma,
+ const char *adr,
+ const char *start_pos,
+ unsigned long size)
+{
+ unsigned long start = (unsigned long) adr;
+ unsigned long page, pos;
+
+ pos = (unsigned long) start_pos;
+
+ while (size > 0) {
+ page = kvirt_to_pa(pos);
+ if (remap_page_range(vma, start, page, PAGE_SIZE, PAGE_SHARED))
+ return -EAGAIN;
+ start += PAGE_SIZE;
+ pos += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+
+ return 0;
+}
+/* \end{Code inspired from BTTV driver} */
+
+/**
+ * relay_mmap_buffer: - mmap buffer to process address space
+ * @rchan_id: relay channel id
+ * @vma: vm_area_struct describing memory to be mapped
+ *
+ * Returns:
+ * 0 if ok
+ * -EAGAIN, when remap failed
+ * -EINVAL, invalid requested length
+ *
+ * Caller should already have grabbed mmap_sem.
+ */
+int
+__relay_mmap_buffer(struct rchan *rchan,
+ struct vm_area_struct *vma)
+{
+ int err = 0;
+ unsigned long length = vma->vm_end - vma->vm_start;
+ struct file *filp = vma->vm_file;
+
+ if (rchan == NULL) {
+ err = -EBADF;
+ goto exit;
+ }
+
+ if (rchan->init_buf) {
+ err = -EPERM;
+ goto exit;
+ }
+
+ if (length != (unsigned long)rchan->alloc_size) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ err = relay_mmap_region(vma,
+ (char *)vma->vm_start,
+ rchan->buf,
+ rchan->alloc_size);
+
+ if (err == 0) {
+ vma->vm_ops = &relay_file_mmap_ops;
+ err = rchan->callbacks->fileop_notify(rchan->id, filp,
+ RELAY_FILE_MAP);
+ if (err == 0)
+ atomic_inc(&rchan->mapped);
+ }
+exit:
+ return err;
+}
+
+/*
+ * High-level relayfs kernel API. See Documentation/filesystems/relafys.txt.
+ */
+
+/*
+ * rchan_callback implementations defining default channel behavior. Used
+ * in place of corresponding NULL values in client callback struct.
+ */
+
+/*
+ * buffer_end() default callback. Does nothing.
+ */
+static int
+buffer_end_default_callback(int rchan_id,
+ char *current_write_pos,
+ char *end_of_buffer,
+ struct timeval end_time,
+ u32 end_tsc,
+ int using_tsc)
+{
+ return 0;
+}
+
+/*
+ * buffer_start() default callback. Does nothing.
+ */
+static int
+buffer_start_default_callback(int rchan_id,
+ char *current_write_pos,
+ u32 buffer_id,
+ struct timeval start_time,
+ u32 start_tsc,
+ int using_tsc)
+{
+ return 0;
+}
+
+/*
+ * deliver() default callback. Does nothing.
+ */
+static void
+deliver_default_callback(int rchan_id, char *from, u32 len)
+{
+}
+
+/*
+ * user_deliver() default callback. Does nothing.
+ */
+static void
+user_deliver_default_callback(int rchan_id, char *from, u32 len)
+{
+}
+
+/*
+ * needs_resize() default callback. Does nothing.
+ */
+static void
+needs_resize_default_callback(int rchan_id,
+ int resize_type,
+ u32 suggested_buf_size,
+ u32 suggested_n_bufs)
+{
+}
+
+/*
+ * fileop_notify() default callback. Does nothing.
+ */
+static int
+fileop_notify_default_callback(int rchan_id,
+ struct file *filp,
+ enum relay_fileop fileop)
+{
+ return 0;
+}
+
+/*
+ * ioctl() default callback. Does nothing.
+ */
+static int
+ioctl_default_callback(int rchan_id,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ return 0;
+}
+
+/* relay channel default callbacks */
+static struct rchan_callbacks default_channel_callbacks = {
+ .buffer_start = buffer_start_default_callback,
+ .buffer_end = buffer_end_default_callback,
+ .deliver = deliver_default_callback,
+ .user_deliver = user_deliver_default_callback,
+ .needs_resize = needs_resize_default_callback,
+ .fileop_notify = fileop_notify_default_callback,
+ .ioctl = ioctl_default_callback,
+};
+
+/**
+ * check_attribute_flags - check sanity of channel attributes
+ * @flags: channel attributes
+ * @resizeable: 1 if true
+ *
+ * Returns 0 if successful, negative otherwise.
+ */
+static int
+check_attribute_flags(u32 *attribute_flags, int resizeable)
+{
+ u32 flags = *attribute_flags;
+
+ if (!(flags & RELAY_DELIVERY_BULK) && !(flags & RELAY_DELIVERY_PACKET))
+ return -EINVAL; /* Delivery mode must be specified */
+
+ if (!(flags & RELAY_USAGE_SMP) && !(flags & RELAY_USAGE_GLOBAL))
+ return -EINVAL; /* Usage must be specified */
+
+ if (resizeable) { /* Resizeable can never be continuous */
+ *attribute_flags &= ~RELAY_MODE_CONTINUOUS;
+ *attribute_flags |= RELAY_MODE_NO_OVERWRITE;
+ }
+
+ if ((flags & RELAY_MODE_CONTINUOUS) &&
+ (flags & RELAY_MODE_NO_OVERWRITE))
+ return -EINVAL; /* Can't have it both ways */
+
+ if (!(flags & RELAY_MODE_CONTINUOUS) &&
+ !(flags & RELAY_MODE_NO_OVERWRITE))
+ *attribute_flags |= RELAY_MODE_CONTINUOUS; /* Default to continuous */
+
+ if (!(flags & RELAY_SCHEME_ANY))
+ return -EINVAL; /* One or both must be specified */
+ else if (flags & RELAY_SCHEME_LOCKLESS) {
+ if (have_cmpxchg())
+ *attribute_flags &= ~RELAY_SCHEME_LOCKING;
+ else if (flags & RELAY_SCHEME_LOCKING)
+ *attribute_flags &= ~RELAY_SCHEME_LOCKLESS;
+ else
+ return -EINVAL; /* Locking scheme not an alternative */
+ }
+
+ if (!(flags & RELAY_TIMESTAMP_ANY))
+ return -EINVAL; /* One or both must be specified */
+ else if (flags & RELAY_TIMESTAMP_TSC) {
+ if (have_tsc())
+ *attribute_flags &= ~RELAY_TIMESTAMP_GETTIMEOFDAY;
+ else if (flags & RELAY_TIMESTAMP_GETTIMEOFDAY)
+ *attribute_flags &= ~RELAY_TIMESTAMP_TSC;
+ else
+ return -EINVAL; /* gettimeofday not an alternative */
+ }
+
+ return 0;
+}
+
+/*
+ * High-level API functions.
+ */
+
+/**
+ * __relay_reset - internal reset function
+ * @rchan: the channel
+ * @init: 1 if this is a first-time channel initialization
+ *
+ * See relay_reset for description of effect.
+ */
+void
+__relay_reset(struct rchan *rchan, int init)
+{
+ int i;
+
+ if (init) {
+ rchan->version = RELAYFS_CHANNEL_VERSION;
+ init_MUTEX(&rchan->resize_sem);
+ init_waitqueue_head(&rchan->read_wait);
+ init_waitqueue_head(&rchan->write_wait);
+ atomic_set(&rchan->refcount, 0);
+ INIT_LIST_HEAD(&rchan->open_readers);
+ rchan->open_readers_lock = RW_LOCK_UNLOCKED;
+ }
+
+ rchan->buf_id = rchan->buf_idx = 0;
+ atomic_set(&rchan->suspended, 0);
+ atomic_set(&rchan->mapped, 0);
+ rchan->half_switch = 0;
+ rchan->bufs_produced = 0;
+ rchan->bufs_consumed = 0;
+ rchan->bytes_consumed = 0;
+ rchan->initialized = 0;
+ rchan->finalized = 0;
+ rchan->resize_min = rchan->resize_max = 0;
+ rchan->resizing = 0;
+ rchan->replace_buffer = 0;
+ rchan->resize_buf = NULL;
+ rchan->resize_buf_size = 0;
+ rchan->resize_alloc_size = 0;
+ rchan->resize_n_bufs = 0;
+ rchan->resize_err = 0;
+ rchan->resize_failures = 0;
+ rchan->resize_order = 0;
+
+ rchan->expand_page_array = NULL;
+ rchan->expand_page_count = 0;
+ rchan->shrink_page_array = NULL;
+ rchan->shrink_page_count = 0;
+ rchan->resize_page_array = NULL;
+ rchan->resize_page_count = 0;
+ rchan->old_buf_page_array = NULL;
+ rchan->expand_buf_id = 0;
+
+ INIT_WORK(&rchan->wake_readers, NULL, NULL);
+ INIT_WORK(&rchan->wake_writers, NULL, NULL);
+
+ for (i = 0; i < RELAY_MAX_BUFS; i++)
+ rchan->unused_bytes[i] = 0;
+
+ rchan->relay_ops->reset(rchan, init);
+}
+
+/**
+ * relay_reset - reset the channel
+ * @rchan: the channel
+ *
+ * Returns 0 if successful, negative if not.
+ *
+ * This has the effect of erasing all data from the buffer and
+ * restarting the channel in its initial state. The buffer itself
+ * is not freed, so any mappings are still in effect.
+ *
+ * NOTE: Care should be taken that the channnel isn't actually
+ * being used by anything when this call is made.
+ */
+int
+relay_reset(int rchan_id)
+{
+ struct rchan *rchan;
+
+ rchan = rchan_get(rchan_id);
+ if (rchan == NULL)
+ return -EBADF;
+
+ __relay_reset(rchan, 0);
+ update_readers_consumed(rchan, 0, 0);
+
+ rchan_put(rchan);
+
+ return 0;
+}
+
+/**
+ * check_init_buf - check the sanity of init_buf, if present
+ * @init_buf: the initbuf
+ * @init_buf_size: the total initbuf size
+ * @bufsize: the channel's sub-buffer size
+ * @nbufs: the number of sub-buffers in the channel
+ *
+ * Returns 0 if ok, negative otherwise.
+ */
+static int
+check_init_buf(char *init_buf, u32 init_buf_size, u32 bufsize, u32 nbufs)
+{
+ int err = 0;
+
+ if (init_buf && nbufs == 1) /* 1 sub-buffer makes no sense */
+ err = -EINVAL;
+
+ if (init_buf && (bufsize * nbufs != init_buf_size))
+ err = -EINVAL;
+
+ return err;
+}
+
+/**
+ * rchan_create_buf - allocate the initial channel buffer
+ * @rchan: the channel
+ * @size_alloc: the total size of the channel buffer
+ *
+ * Returns 0 if successful, negative otherwise.
+ */
+static inline int
+rchan_create_buf(struct rchan *rchan, int size_alloc)
+{
+ struct page **page_array;
+ int page_count;
+
+ if ((rchan->buf = (char *)alloc_rchan_buf(size_alloc, &page_array, &page_count)) == NULL) {
+ rchan->buf_page_array = NULL;
+ rchan->buf_page_count = 0;
+ return -ENOMEM;
+ }
+
+ rchan->buf_page_array = page_array;
+ rchan->buf_page_count = page_count;
+
+ return 0;
+}
+
+/**
+ * rchan_create - allocate and initialize a channel, including buffer
+ * @chanpath: path specifying the relayfs channel file to create
+ * @bufsize: the size of the sub-buffers within the channel buffer
+ * @nbufs: the number of sub-buffers within the channel buffer
+ * @rchan_flags: flags specifying buffer attributes
+ * @err: err code
+ *
+ * Returns channel if successful, NULL otherwise, err receives errcode.
+ *
+ * Allocates a struct rchan representing a relay channel, according
+ * to the attributes passed in via rchan_flags. Does some basic sanity
+ * checking but doesn't try to do anything smart. In particular, the
+ * number of buffers must be a power of 2, and if the lockless scheme
+ * is being used, the sub-buffer size must also be a power of 2. The
+ * locking scheme can use buffers of any size.
+ */
+static struct rchan *
+rchan_create(const char *chanpath,
+ int bufsize,
+ int nbufs,
+ u32 rchan_flags,
+ char *init_buf,
+ u32 init_buf_size,
+ int *err)
+{
+ int size_alloc;
+ struct rchan *rchan = NULL;
+
+ *err = 0;
+
+ rchan = (struct rchan *)kmalloc(sizeof(struct rchan), GFP_KERNEL);
+ if (rchan == NULL) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+ rchan->buf = rchan->init_buf = NULL;
+
+ *err = check_init_buf(init_buf, init_buf_size, bufsize, nbufs);
+ if (*err)
+ goto exit;
+
+ if (nbufs == 1 && bufsize) {
+ rchan->n_bufs = nbufs;
+ rchan->buf_size = bufsize;
+ size_alloc = bufsize;
+ goto alloc;
+ }
+
+ if (bufsize <= 0 ||
+ (rchan_flags & RELAY_SCHEME_LOCKLESS && hweight32(bufsize) != 1) ||
+ hweight32(nbufs) != 1 ||
+ nbufs < RELAY_MIN_BUFS ||
+ nbufs > RELAY_MAX_BUFS) {
+ *err = -EINVAL;
+ goto exit;
+ }
+
+ size_alloc = FIX_SIZE(bufsize * nbufs);
+ if (size_alloc > RELAY_MAX_BUF_SIZE) {
+ *err = -EINVAL;
+ goto exit;
+ }
+ rchan->n_bufs = nbufs;
+ rchan->buf_size = bufsize;
+
+ if (rchan_flags & RELAY_SCHEME_LOCKLESS) {
+ offset_bits(rchan) = ffs(bufsize) - 1;
+ offset_mask(rchan) = RELAY_BUF_OFFSET_MASK(offset_bits(rchan));
+ bufno_bits(rchan) = ffs(nbufs) - 1;
+ }
+alloc:
+ if (rchan_alloc_id(rchan) == -1) {
+ *err = -ENOMEM;
+ goto exit;
+ }
+
+ if (init_buf == NULL) {
+ *err = rchan_create_buf(rchan, size_alloc);
+ if (*err) {
+ rchan_free_id(rchan->id);
+ goto exit;
+ }
+ } else
+ rchan->buf = rchan->init_buf = init_buf;
+
+ rchan->alloc_size = size_alloc;
+
+ if (rchan_flags & RELAY_SCHEME_LOCKLESS)
+ rchan->relay_ops = &lockless_ops;
+ else
+ rchan->relay_ops = &locking_ops;
+
+exit:
+ if (*err) {
+ kfree(rchan);
+ rchan = NULL;
+ }
+
+ return rchan;
+}
+
+
+static char tmpname[NAME_MAX];
+
+/**
+ * rchan_create_dir - create directory for file
+ * @chanpath: path to file, including filename
+ * @residual: filename remaining after parse
+ * @topdir: the directory filename should be created in
+ *
+ * Returns 0 if successful, negative otherwise.
+ *
+ * Inspired by xlate_proc_name() in procfs. Given a file path which
+ * includes the filename, creates any and all directories necessary
+ * to create the file.
+ */
+static int
+rchan_create_dir(const char * chanpath,
+ const char **residual,
+ struct dentry **topdir)
+{
+ const char *cp = chanpath, *next;
+ struct dentry *parent = NULL;
+ int len, err = 0;
+
+ while (1) {
+ next = strchr(cp, '/');
+ if (!next)
+ break;
+
+ len = next - cp;
+
+ strncpy(tmpname, cp, len);
+ tmpname[len] = '\0';
+ err = relayfs_create_dir(tmpname, parent, &parent);
+ if (err && (err != -EEXIST))
+ return err;
+ cp += len + 1;
+ }
+
+ *residual = cp;
+ *topdir = parent;
+
+ return err;
+}
+
+/**
+ * rchan_create_file - create file, including parent directories
+ * @chanpath: path to file, including filename
+ * @dentry: result dentry
+ * @data: data to associate with the file
+ *
+ * Returns 0 if successful, negative otherwise.
+ */
+static int
+rchan_create_file(const char * chanpath,
+ struct dentry **dentry,
+ struct rchan * data,
+ int mode)
+{
+ int err;
+ const char * fname;
+ struct dentry *topdir;
+
+ err = rchan_create_dir(chanpath, &fname, &topdir);
+ if (err && (err != -EEXIST))
+ return err;
+
+ err = relayfs_create_file(fname, topdir, dentry, (void *)data, mode);
+
+ return err;
+}
+
+/**
+ * relay_open - create a new file/channel buffer in relayfs
+ * @chanpath: name of file to create, including path
+ * @bufsize: size of sub-buffers
+ * @nbufs: number of sub-buffers
+ * @flags: channel attributes
+ * @callbacks: client callback functions
+ * @start_reserve: number of bytes to reserve at start of each sub-buffer
+ * @end_reserve: number of bytes to reserve at end of each sub-buffer
+ * @rchan_start_reserve: additional reserve at start of first sub-buffer
+ * @resize_min: minimum total buffer size, if set
+ * @resize_max: maximum total buffer size, if set
+ * @mode: the perms to be given to the relayfs file, 0 to accept defaults
+ * @init_buf: initial memory buffer to start out with, NULL if N/A
+ * @init_buf_size: initial memory buffer size to start out with, 0 if N/A
+ *
+ * Returns channel id if successful, negative otherwise.
+ *
+ * Creates a relay channel using the sizes and attributes specified.
+ * The default permissions, used if mode == 0 are S_IRUSR | S_IWUSR. See
+ * Documentation/filesystems/relayfs.txt for details.
+ */
+int
+relay_open(const char *chanpath,
+ int bufsize,
+ int nbufs,
+ u32 flags,
+ struct rchan_callbacks *channel_callbacks,
+ u32 start_reserve,
+ u32 end_reserve,
+ u32 rchan_start_reserve,
+ u32 resize_min,
+ u32 resize_max,
+ int mode,
+ char *init_buf,
+ u32 init_buf_size)
+{
+ int err;
+ struct rchan *rchan;
+ struct dentry *dentry;
+ struct rchan_callbacks *callbacks = NULL;
+
+ if (chanpath == NULL)
+ return -EINVAL;
+
+ if (nbufs != 1) {
+ err = check_attribute_flags(&flags, resize_min ? 1 : 0);
+ if (err)
+ return err;
+ }
+
+ rchan = rchan_create(chanpath, bufsize, nbufs, flags, init_buf, init_buf_size, &err);
+
+ if (err < 0)
+ return err;
+
+ /* Create file in fs */
+ if ((err = rchan_create_file(chanpath, &dentry, rchan, mode)) < 0) {
+ rchan_destroy_buf(rchan);
+ rchan_free_id(rchan->id);
+ kfree(rchan);
+ return err;
+ }
+
+ rchan->dentry = dentry;
+
+ if (channel_callbacks == NULL)
+ callbacks = &default_channel_callbacks;
+ else
+ callbacks = channel_callbacks;
+
+ if (callbacks->buffer_end == NULL)
+ callbacks->buffer_end = buffer_end_default_callback;
+ if (callbacks->buffer_start == NULL)
+ callbacks->buffer_start = buffer_start_default_callback;
+ if (callbacks->deliver == NULL)
+ callbacks->deliver = deliver_default_callback;
+ if (callbacks->user_deliver == NULL)
+ callbacks->user_deliver = user_deliver_default_callback;
+ if (callbacks->needs_resize == NULL)
+ callbacks->needs_resize = needs_resize_default_callback;
+ if (callbacks->fileop_notify == NULL)
+ callbacks->fileop_notify = fileop_notify_default_callback;
+ if (callbacks->ioctl == NULL)
+ callbacks->ioctl = ioctl_default_callback;
+ rchan->callbacks = callbacks;
+
+ /* Just to let the client know the sizes used */
+ rchan->callbacks->needs_resize(rchan->id,
+ RELAY_RESIZE_REPLACED,
+ rchan->buf_size,
+ rchan->n_bufs);
+
+ rchan->flags = flags;
+ rchan->start_reserve = start_reserve;
+ rchan->end_reserve = end_reserve;
+ rchan->rchan_start_reserve = rchan_start_reserve;
+
+ __relay_reset(rchan, 1);
+
+ if (resize_min > 0 && resize_max > 0 &&
+ resize_max < RELAY_MAX_TOTAL_BUF_SIZE) {
+ rchan->resize_min = resize_min;
+ rchan->resize_max = resize_max;
+ init_shrink_timer(rchan);
+ }
+
+ rchan_get(rchan->id);
+
+ return rchan->id;
+}
+
+/**
+ * relay_discard_init_buf - alloc channel buffer and copy init_buf into it
+ * @rchan_id: the channel id
+ *
+ * Returns 0 if successful, negative otherwise.
+ *
+ * NOTE: May sleep. Should also be called only when the channel isn't
+ * actively being written into.
+ */
+int
+relay_discard_init_buf(int rchan_id)
+{
+ struct rchan *rchan;
+ int err = 0;
+
+ rchan = rchan_get(rchan_id);
+ if (rchan == NULL)
+ return -EBADF;
+
+ if (rchan->init_buf == NULL) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = rchan_create_buf(rchan, rchan->alloc_size);
+ if (err)
+ goto out;
+
+ memcpy(rchan->buf, rchan->init_buf, rchan->n_bufs * rchan->buf_size);
+ rchan->init_buf = NULL;
+out:
+ rchan_put(rchan);
+
+ return err;
+}
+
+/**
+ * relay_finalize - perform end-of-buffer processing for last buffer
+ * @rchan_id: the channel id
+ * @releasing: true if called when releasing file
+ *
+ * Returns 0 if successful, negative otherwise.
+ */
+static int
+relay_finalize(int rchan_id)
+{
+ struct rchan *rchan = rchan_get(rchan_id);
+ if (rchan == NULL)
+ return -EBADF;
+
+ if (rchan->finalized == 0) {
+ rchan->relay_ops->finalize(rchan);
+ rchan->finalized = 1;
+ }
+
+ if (waitqueue_active(&rchan->read_wait)) {
+ PREPARE_WORK(&rchan->wake_readers, wakeup_readers, rchan);
+ schedule_delayed_work(&rchan->wake_readers, 1);
+ }
+
+ rchan_put(rchan);
+
+ return 0;
+}
+
+/**
+ * restore_callbacks - restore default channel callbacks
+ * @rchan: the channel
+ *
+ * Restore callbacks to the default versions.
+ */
+static inline void
+restore_callbacks(struct rchan *rchan)
+{
+ if (rchan->callbacks != &default_channel_callbacks)
+ rchan->callbacks = &default_channel_callbacks;
+}
+
+/**
+ * relay_close - close the channel
+ * @rchan_id: relay channel id
+ *
+ * Finalizes the last sub-buffer and marks the channel as finalized.
+ * The channel buffer and channel data structure are then freed
+ * automatically when the last reference to the channel is given up.
+ */
+int
+relay_close(int rchan_id)
+{
+ int err;
+ struct rchan *rchan;
+
+ if ((rchan_id < 0) || (rchan_id >= RELAY_MAX_CHANNELS))
+ return -EBADF;
+
+ err = relay_finalize(rchan_id);
+
+ if (!err) {
+ read_lock(&rchan_table_lock);
+ rchan = rchan_table[rchan_id];
+ read_unlock(&rchan_table_lock);
+
+ if (rchan) {
+ restore_callbacks(rchan);
+ if (rchan->resize_min)
+ del_timer(&rchan->shrink_timer);
+ rchan_put(rchan);
+ }
+ }
+
+ return err;
+}
+
+/**
+ * relay_write - reserve a slot in the channel and write data into it
+ * @rchan_id: relay channel id
+ * @data_ptr: data to be written into reserved slot
+ * @count: number of bytes to write
+ * @td_offset: optional offset where time delta should be written
+ * @wrote_pos: optional ptr returning buf pos written to, ignored if NULL
+ *
+ * Returns the number of bytes written, 0 or negative on failure.
+ *
+ * Reserves space in the channel and writes count bytes of data_ptr
+ * to it. Automatically performs any necessary locking, depending
+ * on the scheme and SMP usage in effect (no locking is done for the
+ * lockless scheme regardless of usage).
+ *
+ * If td_offset is >= 0, the internal time delta calculated when
+ * slot was reserved will be written at that offset.
+ *
+ * If wrote_pos is non-NULL, it will receive the location the data
+ * was written to, which may be needed for some applications but is not
+ * normally interesting.
+ */
+int
+relay_write(int rchan_id,
+ const void *data_ptr,
+ size_t count,
+ int td_offset,
+ void **wrote_pos)
+{
+ unsigned long flags;
+ char *reserved, *write_pos;
+ int bytes_written = 0;
+ int reserve_code, interrupting;
+ struct timeval ts;
+ u32 td;
+ struct rchan *rchan;
+
+ rchan = rchan_get(rchan_id);
+ if (rchan == NULL)
+ return -EBADF;
+
+ relay_lock_channel(rchan, flags); /* nop for lockless */
+
+ write_pos = reserved = relay_reserve(rchan, count, &ts, &td,
+ &reserve_code, &interrupting);
+
+ if (reserved != NULL) {
+ relay_write_direct(write_pos, data_ptr, count);
+ if ((td_offset >= 0) && (td_offset < count - sizeof(td)))
+ *((u32 *)(reserved + td_offset)) = td;
+ bytes_written = count;
+ } else if (reserve_code == RELAY_WRITE_TOO_LONG)
+ bytes_written = -EINVAL;
+
+ if (bytes_written > 0)
+ relay_commit(rchan, reserved, bytes_written, reserve_code, interrupting);
+
+ relay_unlock_channel(rchan, flags); /* nop for lockless */
+
+ rchan_put(rchan);
+
+ if (wrote_pos)
+ *wrote_pos = reserved;
+
+ return bytes_written;
+}
+
+/**
+ * wakeup_writers - wake up VFS writers waiting on a channel
+ * @private: the channel
+ *
+ * This is the work function used to defer writer waking. The
+ * reason waking is deferred is that calling directly from
+ * buffers_consumed causes problems if you're writing from say
+ * the scheduler.
+ */
+static void
+wakeup_writers(void *private)
+{
+ struct rchan *rchan = (struct rchan *)private;
+
+ wake_up_interruptible(&rchan->write_wait);
+}
+
+
+/**
+ * __relay_buffers_consumed - internal version of relay_buffers_consumed
+ * @rchan: the relay channel
+ * @bufs_consumed: number of buffers to add to current count for channel
+ *
+ * Internal - updates the channel's consumed buffer count.
+ */
+static void
+__relay_buffers_consumed(struct rchan *rchan, u32 bufs_consumed)
+{
+ rchan->bufs_consumed += bufs_consumed;
+
+ if (rchan->bufs_consumed > rchan->bufs_produced)
+ rchan->bufs_consumed = rchan->bufs_produced;
+
+ atomic_set(&rchan->suspended, 0);
+
+ PREPARE_WORK(&rchan->wake_writers, wakeup_writers, rchan);
+ schedule_delayed_work(&rchan->wake_writers, 1);
+}
+
+/**
+ * __reader_buffers_consumed - update reader/channel consumed buffer count
+ * @reader: channel reader
+ * @bufs_consumed: number of buffers to add to current count for channel
+ *
+ * Internal - updates the reader's consumed buffer count. If the reader's
+ * resulting total is greater than the channel's, update the channel's.
+*/
+static void
+__reader_buffers_consumed(struct rchan_reader *reader, u32 bufs_consumed)
+{
+ reader->bufs_consumed += bufs_consumed;
+
+ if (reader->bufs_consumed > reader->rchan->bufs_consumed)
+ __relay_buffers_consumed(reader->rchan, bufs_consumed);
+}
+
+/**
+ * relay_buffers_consumed - add to the # buffers consumed for the channel
+ * @reader: channel reader
+ * @bufs_consumed: number of buffers to add to current count for channel
+ *
+ * Adds to the channel's consumed buffer count. buffers_consumed should
+ * be the number of buffers newly consumed, not the total number consumed.
+ *
+ * NOTE: kernel clients don't need to call this function if the reader
+ * is auto-consuming or the channel is MODE_CONTINUOUS.
+ */
+void
+relay_buffers_consumed(struct rchan_reader *reader, u32 bufs_consumed)
+{
+ if (reader && reader->rchan)
+ __reader_buffers_consumed(reader, bufs_consumed);
+}
+
+/**
+ * __relay_bytes_consumed - internal version of relay_bytes_consumed
+ * @rchan: the relay channel
+ * @bytes_consumed: number of bytes to add to current count for channel
+ * @read_offset: where the bytes were consumed from
+ *
+ * Internal - updates the channel's consumed count.
+*/
+static void
+__relay_bytes_consumed(struct rchan *rchan, u32 bytes_consumed, u32 read_offset)
+{
+ u32 consuming_idx;
+ u32 unused;
+
+ consuming_idx = read_offset / rchan->buf_size;
+
+ if (consuming_idx >= rchan->n_bufs)
+ consuming_idx = rchan->n_bufs - 1;
+ rchan->bytes_consumed += bytes_consumed;
+
+ unused = rchan->unused_bytes[consuming_idx];
+
+ if (rchan->bytes_consumed + unused >= rchan->buf_size) {
+ __relay_buffers_consumed(rchan, 1);
+ rchan->bytes_consumed = 0;
+ }
+}
+
+/**
+ * __reader_bytes_consumed - update reader/channel consumed count
+ * @reader: channel reader
+ * @bytes_consumed: number of bytes to add to current count for channel
+ * @read_offset: where the bytes were consumed from
+ *
+ * Internal - updates the reader's consumed count. If the reader's
+ * resulting total is greater than the channel's, update the channel's.
+*/
+static void
+__reader_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset)
+{
+ u32 consuming_idx;
+ u32 unused;
+
+ consuming_idx = read_offset / reader->rchan->buf_size;
+
+ if (consuming_idx >= reader->rchan->n_bufs)
+ consuming_idx = reader->rchan->n_bufs - 1;
+
+ reader->bytes_consumed += bytes_consumed;
+
+ unused = reader->rchan->unused_bytes[consuming_idx];
+
+ if (reader->bytes_consumed + unused >= reader->rchan->buf_size) {
+ reader->bufs_consumed++;
+ reader->bytes_consumed = 0;
+ }
+
+ if ((reader->bufs_consumed > reader->rchan->bufs_consumed) ||
+ ((reader->bufs_consumed == reader->rchan->bufs_consumed) &&
+ (reader->bytes_consumed > reader->rchan->bytes_consumed)))
+ __relay_bytes_consumed(reader->rchan, bytes_consumed, read_offset);
+}
+
+/**
+ * relay_bytes_consumed - add to the # bytes consumed for the channel
+ * @reader: channel reader
+ * @bytes_consumed: number of bytes to add to current count for channel
+ * @read_offset: where the bytes were consumed from
+ *
+ * Adds to the channel's consumed count. bytes_consumed should be the
+ * number of bytes actually read e.g. return value of relay_read() and
+ * the read_offset should be the actual offset the bytes were read from
+ * e.g. the actual_read_offset set by relay_read(). See
+ * Documentation/filesystems/relayfs.txt for more details.
+ *
+ * NOTE: kernel clients don't need to call this function if the reader
+ * is auto-consuming or the channel is MODE_CONTINUOUS.
+ */
+void
+relay_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset)
+{
+ if (reader && reader->rchan)
+ __reader_bytes_consumed(reader, bytes_consumed, read_offset);
+}
+
+/**
+ * update_readers_consumed - apply offset change to reader
+ * @rchan: the channel
+ *
+ * Apply the consumed counts to all readers open on the channel.
+ */
+void
+update_readers_consumed(struct rchan *rchan, u32 bufs_consumed, u32 bytes_consumed)
+{
+ struct list_head *p;
+ struct rchan_reader *reader;
+
+ read_lock(&rchan->open_readers_lock);
+ list_for_each(p, &rchan->open_readers) {
+ reader = list_entry(p, struct rchan_reader, list);
+ reader->bufs_consumed = bufs_consumed;
+ reader->bytes_consumed = bytes_consumed;
+ if (reader->vfs_reader)
+ reader->pos.file->f_pos = 0;
+ else
+ reader->pos.f_pos = 0;
+ reader->offset_changed = 1;
+ }
+ read_unlock(&rchan->open_readers_lock);
+}
+
+/**
+ * do_read - utility function to do the actual read to user
+ * @rchan: the channel
+ * @buf: user buf to read into, NULL if just getting info
+ * @count: bytes requested
+ * @read_offset: offset into channel
+ * @new_offset: new offset into channel after read
+ * @actual_read_offset: read offset actually used
+ *
+ * Returns the number of bytes read, 0 if none.
+ */
+static ssize_t
+do_read(struct rchan *rchan, char *buf, size_t count, u32 read_offset, u32 *new_offset, u32 *actual_read_offset)
+{
+ u32 read_bufno, cur_bufno;
+ u32 avail_offset, cur_idx, max_offset, buf_end_offset;
+ u32 avail_count, buf_size;
+ int unused_bytes = 0;
+ size_t read_count = 0;
+ u32 last_buf_byte_offset;
+
+ *actual_read_offset = read_offset;
+
+ buf_size = rchan->buf_size;
+ if (unlikely(!buf_size)) BUG();
+
+ read_bufno = read_offset / buf_size;
+ if (unlikely(read_bufno >= RELAY_MAX_BUFS)) BUG();
+ unused_bytes = rchan->unused_bytes[read_bufno];
+
+ avail_offset = cur_idx = relay_get_offset(rchan, &max_offset);
+
+ if (cur_idx == read_offset) {
+ if (atomic_read(&rchan->suspended) == 1) {
+ read_offset += 1;
+ if (read_offset >= max_offset)
+ read_offset = 0;
+ *actual_read_offset = read_offset;
+ } else {
+ *new_offset = read_offset;
+ return 0;
+ }
+ } else {
+ last_buf_byte_offset = (read_bufno + 1) * buf_size - 1;
+ if (read_offset == last_buf_byte_offset) {
+ if (unused_bytes != 1) {
+ read_offset += 1;
+ if (read_offset >= max_offset)
+ read_offset = 0;
+ *actual_read_offset = read_offset;
+ }
+ }
+ }
+
+ read_bufno = read_offset / buf_size;
+ if (unlikely(read_bufno >= RELAY_MAX_BUFS)) BUG();
+ unused_bytes = rchan->unused_bytes[read_bufno];
+
+ cur_bufno = cur_idx / buf_size;
+
+ buf_end_offset = (read_bufno + 1) * buf_size - unused_bytes;
+ if (avail_offset > buf_end_offset)
+ avail_offset = buf_end_offset;
+ else if (avail_offset < read_offset)
+ avail_offset = buf_end_offset;
+ avail_count = avail_offset - read_offset;
+ read_count = avail_count >= count ? count : avail_count;
+
+ if (read_count && buf != NULL)
+ if (copy_to_user(buf, rchan->buf + read_offset, read_count))
+ return -EFAULT;
+
+ if (read_bufno == cur_bufno)
+ if (read_count && (read_offset + read_count >= buf_end_offset) && (read_offset + read_count <= cur_idx)) {
+ *new_offset = cur_idx;
+ return read_count;
+ }
+
+ if (read_offset + read_count + unused_bytes > max_offset)
+ *new_offset = 0;
+ else if (read_offset + read_count >= buf_end_offset)
+ *new_offset = read_offset + read_count + unused_bytes;
+ else
+ *new_offset = read_offset + read_count;
+
+ return read_count;
+}
+
+/**
+ * __relay_read - read bytes from channel, relative to current reader pos
+ * @reader: channel reader
+ * @buf: user buf to read into, NULL if just getting info
+ * @count: bytes requested
+ * @read_offset: offset into channel
+ * @new_offset: new offset into channel after read
+ * @actual_read_offset: read offset actually used
+ * @wait: if non-zero, wait for something to read
+ *
+ * Internal - see relay_read() for details.
+ *
+ * Returns the number of bytes read, 0 if none, negative on failure.
+ */
+static ssize_t
+__relay_read(struct rchan_reader *reader, char *buf, size_t count, u32 read_offset, u32 *new_offset, u32 *actual_read_offset, int wait)
+{
+ int err = 0;
+ size_t read_count = 0;
+ struct rchan *rchan = reader->rchan;
+
+ if (!wait && !rchan->initialized)
+ return -EAGAIN;
+
+ if (using_lockless(rchan))
+ read_offset &= idx_mask(rchan);
+
+ if (read_offset >= rchan->n_bufs * rchan->buf_size) {
+ *new_offset = 0;
+ if (!wait)
+ return -EAGAIN;
+ else
+ return -EINTR;
+ }
+
+ if (buf != NULL && wait) {
+ err = wait_event_interruptible(rchan->read_wait,
+ ((rchan->finalized == 1) ||
+ (atomic_read(&rchan->suspended) == 1) ||
+ (relay_get_offset(rchan, NULL) != read_offset)));
+
+ if (rchan->finalized)
+ return 0;
+
+ if (reader->offset_changed) {
+ reader->offset_changed = 0;
+ return -EINTR;
+ }
+
+ if (err)
+ return err;
+ }
+
+ read_count = do_read(rchan, buf, count, read_offset, new_offset, actual_read_offset);
+
+ if (read_count < 0)
+ err = read_count;
+
+ if (err)
+ return err;
+ else
+ return read_count;
+}
+
+/**
+ * relay_read - read bytes from channel, relative to current reader pos
+ * @reader: channel reader
+ * @buf: user buf to read into, NULL if just getting info
+ * @count: bytes requested
+ * @wait: if non-zero, wait for something to read
+ * @actual_read_offset: set read offset actually used, must not be NULL
+ *
+ * Reads count bytes from the channel, or as much as is available within
+ * the sub-buffer currently being read. The read offset that will be
+ * read from is the position contained within the reader object. If the
+ * wait flag is set, buf is non-NULL, and there is nothing available,
+ * it will wait until there is. If the wait flag is 0 and there is
+ * nothing available, -EAGAIN is returned. If buf is NULL, the value
+ * returned is the number of bytes that would have been read.
+ * actual_read_offset is the value that should be passed as the read
+ * offset to relay_bytes_consumed, needed only if the reader is not
+ * auto-consuming and the channel is MODE_NO_OVERWRITE, but in any case,
+ * it must not be NULL. See Documentation/filesystems/relayfs.txt for
+ * more details.
+ */
+ssize_t
+relay_read(struct rchan_reader *reader, char *buf, size_t count, int wait, u32 *actual_read_offset)
+{
+ u32 new_offset;
+ u32 read_offset;
+ ssize_t read_count;
+
+ if (reader == NULL || reader->rchan == NULL)
+ return -EBADF;
+
+ if (actual_read_offset == NULL)
+ return -EINVAL;
+
+ if (reader->vfs_reader)
+ read_offset = (u32)(reader->pos.file->f_pos);
+ else
+ read_offset = reader->pos.f_pos;
+ *actual_read_offset = read_offset;
+
+ read_count = __relay_read(reader, buf, count, read_offset,
+ &new_offset, actual_read_offset, wait);
+
+ if (read_count < 0)
+ return read_count;
+
+ if (reader->vfs_reader)
+ reader->pos.file->f_pos = new_offset;
+ else
+ reader->pos.f_pos = new_offset;
+
+ if (reader->auto_consume && ((read_count) || (new_offset != read_offset)))
+ __reader_bytes_consumed(reader, read_count, *actual_read_offset);
+
+ if (read_count == 0 && !wait)
+ return -EAGAIN;
+
+ return read_count;
+}
+
+/**
+ * relay_bytes_avail - number of bytes available in current sub-buffer
+ * @reader: channel reader
+ *
+ * Returns the number of bytes available relative to the reader's
+ * current read position within the corresponding sub-buffer, 0 if
+ * there is nothing available. See Documentation/filesystems/relayfs.txt
+ * for more details.
+ */
+ssize_t
+relay_bytes_avail(struct rchan_reader *reader)
+{
+ u32 f_pos;
+ u32 new_offset;
+ u32 actual_read_offset;
+ ssize_t bytes_read;
+
+ if (reader == NULL || reader->rchan == NULL)
+ return -EBADF;
+
+ if (reader->vfs_reader)
+ f_pos = (u32)reader->pos.file->f_pos;
+ else
+ f_pos = reader->pos.f_pos;
+ new_offset = f_pos;
+
+ bytes_read = __relay_read(reader, NULL, reader->rchan->buf_size,
+ f_pos, &new_offset, &actual_read_offset, 0);
+
+ if ((new_offset != f_pos) &&
+ ((bytes_read == -EINTR) || (bytes_read == 0)))
+ bytes_read = -EAGAIN;
+ else if ((bytes_read < 0) && (bytes_read != -EAGAIN))
+ bytes_read = 0;
+
+ return bytes_read;
+}
+
+/**
+ * rchan_empty - boolean, is the channel empty wrt reader?
+ * @reader: channel reader
+ *
+ * Returns 1 if the channel is empty, 0 otherwise.
+ */
+int
+rchan_empty(struct rchan_reader *reader)
+{
+ ssize_t avail_count;
+ u32 buffers_ready;
+ struct rchan *rchan = reader->rchan;
+ u32 cur_idx, curbuf_bytes;
+ int mapped;
+
+ if (atomic_read(&rchan->suspended) == 1)
+ return 0;
+
+ mapped = atomic_read(&rchan->mapped);
+
+ if (mapped && bulk_delivery(rchan)) {
+ buffers_ready = rchan->bufs_produced - rchan->bufs_consumed;
+ return buffers_ready ? 0 : 1;
+ }
+
+ if (mapped && packet_delivery(rchan)) {
+ buffers_ready = rchan->bufs_produced - rchan->bufs_consumed;
+ if (buffers_ready)
+ return 0;
+ else {
+ cur_idx = relay_get_offset(rchan, NULL);
+ curbuf_bytes = cur_idx % rchan->buf_size;
+ return curbuf_bytes == rchan->bytes_consumed ? 1 : 0;
+ }
+ }
+
+ avail_count = relay_bytes_avail(reader);
+
+ return avail_count ? 0 : 1;
+}
+
+/**
+ * rchan_full - boolean, is the channel full wrt consuming reader?
+ * @reader: channel reader
+ *
+ * Returns 1 if the channel is full, 0 otherwise.
+ */
+int
+rchan_full(struct rchan_reader *reader)
+{
+ u32 buffers_ready;
+ struct rchan *rchan = reader->rchan;
+
+ if (mode_continuous(rchan))
+ return 0;
+
+ buffers_ready = rchan->bufs_produced - rchan->bufs_consumed;
+
+ return buffers_ready > reader->rchan->n_bufs - 1 ? 1 : 0;
+}
+
+/**
+ * relay_info - get status and other information about a relay channel
+ * @rchan_id: relay channel id
+ * @rchan_info: pointer to the rchan_info struct to be filled in
+ *
+ * Fills in an rchan_info struct with channel status and attribute
+ * information. See Documentation/filesystems/relayfs.txt for details.
+ *
+ * Returns 0 if successful, negative otherwise.
+ */
+int
+relay_info(int rchan_id, struct rchan_info *rchan_info)
+{
+ int i;
+ struct rchan *rchan;
+
+ rchan = rchan_get(rchan_id);
+ if (rchan == NULL)
+ return -EBADF;
+
+ rchan_info->flags = rchan->flags;
+ rchan_info->buf_size = rchan->buf_size;
+ rchan_info->buf_addr = rchan->buf;
+ rchan_info->alloc_size = rchan->alloc_size;
+ rchan_info->n_bufs = rchan->n_bufs;
+ rchan_info->cur_idx = relay_get_offset(rchan, NULL);
+ rchan_info->bufs_produced = rchan->bufs_produced;
+ rchan_info->bufs_consumed = rchan->bufs_consumed;
+ rchan_info->buf_id = rchan->buf_id;
+
+ for (i = 0; i < rchan->n_bufs; i++) {
+ rchan_info->unused_bytes[i] = rchan->unused_bytes[i];
+ if (using_lockless(rchan))
+ rchan_info->buffer_complete[i] = (atomic_read(&fill_count(rchan, i)) == rchan->buf_size);
+ else
+ rchan_info->buffer_complete[i] = 0;
+ }
+
+ rchan_put(rchan);
+
+ return 0;
+}
+
+/**
+ * __add_rchan_reader - creates and adds a reader to a channel
+ * @rchan: relay channel
+ * @filp: the file associated with rchan, if applicable
+ * @auto_consume: boolean, whether reader's reads automatically consume
+ * @map_reader: boolean, whether reader's reading via a channel mapping
+ *
+ * Returns a pointer to the reader object create, NULL if unsuccessful
+ *
+ * Creates and initializes an rchan_reader object for reading the channel.
+ * If filp is non-NULL, the reader is a VFS reader, otherwise not.
+ *
+ * If the reader is a map reader, it isn't considered a VFS reader for
+ * our purposes. Also, map_readers can't be auto-consuming.
+ */
+struct rchan_reader *
+__add_rchan_reader(struct rchan *rchan, struct file *filp, int auto_consume, int map_reader)
+{
+ struct rchan_reader *reader;
+ u32 will_read;
+
+ reader = kmalloc(sizeof(struct rchan_reader), GFP_KERNEL);
+
+ if (reader) {
+ write_lock(&rchan->open_readers_lock);
+ reader->rchan = rchan;
+ if (filp) {
+ reader->vfs_reader = 1;
+ reader->pos.file = filp;
+ } else {
+ reader->vfs_reader = 0;
+ reader->pos.f_pos = 0;
+ }
+ reader->map_reader = map_reader;
+ reader->auto_consume = auto_consume;
+
+ if (!map_reader) {
+ will_read = rchan->bufs_produced % rchan->n_bufs;
+ if (!will_read && atomic_read(&rchan->suspended))
+ will_read = rchan->n_bufs;
+ reader->bufs_consumed = rchan->bufs_produced - will_read;
+ rchan->bufs_consumed = reader->bufs_consumed;
+ rchan->bytes_consumed = reader->bytes_consumed = 0;
+ reader->offset_changed = 0;
+ }
+
+ list_add(&reader->list, &rchan->open_readers);
+ write_unlock(&rchan->open_readers_lock);
+ }
+
+ return reader;
+}
+
+/**
+ * add_rchan_reader - create a reader for a channel
+ * @rchan_id: relay channel handle
+ * @auto_consume: boolean, whether reader's reads automatically consume
+ *
+ * Returns a pointer to the reader object created, NULL if unsuccessful
+ *
+ * Creates and initializes an rchan_reader object for reading the channel.
+ * This function is useful only for non-VFS readers.
+ */
+struct rchan_reader *
+add_rchan_reader(int rchan_id, int auto_consume)
+{
+ struct rchan *rchan = rchan_get(rchan_id);
+ if (rchan == NULL)
+ return NULL;
+
+ return __add_rchan_reader(rchan, NULL, auto_consume, 0);
+}
+
+/**
+ * add_map_reader - create a map reader for a channel
+ * @rchan_id: relay channel handle
+ *
+ * Returns a pointer to the reader object created, NULL if unsuccessful
+ *
+ * Creates and initializes an rchan_reader object for reading the channel.
+ * This function is useful only for map readers.
+ */
+struct rchan_reader *
+add_map_reader(int rchan_id)
+{
+ struct rchan *rchan = rchan_get(rchan_id);
+ if (rchan == NULL)
+ return NULL;
+
+ return __add_rchan_reader(rchan, NULL, 0, 1);
+}
+
+/**
+ * __remove_rchan_reader - destroy a channel reader
+ * @reader: channel reader
+ *
+ * Internal - removes reader from the open readers list, and frees it.
+ */
+void
+__remove_rchan_reader(struct rchan_reader *reader)
+{
+ struct list_head *p;
+ struct rchan_reader *found_reader = NULL;
+
+ write_lock(&reader->rchan->open_readers_lock);
+ list_for_each(p, &reader->rchan->open_readers) {
+ found_reader = list_entry(p, struct rchan_reader, list);
+ if (found_reader == reader) {
+ list_del(&found_reader->list);
+ break;
+ }
+ }
+ write_unlock(&reader->rchan->open_readers_lock);
+
+ if (found_reader)
+ kfree(found_reader);
+}
+
+/**
+ * remove_rchan_reader - destroy a channel reader
+ * @reader: channel reader
+ *
+ * Finds and removes the given reader from the channel. This function
+ * is useful only for non-VFS readers.
+ *
+ * Returns 0 if successful, negative otherwise.
+ */
+int
+remove_rchan_reader(struct rchan_reader *reader)
+{
+ int err = 0;
+
+ if (reader) {
+ rchan_put(reader->rchan);
+ __remove_rchan_reader(reader);
+ } else
+ err = -EINVAL;
+
+ return err;
+}
+
+/**
+ * remove_map_reader - destroy a map reader
+ * @reader: channel reader
+ *
+ * Finds and removes the given map reader from the channel. This function
+ * is useful only for map readers.
+ *
+ * Returns 0 if successful, negative otherwise.
+ */
+int
+remove_map_reader(struct rchan_reader *reader)
+{
+ return remove_rchan_reader(reader);
+}
+
+EXPORT_SYMBOL(relay_open);
+EXPORT_SYMBOL(relay_close);
+EXPORT_SYMBOL(relay_reset);
+EXPORT_SYMBOL(relay_reserve);
+EXPORT_SYMBOL(relay_commit);
+EXPORT_SYMBOL(relay_read);
+EXPORT_SYMBOL(relay_write);
+EXPORT_SYMBOL(relay_bytes_avail);
+EXPORT_SYMBOL(relay_buffers_consumed);
+EXPORT_SYMBOL(relay_bytes_consumed);
+EXPORT_SYMBOL(relay_info);
+EXPORT_SYMBOL(relay_discard_init_buf);
+
+
--- /dev/null
+#ifndef __UM_CPUFEATURE_H
+#define __UM_CPUFEATURE_H
+
+#include "asm/arch/cpufeature.h"
+
+#endif
--- /dev/null
+#ifndef __UM_LOCAL_H
+#define __UM_LOCAL_H
+
+#include "asm/arch/local.h"
+
+#endif
--- /dev/null
+#ifndef __UM_MODULE_GENERIC_H
+#define __UM_MODULE_GENERIC_H
+
+#include "asm/arch/module.h"
+
+#endif
--- /dev/null
+#ifndef _UM_SECTIONS_H
+#define _UM_SECTIONS_H
+
+/* nothing to see, move along */
+#include <asm-generic/sections.h>
+
+#endif
--- /dev/null
+/*
+ * linux/include/linux/relayfs_fs.h
+ *
+ * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
+ * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com)
+ *
+ * RelayFS definitions and declarations
+ *
+ * Please see Documentation/filesystems/relayfs.txt for more info.
+ */
+
+#ifndef _LINUX_RELAYFS_FS_H
+#define _LINUX_RELAYFS_FS_H
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+
+/*
+ * Tracks changes to rchan struct
+ */
+#define RELAYFS_CHANNEL_VERSION 1
+
+/*
+ * Maximum number of simultaneously open channels
+ */
+#define RELAY_MAX_CHANNELS 256
+
+/*
+ * Relay properties
+ */
+#define RELAY_MIN_BUFS 2
+#define RELAY_MIN_BUFSIZE 4096
+#define RELAY_MAX_BUFS 256
+#define RELAY_MAX_BUF_SIZE 0x1000000
+#define RELAY_MAX_TOTAL_BUF_SIZE 0x8000000
+
+/*
+ * Lockless scheme utility macros
+ */
+#define RELAY_MAX_BUFNO(bufno_bits) (1UL << (bufno_bits))
+#define RELAY_BUF_SIZE(offset_bits) (1UL << (offset_bits))
+#define RELAY_BUF_OFFSET_MASK(offset_bits) (RELAY_BUF_SIZE(offset_bits) - 1)
+#define RELAY_BUFNO_GET(index, offset_bits) ((index) >> (offset_bits))
+#define RELAY_BUF_OFFSET_GET(index, mask) ((index) & (mask))
+#define RELAY_BUF_OFFSET_CLEAR(index, mask) ((index) & ~(mask))
+
+/*
+ * Flags returned by relay_reserve()
+ */
+#define RELAY_BUFFER_SWITCH_NONE 0x0
+#define RELAY_WRITE_DISCARD_NONE 0x0
+#define RELAY_BUFFER_SWITCH 0x1
+#define RELAY_WRITE_DISCARD 0x2
+#define RELAY_WRITE_TOO_LONG 0x4
+
+/*
+ * Relay attribute flags
+ */
+#define RELAY_DELIVERY_BULK 0x1
+#define RELAY_DELIVERY_PACKET 0x2
+#define RELAY_SCHEME_LOCKLESS 0x4
+#define RELAY_SCHEME_LOCKING 0x8
+#define RELAY_SCHEME_ANY 0xC
+#define RELAY_TIMESTAMP_TSC 0x10
+#define RELAY_TIMESTAMP_GETTIMEOFDAY 0x20
+#define RELAY_TIMESTAMP_ANY 0x30
+#define RELAY_USAGE_SMP 0x40
+#define RELAY_USAGE_GLOBAL 0x80
+#define RELAY_MODE_CONTINUOUS 0x100
+#define RELAY_MODE_NO_OVERWRITE 0x200
+
+/*
+ * Flags for needs_resize() callback
+ */
+#define RELAY_RESIZE_NONE 0x0
+#define RELAY_RESIZE_EXPAND 0x1
+#define RELAY_RESIZE_SHRINK 0x2
+#define RELAY_RESIZE_REPLACE 0x4
+#define RELAY_RESIZE_REPLACED 0x8
+
+/*
+ * Values for fileop_notify() callback
+ */
+enum relay_fileop
+{
+ RELAY_FILE_OPEN,
+ RELAY_FILE_CLOSE,
+ RELAY_FILE_MAP,
+ RELAY_FILE_UNMAP
+};
+
+/*
+ * Data structure returned by relay_info()
+ */
+struct rchan_info
+{
+ u32 flags; /* relay attribute flags for channel */
+ u32 buf_size; /* channel's sub-buffer size */
+ char *buf_addr; /* address of channel start */
+ u32 alloc_size; /* total buffer size actually allocated */
+ u32 n_bufs; /* number of sub-buffers in channel */
+ u32 cur_idx; /* current write index into channel */
+ u32 bufs_produced; /* current count of sub-buffers produced */
+ u32 bufs_consumed; /* current count of sub-buffers consumed */
+ u32 buf_id; /* buf_id of current sub-buffer */
+ int buffer_complete[RELAY_MAX_BUFS]; /* boolean per sub-buffer */
+ int unused_bytes[RELAY_MAX_BUFS]; /* count per sub-buffer */
+};
+
+/*
+ * Relay channel client callbacks
+ */
+struct rchan_callbacks
+{
+ /*
+ * buffer_start - called at the beginning of a new sub-buffer
+ * @rchan_id: the channel id
+ * @current_write_pos: position in sub-buffer client should write to
+ * @buffer_id: the id of the new sub-buffer
+ * @start_time: the timestamp associated with the start of sub-buffer
+ * @start_tsc: the TSC associated with the timestamp, if using_tsc
+ * @using_tsc: boolean, indicates whether start_tsc is valid
+ *
+ * Return value should be the number of bytes written by the client.
+ *
+ * See Documentation/filesystems/relayfs.txt for details.
+ */
+ int (*buffer_start) (int rchan_id,
+ char *current_write_pos,
+ u32 buffer_id,
+ struct timeval start_time,
+ u32 start_tsc,
+ int using_tsc);
+
+ /*
+ * buffer_end - called at the end of a sub-buffer
+ * @rchan_id: the channel id
+ * @current_write_pos: position in sub-buffer of end of data
+ * @end_of_buffer: the position of the end of the sub-buffer
+ * @end_time: the timestamp associated with the end of the sub-buffer
+ * @end_tsc: the TSC associated with the end_time, if using_tsc
+ * @using_tsc: boolean, indicates whether end_tsc is valid
+ *
+ * Return value should be the number of bytes written by the client.
+ *
+ * See Documentation/filesystems/relayfs.txt for details.
+ */
+ int (*buffer_end) (int rchan_id,
+ char *current_write_pos,
+ char *end_of_buffer,
+ struct timeval end_time,
+ u32 end_tsc,
+ int using_tsc);
+
+ /*
+ * deliver - called when data is ready for the client
+ * @rchan_id: the channel id
+ * @from: the start of the delivered data
+ * @len: the length of the delivered data
+ *
+ * See Documentation/filesystems/relayfs.txt for details.
+ */
+ void (*deliver) (int rchan_id, char *from, u32 len);
+
+ /*
+ * user_deliver - called when data has been written from userspace
+ * @rchan_id: the channel id
+ * @from: the start of the delivered data
+ * @len: the length of the delivered data
+ *
+ * See Documentation/filesystems/relayfs.txt for details.
+ */
+ void (*user_deliver) (int rchan_id, char *from, u32 len);
+
+ /*
+ * needs_resize - called when a resizing event occurs
+ * @rchan_id: the channel id
+ * @resize_type: the type of resizing event
+ * @suggested_buf_size: the suggested new sub-buffer size
+ * @suggested_buf_size: the suggested new number of sub-buffers
+ *
+ * See Documentation/filesystems/relayfs.txt for details.
+ */
+ void (*needs_resize)(int rchan_id,
+ int resize_type,
+ u32 suggested_buf_size,
+ u32 suggested_n_bufs);
+
+ /*
+ * fileop_notify - called on open/close/mmap/munmap of a relayfs file
+ * @rchan_id: the channel id
+ * @filp: relayfs file pointer
+ * @fileop: which file operation is in progress
+ *
+ * The return value can direct the outcome of the operation.
+ *
+ * See Documentation/filesystems/relayfs.txt for details.
+ */
+ int (*fileop_notify)(int rchan_id,
+ struct file *filp,
+ enum relay_fileop fileop);
+
+ /*
+ * ioctl - called in ioctl context from userspace
+ * @rchan_id: the channel id
+ * @cmd: ioctl cmd
+ * @arg: ioctl cmd arg
+ *
+ * The return value is returned as the value from the ioctl call.
+ *
+ * See Documentation/filesystems/relayfs.txt for details.
+ */
+ int (*ioctl) (int rchan_id, unsigned int cmd, unsigned long arg);
+};
+
+/*
+ * Lockless scheme-specific data
+ */
+struct lockless_rchan
+{
+ u8 bufno_bits; /* # bits used for sub-buffer id */
+ u8 offset_bits; /* # bits used for offset within sub-buffer */
+ u32 index; /* current index = sub-buffer id and offset */
+ u32 offset_mask; /* used to obtain offset portion of index */
+ u32 index_mask; /* used to mask off unused bits index */
+ atomic_t fill_count[RELAY_MAX_BUFS]; /* fill count per sub-buffer */
+};
+
+/*
+ * Locking scheme-specific data
+ */
+struct locking_rchan
+{
+ char *write_buf; /* start of write sub-buffer */
+ char *write_buf_end; /* end of write sub-buffer */
+ char *current_write_pos; /* current write pointer */
+ char *write_limit; /* takes reserves into account */
+ char *in_progress_event_pos; /* used for interrupted writes */
+ u16 in_progress_event_size; /* used for interrupted writes */
+ char *interrupted_pos; /* used for interrupted writes */
+ u16 interrupting_size; /* used for interrupted writes */
+ spinlock_t lock; /* channel lock for locking scheme */
+};
+
+struct relay_ops;
+
+/*
+ * Offset resizing data structure
+ */
+struct resize_offset
+{
+ u32 ge;
+ u32 le;
+ int delta;
+};
+
+/*
+ * Relay channel data structure
+ */
+struct rchan
+{
+ u32 version; /* the version of this struct */
+ char *buf; /* the channel buffer */
+ union
+ {
+ struct lockless_rchan lockless;
+ struct locking_rchan locking;
+ } scheme; /* scheme-specific channel data */
+
+ int id; /* the channel id */
+ struct rchan_callbacks *callbacks; /* client callbacks */
+ u32 flags; /* relay channel attributes */
+ u32 buf_id; /* current sub-buffer id */
+ u32 buf_idx; /* current sub-buffer index */
+
+ atomic_t mapped; /* map count */
+
+ atomic_t suspended; /* channel suspended i.e full? */
+ int half_switch; /* used internally for suspend */
+
+ struct timeval buf_start_time; /* current sub-buffer start time */
+ u32 buf_start_tsc; /* current sub-buffer start TSC */
+
+ u32 buf_size; /* sub-buffer size */
+ u32 alloc_size; /* total buffer size allocated */
+ u32 n_bufs; /* number of sub-buffers */
+
+ u32 bufs_produced; /* count of sub-buffers produced */
+ u32 bufs_consumed; /* count of sub-buffers consumed */
+ u32 bytes_consumed; /* bytes consumed in cur sub-buffer */
+
+ int initialized; /* first buffer initialized? */
+ int finalized; /* channel finalized? */
+
+ u32 start_reserve; /* reserve at start of sub-buffers */
+ u32 end_reserve; /* reserve at end of sub-buffers */
+ u32 rchan_start_reserve; /* additional reserve sub-buffer 0 */
+
+ struct dentry *dentry; /* channel file dentry */
+
+ wait_queue_head_t read_wait; /* VFS read wait queue */
+ wait_queue_head_t write_wait; /* VFS write wait queue */
+ struct work_struct wake_readers; /* reader wake-up work struct */
+ struct work_struct wake_writers; /* reader wake-up work struct */
+ atomic_t refcount; /* channel refcount */
+
+ struct relay_ops *relay_ops; /* scheme-specific channel ops */
+
+ int unused_bytes[RELAY_MAX_BUFS]; /* unused count per sub-buffer */
+
+ struct semaphore resize_sem; /* serializes alloc/repace */
+ struct work_struct work; /* resize allocation work struct */
+
+ struct list_head open_readers; /* open readers for this channel */
+ rwlock_t open_readers_lock; /* protection for open_readers list */
+
+ char *init_buf; /* init channel buffer, if non-NULL */
+
+ u32 resize_min; /* minimum resized total buffer size */
+ u32 resize_max; /* maximum resized total buffer size */
+ char *resize_buf; /* for autosize alloc/free */
+ u32 resize_buf_size; /* resized sub-buffer size */
+ u32 resize_n_bufs; /* resized number of sub-buffers */
+ u32 resize_alloc_size; /* resized actual total size */
+ int resizing; /* is resizing in progress? */
+ int resize_err; /* resizing err code */
+ int resize_failures; /* number of resize failures */
+ int replace_buffer; /* is the alloced buffer ready? */
+ struct resize_offset resize_offset; /* offset change */
+ struct timer_list shrink_timer; /* timer used for shrinking */
+ int resize_order; /* size of last resize */
+ u32 expand_buf_id; /* subbuf id expand will occur at */
+
+ struct page **buf_page_array; /* array of current buffer pages */
+ int buf_page_count; /* number of current buffer pages */
+ struct page **expand_page_array;/* new pages to be inserted */
+ int expand_page_count; /* number of new pages */
+ struct page **shrink_page_array;/* old pages to be freed */
+ int shrink_page_count; /* number of old pages */
+ struct page **resize_page_array;/* will become current pages */
+ int resize_page_count; /* number of resize pages */
+ struct page **old_buf_page_array; /* hold for freeing */
+} ____cacheline_aligned;
+
+/*
+ * Relay channel reader struct
+ */
+struct rchan_reader
+{
+ struct list_head list; /* for list inclusion */
+ struct rchan *rchan; /* the channel we're reading from */
+ int auto_consume; /* does this reader auto-consume? */
+ u32 bufs_consumed; /* buffers this reader has consumed */
+ u32 bytes_consumed; /* bytes consumed in cur sub-buffer */
+ int offset_changed; /* have channel offsets changed? */
+ int vfs_reader; /* are we a VFS reader? */
+ int map_reader; /* are we an mmap reader? */
+
+ union
+ {
+ struct file *file;
+ u32 f_pos;
+ } pos; /* current read offset */
+};
+
+/*
+ * These help make union member access less tedious
+ */
+#define channel_buffer(rchan) ((rchan)->buf)
+#define idx(rchan) ((rchan)->scheme.lockless.index)
+#define bufno_bits(rchan) ((rchan)->scheme.lockless.bufno_bits)
+#define offset_bits(rchan) ((rchan)->scheme.lockless.offset_bits)
+#define offset_mask(rchan) ((rchan)->scheme.lockless.offset_mask)
+#define idx_mask(rchan) ((rchan)->scheme.lockless.index_mask)
+#define bulk_delivery(rchan) (((rchan)->flags & RELAY_DELIVERY_BULK) ? 1 : 0)
+#define packet_delivery(rchan) (((rchan)->flags & RELAY_DELIVERY_PACKET) ? 1 : 0)
+#define using_lockless(rchan) (((rchan)->flags & RELAY_SCHEME_LOCKLESS) ? 1 : 0)
+#define using_locking(rchan) (((rchan)->flags & RELAY_SCHEME_LOCKING) ? 1 : 0)
+#define using_tsc(rchan) (((rchan)->flags & RELAY_TIMESTAMP_TSC) ? 1 : 0)
+#define using_gettimeofday(rchan) (((rchan)->flags & RELAY_TIMESTAMP_GETTIMEOFDAY) ? 1 : 0)
+#define usage_smp(rchan) (((rchan)->flags & RELAY_USAGE_SMP) ? 1 : 0)
+#define usage_global(rchan) (((rchan)->flags & RELAY_USAGE_GLOBAL) ? 1 : 0)
+#define mode_continuous(rchan) (((rchan)->flags & RELAY_MODE_CONTINUOUS) ? 1 : 0)
+#define fill_count(rchan, i) ((rchan)->scheme.lockless.fill_count[(i)])
+#define write_buf(rchan) ((rchan)->scheme.locking.write_buf)
+#define read_buf(rchan) ((rchan)->scheme.locking.read_buf)
+#define write_buf_end(rchan) ((rchan)->scheme.locking.write_buf_end)
+#define read_buf_end(rchan) ((rchan)->scheme.locking.read_buf_end)
+#define cur_write_pos(rchan) ((rchan)->scheme.locking.current_write_pos)
+#define read_limit(rchan) ((rchan)->scheme.locking.read_limit)
+#define write_limit(rchan) ((rchan)->scheme.locking.write_limit)
+#define in_progress_event_pos(rchan) ((rchan)->scheme.locking.in_progress_event_pos)
+#define in_progress_event_size(rchan) ((rchan)->scheme.locking.in_progress_event_size)
+#define interrupted_pos(rchan) ((rchan)->scheme.locking.interrupted_pos)
+#define interrupting_size(rchan) ((rchan)->scheme.locking.interrupting_size)
+#define channel_lock(rchan) ((rchan)->scheme.locking.lock)
+
+
+/**
+ * calc_time_delta - utility function for time delta calculation
+ * @now: current time
+ * @start: start time
+ *
+ * Returns the time delta produced by subtracting start time from now.
+ */
+static inline u32
+calc_time_delta(struct timeval *now,
+ struct timeval *start)
+{
+ return (now->tv_sec - start->tv_sec) * 1000000
+ + (now->tv_usec - start->tv_usec);
+}
+
+/**
+ * recalc_time_delta - utility function for time delta recalculation
+ * @now: current time
+ * @new_delta: the new time delta calculated
+ * @cpu: the associated CPU id
+ */
+static inline void
+recalc_time_delta(struct timeval *now,
+ u32 *new_delta,
+ struct rchan *rchan)
+{
+ if (using_tsc(rchan) == 0)
+ *new_delta = calc_time_delta(now, &rchan->buf_start_time);
+}
+
+/**
+ * have_cmpxchg - does this architecture have a cmpxchg?
+ *
+ * Returns 1 if this architecture has a cmpxchg useable by
+ * the lockless scheme, 0 otherwise.
+ */
+static inline int
+have_cmpxchg(void)
+{
+#if defined(__HAVE_ARCH_CMPXCHG)
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+/**
+ * relay_write_direct - write data directly into destination buffer
+ */
+#define relay_write_direct(DEST, SRC, SIZE) \
+do\
+{\
+ memcpy(DEST, SRC, SIZE);\
+ DEST += SIZE;\
+} while (0);
+
+/**
+ * relay_lock_channel - lock the relay channel if applicable
+ *
+ * This macro only affects the locking scheme. If the locking scheme
+ * is in use and the channel usage is SMP, does a local_irq_save. If the
+ * locking sheme is in use and the channel usage is GLOBAL, uses
+ * spin_lock_irqsave. FLAGS is initialized to 0 since we know that
+ * it is being initialized prior to use and we avoid the compiler warning.
+ */
+#define relay_lock_channel(RCHAN, FLAGS) \
+do\
+{\
+ FLAGS = 0;\
+ if (using_locking(RCHAN)) {\
+ if (usage_smp(RCHAN)) {\
+ local_irq_save(FLAGS); \
+ } else {\
+ spin_lock_irqsave(&(RCHAN)->scheme.locking.lock, FLAGS); \
+ }\
+ }\
+} while (0);
+
+/**
+ * relay_unlock_channel - unlock the relay channel if applicable
+ *
+ * This macro only affects the locking scheme. See relay_lock_channel.
+ */
+#define relay_unlock_channel(RCHAN, FLAGS) \
+do\
+{\
+ if (using_locking(RCHAN)) {\
+ if (usage_smp(RCHAN)) {\
+ local_irq_restore(FLAGS); \
+ } else {\
+ spin_unlock_irqrestore(&(RCHAN)->scheme.locking.lock, FLAGS); \
+ }\
+ }\
+} while (0);
+
+/*
+ * Define cmpxchg if we don't have it
+ */
+#ifndef __HAVE_ARCH_CMPXCHG
+#define cmpxchg(p,o,n) 0
+#endif
+
+/*
+ * High-level relayfs kernel API, fs/relayfs/relay.c
+ */
+extern int
+relay_open(const char *chanpath,
+ int bufsize,
+ int nbufs,
+ u32 flags,
+ struct rchan_callbacks *channel_callbacks,
+ u32 start_reserve,
+ u32 end_reserve,
+ u32 rchan_start_reserve,
+ u32 resize_min,
+ u32 resize_max,
+ int mode,
+ char *init_buf,
+ u32 init_buf_size);
+
+extern int
+relay_close(int rchan_id);
+
+extern int
+relay_write(int rchan_id,
+ const void *data_ptr,
+ size_t count,
+ int td_offset,
+ void **wrote_pos);
+
+extern ssize_t
+relay_read(struct rchan_reader *reader,
+ char *buf,
+ size_t count,
+ int wait,
+ u32 *actual_read_offset);
+
+extern int
+relay_discard_init_buf(int rchan_id);
+
+extern struct rchan_reader *
+add_rchan_reader(int rchan_id, int autoconsume);
+
+extern int
+remove_rchan_reader(struct rchan_reader *reader);
+
+extern struct rchan_reader *
+add_map_reader(int rchan_id);
+
+extern int
+remove_map_reader(struct rchan_reader *reader);
+
+extern int
+relay_info(int rchan_id, struct rchan_info *rchan_info);
+
+extern void
+relay_buffers_consumed(struct rchan_reader *reader, u32 buffers_consumed);
+
+extern void
+relay_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset);
+
+extern ssize_t
+relay_bytes_avail(struct rchan_reader *reader);
+
+extern int
+relay_realloc_buffer(int rchan_id, u32 new_nbufs, int in_background);
+
+extern int
+relay_replace_buffer(int rchan_id);
+
+extern int
+rchan_empty(struct rchan_reader *reader);
+
+extern int
+rchan_full(struct rchan_reader *reader);
+
+extern void
+update_readers_consumed(struct rchan *rchan, u32 bufs_consumed, u32 bytes_consumed);
+
+extern int
+__relay_mmap_buffer(struct rchan *rchan, struct vm_area_struct *vma);
+
+extern struct rchan_reader *
+__add_rchan_reader(struct rchan *rchan, struct file *filp, int auto_consume, int map_reader);
+
+extern void
+__remove_rchan_reader(struct rchan_reader *reader);
+
+/*
+ * Low-level relayfs kernel API, fs/relayfs/relay.c
+ */
+extern struct rchan *
+rchan_get(int rchan_id);
+
+extern void
+rchan_put(struct rchan *rchan);
+
+extern char *
+relay_reserve(struct rchan *rchan,
+ u32 data_len,
+ struct timeval *time_stamp,
+ u32 *time_delta,
+ int *errcode,
+ int *interrupting);
+
+extern void
+relay_commit(struct rchan *rchan,
+ char *from,
+ u32 len,
+ int reserve_code,
+ int interrupting);
+
+extern u32
+relay_get_offset(struct rchan *rchan, u32 *max_offset);
+
+extern int
+relay_reset(int rchan_id);
+
+/*
+ * VFS functions, fs/relayfs/inode.c
+ */
+extern int
+relayfs_create_dir(const char *name,
+ struct dentry *parent,
+ struct dentry **dentry);
+
+extern int
+relayfs_create_file(const char * name,
+ struct dentry *parent,
+ struct dentry **dentry,
+ void * data,
+ int mode);
+
+extern int
+relayfs_remove_file(struct dentry *dentry);
+
+extern int
+reset_index(struct rchan *rchan, u32 old_index);
+
+
+/*
+ * klog functions, fs/relayfs/klog.c
+ */
+extern int
+create_klog_channel(void);
+
+extern int
+remove_klog_channel(void);
+
+/*
+ * Scheme-specific channel ops
+ */
+struct relay_ops
+{
+ char * (*reserve) (struct rchan *rchan,
+ u32 slot_len,
+ struct timeval *time_stamp,
+ u32 *tsc,
+ int * errcode,
+ int * interrupting);
+
+ void (*commit) (struct rchan *rchan,
+ char *from,
+ u32 len,
+ int deliver,
+ int interrupting);
+
+ u32 (*get_offset) (struct rchan *rchan,
+ u32 *max_offset);
+
+ void (*resume) (struct rchan *rchan);
+ void (*finalize) (struct rchan *rchan);
+ void (*reset) (struct rchan *rchan,
+ int init);
+ int (*reset_index) (struct rchan *rchan,
+ u32 old_index);
+};
+
+#endif /* _LINUX_RELAYFS_FS_H */
+
+
+
+
+
--- /dev/null
+#ifndef _VX_VS_BASE_H
+#define _VX_VS_BASE_H
+
+#include "vserver/context.h"
+
+// #define VX_DEBUG
+
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define vx_task_xid(t) ((t)->xid)
+
+#define vx_current_xid() vx_task_xid(current)
+
+#define vx_check(c,m) __vx_check(vx_current_xid(),c,m)
+
+#define vx_weak_check(c,m) ((m) ? vx_check(c,m) : 1)
+
+
+/*
+ * check current context for ADMIN/WATCH and
+ * optionally agains supplied argument
+ */
+static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode)
+{
+ if (mode & VX_ARG_MASK) {
+ if ((mode & VX_IDENT) &&
+ (id == cid))
+ return 1;
+ }
+ if (mode & VX_ATR_MASK) {
+ if ((mode & VX_DYNAMIC) &&
+ (id >= MIN_D_CONTEXT) &&
+ (id <= MAX_S_CONTEXT))
+ return 1;
+ if ((mode & VX_STATIC) &&
+ (id > 1) && (id < MIN_D_CONTEXT))
+ return 1;
+ }
+ return (((mode & VX_ADMIN) && (cid == 0)) ||
+ ((mode & VX_WATCH) && (cid == 1)));
+}
+
+
+#define __vx_flags(v,m,f) (((v) & (m)) ^ (f))
+
+#define __vx_task_flags(t,m,f) \
+ (((t) && ((t)->vx_info)) ? \
+ __vx_flags((t)->vx_info->vx_flags,(m),(f)) : 0)
+
+#define vx_current_flags() \
+ ((current->vx_info) ? current->vx_info->vx_flags : 0)
+
+#define vx_flags(m,f) __vx_flags(vx_current_flags(),(m),(f))
+
+
+#define vx_current_ccaps() \
+ ((current->vx_info) ? current->vx_info->vx_ccaps : 0)
+
+#define vx_ccaps(c) (vx_current_ccaps() & (c))
+
+#define vx_current_bcaps() \
+ (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \
+ current->vx_info->vx_bcaps : cap_bset)
+
+
+/* generic flag merging */
+
+#define vx_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m)))
+
+#define vx_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m)))
+
+#endif
--- /dev/null
+#ifndef _VX_VS_CONTEXT_H
+#define _VX_VS_CONTEXT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+
+#undef vxdprintk
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+
+extern int proc_pid_vx_info(struct task_struct *, char *);
+
+
+#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__)
+
+static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
+ const char *_file, int _line)
+{
+ if (!vxi)
+ return NULL;
+ vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n",
+ vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
+ _file, _line);
+ atomic_inc(&vxi->vx_usecnt);
+ return vxi;
+}
+
+
+#define free_vx_info(i) \
+ call_rcu(&i->vx_rcu, rcu_free_vx_info, i);
+
+#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__)
+
+static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
+{
+ if (!vxi)
+ return;
+ vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n",
+ vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
+ _file, _line);
+ if (atomic_dec_and_test(&vxi->vx_usecnt))
+ free_vx_info(vxi);
+}
+
+#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
+ const char *_file, int _line)
+{
+ BUG_ON(*vxp);
+ if (!vxi)
+ return;
+ vxdprintk("set_vx_info(%p[#%d.%d.%d])\t%s:%d\n",
+ vxi, vxi?vxi->vx_id:0,
+ vxi?atomic_read(&vxi->vx_usecnt):0,
+ vxi?atomic_read(&vxi->vx_refcnt):0,
+ _file, _line);
+ atomic_inc(&vxi->vx_refcnt);
+ *vxp = __get_vx_info(vxi, _file, _line);
+}
+
+#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_vx_info(struct vx_info **vxp,
+ const char *_file, int _line)
+{
+ struct vx_info *vxo = *vxp;
+
+ if (!vxo)
+ return;
+ vxdprintk("clr_vx_info(%p[#%d.%d.%d])\t%s:%d\n",
+ vxo, vxo?vxo->vx_id:0,
+ vxo?atomic_read(&vxo->vx_usecnt):0,
+ vxo?atomic_read(&vxo->vx_refcnt):0,
+ _file, _line);
+ *vxp = NULL;
+ wmb();
+ if (vxo && atomic_dec_and_test(&vxo->vx_refcnt))
+ unhash_vx_info(vxo);
+ __put_vx_info(vxo, _file, _line);
+}
+
+
+#define task_get_vx_info(i) __task_get_vx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p,
+ const char *_file, int _line)
+{
+ struct vx_info *vxi;
+
+ task_lock(p);
+ vxi = __get_vx_info(p->vx_info, _file, _line);
+ task_unlock(p);
+ return vxi;
+}
+
+
+#define vx_verify_info(p,i) \
+ __vx_verify_info((p)->vx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __vx_verify_info(
+ struct vx_info *vxa, struct vx_info *vxb,
+ const char *_file, int _line)
+{
+ if (vxa == vxb)
+ return;
+ printk(KERN_ERR "vx bad assumption (%p==%p) at %s:%d\n",
+ vxa, vxb, _file, _line);
+}
+
+
+#undef vxdprintk
+#define vxdprintk(x...)
+
+#else
+#warning duplicate inclusion
+#endif
--- /dev/null
+#ifndef _VX_VS_CVIRT_H
+#define _VX_VS_CVIRT_H
+
+
+// #define VX_DEBUG
+
+#include "vserver/cvirt.h"
+#include "vs_base.h"
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+/* utsname virtualization */
+
+static inline struct new_utsname *vx_new_utsname(void)
+{
+ if (current->vx_info)
+ return ¤t->vx_info->cvirt.utsname;
+ return &system_utsname;
+}
+
+#define vx_new_uts(x) ((vx_new_utsname())->x)
+
+
+/* pid faking stuff */
+
+
+#define vx_map_tgid(v,p) \
+ __vx_map_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_map_tgid(struct vx_info *vxi, int pid,
+ char *file, int line)
+{
+ if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+ vxdprintk("vx_map_tgid: %p/%llx: %d -> %d in %s:%d\n",
+ vxi, vxi->vx_flags, pid,
+ (pid == vxi->vx_initpid)?1:pid,
+ file, line);
+ if (pid == vxi->vx_initpid)
+ return 1;
+ }
+ return pid;
+}
+
+#define vx_rmap_tgid(v,p) \
+ __vx_rmap_tgid((v), (p), __FILE__, __LINE__)
+
+static inline int __vx_rmap_tgid(struct vx_info *vxi, int pid,
+ char *file, int line)
+{
+ if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) {
+ vxdprintk("vx_rmap_tgid: %p/%llx: %d -> %d in %s:%d\n",
+ vxi, vxi->vx_flags, pid,
+ (pid == 1)?vxi->vx_initpid:pid,
+ file, line);
+ if ((pid == 1) && vxi->vx_initpid)
+ return vxi->vx_initpid;
+ }
+ return pid;
+}
+
+#undef vxdprintk
+#define vxdprintk(x...)
+
+#else
+#warning duplicate inclusion
+#endif
--- /dev/null
+#ifndef _VX_VS_DLIMIT_H
+#define _VX_VS_DLIMIT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/dlimit.h"
+
+#if defined(VX_DEBUG)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__)
+
+static inline struct dl_info *__get_dl_info(struct dl_info *dli,
+ const char *_file, int _line)
+{
+ if (!dli)
+ return NULL;
+ vxdprintk("get_dl_info(%p[#%d.%d])\t%s:%d\n",
+ dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0,
+ _file, _line);
+ atomic_inc(&dli->dl_usecnt);
+ return dli;
+}
+
+
+#define free_dl_info(i) \
+ call_rcu(&i->dl_rcu, rcu_free_dl_info, i);
+
+#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__)
+
+static inline void __put_dl_info(struct dl_info *dli, const char *_file, int _line)
+{
+ if (!dli)
+ return;
+ vxdprintk("put_dl_info(%p[#%d.%d])\t%s:%d\n",
+ dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0,
+ _file, _line);
+ if (atomic_dec_and_test(&dli->dl_usecnt))
+ free_dl_info(dli);
+}
+
+
+extern int vx_debug_dlimit;
+
+#define __dlimit_char(d) ((d)?'*':' ')
+
+static inline int __dl_alloc_space(struct super_block *sb,
+ xid_t xid, dlsize_t nr, const char *file, int line)
+{
+ struct dl_info *dli = NULL;
+ int ret = 0;
+
+ if (nr == 0)
+ goto out;
+ dli = locate_dl_info(sb, xid);
+ if (!dli)
+ goto out;
+
+ spin_lock(&dli->dl_lock);
+ ret = (dli->dl_space_used + nr > dli->dl_space_total);
+ if (!ret)
+ dli->dl_space_used += nr;
+ spin_unlock(&dli->dl_lock);
+ put_dl_info(dli);
+out:
+ if (vx_debug_dlimit)
+ printk("ALLOC (%p,#%d)%c %lld bytes (%d)@ %s:%d\n",
+ sb, xid, __dlimit_char(dli), nr, ret, file, line);
+ return ret;
+}
+
+static inline void __dl_free_space(struct super_block *sb,
+ xid_t xid, dlsize_t nr, const char *file, int line)
+{
+ struct dl_info *dli = NULL;
+
+ if (nr == 0)
+ goto out;
+ dli = locate_dl_info(sb, xid);
+ if (!dli)
+ goto out;
+
+ spin_lock(&dli->dl_lock);
+ dli->dl_space_used -= nr;
+ spin_unlock(&dli->dl_lock);
+ put_dl_info(dli);
+out:
+ if (vx_debug_dlimit)
+ printk("FREE (%p,#%d)%c %lld bytes @ %s:%d\n",
+ sb, xid, __dlimit_char(dli), nr, file, line);
+}
+
+static inline int __dl_alloc_inode(struct super_block *sb,
+ xid_t xid, const char *file, int line)
+{
+ struct dl_info *dli;
+ int ret = 0;
+
+ dli = locate_dl_info(sb, xid);
+ if (!dli)
+ goto out;
+
+ spin_lock(&dli->dl_lock);
+ ret = (dli->dl_inodes_used >= dli->dl_inodes_total);
+ if (!ret)
+ dli->dl_inodes_used++;
+ spin_unlock(&dli->dl_lock);
+ put_dl_info(dli);
+out:
+ if (vx_debug_dlimit)
+ printk("ALLOC (%p,#%d)%c inode (%d)@ %s:%d\n",
+ sb, xid, __dlimit_char(dli), ret, file, line);
+ return ret;
+}
+
+static inline void __dl_free_inode(struct super_block *sb,
+ xid_t xid, const char *file, int line)
+{
+ struct dl_info *dli;
+
+ dli = locate_dl_info(sb, xid);
+ if (!dli)
+ goto out;
+
+ spin_lock(&dli->dl_lock);
+ dli->dl_inodes_used--;
+ spin_unlock(&dli->dl_lock);
+ put_dl_info(dli);
+out:
+ if (vx_debug_dlimit)
+ printk("FREE (%p,#%d)%c inode @ %s:%d\n",
+ sb, xid, __dlimit_char(dli), file, line);
+}
+
+
+
+#define DLIMIT_ALLOC_BLOCK(sb, xid, nr) \
+ __dl_alloc_space(sb, xid, \
+ ((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \
+ __FILE__, __LINE__ )
+
+#define DLIMIT_FREE_BLOCK(sb, xid, nr) \
+ __dl_free_space(sb, xid, \
+ ((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \
+ __FILE__, __LINE__ )
+
+#define DLIMIT_ALLOC_INODE(sb, xid) \
+ __dl_alloc_inode(sb, xid, __FILE__, __LINE__ )
+
+#define DLIMIT_FREE_INODE(sb, xid) \
+ __dl_free_inode(sb, xid, __FILE__, __LINE__ )
+
+
+#define DLIMIT_ADJUST_BLOCK(sb, xid, fb, rb)
+
+
+#else
+#warning duplicate inclusion
+#endif
--- /dev/null
+#ifndef _VX_VS_LIMIT_H
+#define _VX_VS_LIMIT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/limit.h"
+
+
+/* file limits */
+
+#define VX_DEBUG_ACC_FILE 0
+#define VX_DEBUG_ACC_OPENFD 0
+
+#if (VX_DEBUG_ACC_FILE) || (VX_DEBUG_ACC_OPENFD)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+
+#define vx_acc_cres(v,d,r) \
+ __vx_acc_cres((v), (r), (d), __FILE__, __LINE__)
+
+static inline void __vx_acc_cres(struct vx_info *vxi,
+ int res, int dir, char *file, int line)
+{
+ if (vxi) {
+ if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+ (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+ printk("vx_acc_cres[%5d,%2d]: %5d%s in %s:%d\n",
+ (vxi?vxi->vx_id:-1), res,
+ (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+ (dir>0)?"++":"--", file, line);
+ if (dir > 0)
+ atomic_inc(&vxi->limit.rcur[res]);
+ else
+ atomic_dec(&vxi->limit.rcur[res]);
+ }
+}
+
+#define vx_nproc_inc(p) vx_acc_cres(current->vx_info, 1, RLIMIT_NPROC)
+#define vx_nproc_dec(p) vx_acc_cres(current->vx_info,-1, RLIMIT_NPROC)
+
+#define vx_files_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_NOFILE)
+#define vx_files_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_NOFILE)
+
+#define vx_openfd_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD)
+#define vx_openfd_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD)
+
+/*
+#define vx_openfd_inc(f) do { \
+ vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD); \
+ printk("vx_openfd_inc: %d[#%d] in %s:%d\n", \
+ f, current->xid, __FILE__, __LINE__); \
+ } while (0)
+
+#define vx_openfd_dec(f) do { \
+ vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD); \
+ printk("vx_openfd_dec: %d[#%d] in %s:%d\n", \
+ f, current->xid, __FILE__, __LINE__); \
+ } while (0)
+*/
+
+#define vx_cres_avail(v,n,r) \
+ __vx_cres_avail((v), (r), (n), __FILE__, __LINE__)
+
+static inline int __vx_cres_avail(struct vx_info *vxi,
+ int res, int num, char *file, int line)
+{
+ unsigned long value;
+
+ if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) ||
+ (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD))
+ printk("vx_cres_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+ (vxi?vxi->vx_id:-1), res,
+ (vxi?vxi->limit.rlim[res]:1),
+ (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+ num, file, line);
+ if (!vxi)
+ return 1;
+ value = atomic_read(&vxi->limit.rcur[res]);
+ if (value > vxi->limit.rmax[res])
+ vxi->limit.rmax[res] = value;
+ if (vxi->limit.rlim[res] == RLIM_INFINITY)
+ return 1;
+ if (value + num <= vxi->limit.rlim[res])
+ return 1;
+ atomic_inc(&vxi->limit.lhit[res]);
+ return 0;
+}
+
+#define vx_nproc_avail(n) \
+ vx_cres_avail(current->vx_info, (n), RLIMIT_NPROC)
+
+#define vx_files_avail(n) \
+ vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE)
+
+#define vx_openfd_avail(n) \
+ vx_cres_avail(current->vx_info, (n), RLIMIT_OPENFD)
+
+
+/* socket limits */
+
+#define vx_sock_inc(f) vx_acc_cres(current->vx_info, 1, VLIMIT_SOCK)
+#define vx_sock_dec(f) vx_acc_cres(current->vx_info,-1, VLIMIT_SOCK)
+
+#define vx_sock_avail(n) \
+ vx_cres_avail(current->vx_info, (n), VLIMIT_SOCK)
+
+
+#else
+#warning duplicate inclusion
+#endif
--- /dev/null
+#ifndef _VX_VS_MEMORY_H
+#define _VX_VS_MEMORY_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/limit.h"
+
+
+#define VX_DEBUG_ACC_RSS 0
+#define VX_DEBUG_ACC_VM 0
+#define VX_DEBUG_ACC_VML 0
+
+#if (VX_DEBUG_ACC_RSS) || (VX_DEBUG_ACC_VM) || (VX_DEBUG_ACC_VML)
+#define vxdprintk(x...) printk("vxd: " x)
+#else
+#define vxdprintk(x...)
+#endif
+
+#define vx_acc_page(m, d, v, r) \
+ __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__)
+
+static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi,
+ int res, int dir, char *file, int line)
+{
+ if (v) {
+ if (dir > 0)
+ ++(*v);
+ else
+ --(*v);
+ }
+ if (vxi) {
+ if (dir > 0)
+ atomic_inc(&vxi->limit.rcur[res]);
+ else
+ atomic_dec(&vxi->limit.rcur[res]);
+ }
+}
+
+
+#define vx_acc_pages(m, p, v, r) \
+ __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__)
+
+static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi,
+ int res, int pages, char *file, int line)
+{
+ if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+ (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+ (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+ vxdprintk("vx_acc_pages [%5d,%2d]: %5d += %5d in %s:%d\n",
+ (vxi?vxi->vx_id:-1), res,
+ (vxi?atomic_read(&vxi->limit.res[res]):0),
+ pages, file, line);
+ if (pages == 0)
+ return;
+ if (v)
+ *v += pages;
+ if (vxi)
+ atomic_add(pages, &vxi->limit.rcur[res]);
+}
+
+
+
+#define vx_acc_vmpage(m,d) vx_acc_page(m, d, total_vm, RLIMIT_AS)
+#define vx_acc_vmlpage(m,d) vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspage(m,d) vx_acc_page(m, d, rss, RLIMIT_RSS)
+
+#define vx_acc_vmpages(m,p) vx_acc_pages(m, p, total_vm, RLIMIT_AS)
+#define vx_acc_vmlpages(m,p) vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK)
+#define vx_acc_rsspages(m,p) vx_acc_pages(m, p, rss, RLIMIT_RSS)
+
+#define vx_pages_add(s,r,p) __vx_acc_pages(0, s, r, p, __FILE__, __LINE__)
+#define vx_pages_sub(s,r,p) __vx_pages_add(s, r, -(p))
+
+#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1)
+#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1)
+#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p)
+#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p))
+
+#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1)
+#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1)
+#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p)
+#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p))
+
+#define vx_rsspages_inc(m) vx_acc_rsspage(m, 1)
+#define vx_rsspages_dec(m) vx_acc_rsspage(m,-1)
+#define vx_rsspages_add(m,p) vx_acc_rsspages(m, p)
+#define vx_rsspages_sub(m,p) vx_acc_rsspages(m,-(p))
+
+
+
+#define vx_pages_avail(m, p, r) \
+ __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__)
+
+static inline int __vx_pages_avail(struct vx_info *vxi,
+ int res, int pages, char *file, int line)
+{
+ unsigned long value;
+
+ if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) ||
+ (res == RLIMIT_AS && VX_DEBUG_ACC_VM) ||
+ (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML))
+ printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n",
+ (vxi?vxi->vx_id:-1), res,
+ (vxi?vxi->limit.rlim[res]:1),
+ (vxi?atomic_read(&vxi->limit.rcur[res]):0),
+ pages, file, line);
+ if (!vxi)
+ return 1;
+ value = atomic_read(&vxi->limit.rcur[res]);
+ if (value > vxi->limit.rmax[res])
+ vxi->limit.rmax[res] = value;
+ if (vxi->limit.rlim[res] == RLIM_INFINITY)
+ return 1;
+ if (value + pages <= vxi->limit.rlim[res])
+ return 1;
+ atomic_inc(&vxi->limit.lhit[res]);
+ return 0;
+}
+
+#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS)
+#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK)
+#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS)
+
+#else
+#warning duplicate inclusion
+#endif
--- /dev/null
+#ifndef _NX_VS_NETWORK_H
+#define _NX_VS_NETWORK_H
+
+
+// #define NX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/network.h"
+
+#if defined(NX_DEBUG)
+#define nxdprintk(x...) printk("nxd: " x)
+#else
+#define nxdprintk(x...)
+#endif
+
+
+extern int proc_pid_nx_info(struct task_struct *, char *);
+
+
+#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
+
+static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
+ const char *_file, int _line)
+{
+ if (!nxi)
+ return NULL;
+ nxdprintk("get_nx_info(%p[#%d.%d])\t%s:%d\n",
+ nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
+ _file, _line);
+ atomic_inc(&nxi->nx_usecnt);
+ return nxi;
+}
+
+
+#define free_nx_info(nxi) \
+ call_rcu(&nxi->nx_rcu, rcu_free_nx_info, nxi);
+
+#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
+
+static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
+{
+ if (!nxi)
+ return;
+ nxdprintk("put_nx_info(%p[#%d.%d])\t%s:%d\n",
+ nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
+ _file, _line);
+ if (atomic_dec_and_test(&nxi->nx_usecnt))
+ free_nx_info(nxi);
+}
+
+
+#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__)
+
+static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
+ const char *_file, int _line)
+{
+ BUG_ON(*nxp);
+ if (!nxi)
+ return;
+ nxdprintk("set_nx_info(%p[#%d.%d.%d])\t%s:%d\n",
+ nxi, nxi?nxi->nx_id:0,
+ nxi?atomic_read(&nxi->nx_usecnt):0,
+ nxi?atomic_read(&nxi->nx_refcnt):0,
+ _file, _line);
+ atomic_inc(&nxi->nx_refcnt);
+ *nxp = __get_nx_info(nxi, _file, _line);
+}
+
+#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__)
+
+static inline void __clr_nx_info(struct nx_info **nxp,
+ const char *_file, int _line)
+{
+ struct nx_info *nxo = *nxp;
+
+ if (!nxo)
+ return;
+ nxdprintk("clr_nx_info(%p[#%d.%d.%d])\t%s:%d\n",
+ nxo, nxo?nxo->nx_id:0,
+ nxo?atomic_read(&nxo->nx_usecnt):0,
+ nxo?atomic_read(&nxo->nx_refcnt):0,
+ _file, _line);
+ *nxp = NULL;
+ wmb();
+ if (nxo && atomic_dec_and_test(&nxo->nx_refcnt))
+ unhash_nx_info(nxo);
+ __put_nx_info(nxo, _file, _line);
+}
+
+
+#define task_get_nx_info(i) __task_get_nx_info(i,__FILE__,__LINE__)
+
+static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p,
+ const char *_file, int _line)
+{
+ struct nx_info *nxi;
+
+ task_lock(p);
+ nxi = __get_nx_info(p->nx_info, _file, _line);
+ task_unlock(p);
+ return nxi;
+}
+
+#define nx_verify_info(p,i) \
+ __nx_verify_info((p)->nx_info,i,__FILE__,__LINE__)
+
+static __inline__ void __nx_verify_info(
+ struct nx_info *ipa, struct nx_info *ipb,
+ const char *_file, int _line)
+{
+ if (ipa == ipb)
+ return;
+ printk(KERN_ERR "ip bad assumption (%p==%p) at %s:%d\n",
+ ipa, ipb, _file, _line);
+}
+
+
+#define nx_task_nid(t) ((t)->nid)
+
+#define nx_current_nid() nx_task_nid(current)
+
+#define nx_check(c,m) __nx_check(nx_current_nid(),c,m)
+
+#define nx_weak_check(c,m) ((m) ? nx_check(c,m) : 1)
+
+#undef nxdprintk
+#define nxdprintk(x...)
+
+
+#define __nx_flags(v,m,f) (((v) & (m)) ^ (f))
+
+#define __nx_task_flags(t,m,f) \
+ (((t) && ((t)->nx_info)) ? \
+ __nx_flags((t)->nx_info->nx_flags,(m),(f)) : 0)
+
+#define nx_current_flags() \
+ ((current->nx_info) ? current->nx_info->nx_flags : 0)
+
+#define nx_flags(m,f) __nx_flags(nx_current_flags(),(m),(f))
+
+
+#define nx_current_ncaps() \
+ ((current->nx_info) ? current->nx_info->nx_ncaps : 0)
+
+#define nx_ncaps(c) (nx_current_ncaps() & (c))
+
+
+
+#else
+#warning duplicate inclusion
+#endif
--- /dev/null
+#ifndef _VX_VS_LIMIT_H
+#define _VX_VS_LIMIT_H
+
+
+// #define VX_DEBUG
+
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "vserver/context.h"
+#include "vserver/network.h"
+
+
+/* socket accounting */
+
+#include <linux/socket.h>
+
+static inline int vx_sock_type(int family)
+{
+ int type = 4;
+
+ if (family > 0 && family < 3)
+ type = family;
+ else if (family == PF_INET6)
+ type = 3;
+ return type;
+}
+
+#define vx_acc_sock(v,f,p,s) \
+ __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__)
+
+static inline void __vx_acc_sock(struct vx_info *vxi,
+ int family, int pos, int size, char *file, int line)
+{
+ if (vxi) {
+ int type = vx_sock_type(family);
+
+ atomic_inc(&vxi->cacct.sock[type][pos].count);
+ atomic_add(size, &vxi->cacct.sock[type][pos].total);
+ }
+}
+
+#define vx_sock_recv(sk,s) \
+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s))
+#define vx_sock_send(sk,s) \
+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s))
+#define vx_sock_fail(sk,s) \
+ vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s))
+
+
+#define sock_vx_init(s) do { \
+ (s)->sk_xid = 0; \
+ (s)->sk_vx_info = NULL; \
+ } while (0)
+
+#define sock_nx_init(s) do { \
+ (s)->sk_nid = 0; \
+ (s)->sk_nx_info = NULL; \
+ } while (0)
+
+
+#else
+#warning duplicate inclusion
+#endif
--- /dev/null
+#ifndef _VX_DLIMIT_H
+#define _VX_DLIMIT_H
+
+#include "switch.h"
+#include <linux/spinlock.h>
+
+/* inode vserver commands */
+
+#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0)
+#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0)
+
+#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0)
+#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0)
+
+
+struct vcmd_ctx_dlimit_base_v0 {
+ const char __user *name;
+ uint32_t flags;
+};
+
+struct vcmd_ctx_dlimit_v0 {
+ const char __user *name;
+ uint32_t space_used; /* used space in kbytes */
+ uint32_t space_total; /* maximum space in kbytes */
+ uint32_t inodes_used; /* used inodes */
+ uint32_t inodes_total; /* maximum inodes */
+ uint32_t reserved; /* reserved for root in % */
+ uint32_t flags;
+};
+
+#define CDLIM_UNSET (0ULL)
+#define CDLIM_INFINITY (~0ULL)
+#define CDLIM_KEEP (~1ULL)
+
+
+#ifdef __KERNEL__
+
+struct super_block;
+
+struct dl_info {
+ struct hlist_node dl_hlist; /* linked list of contexts */
+ struct rcu_head dl_rcu; /* the rcu head */
+ xid_t dl_xid; /* context id */
+ atomic_t dl_usecnt; /* usage count */
+ atomic_t dl_refcnt; /* reference count */
+
+ struct super_block *dl_sb; /* associated superblock */
+
+// struct rw_semaphore dl_sem; /* protect the values */
+ spinlock_t dl_lock; /* protect the values */
+
+ uint64_t dl_space_used; /* used space in bytes */
+ uint64_t dl_space_total; /* maximum space in bytes */
+ uint32_t dl_inodes_used; /* used inodes */
+ uint32_t dl_inodes_total; /* maximum inodes */
+
+ unsigned int dl_nrlmult; /* non root limit mult */
+};
+
+extern void rcu_free_dl_info(void *);
+extern void unhash_dl_info(struct dl_info *);
+
+extern struct dl_info *locate_dl_info(struct super_block *, xid_t);
+
+
+struct kstatfs;
+
+extern void vx_vsi_statfs(struct super_block *, struct kstatfs *);
+
+
+extern int vc_add_dlimit(uint32_t, void __user *);
+extern int vc_rem_dlimit(uint32_t, void __user *);
+
+extern int vc_set_dlimit(uint32_t, void __user *);
+extern int vc_get_dlimit(uint32_t, void __user *);
+
+
+typedef uint64_t dlsize_t;
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _VX_DLIMIT_H */
--- /dev/null
+/*
+ * linux/kernel/vserver/dlimit.c
+ *
+ * Virtual Server: Context Disk Limits
+ *
+ * Copyright (C) 2004 Herbert Pötzl
+ *
+ * V0.01 initial version
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/namespace.h>
+#include <linux/namei.h>
+#include <linux/statfs.h>
+#include <linux/vserver/switch.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+#include <linux/vs_dlimit.h>
+
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+/* __alloc_dl_info()
+
+ * allocate an initialized dl_info struct
+ * doesn't make it visible (hash) */
+
+static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid)
+{
+ struct dl_info *new = NULL;
+
+ vxdprintk("alloc_dl_info(%p,%d)\n", sb, xid);
+
+ /* would this benefit from a slab cache? */
+ new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
+ if (!new)
+ return 0;
+
+ memset (new, 0, sizeof(struct dl_info));
+ new->dl_xid = xid;
+ new->dl_sb = sb;
+ INIT_RCU_HEAD(&new->dl_rcu);
+ INIT_HLIST_NODE(&new->dl_hlist);
+ spin_lock_init(&new->dl_lock);
+ atomic_set(&new->dl_refcnt, 0);
+ atomic_set(&new->dl_usecnt, 0);
+
+ /* rest of init goes here */
+
+ vxdprintk("alloc_dl_info(%p,%d) = %p\n", sb, xid, new);
+ return new;
+}
+
+/* __dealloc_dl_info()
+
+ * final disposal of dl_info */
+
+static void __dealloc_dl_info(struct dl_info *dli)
+{
+ vxdprintk("dealloc_dl_info(%p)\n", dli);
+
+ dli->dl_hlist.next = LIST_POISON1;
+ dli->dl_xid = -1;
+ dli->dl_sb = 0;
+
+ BUG_ON(atomic_read(&dli->dl_usecnt));
+ BUG_ON(atomic_read(&dli->dl_refcnt));
+
+ kfree(dli);
+}
+
+
+/* hash table for dl_info hash */
+
+#define DL_HASH_SIZE 13
+
+struct hlist_head dl_info_hash[DL_HASH_SIZE];
+
+static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED;
+
+
+static inline unsigned int __hashval(struct super_block *sb, xid_t xid)
+{
+ return ((xid ^ (unsigned int)sb) % DL_HASH_SIZE);
+}
+
+
+
+/* __hash_dl_info()
+
+ * add the dli to the global hash table
+ * requires the hash_lock to be held */
+
+static inline void __hash_dl_info(struct dl_info *dli)
+{
+ struct hlist_head *head;
+
+ vxdprintk("__hash_dl_info: %p[#%d]\n", dli, dli->dl_xid);
+ get_dl_info(dli);
+ head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)];
+ hlist_add_head_rcu(&dli->dl_hlist, head);
+}
+
+/* __unhash_dl_info()
+
+ * remove the dli from the global hash table
+ * requires the hash_lock to be held */
+
+static inline void __unhash_dl_info(struct dl_info *dli)
+{
+ vxdprintk("__unhash_dl_info: %p[#%d]\n", dli, dli->dl_xid);
+ hlist_del_rcu(&dli->dl_hlist);
+ put_dl_info(dli);
+}
+
+
+#define hlist_for_each_rcu(pos, head) \
+ for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
+ pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
+
+
+/* __lookup_dl_info()
+
+ * requires the rcu_read_lock()
+ * doesn't increment the dl_refcnt */
+
+static inline struct dl_info *__lookup_dl_info(struct super_block *sb, xid_t xid)
+{
+ struct hlist_head *head = &dl_info_hash[__hashval(sb, xid)];
+ struct hlist_node *pos;
+
+ hlist_for_each_rcu(pos, head) {
+ struct dl_info *dli =
+ hlist_entry(pos, struct dl_info, dl_hlist);
+
+ if (dli->dl_xid == xid && dli->dl_sb == sb) {
+ return dli;
+ }
+ }
+ return NULL;
+}
+
+
+struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid)
+{
+ struct dl_info *dli;
+
+ rcu_read_lock();
+ dli = get_dl_info(__lookup_dl_info(sb, xid));
+ rcu_read_unlock();
+ return dli;
+}
+
+void rcu_free_dl_info(void *obj)
+{
+ struct dl_info *dli = obj;
+ int usecnt, refcnt;
+
+ BUG_ON(!dli);
+
+ usecnt = atomic_read(&dli->dl_usecnt);
+ BUG_ON(usecnt < 0);
+
+ refcnt = atomic_read(&dli->dl_refcnt);
+ BUG_ON(refcnt < 0);
+
+ if (!usecnt)
+ __dealloc_dl_info(dli);
+ else
+ printk("!!! rcu didn't free\n");
+}
+
+
+
+
+int vc_add_dlimit(uint32_t id, void __user *data)
+{
+ struct nameidata nd;
+ struct vcmd_ctx_dlimit_base_v0 vc_data;
+ int ret;
+
+ if (!vx_check(0, VX_ADMIN))
+ return -ENOSYS;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ ret = user_path_walk_link(vc_data.name, &nd);
+ if (!ret) {
+ struct super_block *sb;
+ struct dl_info *dli;
+
+ ret = -EINVAL;
+ if (!nd.dentry->d_inode)
+ goto out_release;
+ if (!(sb = nd.dentry->d_inode->i_sb))
+ goto out_release;
+
+ dli = __alloc_dl_info(sb, id);
+ spin_lock(&dl_info_hash_lock);
+
+ ret = -EEXIST;
+ if (__lookup_dl_info(sb, id))
+ goto out_unlock;
+ __hash_dl_info(dli);
+ dli = NULL;
+ ret = 0;
+
+ out_unlock:
+ spin_unlock(&dl_info_hash_lock);
+ if (dli)
+ __dealloc_dl_info(dli);
+ out_release:
+ path_release(&nd);
+ }
+ return ret;
+}
+
+
+int vc_rem_dlimit(uint32_t id, void __user *data)
+{
+ struct nameidata nd;
+ struct vcmd_ctx_dlimit_base_v0 vc_data;
+ int ret;
+
+ if (!vx_check(0, VX_ADMIN))
+ return -ENOSYS;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ ret = user_path_walk_link(vc_data.name, &nd);
+ if (!ret) {
+ struct super_block *sb;
+ struct dl_info *dli;
+
+ ret = -EINVAL;
+ if (!nd.dentry->d_inode)
+ goto out_release;
+ if (!(sb = nd.dentry->d_inode->i_sb))
+ goto out_release;
+
+ spin_lock(&dl_info_hash_lock);
+ dli = __lookup_dl_info(sb, id);
+
+ ret = -ESRCH;
+ if (!dli)
+ goto out_unlock;
+
+ __unhash_dl_info(dli);
+ ret = 0;
+
+ out_unlock:
+ spin_unlock(&dl_info_hash_lock);
+ out_release:
+ path_release(&nd);
+ }
+ return ret;
+}
+
+
+int vc_set_dlimit(uint32_t id, void __user *data)
+{
+ struct nameidata nd;
+ struct vcmd_ctx_dlimit_v0 vc_data;
+ int ret;
+
+ if (!vx_check(0, VX_ADMIN))
+ return -ENOSYS;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ ret = user_path_walk_link(vc_data.name, &nd);
+ if (!ret) {
+ struct super_block *sb;
+ struct dl_info *dli;
+
+ ret = -EINVAL;
+ if (!nd.dentry->d_inode)
+ goto out_release;
+ if (!(sb = nd.dentry->d_inode->i_sb))
+ goto out_release;
+ if (vc_data.reserved > 100 ||
+ vc_data.inodes_used > vc_data.inodes_total ||
+ vc_data.space_used > vc_data.space_total)
+ goto out_release;
+
+ ret = -ESRCH;
+ dli = locate_dl_info(sb, id);
+ if (!dli)
+ goto out_release;
+
+ spin_lock(&dli->dl_lock);
+
+ if (vc_data.inodes_used != (uint32_t)CDLIM_KEEP)
+ dli->dl_inodes_used = vc_data.inodes_used;
+ if (vc_data.inodes_total != (uint32_t)CDLIM_KEEP)
+ dli->dl_inodes_total = vc_data.inodes_total;
+ if (vc_data.space_used != (uint32_t)CDLIM_KEEP) {
+ dli->dl_space_used = vc_data.space_used;
+ dli->dl_space_used <<= 10;
+ }
+ if (vc_data.space_total == (uint32_t)CDLIM_INFINITY)
+ dli->dl_space_total = (uint64_t)CDLIM_INFINITY;
+ else if (vc_data.space_total != (uint32_t)CDLIM_KEEP) {
+ dli->dl_space_total = vc_data.space_total;
+ dli->dl_space_total <<= 10;
+ }
+ if (vc_data.reserved != (uint32_t)CDLIM_KEEP)
+ dli->dl_nrlmult = (1 << 10) * (100 - vc_data.reserved) / 100;
+
+ spin_unlock(&dli->dl_lock);
+
+ put_dl_info(dli);
+ ret = 0;
+
+ out_release:
+ path_release(&nd);
+ }
+ return ret;
+}
+
+int vc_get_dlimit(uint32_t id, void __user *data)
+{
+ struct nameidata nd;
+ struct vcmd_ctx_dlimit_v0 vc_data;
+ int ret;
+
+ if (!vx_check(0, VX_ADMIN))
+ return -ENOSYS;
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ ret = user_path_walk_link(vc_data.name, &nd);
+ if (!ret) {
+ struct super_block *sb;
+ struct dl_info *dli;
+
+ ret = -EINVAL;
+ if (!nd.dentry->d_inode)
+ goto out_release;
+ if (!(sb = nd.dentry->d_inode->i_sb))
+ goto out_release;
+ if (vc_data.reserved > 100 ||
+ vc_data.inodes_used > vc_data.inodes_total ||
+ vc_data.space_used > vc_data.space_total)
+ goto out_release;
+
+ ret = -ESRCH;
+ dli = locate_dl_info(sb, id);
+ if (!dli)
+ goto out_release;
+
+ spin_lock(&dli->dl_lock);
+ vc_data.inodes_used = dli->dl_inodes_used;
+ vc_data.inodes_total = dli->dl_inodes_total;
+ vc_data.space_used = dli->dl_space_used >> 10;
+ if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
+ vc_data.space_total = (uint32_t)CDLIM_INFINITY;
+ else
+ vc_data.space_total = dli->dl_space_total >> 10;
+
+ vc_data.reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
+ spin_unlock(&dli->dl_lock);
+
+ put_dl_info(dli);
+ ret = -EFAULT;
+ if (copy_to_user(data, &vc_data, sizeof(vc_data)))
+ goto out_release;
+
+ ret = 0;
+ out_release:
+ path_release(&nd);
+ }
+ return ret;
+}
+
+
+void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+ struct dl_info *dli;
+ __u64 blimit, bfree, bavail;
+ __u32 ifree;
+
+ dli = locate_dl_info(sb, current->xid);
+ if (!dli)
+ return;
+
+ spin_lock(&dli->dl_lock);
+ if (dli->dl_inodes_total == (uint32_t)CDLIM_INFINITY)
+ goto no_ilim;
+
+ /* reduce max inodes available to limit */
+ if (buf->f_files > dli->dl_inodes_total)
+ buf->f_files = dli->dl_inodes_total;
+
+ ifree = dli->dl_inodes_total - dli->dl_inodes_used;
+ /* reduce free inodes to min */
+ if (ifree < buf->f_ffree)
+ buf->f_ffree = ifree;
+
+no_ilim:
+ if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
+ goto no_blim;
+
+ blimit = dli->dl_space_total >> sb->s_blocksize_bits;
+
+ if (dli->dl_space_total < dli->dl_space_used)
+ bfree = 0;
+ else
+ bfree = (dli->dl_space_total - dli->dl_space_used)
+ >> sb->s_blocksize_bits;
+
+ bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
+ if (bavail < dli->dl_space_used)
+ bavail = 0;
+ else
+ bavail = (bavail - dli->dl_space_used)
+ >> sb->s_blocksize_bits;
+
+ /* reduce max space available to limit */
+ if (buf->f_blocks > blimit)
+ buf->f_blocks = blimit;
+
+ /* reduce free space to min */
+ if (bfree < buf->f_bfree)
+ buf->f_bfree = bfree;
+
+ /* reduce avail space to min */
+ if (bavail < buf->f_bavail)
+ buf->f_bavail = bavail;
+
+no_blim:
+ spin_unlock(&dli->dl_lock);
+ put_dl_info(dli);
+
+ return;
+}
+
--- /dev/null
+/*
+ * linux/kernel/vserver/helper.c
+ *
+ * Virtual Context Support
+ *
+ * Copyright (C) 2004 Herbert Pötzl
+ *
+ * V0.01 basic helper
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/kmod.h>
+#include <linux/vserver.h>
+#include <linux/vs_base.h>
+#include <linux/vs_context.h>
+
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+
+char vshelper_path[255] = "/sbin/vshelper";
+
+
+/*
+ * vshelper path is set via /proc/sys
+ * invoked by vserver sys_reboot(), with
+ * the following arguments
+ *
+ * argv [0] = vshelper_path;
+ * argv [1] = action: "restart", "halt", "poweroff", ...
+ * argv [2] = context identifier
+ * argv [3] = additional argument (restart2)
+ *
+ * envp [*] = type-specific parameters
+ */
+
+long vs_reboot(unsigned int cmd, void * arg)
+{
+ char id_buf[8], cmd_buf[32];
+ char uid_buf[32], pid_buf[32];
+ char buffer[256];
+
+ char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0};
+ char *envp[] = {"HOME=/", "TERM=linux",
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ uid_buf, pid_buf, cmd_buf, 0};
+
+ snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid());
+
+ snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd);
+ snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid);
+ snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid);
+
+ switch (cmd) {
+ case LINUX_REBOOT_CMD_RESTART:
+ argv[1] = "restart";
+ break;
+
+ case LINUX_REBOOT_CMD_HALT:
+ argv[1] = "halt";
+ break;
+
+ case LINUX_REBOOT_CMD_POWER_OFF:
+ argv[1] = "poweroff";
+ break;
+
+ case LINUX_REBOOT_CMD_SW_SUSPEND:
+ argv[1] = "swsusp";
+ break;
+
+ case LINUX_REBOOT_CMD_RESTART2:
+ if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0)
+ return -EFAULT;
+ argv[3] = buffer;
+ default:
+ argv[1] = "restart2";
+ break;
+ }
+
+ /* maybe we should wait ? */
+ if (call_usermodehelper(*argv, argv, envp, 0)) {
+ printk( KERN_WARNING
+ "vs_reboot(): failed to exec (%s %s %s %s)\n",
+ vshelper_path, argv[1], argv[2], argv[3]);
+ return -EPERM;
+ }
+ return 0;
+}
+