From: Planet-Lab Support Date: Tue, 13 Jul 2004 17:57:18 +0000 (+0000) Subject: This commit was manufactured by cvs2svn to create branch 'vserver'. X-Git-Tag: vserver-2_6_7-vs1_9_1_12~2 X-Git-Url: http://git.onelab.eu/?p=linux-2.6.git;a=commitdiff_plain;h=c449269f45c2cdf53af08c8d0af37472f66539d9 This commit was manufactured by cvs2svn to create branch 'vserver'. --- diff --git a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h new file mode 100644 index 000000000..d875d0435 --- /dev/null +++ b/arch/um/drivers/cow.h @@ -0,0 +1,41 @@ +#ifndef __COW_H__ +#define __COW_H__ + +#include + +#if __BYTE_ORDER == __BIG_ENDIAN +# define ntohll(x) (x) +# define htonll(x) (x) +#elif __BYTE_ORDER == __LITTLE_ENDIAN +# define ntohll(x) bswap_64(x) +# define htonll(x) bswap_64(x) +#else +#error "__BYTE_ORDER not defined" +#endif + +extern int init_cow_file(int fd, char *cow_file, char *backing_file, + int sectorsize, int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out); + +extern int file_reader(__u64 offset, char *buf, int len, void *arg); +extern int read_cow_header(int (*reader)(__u64, char *, int, void *), + void *arg, __u32 *version_out, + char **backing_file_out, time_t *mtime_out, + __u64 *size_out, int *sectorsize_out, + __u32 *align_out, int *bitmap_offset_out); + +extern int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, long long *size); + +extern void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out); + +#endif + +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c new file mode 100644 index 000000000..014c2c853 --- /dev/null +++ b/arch/um/drivers/cow_user.c @@ -0,0 +1,375 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "os.h" + +#include "cow.h" +#include "cow_sys.h" + +#define PATH_LEN_V1 256 + +struct cow_header_v1 { + int magic; + int version; + char backing_file[PATH_LEN_V1]; + time_t mtime; + __u64 size; + int sectorsize; +}; + +#define PATH_LEN_V2 MAXPATHLEN + +struct cow_header_v2 { + unsigned long magic; + unsigned long version; + char backing_file[PATH_LEN_V2]; + time_t mtime; + __u64 size; + int sectorsize; +}; + +/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in + * case other systems have different values for MAXPATHLEN + */ +#define PATH_LEN_V3 4096 + +/* Changes from V2 - + * PATH_LEN_V3 as described above + * Explicitly specify field bit lengths for systems with different + * lengths for the usual C types. Not sure whether char or + * time_t should be changed, this can be changed later without + * breaking compatibility + * Add alignment field so that different alignments can be used for the + * bitmap and data + * Add cow_format field to allow for the possibility of different ways + * of specifying the COW blocks. For now, the only value is 0, + * for the traditional COW bitmap. + * Move the backing_file field to the end of the header. This allows + * for the possibility of expanding it into the padding required + * by the bitmap alignment. + * The bitmap and data portions of the file will be aligned as specified + * by the alignment field. This is to allow COW files to be + * put on devices with restrictions on access alignments, such as + * /dev/raw, with a 512 byte alignment restriction. This also + * allows the data to be more aligned more strictly than on + * sector boundaries. This is needed for ubd-mmap, which needs + * the data to be page aligned. + * Fixed (finally!) the rounding bug + */ + +struct cow_header_v3 { + __u32 magic; + __u32 version; + time_t mtime; + __u64 size; + __u32 sectorsize; + __u32 alignment; + __u32 cow_format; + char backing_file[PATH_LEN_V3]; +}; + +/* COW format definitions - for now, we have only the usual COW bitmap */ +#define COW_BITMAP 0 + +union cow_header { + struct cow_header_v1 v1; + struct cow_header_v2 v2; + struct cow_header_v3 v3; +}; + +#define COW_MAGIC 0x4f4f4f4d /* MOOO */ +#define COW_VERSION 3 + +#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len)) +#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align) + +void cow_sizes(int version, __u64 size, int sectorsize, int align, + int bitmap_offset, unsigned long *bitmap_len_out, + int *data_offset_out) +{ + if(version < 3){ + *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = (*data_offset_out + sectorsize - 1) / + sectorsize; + *data_offset_out *= sectorsize; + } + else { + *bitmap_len_out = DIV_ROUND(size, sectorsize); + *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8); + + *data_offset_out = bitmap_offset + *bitmap_len_out; + *data_offset_out = ROUND_UP(*data_offset_out, align); + } +} + +static int absolutize(char *to, int size, char *from) +{ + char save_cwd[256], *slash; + int remaining; + + if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { + cow_printf("absolutize : unable to get cwd - errno = %d\n", + errno); + return(-1); + } + slash = strrchr(from, '/'); + if(slash != NULL){ + *slash = '\0'; + if(chdir(from)){ + *slash = '/'; + cow_printf("absolutize : Can't cd to '%s' - " + "errno = %d\n", from, errno); + return(-1); + } + *slash = '/'; + if(getcwd(to, size) == NULL){ + cow_printf("absolutize : unable to get cwd of '%s' - " + "errno = %d\n", from, errno); + return(-1); + } + remaining = size - strlen(to); + if(strlen(slash) + 1 > remaining){ + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return(-1); + } + strcat(to, slash); + } + else { + if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ + cow_printf("absolutize : unable to fit '%s' into %d " + "chars\n", from, size); + return(-1); + } + strcpy(to, save_cwd); + strcat(to, "/"); + strcat(to, from); + } + chdir(save_cwd); + return(0); +} + +int write_cow_header(char *cow_file, int fd, char *backing_file, + int sectorsize, int alignment, long long *size) +{ + struct cow_header_v3 *header; + unsigned long modtime; + int err; + + err = cow_seek_file(fd, 0); + if(err < 0){ + cow_printf("write_cow_header - lseek failed, err = %d\n", -err); + goto out; + } + + err = -ENOMEM; + header = cow_malloc(sizeof(*header)); + if(header == NULL){ + cow_printf("Failed to allocate COW V3 header\n"); + goto out; + } + header->magic = htonl(COW_MAGIC); + header->version = htonl(COW_VERSION); + + err = -EINVAL; + if(strlen(backing_file) > sizeof(header->backing_file) - 1){ + cow_printf("Backing file name \"%s\" is too long - names are " + "limited to %d characters\n", backing_file, + sizeof(header->backing_file) - 1); + goto out_free; + } + + if(absolutize(header->backing_file, sizeof(header->backing_file), + backing_file)) + goto out_free; + + err = os_file_modtime(header->backing_file, &modtime); + if(err < 0){ + cow_printf("Backing file '%s' mtime request failed, " + "err = %d\n", header->backing_file, -err); + goto out_free; + } + + err = cow_file_size(header->backing_file, size); + if(err < 0){ + cow_printf("Couldn't get size of backing file '%s', " + "err = %d\n", header->backing_file, -err); + goto out_free; + } + + header->mtime = htonl(modtime); + header->size = htonll(*size); + header->sectorsize = htonl(sectorsize); + header->alignment = htonl(alignment); + header->cow_format = COW_BITMAP; + + err = os_write_file(fd, header, sizeof(*header)); + if(err != sizeof(*header)){ + cow_printf("Write of header to new COW file '%s' failed, " + "err = %d\n", cow_file, -err); + goto out_free; + } + err = 0; + out_free: + cow_free(header); + out: + return(err); +} + +int file_reader(__u64 offset, char *buf, int len, void *arg) +{ + int fd = *((int *) arg); + + return(pread(fd, buf, len, offset)); +} + +/* XXX Need to sanity-check the values read from the header */ + +int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, + __u32 *version_out, char **backing_file_out, + time_t *mtime_out, __u64 *size_out, + int *sectorsize_out, __u32 *align_out, + int *bitmap_offset_out) +{ + union cow_header *header; + char *file; + int err, n; + unsigned long version, magic; + + header = cow_malloc(sizeof(*header)); + if(header == NULL){ + cow_printf("read_cow_header - Failed to allocate header\n"); + return(-ENOMEM); + } + err = -EINVAL; + n = (*reader)(0, (char *) header, sizeof(*header), arg); + if(n < offsetof(typeof(header->v1), backing_file)){ + cow_printf("read_cow_header - short header\n"); + goto out; + } + + magic = header->v1.magic; + if(magic == COW_MAGIC) { + version = header->v1.version; + } + else if(magic == ntohl(COW_MAGIC)){ + version = ntohl(header->v1.version); + } + /* No error printed because the non-COW case comes through here */ + else goto out; + + *version_out = version; + + if(version == 1){ + if(n < sizeof(header->v1)){ + cow_printf("read_cow_header - failed to read V1 " + "header\n"); + goto out; + } + *mtime_out = header->v1.mtime; + *size_out = header->v1.size; + *sectorsize_out = header->v1.sectorsize; + *bitmap_offset_out = sizeof(header->v1); + *align_out = *sectorsize_out; + file = header->v1.backing_file; + } + else if(version == 2){ + if(n < sizeof(header->v2)){ + cow_printf("read_cow_header - failed to read V2 " + "header\n"); + goto out; + } + *mtime_out = ntohl(header->v2.mtime); + *size_out = ntohll(header->v2.size); + *sectorsize_out = ntohl(header->v2.sectorsize); + *bitmap_offset_out = sizeof(header->v2); + *align_out = *sectorsize_out; + file = header->v2.backing_file; + } + else if(version == 3){ + if(n < sizeof(header->v3)){ + cow_printf("read_cow_header - failed to read V2 " + "header\n"); + goto out; + } + *mtime_out = ntohl(header->v3.mtime); + *size_out = ntohll(header->v3.size); + *sectorsize_out = ntohl(header->v3.sectorsize); + *align_out = ntohl(header->v3.alignment); + *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out); + file = header->v3.backing_file; + } + else { + cow_printf("read_cow_header - invalid COW version\n"); + goto out; + } + err = -ENOMEM; + *backing_file_out = cow_strdup(file); + if(*backing_file_out == NULL){ + cow_printf("read_cow_header - failed to allocate backing " + "file\n"); + goto out; + } + err = 0; + out: + cow_free(header); + return(err); +} + +int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, + int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out) +{ + __u64 size, offset; + char zero = 0; + int err; + + err = write_cow_header(cow_file, fd, backing_file, sectorsize, + alignment, &size); + if(err) + goto out; + + *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment); + cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out, + bitmap_len_out, data_offset_out); + + offset = *data_offset_out + size - sizeof(zero); + err = cow_seek_file(fd, offset); + if(err < 0){ + cow_printf("cow bitmap lseek failed : err = %d\n", -err); + goto out; + } + + /* does not really matter how much we write it is just to set EOF + * this also sets the entire COW bitmap + * to zero without having to allocate it + */ + err = cow_write_file(fd, &zero, sizeof(zero)); + if(err != sizeof(zero)){ + cow_printf("Write of bitmap to new COW file '%s' failed, " + "err = %d\n", cow_file, -err); + err = -EINVAL; + goto out; + } + + return(0); + + out: + return(err); +} + +/* + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h new file mode 100644 index 000000000..4bcb829d7 --- /dev/null +++ b/arch/um/include/irq_kern.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __IRQ_KERN_H__ +#define __IRQ_KERN_H__ + +#include "linux/interrupt.h" + +extern int um_request_irq(unsigned int irq, int fd, int type, + irqreturn_t (*handler)(int, void *, + struct pt_regs *), + unsigned long irqflags, const char * devname, + void *dev_id); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/include/mem_kern.h b/arch/um/include/mem_kern.h new file mode 100644 index 000000000..b39f03d94 --- /dev/null +++ b/arch/um/include/mem_kern.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __MEM_KERN_H__ +#define __MEM_KERN_H__ + +#include "linux/list.h" +#include "linux/types.h" + +struct remapper { + struct list_head list; + int (*proc)(int, unsigned long, int, __u64); +}; + +extern void register_remapper(struct remapper *info); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c new file mode 100644 index 000000000..d0e0f50dc --- /dev/null +++ b/arch/um/kernel/physmem.c @@ -0,0 +1,468 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/mm.h" +#include "linux/ghash.h" +#include "linux/slab.h" +#include "linux/vmalloc.h" +#include "linux/bootmem.h" +#include "asm/types.h" +#include "asm/pgtable.h" +#include "kern_util.h" +#include "user_util.h" +#include "mode_kern.h" +#include "mem.h" +#include "mem_user.h" +#include "os.h" +#include "kern.h" +#include "init.h" + +#if 0 +static pgd_t physmem_pgd[PTRS_PER_PGD]; + +static struct phys_desc *lookup_mapping(void *addr) +{ + pgd = &physmem_pgd[pgd_index(addr)]; + if(pgd_none(pgd)) + return(NULL); + + pmd = pmd_offset(pgd, addr); + if(pmd_none(pmd)) + return(NULL); + + pte = pte_offset_kernel(pmd, addr); + return((struct phys_desc *) pte_val(pte)); +} + +static struct add_mapping(void *addr, struct phys_desc *new) +{ +} +#endif + +#define PHYS_HASHSIZE (8192) + +struct phys_desc; + +DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc); + +struct phys_desc { + struct virtmem_ptrs virt_ptrs; + int fd; + __u64 offset; + void *virt; + unsigned long phys; + struct list_head list; +}; + +struct virtmem_table virtmem_hash; + +static int virt_cmp(void *virt1, void *virt2) +{ + return(virt1 != virt2); +} + +static int virt_hash(void *virt) +{ + unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT; + return(addr % PHYS_HASHSIZE); +} + +DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp, + virt_hash); + +LIST_HEAD(descriptor_mappings); + +struct desc_mapping { + int fd; + struct list_head list; + struct list_head pages; +}; + +static struct desc_mapping *find_mapping(int fd) +{ + struct desc_mapping *desc; + struct list_head *ele; + + list_for_each(ele, &descriptor_mappings){ + desc = list_entry(ele, struct desc_mapping, list); + if(desc->fd == fd) + return(desc); + } + + return(NULL); +} + +static struct desc_mapping *descriptor_mapping(int fd) +{ + struct desc_mapping *desc; + + desc = find_mapping(fd); + if(desc != NULL) + return(desc); + + desc = kmalloc(sizeof(*desc), GFP_ATOMIC); + if(desc == NULL) + return(NULL); + + *desc = ((struct desc_mapping) + { .fd = fd, + .list = LIST_HEAD_INIT(desc->list), + .pages = LIST_HEAD_INIT(desc->pages) }); + list_add(&desc->list, &descriptor_mappings); + + return(desc); +} + +int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w) +{ + struct desc_mapping *fd_maps; + struct phys_desc *desc; + unsigned long phys; + int err; + + fd_maps = descriptor_mapping(fd); + if(fd_maps == NULL) + return(-ENOMEM); + + phys = __pa(virt); + if(find_virtmem_hash(&virtmem_hash, virt) != NULL) + panic("Address 0x%p is already substituted\n", virt); + + err = -ENOMEM; + desc = kmalloc(sizeof(*desc), GFP_ATOMIC); + if(desc == NULL) + goto out; + + *desc = ((struct phys_desc) + { .virt_ptrs = { NULL, NULL }, + .fd = fd, + .offset = offset, + .virt = virt, + .phys = __pa(virt), + .list = LIST_HEAD_INIT(desc->list) }); + insert_virtmem_hash(&virtmem_hash, desc); + + list_add(&desc->list, &fd_maps->pages); + + virt = (void *) ((unsigned long) virt & PAGE_MASK); + err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0); + if(!err) + goto out; + + remove_virtmem_hash(&virtmem_hash, desc); + kfree(desc); + out: + return(err); +} + +static int physmem_fd = -1; + +static void remove_mapping(struct phys_desc *desc) +{ + void *virt = desc->virt; + int err; + + remove_virtmem_hash(&virtmem_hash, desc); + list_del(&desc->list); + kfree(desc); + + err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0); + if(err) + panic("Failed to unmap block device page from physical memory, " + "errno = %d", -err); +} + +int physmem_remove_mapping(void *virt) +{ + struct phys_desc *desc; + + virt = (void *) ((unsigned long) virt & PAGE_MASK); + desc = find_virtmem_hash(&virtmem_hash, virt); + if(desc == NULL) + return(0); + + remove_mapping(desc); + return(1); +} + +void physmem_forget_descriptor(int fd) +{ + struct desc_mapping *desc; + struct phys_desc *page; + struct list_head *ele, *next; + __u64 offset; + void *addr; + int err; + + desc = find_mapping(fd); + if(desc == NULL) + return; + + list_for_each_safe(ele, next, &desc->pages){ + page = list_entry(ele, struct phys_desc, list); + offset = page->offset; + addr = page->virt; + remove_mapping(page); + err = os_seek_file(fd, offset); + if(err) + panic("physmem_forget_descriptor - failed to seek " + "to %lld in fd %d, error = %d\n", + offset, fd, -err); + err = os_read_file(fd, addr, PAGE_SIZE); + if(err < 0) + panic("physmem_forget_descriptor - failed to read " + "from fd %d to 0x%p, error = %d\n", + fd, addr, -err); + } + + list_del(&desc->list); + kfree(desc); +} + +void arch_free_page(struct page *page, int order) +{ + void *virt; + int i; + + for(i = 0; i < (1 << order); i++){ + virt = __va(page_to_phys(page + i)); + physmem_remove_mapping(virt); + } +} + +int is_remapped(void *virt) +{ + return(find_virtmem_hash(&virtmem_hash, virt) != NULL); +} + +/* Changed during early boot */ +unsigned long high_physmem; + +extern unsigned long physmem_size; + +void *to_virt(unsigned long phys) +{ + return((void *) uml_physmem + phys); +} + +unsigned long to_phys(void *virt) +{ + return(((unsigned long) virt) - uml_physmem); +} + +int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) +{ + struct page *p, *map; + unsigned long phys_len, phys_pages, highmem_len, highmem_pages; + unsigned long iomem_len, iomem_pages, total_len, total_pages; + int i; + + phys_pages = physmem >> PAGE_SHIFT; + phys_len = phys_pages * sizeof(struct page); + + iomem_pages = iomem >> PAGE_SHIFT; + iomem_len = iomem_pages * sizeof(struct page); + + highmem_pages = highmem >> PAGE_SHIFT; + highmem_len = highmem_pages * sizeof(struct page); + + total_pages = phys_pages + iomem_pages + highmem_pages; + total_len = phys_len + iomem_pages + highmem_len; + + if(kmalloc_ok){ + map = kmalloc(total_len, GFP_KERNEL); + if(map == NULL) + map = vmalloc(total_len); + } + else map = alloc_bootmem_low_pages(total_len); + + if(map == NULL) + return(-ENOMEM); + + for(i = 0; i < total_pages; i++){ + p = &map[i]; + set_page_count(p, 0); + SetPageReserved(p); + INIT_LIST_HEAD(&p->lru); + } + + mem_map = map; + max_mapnr = total_pages; + return(0); +} + +struct page *phys_to_page(const unsigned long phys) +{ + return(&mem_map[phys >> PAGE_SHIFT]); +} + +struct page *__virt_to_page(const unsigned long virt) +{ + return(&mem_map[__pa(virt) >> PAGE_SHIFT]); +} + +unsigned long page_to_phys(struct page *page) +{ + return((page - mem_map) << PAGE_SHIFT); +} + +pte_t mk_pte(struct page *page, pgprot_t pgprot) +{ + pte_t pte; + + pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot); + if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte)); + return(pte); +} + +/* Changed during early boot */ +static unsigned long kmem_top = 0; + +unsigned long get_kmem_end(void) +{ + if(kmem_top == 0) + kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); + return(kmem_top); +} + +void map_memory(unsigned long virt, unsigned long phys, unsigned long len, + int r, int w, int x) +{ + __u64 offset; + int fd, err; + + fd = phys_mapping(phys, &offset); + err = os_map_memory((void *) virt, fd, offset, len, r, w, x); + if(err) + panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " + "err = %d\n", virt, fd, offset, len, r, w, x, err); +} + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) + +void setup_physmem(unsigned long start, unsigned long reserve_end, + unsigned long len, unsigned long highmem) +{ + unsigned long reserve = reserve_end - start; + int pfn = PFN_UP(__pa(reserve_end)); + int delta = (len - reserve) >> PAGE_SHIFT; + int err, offset, bootmap_size; + + physmem_fd = create_mem_file(len + highmem); + + offset = uml_reserved - uml_physmem; + err = os_map_memory((void *) uml_reserved, physmem_fd, offset, + len - offset, 1, 1, 0); + if(err < 0){ + os_print_error(err, "Mapping memory"); + exit(1); + } + + bootmap_size = init_bootmem(pfn, pfn + delta); + free_bootmem(__pa(reserve_end) + bootmap_size, + len - bootmap_size - reserve); +} + +int phys_mapping(unsigned long phys, __u64 *offset_out) +{ + struct phys_desc *desc = find_virtmem_hash(&virtmem_hash, + __va(phys & PAGE_MASK)); + int fd = -1; + + if(desc != NULL){ + fd = desc->fd; + *offset_out = desc->offset; + } + else if(phys < physmem_size){ + fd = physmem_fd; + *offset_out = phys; + } + else if(phys < __pa(end_iomem)){ + struct iomem_region *region = iomem_regions; + + while(region != NULL){ + if((phys >= region->phys) && + (phys < region->phys + region->size)){ + fd = region->fd; + *offset_out = phys - region->phys; + break; + } + region = region->next; + } + } + else if(phys < __pa(end_iomem) + highmem){ + fd = physmem_fd; + *offset_out = phys - iomem_size; + } + + return(fd); +} + +static int __init uml_mem_setup(char *line, int *add) +{ + char *retptr; + physmem_size = memparse(line,&retptr); + return 0; +} +__uml_setup("mem=", uml_mem_setup, +"mem=\n" +" This controls how much \"physical\" memory the kernel allocates\n" +" for the system. The size is specified as a number followed by\n" +" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" +" This is not related to the amount of memory in the host. It can\n" +" be more, and the excess, if it's ever used, will just be swapped out.\n" +" Example: mem=64M\n\n" +); + +unsigned long find_iomem(char *driver, unsigned long *len_out) +{ + struct iomem_region *region = iomem_regions; + + while(region != NULL){ + if(!strcmp(region->driver, driver)){ + *len_out = region->size; + return(region->virt); + } + } + + return(0); +} + +int setup_iomem(void) +{ + struct iomem_region *region = iomem_regions; + unsigned long iomem_start = high_physmem + PAGE_SIZE; + int err; + + while(region != NULL){ + err = os_map_memory((void *) iomem_start, region->fd, 0, + region->size, 1, 1, 0); + if(err) + printk("Mapping iomem region for driver '%s' failed, " + "errno = %d\n", region->driver, -err); + else { + region->virt = iomem_start; + region->phys = __pa(region->virt); + } + + iomem_start += region->size + PAGE_SIZE; + region = region->next; + } + + return(0); +} + +__initcall(setup_iomem); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c new file mode 100644 index 000000000..ea82f19b2 --- /dev/null +++ b/arch/um/kernel/skas/uaccess.c @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/stddef.h" +#include "linux/kernel.h" +#include "linux/string.h" +#include "linux/fs.h" +#include "linux/highmem.h" +#include "asm/page.h" +#include "asm/pgtable.h" +#include "asm/uaccess.h" +#include "kern_util.h" + +extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, + pte_t *pte_out); + +static unsigned long maybe_map(unsigned long virt, int is_write) +{ + pte_t pte; + int err; + + void *phys = um_virt_to_phys(current, virt, &pte); + int dummy_code; + + if(IS_ERR(phys) || (is_write && !pte_write(pte))){ + err = handle_page_fault(virt, 0, is_write, 0, &dummy_code); + if(err) + return(0); + phys = um_virt_to_phys(current, virt, NULL); + } + return((unsigned long) phys); +} + +static int do_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long addr, int len, void *arg), void *arg) +{ + struct page *page; + int n; + + addr = maybe_map(addr, is_write); + if(addr == -1) + return(-1); + + page = phys_to_page(addr); + addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); + n = (*op)(addr, len, arg); + kunmap(page); + + return(n); +} + +static int buffer_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long addr, int len, void *arg), + void *arg) +{ + int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); + int remain = len, n; + + n = do_op(addr, size, is_write, op, arg); + if(n != 0) + return(n < 0 ? remain : 0); + + addr += size; + remain -= size; + if(remain == 0) + return(0); + + while(addr < ((addr + remain) & PAGE_MASK)){ + n = do_op(addr, PAGE_SIZE, is_write, op, arg); + if(n != 0) + return(n < 0 ? remain : 0); + + addr += PAGE_SIZE; + remain -= PAGE_SIZE; + } + if(remain == 0) + return(0); + + n = do_op(addr, remain, is_write, op, arg); + if(n != 0) + return(n < 0 ? remain : 0); + return(0); +} + +static int copy_chunk_from_user(unsigned long from, int len, void *arg) +{ + unsigned long *to_ptr = arg, to = *to_ptr; + + memcpy((void *) to, (void *) from, len); + *to_ptr += len; + return(0); +} + +int copy_from_user_skas(void *to, const void *from, int n) +{ + if(segment_eq(get_fs(), KERNEL_DS)){ + memcpy(to, from, n); + return(0); + } + + return(access_ok_skas(VERIFY_READ, from, n) ? + buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): + n); +} + +static int copy_chunk_to_user(unsigned long to, int len, void *arg) +{ + unsigned long *from_ptr = arg, from = *from_ptr; + + memcpy((void *) to, (void *) from, len); + *from_ptr += len; + return(0); +} + +int copy_to_user_skas(void *to, const void *from, int n) +{ + if(segment_eq(get_fs(), KERNEL_DS)){ + memcpy(to, from, n); + return(0); + } + + return(access_ok_skas(VERIFY_WRITE, to, n) ? + buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : + n); +} + +static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) +{ + char **to_ptr = arg, *to = *to_ptr; + int n; + + strncpy(to, (void *) from, len); + n = strnlen(to, len); + *to_ptr += n; + + if(n < len) + return(1); + return(0); +} + +int strncpy_from_user_skas(char *dst, const char *src, int count) +{ + int n; + char *ptr = dst; + + if(segment_eq(get_fs(), KERNEL_DS)){ + strncpy(dst, src, count); + return(strnlen(dst, count)); + } + + if(!access_ok_skas(VERIFY_READ, src, 1)) + return(-EFAULT); + + n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, + &ptr); + if(n != 0) + return(-EFAULT); + return(strnlen(dst, count)); +} + +static int clear_chunk(unsigned long addr, int len, void *unused) +{ + memset((void *) addr, 0, len); + return(0); +} + +int __clear_user_skas(void *mem, int len) +{ + return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL)); +} + +int clear_user_skas(void *mem, int len) +{ + if(segment_eq(get_fs(), KERNEL_DS)){ + memset(mem, 0, len); + return(0); + } + + return(access_ok_skas(VERIFY_WRITE, mem, len) ? + buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len); +} + +static int strnlen_chunk(unsigned long str, int len, void *arg) +{ + int *len_ptr = arg, n; + + n = strnlen((void *) str, len); + *len_ptr += n; + + if(n < len) + return(1); + return(0); +} + +int strnlen_user_skas(const void *str, int len) +{ + int count = 0, n; + + if(segment_eq(get_fs(), KERNEL_DS)) + return(strnlen(str, len) + 1); + + n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); + if(n == 0) + return(count + 1); + return(-EFAULT); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/kernel/tt/uaccess.c b/arch/um/kernel/tt/uaccess.c new file mode 100644 index 000000000..9c8401120 --- /dev/null +++ b/arch/um/kernel/tt/uaccess.c @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include "linux/sched.h" +#include "asm/uaccess.h" + +int copy_from_user_tt(void *to, const void *from, int n) +{ + if(!access_ok_tt(VERIFY_READ, from, n)) + return(n); + + return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int copy_to_user_tt(void *to, const void *from, int n) +{ + if(!access_ok_tt(VERIFY_WRITE, to, n)) + return(n); + + return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int strncpy_from_user_tt(char *dst, const char *src, int count) +{ + int n; + + if(!access_ok_tt(VERIFY_READ, src, 1)) + return(-EFAULT); + + n = __do_strncpy_from_user(dst, src, count, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher); + if(n < 0) return(-EFAULT); + return(n); +} + +int __clear_user_tt(void *mem, int len) +{ + return(__do_clear_user(mem, len, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int clear_user_tt(void *mem, int len) +{ + if(!access_ok_tt(VERIFY_WRITE, mem, len)) + return(len); + + return(__do_clear_user(mem, len, ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +int strnlen_user_tt(const void *str, int len) +{ + return(__do_strnlen_user(str, len, + ¤t->thread.fault_addr, + ¤t->thread.fault_catcher)); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c new file mode 100644 index 000000000..ef0fb71e4 --- /dev/null +++ b/arch/um/os-Linux/user_syms.c @@ -0,0 +1,88 @@ +#include "linux/types.h" +#include "linux/module.h" + +/* Some of this are builtin function (some are not but could in the future), + * so I *must* declare good prototypes for them and then EXPORT them. + * The kernel code uses the macro defined by include/linux/string.h, + * so I undef macros; the userspace code does not include that and I + * add an EXPORT for the glibc one.*/ + +#undef strlen +#undef strstr +#undef memcpy +#undef memset + +extern size_t strlen(const char *); +extern void *memcpy(void *, const void *, size_t); +extern void *memset(void *, int, size_t); +extern int printf(const char *, ...); + +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(printf); + +EXPORT_SYMBOL(strstr); + +/* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms. + * However, the modules will use the CRC defined *here*, no matter if it is + * good; so the versions of these symbols will always match + */ +#define EXPORT_SYMBOL_PROTO(sym) \ + int sym(void); \ + EXPORT_SYMBOL(sym); + +EXPORT_SYMBOL_PROTO(__errno_location); + +EXPORT_SYMBOL_PROTO(access); +EXPORT_SYMBOL_PROTO(open); +EXPORT_SYMBOL_PROTO(open64); +EXPORT_SYMBOL_PROTO(close); +EXPORT_SYMBOL_PROTO(read); +EXPORT_SYMBOL_PROTO(write); +EXPORT_SYMBOL_PROTO(dup2); +EXPORT_SYMBOL_PROTO(__xstat); +EXPORT_SYMBOL_PROTO(__lxstat); +EXPORT_SYMBOL_PROTO(__lxstat64); +EXPORT_SYMBOL_PROTO(lseek); +EXPORT_SYMBOL_PROTO(lseek64); +EXPORT_SYMBOL_PROTO(chown); +EXPORT_SYMBOL_PROTO(truncate); +EXPORT_SYMBOL_PROTO(utime); +EXPORT_SYMBOL_PROTO(chmod); +EXPORT_SYMBOL_PROTO(rename); +EXPORT_SYMBOL_PROTO(__xmknod); + +EXPORT_SYMBOL_PROTO(symlink); +EXPORT_SYMBOL_PROTO(link); +EXPORT_SYMBOL_PROTO(unlink); +EXPORT_SYMBOL_PROTO(readlink); + +EXPORT_SYMBOL_PROTO(mkdir); +EXPORT_SYMBOL_PROTO(rmdir); +EXPORT_SYMBOL_PROTO(opendir); +EXPORT_SYMBOL_PROTO(readdir); +EXPORT_SYMBOL_PROTO(closedir); +EXPORT_SYMBOL_PROTO(seekdir); +EXPORT_SYMBOL_PROTO(telldir); + +EXPORT_SYMBOL_PROTO(ioctl); + +EXPORT_SYMBOL_PROTO(pread64); +EXPORT_SYMBOL_PROTO(pwrite64); + +EXPORT_SYMBOL_PROTO(statfs); +EXPORT_SYMBOL_PROTO(statfs64); + +EXPORT_SYMBOL_PROTO(getuid); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/fs/hostfs/Makefile b/fs/hostfs/Makefile new file mode 100644 index 000000000..794292e0a --- /dev/null +++ b/fs/hostfs/Makefile @@ -0,0 +1,26 @@ +# +# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino +# to __st_ino. It stayed in the same place, so as long as the correct name +# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa. + +STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \ + echo __)st_ino + +hostfs-objs := hostfs_kern.o hostfs_user.o + +obj-y = +obj-$(CONFIG_HOSTFS) += hostfs.o + +SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) + +USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(SINGLE_OBJS)) +USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) + +USER_CFLAGS += -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD) + +$(USER_OBJS) : %.o: %.c + $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h new file mode 100644 index 000000000..d1f6c339f --- /dev/null +++ b/fs/hostfs/hostfs.h @@ -0,0 +1,79 @@ +#ifndef __UM_FS_HOSTFS +#define __UM_FS_HOSTFS + +#include "os.h" + +/* These are exactly the same definitions as in fs.h, but the names are + * changed so that this file can be included in both kernel and user files. + */ + +#define HOSTFS_ATTR_MODE 1 +#define HOSTFS_ATTR_UID 2 +#define HOSTFS_ATTR_GID 4 +#define HOSTFS_ATTR_SIZE 8 +#define HOSTFS_ATTR_ATIME 16 +#define HOSTFS_ATTR_MTIME 32 +#define HOSTFS_ATTR_CTIME 64 +#define HOSTFS_ATTR_ATIME_SET 128 +#define HOSTFS_ATTR_MTIME_SET 256 +#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ +#define HOSTFS_ATTR_ATTR_FLAG 1024 + +struct hostfs_iattr { + unsigned int ia_valid; + mode_t ia_mode; + uid_t ia_uid; + gid_t ia_gid; + loff_t ia_size; + struct timespec ia_atime; + struct timespec ia_mtime; + struct timespec ia_ctime; + unsigned int ia_attr_flags; +}; + +extern int stat_file(const char *path, unsigned long long *inode_out, + int *mode_out, int *nlink_out, int *uid_out, int *gid_out, + unsigned long long *size_out, struct timespec *atime_out, + struct timespec *mtime_out, struct timespec *ctime_out, + int *blksize_out, unsigned long long *blocks_out); +extern int access_file(char *path, int r, int w, int x); +extern int open_file(char *path, int r, int w, int append); +extern int file_type(const char *path, int *rdev); +extern void *open_dir(char *path, int *err_out); +extern char *read_dir(void *stream, unsigned long long *pos, + unsigned long long *ino_out, int *len_out); +extern void close_file(void *stream); +extern void close_dir(void *stream); +extern int read_file(int fd, unsigned long long *offset, char *buf, int len); +extern int write_file(int fd, unsigned long long *offset, const char *buf, + int len); +extern int lseek_file(int fd, long long offset, int whence); +extern int file_create(char *name, int ur, int uw, int ux, int gr, + int gw, int gx, int or, int ow, int ox); +extern int set_attr(const char *file, struct hostfs_iattr *attrs); +extern int make_symlink(const char *from, const char *to); +extern int unlink_file(const char *file); +extern int do_mkdir(const char *file, int mode); +extern int do_rmdir(const char *file); +extern int do_mknod(const char *file, int mode, int dev); +extern int link_file(const char *from, const char *to); +extern int do_readlink(char *file, char *buf, int size); +extern int rename_file(char *from, char *to); +extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, + long long *bfree_out, long long *bavail_out, + long long *files_out, long long *ffree_out, + void *fsid_out, int fsid_size, long *namelen_out, + long *spare_out); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c new file mode 100644 index 000000000..ef5d5d1bf --- /dev/null +++ b/fs/hostfs/hostfs_kern.c @@ -0,0 +1,1008 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + * + * Ported the filesystem routines to 2.5. + * 2003-02-10 Petr Baudis + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hostfs.h" +#include "kern_util.h" +#include "kern.h" +#include "user_util.h" +#include "2_5compat.h" +#include "init.h" + +struct hostfs_inode_info { + char *host_filename; + int fd; + int mode; + struct inode vfs_inode; +}; + +static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) +{ + return(list_entry(inode, struct hostfs_inode_info, vfs_inode)); +} + +#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_dentry->d_inode) + +int hostfs_d_delete(struct dentry *dentry) +{ + return(1); +} + +struct dentry_operations hostfs_dentry_ops = { + .d_delete = hostfs_d_delete, +}; + +/* Changed in hostfs_args before the kernel starts running */ +static char *root_ino = "/"; +static int append = 0; + +#define HOSTFS_SUPER_MAGIC 0x00c0ffee + +static struct inode_operations hostfs_iops; +static struct inode_operations hostfs_dir_iops; +static struct address_space_operations hostfs_link_aops; + +static int __init hostfs_args(char *options, int *add) +{ + char *ptr; + + ptr = strchr(options, ','); + if(ptr != NULL) + *ptr++ = '\0'; + if(*options != '\0') + root_ino = options; + + options = ptr; + while(options){ + ptr = strchr(options, ','); + if(ptr != NULL) + *ptr++ = '\0'; + if(*options != '\0'){ + if(!strcmp(options, "append")) + append = 1; + else printf("hostfs_args - unsupported option - %s\n", + options); + } + options = ptr; + } + return(0); +} + +__uml_setup("hostfs=", hostfs_args, +"hostfs=,,...\n" +" This is used to set hostfs parameters. The root directory argument\n" +" is used to confine all hostfs mounts to within the specified directory\n" +" tree on the host. If this isn't specified, then a user inside UML can\n" +" mount anything on the host that's accessible to the user that's running\n" +" it.\n" +" The only flag currently supported is 'append', which specifies that all\n" +" files opened by hostfs will be opened in append mode.\n\n" +); + +static char *dentry_name(struct dentry *dentry, int extra) +{ + struct dentry *parent; + char *root, *name; + int len; + + len = 0; + parent = dentry; + while(parent->d_parent != parent){ + len += parent->d_name.len + 1; + parent = parent->d_parent; + } + + root = HOSTFS_I(parent->d_inode)->host_filename; + len += strlen(root); + name = kmalloc(len + extra + 1, GFP_KERNEL); + if(name == NULL) return(NULL); + + name[len] = '\0'; + parent = dentry; + while(parent->d_parent != parent){ + len -= parent->d_name.len + 1; + name[len] = '/'; + strncpy(&name[len + 1], parent->d_name.name, + parent->d_name.len); + parent = parent->d_parent; + } + strncpy(name, root, strlen(root)); + return(name); +} + +static char *inode_name(struct inode *ino, int extra) +{ + struct dentry *dentry; + + dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); + return(dentry_name(dentry, extra)); +} + +static int read_name(struct inode *ino, char *name) +{ + /* The non-int inode fields are copied into ints by stat_file and + * then copied into the inode because passing the actual pointers + * in and having them treated as int * breaks on big-endian machines + */ + int err; + int i_mode, i_nlink, i_blksize; + unsigned long long i_size; + unsigned long long i_ino; + unsigned long long i_blocks; + + err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, + &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, + &ino->i_ctime, &i_blksize, &i_blocks); + if(err) + return(err); + + ino->i_ino = i_ino; + ino->i_mode = i_mode; + ino->i_nlink = i_nlink; + ino->i_size = i_size; + ino->i_blksize = i_blksize; + ino->i_blocks = i_blocks; + if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid())) + ino->i_uid = 0; + return(0); +} + +static char *follow_link(char *link) +{ + int len, n; + char *name, *resolved, *end; + + len = 64; + while(1){ + n = -ENOMEM; + name = kmalloc(len, GFP_KERNEL); + if(name == NULL) + goto out; + + n = do_readlink(link, name, len); + if(n < len) + break; + len *= 2; + kfree(name); + } + if(n < 0) + goto out_free; + + if(*name == '/') + return(name); + + end = strrchr(link, '/'); + if(end == NULL) + return(name); + + *(end + 1) = '\0'; + len = strlen(link) + strlen(name) + 1; + + resolved = kmalloc(len, GFP_KERNEL); + if(resolved == NULL){ + n = -ENOMEM; + goto out_free; + } + + sprintf(resolved, "%s%s", link, name); + kfree(name); + kfree(link); + return(resolved); + + out_free: + kfree(name); + out: + return(ERR_PTR(n)); +} + +static int read_inode(struct inode *ino) +{ + char *name; + int err = 0; + + /* Unfortunately, we are called from iget() when we don't have a dentry + * allocated yet. + */ + if(list_empty(&ino->i_dentry)) + goto out; + + err = -ENOMEM; + name = inode_name(ino, 0); + if(name == NULL) + goto out; + + if(file_type(name, NULL) == OS_TYPE_SYMLINK){ + name = follow_link(name); + if(IS_ERR(name)){ + err = PTR_ERR(name); + goto out; + } + } + + err = read_name(ino, name); + kfree(name); + out: + return(err); +} + +int hostfs_statfs(struct super_block *sb, struct kstatfs *sf) +{ + /* do_statfs uses struct statfs64 internally, but the linux kernel + * struct statfs still has 32-bit versions for most of these fields, + * so we convert them here + */ + int err; + long long f_blocks; + long long f_bfree; + long long f_bavail; + long long f_files; + long long f_ffree; + + err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename, + &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, + &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), + &sf->f_namelen, sf->f_spare); + if(err) return(err); + sf->f_blocks = f_blocks; + sf->f_bfree = f_bfree; + sf->f_bavail = f_bavail; + sf->f_files = f_files; + sf->f_ffree = f_ffree; + sf->f_type = HOSTFS_SUPER_MAGIC; + return(0); +} + +static struct inode *hostfs_alloc_inode(struct super_block *sb) +{ + struct hostfs_inode_info *hi; + + hi = kmalloc(sizeof(*hi), GFP_KERNEL); + if(hi == NULL) + return(NULL); + + *hi = ((struct hostfs_inode_info) { .host_filename = NULL, + .fd = -1, + .mode = 0 }); + inode_init_once(&hi->vfs_inode); + return(&hi->vfs_inode); +} + +static void hostfs_destroy_inode(struct inode *inode) +{ + if(HOSTFS_I(inode)->host_filename) + kfree(HOSTFS_I(inode)->host_filename); + + if(HOSTFS_I(inode)->fd != -1) + close_file(&HOSTFS_I(inode)->fd); + + kfree(HOSTFS_I(inode)); +} + +static void hostfs_read_inode(struct inode *inode) +{ + read_inode(inode); +} + +static struct super_operations hostfs_sbops = { + .alloc_inode = hostfs_alloc_inode, + .destroy_inode = hostfs_destroy_inode, + .read_inode = hostfs_read_inode, + .statfs = hostfs_statfs, +}; + +int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) +{ + void *dir; + char *name; + unsigned long long next, ino; + int error, len; + + name = dentry_name(file->f_dentry, 0); + if(name == NULL) return(-ENOMEM); + dir = open_dir(name, &error); + kfree(name); + if(dir == NULL) return(-error); + next = file->f_pos; + while((name = read_dir(dir, &next, &ino, &len)) != NULL){ + error = (*filldir)(ent, name, len, file->f_pos, + ino, DT_UNKNOWN); + if(error) break; + file->f_pos = next; + } + close_dir(dir); + return(0); +} + +int hostfs_file_open(struct inode *ino, struct file *file) +{ + char *name; + int mode = 0, r = 0, w = 0, fd; + + mode = file->f_mode & (FMODE_READ | FMODE_WRITE); + if((mode & HOSTFS_I(ino)->mode) == mode) + return(0); + + /* The file may already have been opened, but with the wrong access, + * so this resets things and reopens the file with the new access. + */ + if(HOSTFS_I(ino)->fd != -1){ + close_file(&HOSTFS_I(ino)->fd); + HOSTFS_I(ino)->fd = -1; + } + + HOSTFS_I(ino)->mode |= mode; + if(HOSTFS_I(ino)->mode & FMODE_READ) + r = 1; + if(HOSTFS_I(ino)->mode & FMODE_WRITE) + w = 1; + if(w) + r = 1; + + name = dentry_name(file->f_dentry, 0); + if(name == NULL) + return(-ENOMEM); + + fd = open_file(name, r, w, append); + kfree(name); + if(fd < 0) return(fd); + FILE_HOSTFS_I(file)->fd = fd; + + return(0); +} + +int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + return(0); +} + +static struct file_operations hostfs_file_fops = { + .llseek = generic_file_llseek, + .read = generic_file_read, + .write = generic_file_write, + .mmap = generic_file_mmap, + .open = hostfs_file_open, + .release = NULL, + .fsync = hostfs_fsync, +}; + +static struct file_operations hostfs_dir_fops = { + .readdir = hostfs_readdir, + .read = generic_read_dir, +}; + +int hostfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + char *buffer; + unsigned long long base; + int count = PAGE_CACHE_SIZE; + int end_index = inode->i_size >> PAGE_CACHE_SHIFT; + int err; + + if (page->index >= end_index) + count = inode->i_size & (PAGE_CACHE_SIZE-1); + + buffer = kmap(page); + base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; + + err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); + if(err != count){ + ClearPageUptodate(page); + goto out; + } + + if (base > inode->i_size) + inode->i_size = base; + + if (PageError(page)) + ClearPageError(page); + err = 0; + + out: + kunmap(page); + + unlock_page(page); + return err; +} + +int hostfs_readpage(struct file *file, struct page *page) +{ + char *buffer; + long long start; + int err = 0; + + start = (long long) page->index << PAGE_CACHE_SHIFT; + buffer = kmap(page); + err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, + PAGE_CACHE_SIZE); + if(err < 0) goto out; + + memset(&buffer[err], 0, PAGE_CACHE_SIZE - err); + + flush_dcache_page(page); + SetPageUptodate(page); + if (PageError(page)) ClearPageError(page); + err = 0; + out: + kunmap(page); + unlock_page(page); + return(err); +} + +int hostfs_prepare_write(struct file *file, struct page *page, + unsigned int from, unsigned int to) +{ + char *buffer; + long long start, tmp; + int err; + + start = (long long) page->index << PAGE_CACHE_SHIFT; + buffer = kmap(page); + if(from != 0){ + tmp = start; + err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer, + from); + if(err < 0) goto out; + } + if(to != PAGE_CACHE_SIZE){ + start += to; + err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to, + PAGE_CACHE_SIZE - to); + if(err < 0) goto out; + } + err = 0; + out: + kunmap(page); + return(err); +} + +int hostfs_commit_write(struct file *file, struct page *page, unsigned from, + unsigned to) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + char *buffer; + long long start; + int err = 0; + + start = (long long) (page->index << PAGE_CACHE_SHIFT) + from; + buffer = kmap(page); + err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from, + to - from); + if(err > 0) err = 0; + if(!err && (start > inode->i_size)) + inode->i_size = start; + + kunmap(page); + return(err); +} + +static struct address_space_operations hostfs_aops = { + .writepage = hostfs_writepage, + .readpage = hostfs_readpage, +/* .set_page_dirty = __set_page_dirty_nobuffers, */ + .prepare_write = hostfs_prepare_write, + .commit_write = hostfs_commit_write +}; + +static int init_inode(struct inode *inode, struct dentry *dentry) +{ + char *name; + int type, err = -ENOMEM, rdev; + + if(dentry){ + name = dentry_name(dentry, 0); + if(name == NULL) + goto out; + type = file_type(name, &rdev); + kfree(name); + } + else type = OS_TYPE_DIR; + + err = 0; + if(type == OS_TYPE_SYMLINK) + inode->i_op = &page_symlink_inode_operations; + else if(type == OS_TYPE_DIR) + inode->i_op = &hostfs_dir_iops; + else inode->i_op = &hostfs_iops; + + if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; + else inode->i_fop = &hostfs_file_fops; + + if(type == OS_TYPE_SYMLINK) + inode->i_mapping->a_ops = &hostfs_link_aops; + else inode->i_mapping->a_ops = &hostfs_aops; + + switch (type) { + case OS_TYPE_CHARDEV: + init_special_inode(inode, S_IFCHR, rdev); + break; + case OS_TYPE_BLOCKDEV: + init_special_inode(inode, S_IFBLK, rdev); + break; + case OS_TYPE_FIFO: + init_special_inode(inode, S_IFIFO, 0); + break; + case OS_TYPE_SOCK: + init_special_inode(inode, S_IFSOCK, 0); + break; + } + out: + return(err); +} + +int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + struct inode *inode; + char *name; + int error, fd; + + error = -ENOMEM; + inode = iget(dir->i_sb, 0); + if(inode == NULL) goto out; + + error = init_inode(inode, dentry); + if(error) + goto out_put; + + error = -ENOMEM; + name = dentry_name(dentry, 0); + if(name == NULL) + goto out_put; + + fd = file_create(name, + mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, + mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, + mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); + if(fd < 0) + error = fd; + else error = read_name(inode, name); + + kfree(name); + if(error) + goto out_put; + + HOSTFS_I(inode)->fd = fd; + HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE; + d_instantiate(dentry, inode); + return(0); + + out_put: + iput(inode); + out: + return(error); +} + +struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode; + char *name; + int err; + + err = -ENOMEM; + inode = iget(ino->i_sb, 0); + if(inode == NULL) + goto out; + + err = init_inode(inode, dentry); + if(err) + goto out_put; + + err = -ENOMEM; + name = dentry_name(dentry, 0); + if(name == NULL) + goto out_put; + + err = read_name(inode, name); + kfree(name); + if(err == -ENOENT){ + iput(inode); + inode = NULL; + } + else if(err) + goto out_put; + + d_add(dentry, inode); + dentry->d_op = &hostfs_dentry_ops; + return(NULL); + + out_put: + iput(inode); + out: + return(ERR_PTR(err)); +} + +static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) +{ + char *file; + int len; + + file = inode_name(ino, dentry->d_name.len + 1); + if(file == NULL) return(NULL); + strcat(file, "/"); + len = strlen(file); + strncat(file, dentry->d_name.name, dentry->d_name.len); + file[len + dentry->d_name.len] = '\0'; + return(file); +} + +int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) +{ + char *from_name, *to_name; + int err; + + if((from_name = inode_dentry_name(ino, from)) == NULL) + return(-ENOMEM); + to_name = dentry_name(to, 0); + if(to_name == NULL){ + kfree(from_name); + return(-ENOMEM); + } + err = link_file(to_name, from_name); + kfree(from_name); + kfree(to_name); + return(err); +} + +int hostfs_unlink(struct inode *ino, struct dentry *dentry) +{ + char *file; + int err; + + if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + if(append) + return(-EPERM); + + err = unlink_file(file); + kfree(file); + return(err); +} + +int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) +{ + char *file; + int err; + + if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + err = make_symlink(file, to); + kfree(file); + return(err); +} + +int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) +{ + char *file; + int err; + + if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + err = do_mkdir(file, mode); + kfree(file); + return(err); +} + +int hostfs_rmdir(struct inode *ino, struct dentry *dentry) +{ + char *file; + int err; + + if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); + err = do_rmdir(file); + kfree(file); + return(err); +} + +int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + struct inode *inode; + char *name; + int err = -ENOMEM; + + inode = iget(dir->i_sb, 0); + if(inode == NULL) + goto out; + + err = init_inode(inode, dentry); + if(err) + goto out_put; + + err = -ENOMEM; + name = dentry_name(dentry, 0); + if(name == NULL) + goto out_put; + + init_special_inode(inode, mode, dev); + err = do_mknod(name, mode, dev); + if(err) + goto out_free; + + err = read_name(inode, name); + kfree(name); + if(err) + goto out_put; + + d_instantiate(dentry, inode); + return(0); + + out_free: + kfree(name); + out_put: + iput(inode); + out: + return(err); +} + +int hostfs_rename(struct inode *from_ino, struct dentry *from, + struct inode *to_ino, struct dentry *to) +{ + char *from_name, *to_name; + int err; + + if((from_name = inode_dentry_name(from_ino, from)) == NULL) + return(-ENOMEM); + if((to_name = inode_dentry_name(to_ino, to)) == NULL){ + kfree(from_name); + return(-ENOMEM); + } + err = rename_file(from_name, to_name); + kfree(from_name); + kfree(to_name); + return(err); +} + +void hostfs_truncate(struct inode *ino) +{ + not_implemented(); +} + +int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) +{ + char *name; + int r = 0, w = 0, x = 0, err; + + if(desired & MAY_READ) r = 1; + if(desired & MAY_WRITE) w = 1; + if(desired & MAY_EXEC) x = 1; + name = inode_name(ino, 0); + if(name == NULL) return(-ENOMEM); + err = access_file(name, r, w, x); + kfree(name); + if(!err) err = vfs_permission(ino, desired); + return(err); +} + +int hostfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct hostfs_iattr attrs; + char *name; + int err; + + if(append) + attr->ia_valid &= ~ATTR_SIZE; + + attrs.ia_valid = 0; + if(attr->ia_valid & ATTR_MODE){ + attrs.ia_valid |= HOSTFS_ATTR_MODE; + attrs.ia_mode = attr->ia_mode; + } + if(attr->ia_valid & ATTR_UID){ + if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && + (attr->ia_uid == 0)) + attr->ia_uid = getuid(); + attrs.ia_valid |= HOSTFS_ATTR_UID; + attrs.ia_uid = attr->ia_uid; + } + if(attr->ia_valid & ATTR_GID){ + if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && + (attr->ia_gid == 0)) + attr->ia_gid = getuid(); + attrs.ia_valid |= HOSTFS_ATTR_GID; + attrs.ia_gid = attr->ia_gid; + } + if(attr->ia_valid & ATTR_SIZE){ + attrs.ia_valid |= HOSTFS_ATTR_SIZE; + attrs.ia_size = attr->ia_size; + } + if(attr->ia_valid & ATTR_ATIME){ + attrs.ia_valid |= HOSTFS_ATTR_ATIME; + attrs.ia_atime = attr->ia_atime; + } + if(attr->ia_valid & ATTR_MTIME){ + attrs.ia_valid |= HOSTFS_ATTR_MTIME; + attrs.ia_mtime = attr->ia_mtime; + } + if(attr->ia_valid & ATTR_CTIME){ + attrs.ia_valid |= HOSTFS_ATTR_CTIME; + attrs.ia_ctime = attr->ia_ctime; + } + if(attr->ia_valid & ATTR_ATIME_SET){ + attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET; + } + if(attr->ia_valid & ATTR_MTIME_SET){ + attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; + } + name = dentry_name(dentry, 0); + if(name == NULL) return(-ENOMEM); + err = set_attr(name, &attrs); + kfree(name); + if(err) + return(err); + + return(inode_setattr(dentry->d_inode, attr)); +} + +int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + generic_fillattr(dentry->d_inode, stat); + return(0); +} + +static struct inode_operations hostfs_iops = { + .create = hostfs_create, + .link = hostfs_link, + .unlink = hostfs_unlink, + .symlink = hostfs_symlink, + .mkdir = hostfs_mkdir, + .rmdir = hostfs_rmdir, + .mknod = hostfs_mknod, + .rename = hostfs_rename, + .truncate = hostfs_truncate, + .permission = hostfs_permission, + .setattr = hostfs_setattr, + .getattr = hostfs_getattr, +}; + +static struct inode_operations hostfs_dir_iops = { + .create = hostfs_create, + .lookup = hostfs_lookup, + .link = hostfs_link, + .unlink = hostfs_unlink, + .symlink = hostfs_symlink, + .mkdir = hostfs_mkdir, + .rmdir = hostfs_rmdir, + .mknod = hostfs_mknod, + .rename = hostfs_rename, + .truncate = hostfs_truncate, + .permission = hostfs_permission, + .setattr = hostfs_setattr, + .getattr = hostfs_getattr, +}; + +int hostfs_link_readpage(struct file *file, struct page *page) +{ + char *buffer, *name; + long long start; + int err; + + start = page->index << PAGE_CACHE_SHIFT; + buffer = kmap(page); + name = inode_name(page->mapping->host, 0); + if(name == NULL) return(-ENOMEM); + err = do_readlink(name, buffer, PAGE_CACHE_SIZE); + kfree(name); + if(err == PAGE_CACHE_SIZE) + err = -E2BIG; + else if(err > 0){ + flush_dcache_page(page); + SetPageUptodate(page); + if (PageError(page)) ClearPageError(page); + err = 0; + } + kunmap(page); + unlock_page(page); + return(err); +} + +static struct address_space_operations hostfs_link_aops = { + .readpage = hostfs_link_readpage, +}; + +static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) +{ + struct inode *root_inode; + char *name, *data = d; + int err; + + sb->s_blocksize = 1024; + sb->s_blocksize_bits = 10; + sb->s_magic = HOSTFS_SUPER_MAGIC; + sb->s_op = &hostfs_sbops; + + if((data == NULL) || (*data == '\0')) + data = root_ino; + + err = -ENOMEM; + name = kmalloc(strlen(data) + 1, GFP_KERNEL); + if(name == NULL) + goto out; + + strcpy(name, data); + + root_inode = iget(sb, 0); + if(root_inode == NULL) + goto out_free; + + err = init_inode(root_inode, NULL); + if(err) + goto out_put; + + HOSTFS_I(root_inode)->host_filename = name; + + err = -ENOMEM; + sb->s_root = d_alloc_root(root_inode); + if(sb->s_root == NULL) + goto out_put; + + err = read_inode(root_inode); + if(err) + goto out_put; + + return(0); + + out_put: + iput(root_inode); + out_free: + kfree(name); + out: + return(err); +} + +static struct super_block *hostfs_read_sb(struct file_system_type *type, + int flags, const char *dev_name, + void *data) +{ + return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common)); +} + +static struct file_system_type hostfs_type = { + .owner = THIS_MODULE, + .name = "hostfs", + .get_sb = hostfs_read_sb, + .kill_sb = kill_anon_super, + .fs_flags = 0, +}; + +static int __init init_hostfs(void) +{ + return(register_filesystem(&hostfs_type)); +} + +static void __exit exit_hostfs(void) +{ + unregister_filesystem(&hostfs_type); +} + +module_init(init_hostfs) +module_exit(exit_hostfs) +MODULE_LICENSE("GPL"); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c new file mode 100644 index 000000000..c40626609 --- /dev/null +++ b/fs/hostfs/hostfs_user.c @@ -0,0 +1,361 @@ +/* + * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hostfs.h" +#include "kern_util.h" +#include "user.h" + +int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, + int *nlink_out, int *uid_out, int *gid_out, + unsigned long long *size_out, struct timespec *atime_out, + struct timespec *mtime_out, struct timespec *ctime_out, + int *blksize_out, unsigned long long *blocks_out) +{ + struct stat64 buf; + + if(lstat64(path, &buf) < 0) + return(-errno); + + /* See the Makefile for why STAT64_INO_FIELD is passed in + * by the build + */ + if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD; + if(mode_out != NULL) *mode_out = buf.st_mode; + if(nlink_out != NULL) *nlink_out = buf.st_nlink; + if(uid_out != NULL) *uid_out = buf.st_uid; + if(gid_out != NULL) *gid_out = buf.st_gid; + if(size_out != NULL) *size_out = buf.st_size; + if(atime_out != NULL) { + atime_out->tv_sec = buf.st_atime; + atime_out->tv_nsec = 0; + } + if(mtime_out != NULL) { + mtime_out->tv_sec = buf.st_mtime; + mtime_out->tv_nsec = 0; + } + if(ctime_out != NULL) { + ctime_out->tv_sec = buf.st_ctime; + ctime_out->tv_nsec = 0; + } + if(blksize_out != NULL) *blksize_out = buf.st_blksize; + if(blocks_out != NULL) *blocks_out = buf.st_blocks; + return(0); +} + +int file_type(const char *path, int *rdev) +{ + struct stat64 buf; + + if(lstat64(path, &buf) < 0) + return(-errno); + if(rdev != NULL) + *rdev = buf.st_rdev; + + if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); + else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); + else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); + else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); + else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO); + else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK); + else return(OS_TYPE_FILE); +} + +int access_file(char *path, int r, int w, int x) +{ + int mode = 0; + + if(r) mode = R_OK; + if(w) mode |= W_OK; + if(x) mode |= X_OK; + if(access(path, mode) != 0) return(-errno); + else return(0); +} + +int open_file(char *path, int r, int w, int append) +{ + int mode = 0, fd; + + if(r && !w) + mode = O_RDONLY; + else if(!r && w) + mode = O_WRONLY; + else if(r && w) + mode = O_RDWR; + else panic("Impossible mode in open_file"); + + if(append) + mode |= O_APPEND; + fd = open64(path, mode); + if(fd < 0) return(-errno); + else return(fd); +} + +void *open_dir(char *path, int *err_out) +{ + DIR *dir; + + dir = opendir(path); + *err_out = errno; + if(dir == NULL) return(NULL); + return(dir); +} + +char *read_dir(void *stream, unsigned long long *pos, + unsigned long long *ino_out, int *len_out) +{ + DIR *dir = stream; + struct dirent *ent; + + seekdir(dir, *pos); + ent = readdir(dir); + if(ent == NULL) return(NULL); + *len_out = strlen(ent->d_name); + *ino_out = ent->d_ino; + *pos = telldir(dir); + return(ent->d_name); +} + +int read_file(int fd, unsigned long long *offset, char *buf, int len) +{ + int n; + + n = pread64(fd, buf, len, *offset); + if(n < 0) return(-errno); + *offset += n; + return(n); +} + +int write_file(int fd, unsigned long long *offset, const char *buf, int len) +{ + int n; + + n = pwrite64(fd, buf, len, *offset); + if(n < 0) return(-errno); + *offset += n; + return(n); +} + +int lseek_file(int fd, long long offset, int whence) +{ + int ret; + + ret = lseek64(fd, offset, whence); + if(ret < 0) return(-errno); + return(0); +} + +void close_file(void *stream) +{ + close(*((int *) stream)); +} + +void close_dir(void *stream) +{ + closedir(stream); +} + +int file_create(char *name, int ur, int uw, int ux, int gr, + int gw, int gx, int or, int ow, int ox) +{ + int mode, fd; + + mode = 0; + mode |= ur ? S_IRUSR : 0; + mode |= uw ? S_IWUSR : 0; + mode |= ux ? S_IXUSR : 0; + mode |= gr ? S_IRGRP : 0; + mode |= gw ? S_IWGRP : 0; + mode |= gx ? S_IXGRP : 0; + mode |= or ? S_IROTH : 0; + mode |= ow ? S_IWOTH : 0; + mode |= ox ? S_IXOTH : 0; + fd = open64(name, O_CREAT | O_RDWR, mode); + if(fd < 0) + return(-errno); + return(fd); +} + +int set_attr(const char *file, struct hostfs_iattr *attrs) +{ + struct utimbuf buf; + int err, ma; + + if(attrs->ia_valid & HOSTFS_ATTR_MODE){ + if(chmod(file, attrs->ia_mode) != 0) return(-errno); + } + if(attrs->ia_valid & HOSTFS_ATTR_UID){ + if(chown(file, attrs->ia_uid, -1)) return(-errno); + } + if(attrs->ia_valid & HOSTFS_ATTR_GID){ + if(chown(file, -1, attrs->ia_gid)) return(-errno); + } + if(attrs->ia_valid & HOSTFS_ATTR_SIZE){ + if(truncate(file, attrs->ia_size)) return(-errno); + } + ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET; + if((attrs->ia_valid & ma) == ma){ + buf.actime = attrs->ia_atime.tv_sec; + buf.modtime = attrs->ia_mtime.tv_sec; + if(utime(file, &buf) != 0) return(-errno); + } + else { + struct timespec ts; + + if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){ + err = stat_file(file, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, &ts, NULL, NULL, NULL); + if(err != 0) + return(err); + buf.actime = attrs->ia_atime.tv_sec; + buf.modtime = ts.tv_sec; + if(utime(file, &buf) != 0) + return(-errno); + } + if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){ + err = stat_file(file, NULL, NULL, NULL, NULL, NULL, + NULL, &ts, NULL, NULL, NULL, NULL); + if(err != 0) + return(err); + buf.actime = ts.tv_sec; + buf.modtime = attrs->ia_mtime.tv_sec; + if(utime(file, &buf) != 0) + return(-errno); + } + } + if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; + if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ + err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, + &attrs->ia_atime, &attrs->ia_mtime, NULL, + NULL, NULL); + if(err != 0) return(err); + } + return(0); +} + +int make_symlink(const char *from, const char *to) +{ + int err; + + err = symlink(to, from); + if(err) return(-errno); + return(0); +} + +int unlink_file(const char *file) +{ + int err; + + err = unlink(file); + if(err) return(-errno); + return(0); +} + +int do_mkdir(const char *file, int mode) +{ + int err; + + err = mkdir(file, mode); + if(err) return(-errno); + return(0); +} + +int do_rmdir(const char *file) +{ + int err; + + err = rmdir(file); + if(err) return(-errno); + return(0); +} + +int do_mknod(const char *file, int mode, int dev) +{ + int err; + + err = mknod(file, mode, dev); + if(err) return(-errno); + return(0); +} + +int link_file(const char *to, const char *from) +{ + int err; + + err = link(to, from); + if(err) return(-errno); + return(0); +} + +int do_readlink(char *file, char *buf, int size) +{ + int n; + + n = readlink(file, buf, size); + if(n < 0) + return(-errno); + if(n < size) + buf[n] = '\0'; + return(n); +} + +int rename_file(char *from, char *to) +{ + int err; + + err = rename(from, to); + if(err < 0) return(-errno); + return(0); +} + +int do_statfs(char *root, long *bsize_out, long long *blocks_out, + long long *bfree_out, long long *bavail_out, + long long *files_out, long long *ffree_out, + void *fsid_out, int fsid_size, long *namelen_out, + long *spare_out) +{ + struct statfs64 buf; + int err; + + err = statfs64(root, &buf); + if(err < 0) return(-errno); + *bsize_out = buf.f_bsize; + *blocks_out = buf.f_blocks; + *bfree_out = buf.f_bfree; + *bavail_out = buf.f_bavail; + *files_out = buf.f_files; + *ffree_out = buf.f_ffree; + memcpy(fsid_out, &buf.f_fsid, + sizeof(buf.f_fsid) > fsid_size ? fsid_size : + sizeof(buf.f_fsid)); + *namelen_out = buf.f_namelen; + spare_out[0] = buf.f_spare[0]; + spare_out[1] = buf.f_spare[1]; + spare_out[2] = buf.f_spare[2]; + spare_out[3] = buf.f_spare[3]; + spare_out[4] = buf.f_spare[4]; + spare_out[5] = buf.f_spare[5]; + return(0); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/fs/hppfs/Makefile b/fs/hppfs/Makefile new file mode 100644 index 000000000..e67f03848 --- /dev/null +++ b/fs/hppfs/Makefile @@ -0,0 +1,19 @@ +# +# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com) +# Licensed under the GPL +# + +hppfs-objs := hppfs_kern.o + +obj-y = +obj-$(CONFIG_HPPFS) += hppfs.o + +clean: + +modules: + +fastdep: + +dep: + +archmrproper: clean diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c new file mode 100644 index 000000000..ebf08cb55 --- /dev/null +++ b/fs/hppfs/hppfs_kern.c @@ -0,0 +1,811 @@ +/* + * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "os.h" + +static int init_inode(struct inode *inode, struct dentry *dentry); + +struct hppfs_data { + struct list_head list; + char contents[PAGE_SIZE - sizeof(struct list_head)]; +}; + +struct hppfs_private { + struct file proc_file; + int host_fd; + loff_t len; + struct hppfs_data *contents; +}; + +struct hppfs_inode_info { + struct dentry *proc_dentry; + struct inode vfs_inode; +}; + +static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) +{ + return(list_entry(inode, struct hppfs_inode_info, vfs_inode)); +} + +#define HPPFS_SUPER_MAGIC 0xb00000ee + +static struct super_operations hppfs_sbops; + +static int is_pid(struct dentry *dentry) +{ + struct super_block *sb; + int i; + + sb = dentry->d_sb; + if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) + return(0); + + for(i = 0; i < dentry->d_name.len; i++){ + if(!isdigit(dentry->d_name.name[i])) + return(0); + } + return(1); +} + +static char *dentry_name(struct dentry *dentry, int extra) +{ + struct dentry *parent; + char *root, *name; + const char *seg_name; + int len, seg_len; + + len = 0; + parent = dentry; + while(parent->d_parent != parent){ + if(is_pid(parent)) + len += strlen("pid") + 1; + else len += parent->d_name.len + 1; + parent = parent->d_parent; + } + + root = "proc"; + len += strlen(root); + name = kmalloc(len + extra + 1, GFP_KERNEL); + if(name == NULL) return(NULL); + + name[len] = '\0'; + parent = dentry; + while(parent->d_parent != parent){ + if(is_pid(parent)){ + seg_name = "pid"; + seg_len = strlen("pid"); + } + else { + seg_name = parent->d_name.name; + seg_len = parent->d_name.len; + } + + len -= seg_len + 1; + name[len] = '/'; + strncpy(&name[len + 1], seg_name, seg_len); + parent = parent->d_parent; + } + strncpy(name, root, strlen(root)); + return(name); +} + +struct dentry_operations hppfs_dentry_ops = { +}; + +static int file_removed(struct dentry *dentry, const char *file) +{ + char *host_file; + int extra, fd; + + extra = 0; + if(file != NULL) extra += strlen(file) + 1; + + host_file = dentry_name(dentry, extra + strlen("/remove")); + if(host_file == NULL){ + printk("file_removed : allocation failed\n"); + return(-ENOMEM); + } + + if(file != NULL){ + strcat(host_file, "/"); + strcat(host_file, file); + } + strcat(host_file, "/remove"); + + fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); + kfree(host_file); + if(fd > 0){ + os_close_file(fd); + return(1); + } + return(0); +} + +static void hppfs_read_inode(struct inode *ino) +{ + struct inode *proc_ino; + + if(HPPFS_I(ino)->proc_dentry == NULL) + return; + + proc_ino = HPPFS_I(ino)->proc_dentry->d_inode; + ino->i_uid = proc_ino->i_uid; + ino->i_gid = proc_ino->i_gid; + ino->i_atime = proc_ino->i_atime; + ino->i_mtime = proc_ino->i_mtime; + ino->i_ctime = proc_ino->i_ctime; + ino->i_ino = proc_ino->i_ino; + ino->i_mode = proc_ino->i_mode; + ino->i_nlink = proc_ino->i_nlink; + ino->i_size = proc_ino->i_size; + ino->i_blksize = proc_ino->i_blksize; + ino->i_blocks = proc_ino->i_blocks; +} + +static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, + struct nameidata *nd) +{ + struct dentry *proc_dentry, *new, *parent; + struct inode *inode; + int err, deleted; + + deleted = file_removed(dentry, NULL); + if(deleted < 0) + return(ERR_PTR(deleted)); + else if(deleted) + return(ERR_PTR(-ENOENT)); + + err = -ENOMEM; + parent = HPPFS_I(ino)->proc_dentry; + down(&parent->d_inode->i_sem); + proc_dentry = d_lookup(parent, &dentry->d_name); + if(proc_dentry == NULL){ + proc_dentry = d_alloc(parent, &dentry->d_name); + if(proc_dentry == NULL){ + up(&parent->d_inode->i_sem); + goto out; + } + new = (*parent->d_inode->i_op->lookup)(parent->d_inode, + proc_dentry, NULL); + if(new){ + dput(proc_dentry); + proc_dentry = new; + } + } + up(&parent->d_inode->i_sem); + + if(IS_ERR(proc_dentry)) + return(proc_dentry); + + inode = iget(ino->i_sb, 0); + if(inode == NULL) + goto out_dput; + + err = init_inode(inode, proc_dentry); + if(err) + goto out_put; + + hppfs_read_inode(inode); + + d_add(dentry, inode); + dentry->d_op = &hppfs_dentry_ops; + return(NULL); + + out_put: + iput(inode); + out_dput: + dput(proc_dentry); + out: + return(ERR_PTR(err)); +} + +static struct inode_operations hppfs_file_iops = { +}; + +static ssize_t read_proc(struct file *file, char *buf, ssize_t count, + loff_t *ppos, int is_user) +{ + ssize_t (*read)(struct file *, char *, size_t, loff_t *); + ssize_t n; + + read = file->f_dentry->d_inode->i_fop->read; + + if(!is_user) + set_fs(KERNEL_DS); + + n = (*read)(file, buf, count, &file->f_pos); + + if(!is_user) + set_fs(USER_DS); + + if(ppos) *ppos = file->f_pos; + return(n); +} + +static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count) +{ + ssize_t n; + int cur, err; + char *new_buf; + + n = -ENOMEM; + new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if(new_buf == NULL){ + printk("hppfs_read_file : kmalloc failed\n"); + goto out; + } + n = 0; + while(count > 0){ + cur = min_t(ssize_t, count, PAGE_SIZE); + err = os_read_file(fd, new_buf, cur); + if(err < 0){ + printk("hppfs_read : read failed, errno = %d\n", + count); + n = err; + goto out_free; + } + else if(err == 0) + break; + + if(copy_to_user(buf, new_buf, err)){ + n = -EFAULT; + goto out_free; + } + n += err; + count -= err; + } + out_free: + kfree(new_buf); + out: + return(n); +} + +static ssize_t hppfs_read(struct file *file, char *buf, size_t count, + loff_t *ppos) +{ + struct hppfs_private *hppfs = file->private_data; + struct hppfs_data *data; + loff_t off; + int err; + + if(hppfs->contents != NULL){ + if(*ppos >= hppfs->len) return(0); + + data = hppfs->contents; + off = *ppos; + while(off >= sizeof(data->contents)){ + data = list_entry(data->list.next, struct hppfs_data, + list); + off -= sizeof(data->contents); + } + + if(off + count > hppfs->len) + count = hppfs->len - off; + copy_to_user(buf, &data->contents[off], count); + *ppos += count; + } + else if(hppfs->host_fd != -1){ + err = os_seek_file(hppfs->host_fd, *ppos); + if(err){ + printk("hppfs_read : seek failed, errno = %d\n", err); + return(err); + } + count = hppfs_read_file(hppfs->host_fd, buf, count); + if(count > 0) + *ppos += count; + } + else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1); + + return(count); +} + +static ssize_t hppfs_write(struct file *file, const char *buf, size_t len, + loff_t *ppos) +{ + struct hppfs_private *data = file->private_data; + struct file *proc_file = &data->proc_file; + ssize_t (*write)(struct file *, const char *, size_t, loff_t *); + int err; + + write = proc_file->f_dentry->d_inode->i_fop->write; + + proc_file->f_pos = file->f_pos; + err = (*write)(proc_file, buf, len, &proc_file->f_pos); + file->f_pos = proc_file->f_pos; + + return(err); +} + +static int open_host_sock(char *host_file, int *filter_out) +{ + char *end; + int fd; + + end = &host_file[strlen(host_file)]; + strcpy(end, "/rw"); + *filter_out = 1; + fd = os_connect_socket(host_file); + if(fd > 0) + return(fd); + + strcpy(end, "/r"); + *filter_out = 0; + fd = os_connect_socket(host_file); + return(fd); +} + +static void free_contents(struct hppfs_data *head) +{ + struct hppfs_data *data; + struct list_head *ele, *next; + + if(head == NULL) return; + + list_for_each_safe(ele, next, &head->list){ + data = list_entry(ele, struct hppfs_data, list); + kfree(data); + } + kfree(head); +} + +static struct hppfs_data *hppfs_get_data(int fd, int filter, + struct file *proc_file, + struct file *hppfs_file, + loff_t *size_out) +{ + struct hppfs_data *data, *new, *head; + int n, err; + + err = -ENOMEM; + data = kmalloc(sizeof(*data), GFP_KERNEL); + if(data == NULL){ + printk("hppfs_get_data : head allocation failed\n"); + goto failed; + } + + INIT_LIST_HEAD(&data->list); + + head = data; + *size_out = 0; + + if(filter){ + while((n = read_proc(proc_file, data->contents, + sizeof(data->contents), NULL, 0)) > 0) + os_write_file(fd, data->contents, n); + err = os_shutdown_socket(fd, 0, 1); + if(err){ + printk("hppfs_get_data : failed to shut down " + "socket\n"); + goto failed_free; + } + } + while(1){ + n = os_read_file(fd, data->contents, sizeof(data->contents)); + if(n < 0){ + err = n; + printk("hppfs_get_data : read failed, errno = %d\n", + err); + goto failed_free; + } + else if(n == 0) + break; + + *size_out += n; + + if(n < sizeof(data->contents)) + break; + + new = kmalloc(sizeof(*data), GFP_KERNEL); + if(new == 0){ + printk("hppfs_get_data : data allocation failed\n"); + err = -ENOMEM; + goto failed_free; + } + + INIT_LIST_HEAD(&new->list); + list_add(&new->list, &data->list); + data = new; + } + return(head); + + failed_free: + free_contents(head); + failed: + return(ERR_PTR(err)); +} + +static struct hppfs_private *hppfs_data(void) +{ + struct hppfs_private *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if(data == NULL) + return(data); + + *data = ((struct hppfs_private ) { .host_fd = -1, + .len = -1, + .contents = NULL } ); + return(data); +} + +static int file_mode(int fmode) +{ + if(fmode == (FMODE_READ | FMODE_WRITE)) + return(O_RDWR); + if(fmode == FMODE_READ) + return(O_RDONLY); + if(fmode == FMODE_WRITE) + return(O_WRONLY); + return(0); +} + +static int hppfs_open(struct inode *inode, struct file *file) +{ + struct hppfs_private *data; + struct dentry *proc_dentry; + char *host_file; + int err, fd, type, filter; + + err = -ENOMEM; + data = hppfs_data(); + if(data == NULL) + goto out; + + host_file = dentry_name(file->f_dentry, strlen("/rw")); + if(host_file == NULL) + goto out_free2; + + proc_dentry = HPPFS_I(inode)->proc_dentry; + + /* XXX This isn't closed anywhere */ + err = open_private_file(&data->proc_file, proc_dentry, + file_mode(file->f_mode)); + if(err) + goto out_free1; + + type = os_file_type(host_file); + if(type == OS_TYPE_FILE){ + fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); + if(fd >= 0) + data->host_fd = fd; + else printk("hppfs_open : failed to open '%s', errno = %d\n", + host_file, -fd); + + data->contents = NULL; + } + else if(type == OS_TYPE_DIR){ + fd = open_host_sock(host_file, &filter); + if(fd > 0){ + data->contents = hppfs_get_data(fd, filter, + &data->proc_file, + file, &data->len); + if(!IS_ERR(data->contents)) + data->host_fd = fd; + } + else printk("hppfs_open : failed to open a socket in " + "'%s', errno = %d\n", host_file, -fd); + } + kfree(host_file); + + file->private_data = data; + return(0); + + out_free1: + kfree(host_file); + out_free2: + free_contents(data->contents); + kfree(data); + out: + return(err); +} + +static int hppfs_dir_open(struct inode *inode, struct file *file) +{ + struct hppfs_private *data; + struct dentry *proc_dentry; + int err; + + err = -ENOMEM; + data = hppfs_data(); + if(data == NULL) + goto out; + + proc_dentry = HPPFS_I(inode)->proc_dentry; + err = open_private_file(&data->proc_file, proc_dentry, + file_mode(file->f_mode)); + if(err) + goto out_free; + + file->private_data = data; + return(0); + + out_free: + kfree(data); + out: + return(err); +} + +static loff_t hppfs_llseek(struct file *file, loff_t off, int where) +{ + struct hppfs_private *data = file->private_data; + struct file *proc_file = &data->proc_file; + loff_t (*llseek)(struct file *, loff_t, int); + loff_t ret; + + llseek = proc_file->f_dentry->d_inode->i_fop->llseek; + if(llseek != NULL){ + ret = (*llseek)(proc_file, off, where); + if(ret < 0) + return(ret); + } + + return(default_llseek(file, off, where)); +} + +static struct file_operations hppfs_file_fops = { + .owner = NULL, + .llseek = hppfs_llseek, + .read = hppfs_read, + .write = hppfs_write, + .open = hppfs_open, +}; + +struct hppfs_dirent { + void *vfs_dirent; + filldir_t filldir; + struct dentry *dentry; +}; + +static int hppfs_filldir(void *d, const char *name, int size, + loff_t offset, ino_t inode, unsigned int type) +{ + struct hppfs_dirent *dirent = d; + + if(file_removed(dirent->dentry, name)) + return(0); + + return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset, + inode, type)); +} + +static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) +{ + struct hppfs_private *data = file->private_data; + struct file *proc_file = &data->proc_file; + int (*readdir)(struct file *, void *, filldir_t); + struct hppfs_dirent dirent = ((struct hppfs_dirent) + { .vfs_dirent = ent, + .filldir = filldir, + .dentry = file->f_dentry } ); + int err; + + readdir = proc_file->f_dentry->d_inode->i_fop->readdir; + + proc_file->f_pos = file->f_pos; + err = (*readdir)(proc_file, &dirent, hppfs_filldir); + file->f_pos = proc_file->f_pos; + + return(err); +} + +static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + return(0); +} + +static struct file_operations hppfs_dir_fops = { + .owner = NULL, + .readdir = hppfs_readdir, + .open = hppfs_dir_open, + .fsync = hppfs_fsync, +}; + +static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf) +{ + sf->f_blocks = 0; + sf->f_bfree = 0; + sf->f_bavail = 0; + sf->f_files = 0; + sf->f_ffree = 0; + sf->f_type = HPPFS_SUPER_MAGIC; + return(0); +} + +static struct inode *hppfs_alloc_inode(struct super_block *sb) +{ + struct hppfs_inode_info *hi; + + hi = kmalloc(sizeof(*hi), GFP_KERNEL); + if(hi == NULL) + return(NULL); + + *hi = ((struct hppfs_inode_info) { .proc_dentry = NULL }); + inode_init_once(&hi->vfs_inode); + return(&hi->vfs_inode); +} + +void hppfs_delete_inode(struct inode *ino) +{ + clear_inode(ino); +} + +static void hppfs_destroy_inode(struct inode *inode) +{ + kfree(HPPFS_I(inode)); +} + +static struct super_operations hppfs_sbops = { + .alloc_inode = hppfs_alloc_inode, + .destroy_inode = hppfs_destroy_inode, + .read_inode = hppfs_read_inode, + .delete_inode = hppfs_delete_inode, + .statfs = hppfs_statfs, +}; + +static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen) +{ + struct file proc_file; + struct dentry *proc_dentry; + int (*readlink)(struct dentry *, char *, int); + int err, n; + + proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; + err = open_private_file(&proc_file, proc_dentry, O_RDONLY); + if(err) + return(err); + + readlink = proc_dentry->d_inode->i_op->readlink; + n = (*readlink)(proc_dentry, buffer, buflen); + + close_private_file(&proc_file); + + return(n); +} + +static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct file proc_file; + struct dentry *proc_dentry; + int (*follow_link)(struct dentry *, struct nameidata *); + int err, n; + + proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; + err = open_private_file(&proc_file, proc_dentry, O_RDONLY); + if(err) + return(err); + + follow_link = proc_dentry->d_inode->i_op->follow_link; + n = (*follow_link)(proc_dentry, nd); + + close_private_file(&proc_file); + + return(n); +} + +static struct inode_operations hppfs_dir_iops = { + .lookup = hppfs_lookup, +}; + +static struct inode_operations hppfs_link_iops = { + .readlink = hppfs_readlink, + .follow_link = hppfs_follow_link, +}; + +static int init_inode(struct inode *inode, struct dentry *dentry) +{ + if(S_ISDIR(dentry->d_inode->i_mode)){ + inode->i_op = &hppfs_dir_iops; + inode->i_fop = &hppfs_dir_fops; + } + else if(S_ISLNK(dentry->d_inode->i_mode)){ + inode->i_op = &hppfs_link_iops; + inode->i_fop = &hppfs_file_fops; + } + else { + inode->i_op = &hppfs_file_iops; + inode->i_fop = &hppfs_file_fops; + } + + HPPFS_I(inode)->proc_dentry = dentry; + + return(0); +} + +static int hppfs_fill_super(struct super_block *sb, void *d, int silent) +{ + struct inode *root_inode; + struct file_system_type *procfs; + struct super_block *proc_sb; + int err; + + err = -ENOENT; + procfs = get_fs_type("proc"); + if(procfs == NULL) + goto out; + + if(list_empty(&procfs->fs_supers)) + goto out; + + proc_sb = list_entry(procfs->fs_supers.next, struct super_block, + s_instances); + + sb->s_blocksize = 1024; + sb->s_blocksize_bits = 10; + sb->s_magic = HPPFS_SUPER_MAGIC; + sb->s_op = &hppfs_sbops; + + root_inode = iget(sb, 0); + if(root_inode == NULL) + goto out; + + err = init_inode(root_inode, proc_sb->s_root); + if(err) + goto out_put; + + err = -ENOMEM; + sb->s_root = d_alloc_root(root_inode); + if(sb->s_root == NULL) + goto out_put; + + hppfs_read_inode(root_inode); + + return(0); + + out_put: + iput(root_inode); + out: + return(err); +} + +static struct super_block *hppfs_read_super(struct file_system_type *type, + int flags, const char *dev_name, + void *data) +{ + return(get_sb_nodev(type, flags, data, hppfs_fill_super)); +} + +static struct file_system_type hppfs_type = { + .owner = THIS_MODULE, + .name = "hppfs", + .get_sb = hppfs_read_super, + .kill_sb = kill_anon_super, + .fs_flags = 0, +}; + +static int __init init_hppfs(void) +{ + return(register_filesystem(&hppfs_type)); +} + +static void __exit exit_hppfs(void) +{ + unregister_filesystem(&hppfs_type); +} + +module_init(init_hppfs) +module_exit(exit_hppfs) +MODULE_LICENSE("GPL"); + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile new file mode 100644 index 000000000..09f098a10 --- /dev/null +++ b/fs/relayfs/Makefile @@ -0,0 +1,8 @@ +# +# relayfs Makefile +# + +obj-$(CONFIG_RELAYFS_FS) += relayfs.o + +relayfs-y := relay.o relay_lockless.o relay_locking.o inode.o resize.o +relayfs-$(CONFIG_KLOG_CHANNEL) += klog.o diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c new file mode 100644 index 000000000..6e8736015 --- /dev/null +++ b/fs/relayfs/inode.c @@ -0,0 +1,629 @@ +/* + * VFS-related code for RelayFS, a high-speed data relay filesystem. + * + * Copyright (C) 2003 - Tom Zanussi , IBM Corp + * Copyright (C) 2003 - Karim Yaghmour + * + * Based on ramfs, Copyright (C) 2002 - Linus Torvalds + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RELAYFS_MAGIC 0x26F82121 + +static struct super_operations relayfs_ops; +static struct address_space_operations relayfs_aops; +static struct inode_operations relayfs_file_inode_operations; +static struct file_operations relayfs_file_operations; +static struct inode_operations relayfs_dir_inode_operations; + +static struct vfsmount * relayfs_mount; +static int relayfs_mount_count; + +static struct backing_dev_info relayfs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .memory_backed = 1, /* Does not contribute to dirty memory */ +}; + +static struct inode * +relayfs_get_inode(struct super_block *sb, int mode, dev_t dev) +{ + struct inode * inode; + + inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_mapping->a_ops = &relayfs_aops; + inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + inode->i_op = &relayfs_file_inode_operations; + inode->i_fop = &relayfs_file_operations; + break; + case S_IFDIR: + inode->i_op = &relayfs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inode->i_nlink++; + break; + case S_IFLNK: + inode->i_op = &page_symlink_inode_operations; + break; + } + } + return inode; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +/* SMP-safe */ +static int +relayfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + struct inode * inode; + int error = -ENOSPC; + + inode = relayfs_get_inode(dir->i_sb, mode, dev); + + if (inode) { + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + error = 0; + } + return error; +} + +static int +relayfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + int retval; + + retval = relayfs_mknod(dir, dentry, mode | S_IFDIR, 0); + + if (!retval) + dir->i_nlink++; + return retval; +} + +static int +relayfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +{ + return relayfs_mknod(dir, dentry, mode | S_IFREG, 0); +} + +static int +relayfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +{ + struct inode *inode; + int error = -ENOSPC; + + inode = relayfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); + + if (inode) { + int l = strlen(symname)+1; + error = page_symlink(inode, symname, l); + if (!error) { + d_instantiate(dentry, inode); + dget(dentry); + } else + iput(inode); + } + return error; +} + +/** + * relayfs_create_entry - create a relayfs directory or file + * @name: the name of the file to create + * @parent: parent directory + * @dentry: result dentry + * @entry_type: type of file to create (S_IFREG, S_IFDIR) + * @mode: mode + * @data: data to associate with the file + * + * Creates a file or directory with the specifed permissions. + */ +static int +relayfs_create_entry(const char * name, struct dentry * parent, struct dentry **dentry, int entry_type, int mode, void * data) +{ + struct qstr qname; + struct dentry * d; + + int error = 0; + + error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count); + if (error) { + printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error); + return error; + } + + qname.name = name; + qname.len = strlen(name); + qname.hash = full_name_hash(name, qname.len); + + if (parent == NULL) + if (relayfs_mount && relayfs_mount->mnt_sb) + parent = relayfs_mount->mnt_sb->s_root; + + if (parent == NULL) { + simple_release_fs(&relayfs_mount, &relayfs_mount_count); + return -EINVAL; + } + + parent = dget(parent); + down(&parent->d_inode->i_sem); + d = lookup_hash(&qname, parent); + if (IS_ERR(d)) { + error = PTR_ERR(d); + goto release_mount; + } + + if (d->d_inode) { + error = -EEXIST; + goto release_mount; + } + + if (entry_type == S_IFREG) + error = relayfs_create(parent->d_inode, d, entry_type | mode, NULL); + else + error = relayfs_mkdir(parent->d_inode, d, entry_type | mode); + if (error) + goto release_mount; + + if ((entry_type == S_IFREG) && data) { + d->d_inode->u.generic_ip = data; + goto exit; /* don't release mount for regular files */ + } + +release_mount: + simple_release_fs(&relayfs_mount, &relayfs_mount_count); +exit: + *dentry = d; + up(&parent->d_inode->i_sem); + dput(parent); + + return error; +} + +/** + * relayfs_create_file - create a file in the relay filesystem + * @name: the name of the file to create + * @parent: parent directory + * @dentry: result dentry + * @data: data to associate with the file + * @mode: mode, if not specied the default perms are used + * + * The file will be created user rw on behalf of current user. + */ +int +relayfs_create_file(const char * name, struct dentry * parent, struct dentry **dentry, void * data, int mode) +{ + if (!mode) + mode = S_IRUSR | S_IWUSR; + + return relayfs_create_entry(name, parent, dentry, S_IFREG, + mode, data); +} + +/** + * relayfs_create_dir - create a directory in the relay filesystem + * @name: the name of the directory to create + * @parent: parent directory + * @dentry: result dentry + * + * The directory will be created world rwx on behalf of current user. + */ +int +relayfs_create_dir(const char * name, struct dentry * parent, struct dentry **dentry) +{ + return relayfs_create_entry(name, parent, dentry, S_IFDIR, + S_IRWXU | S_IRUGO | S_IXUGO, NULL); +} + +/** + * relayfs_remove_file - remove a file in the relay filesystem + * @dentry: file dentry + * + * Remove a file previously created by relayfs_create_file. + */ +int +relayfs_remove_file(struct dentry *dentry) +{ + struct dentry *parent; + int is_reg; + + parent = dentry->d_parent; + if (parent == NULL) + return -EINVAL; + + is_reg = S_ISREG(dentry->d_inode->i_mode); + + parent = dget(parent); + down(&parent->d_inode->i_sem); + if (dentry->d_inode) { + simple_unlink(parent->d_inode, dentry); + d_delete(dentry); + } + dput(dentry); + up(&parent->d_inode->i_sem); + dput(parent); + + if(is_reg) + simple_release_fs(&relayfs_mount, &relayfs_mount_count); + + return 0; +} + +/** + * relayfs_open - open file op for relayfs files + * @inode: the inode + * @filp: the file + * + * Associates the channel with the file, and increments the + * channel refcount. Reads will be 'auto-consuming'. + */ +int +relayfs_open(struct inode *inode, struct file *filp) +{ + struct rchan *rchan; + struct rchan_reader *reader; + int retval = 0; + + if (inode->u.generic_ip) { + rchan = (struct rchan *)inode->u.generic_ip; + if (rchan == NULL) + return -EACCES; + reader = __add_rchan_reader(rchan, filp, 1, 0); + if (reader == NULL) + return -ENOMEM; + filp->private_data = reader; + retval = rchan->callbacks->fileop_notify(rchan->id, filp, + RELAY_FILE_OPEN); + if (retval == 0) + /* Inc relay channel refcount for file */ + rchan_get(rchan->id); + else { + __remove_rchan_reader(reader); + retval = -EPERM; + } + } + + return retval; +} + +/** + * relayfs_mmap - mmap file op for relayfs files + * @filp: the file + * @vma: the vma describing what to map + * + * Calls upon relay_mmap_buffer to map the file into user space. + */ +int +relayfs_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct rchan *rchan; + + rchan = ((struct rchan_reader *)filp->private_data)->rchan; + + return __relay_mmap_buffer(rchan, vma); +} + +/** + * relayfs_file_read - read file op for relayfs files + * @filp: the file + * @buf: user buf to read into + * @count: bytes requested + * @offset: offset into file + * + * Reads count bytes from the channel, or as much as is available within + * the sub-buffer currently being read. Reads are 'auto-consuming'. + * See relay_read() for details. + * + * Returns bytes read on success, 0 or -EAGAIN if nothing available, + * negative otherwise. + */ +ssize_t +relayfs_file_read(struct file *filp, char * buf, size_t count, loff_t *offset) +{ + size_t read_count; + struct rchan_reader *reader; + u32 dummy; /* all VFS readers are auto-consuming */ + + if (offset != &filp->f_pos) /* pread, seeking not supported */ + return -ESPIPE; + + if (count == 0) + return 0; + + reader = (struct rchan_reader *)filp->private_data; + read_count = relay_read(reader, buf, count, + filp->f_flags & (O_NDELAY | O_NONBLOCK) ? 0 : 1, &dummy); + + return read_count; +} + +/** + * relayfs_file_write - write file op for relayfs files + * @filp: the file + * @buf: user buf to write from + * @count: bytes to write + * @offset: offset into file + * + * Reserves a slot in the relay buffer and writes count bytes + * into it. The current limit for a single write is 2 pages + * worth. The user_deliver() channel callback will be invoked on + * + * Returns bytes written on success, 0 or -EAGAIN if nothing available, + * negative otherwise. + */ +ssize_t +relayfs_file_write(struct file *filp, const char *buf, size_t count, loff_t *offset) +{ + int write_count; + char * write_buf; + struct rchan *rchan; + int err = 0; + void *wrote_pos; + struct rchan_reader *reader; + + reader = (struct rchan_reader *)filp->private_data; + if (reader == NULL) + return -EPERM; + + rchan = reader->rchan; + if (rchan == NULL) + return -EPERM; + + if (count == 0) + return 0; + + /* Change this if need to write more than 2 pages at once */ + if (count > 2 * PAGE_SIZE) + return -EINVAL; + + write_buf = (char *)__get_free_pages(GFP_KERNEL, 1); + if (write_buf == NULL) + return -ENOMEM; + + if (copy_from_user(write_buf, buf, count)) + return -EFAULT; + + if (filp->f_flags & (O_NDELAY | O_NONBLOCK)) { + write_count = relay_write(rchan->id, write_buf, count, -1, &wrote_pos); + if (write_count == 0) + return -EAGAIN; + } else { + err = wait_event_interruptible(rchan->write_wait, + (write_count = relay_write(rchan->id, write_buf, count, -1, &wrote_pos))); + if (err) + return err; + } + + free_pages((unsigned long)write_buf, 1); + + rchan->callbacks->user_deliver(rchan->id, wrote_pos, write_count); + + return write_count; +} + +/** + * relayfs_ioctl - ioctl file op for relayfs files + * @inode: the inode + * @filp: the file + * @cmd: the command + * @arg: command arg + * + * Passes the specified cmd/arg to the kernel client. arg may be a + * pointer to user-space data, in which case the kernel client is + * responsible for copying the data to/from user space appropriately. + * The kernel client is also responsible for returning a meaningful + * return value for ioctl calls. + * + * Returns result of relay channel callback, -EPERM if unsuccessful. + */ +int +relayfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct rchan *rchan; + struct rchan_reader *reader; + + reader = (struct rchan_reader *)filp->private_data; + if (reader == NULL) + return -EPERM; + + rchan = reader->rchan; + if (rchan == NULL) + return -EPERM; + + return rchan->callbacks->ioctl(rchan->id, cmd, arg); +} + +/** + * relayfs_poll - poll file op for relayfs files + * @filp: the file + * @wait: poll table + * + * Poll implemention. + */ +static unsigned int +relayfs_poll(struct file *filp, poll_table *wait) +{ + struct rchan_reader *reader; + unsigned int mask = 0; + + reader = (struct rchan_reader *)filp->private_data; + + if (reader->rchan->finalized) + return POLLERR; + + if (filp->f_mode & FMODE_READ) { + poll_wait(filp, &reader->rchan->read_wait, wait); + if (!rchan_empty(reader)) + mask |= POLLIN | POLLRDNORM; + } + + if (filp->f_mode & FMODE_WRITE) { + poll_wait(filp, &reader->rchan->write_wait, wait); + if (!rchan_full(reader)) + mask |= POLLOUT | POLLWRNORM; + } + + return mask; +} + +/** + * relayfs_release - release file op for relayfs files + * @inode: the inode + * @filp: the file + * + * Decrements the channel refcount, as the filesystem is + * no longer using it. + */ +int +relayfs_release(struct inode *inode, struct file *filp) +{ + struct rchan_reader *reader; + struct rchan *rchan; + + reader = (struct rchan_reader *)filp->private_data; + if (reader == NULL || reader->rchan == NULL) + return 0; + rchan = reader->rchan; + + rchan->callbacks->fileop_notify(reader->rchan->id, filp, + RELAY_FILE_CLOSE); + __remove_rchan_reader(reader); + /* The channel is no longer in use as far as this file is concerned */ + rchan_put(rchan); + + return 0; +} + +static struct address_space_operations relayfs_aops = { + .readpage = simple_readpage, + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write +}; + +static struct file_operations relayfs_file_operations = { + .open = relayfs_open, + .read = relayfs_file_read, + .write = relayfs_file_write, + .ioctl = relayfs_ioctl, + .poll = relayfs_poll, + .mmap = relayfs_mmap, + .fsync = simple_sync_file, + .release = relayfs_release, +}; + +static struct inode_operations relayfs_file_inode_operations = { + .getattr = simple_getattr, +}; + +static struct inode_operations relayfs_dir_inode_operations = { + .create = relayfs_create, + .lookup = simple_lookup, + .link = simple_link, + .unlink = simple_unlink, + .symlink = relayfs_symlink, + .mkdir = relayfs_mkdir, + .rmdir = simple_rmdir, + .mknod = relayfs_mknod, + .rename = simple_rename, +}; + +static struct super_operations relayfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +static int +relayfs_fill_super(struct super_block * sb, void * data, int silent) +{ + struct inode * inode; + struct dentry * root; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = RELAYFS_MAGIC; + sb->s_op = &relayfs_ops; + inode = relayfs_get_inode(sb, S_IFDIR | 0755, 0); + + if (!inode) + return -ENOMEM; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + + return 0; +} + +static struct super_block * +relayfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return get_sb_single(fs_type, flags, data, relayfs_fill_super); +} + +static struct file_system_type relayfs_fs_type = { + .owner = THIS_MODULE, + .name = "relayfs", + .get_sb = relayfs_get_sb, + .kill_sb = kill_litter_super, +}; + +static int __init +init_relayfs_fs(void) +{ + int err = register_filesystem(&relayfs_fs_type); +#ifdef CONFIG_KLOG_CHANNEL + if (!err) + create_klog_channel(); +#endif + return err; +} + +static void __exit +exit_relayfs_fs(void) +{ +#ifdef CONFIG_KLOG_CHANNEL + remove_klog_channel(); +#endif + unregister_filesystem(&relayfs_fs_type); +} + +module_init(init_relayfs_fs) +module_exit(exit_relayfs_fs) + +MODULE_AUTHOR("Tom Zanussi and Karim Yaghmour "); +MODULE_DESCRIPTION("Relay Filesystem"); +MODULE_LICENSE("GPL"); + diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c new file mode 100644 index 000000000..11f4636ce --- /dev/null +++ b/fs/relayfs/relay.c @@ -0,0 +1,1911 @@ +/* + * Public API and common code for RelayFS. + * + * Please see Documentation/filesystems/relayfs.txt for API description. + * + * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "relay_lockless.h" +#include "relay_locking.h" +#include "resize.h" + +/* Relay channel table, indexed by channel id */ +static struct rchan * rchan_table[RELAY_MAX_CHANNELS]; +static rwlock_t rchan_table_lock = RW_LOCK_UNLOCKED; + +/* Relay operation structs, one per scheme */ +static struct relay_ops lockless_ops = { + .reserve = lockless_reserve, + .commit = lockless_commit, + .get_offset = lockless_get_offset, + .finalize = lockless_finalize, + .reset = lockless_reset, + .reset_index = lockless_reset_index +}; + +static struct relay_ops locking_ops = { + .reserve = locking_reserve, + .commit = locking_commit, + .get_offset = locking_get_offset, + .finalize = locking_finalize, + .reset = locking_reset, + .reset_index = locking_reset_index +}; + +/* + * Low-level relayfs kernel API. These functions should not normally be + * used by clients. See high-level kernel API below. + */ + +/** + * rchan_get - get channel associated with id, incrementing refcount + * @rchan_id: the channel id + * + * Returns channel if successful, NULL otherwise. + */ +struct rchan * +rchan_get(int rchan_id) +{ + struct rchan *rchan; + + if ((rchan_id < 0) || (rchan_id >= RELAY_MAX_CHANNELS)) + return NULL; + + read_lock(&rchan_table_lock); + rchan = rchan_table[rchan_id]; + if (rchan) + atomic_inc(&rchan->refcount); + read_unlock(&rchan_table_lock); + + return rchan; +} + +/** + * clear_readers - clear non-VFS readers + * @rchan: the channel + * + * Clear the channel pointers of all non-VFS readers open on the channel. + */ +static inline void +clear_readers(struct rchan *rchan) +{ + struct list_head *p; + struct rchan_reader *reader; + + read_lock(&rchan->open_readers_lock); + list_for_each(p, &rchan->open_readers) { + reader = list_entry(p, struct rchan_reader, list); + if (!reader->vfs_reader) + reader->rchan = NULL; + } + read_unlock(&rchan->open_readers_lock); +} + +/** + * rchan_alloc_id - reserve a channel id and store associated channel + * @rchan: the channel + * + * Returns channel id if successful, -1 otherwise. + */ +static inline int +rchan_alloc_id(struct rchan *rchan) +{ + int i; + int rchan_id = -1; + + if (rchan == NULL) + return -1; + + write_lock(&rchan_table_lock); + for (i = 0; i < RELAY_MAX_CHANNELS; i++) { + if (rchan_table[i] == NULL) { + rchan_table[i] = rchan; + rchan_id = rchan->id = i; + break; + } + } + if (rchan_id != -1) + atomic_inc(&rchan->refcount); + write_unlock(&rchan_table_lock); + + return rchan_id; +} + +/** + * rchan_free_id - revoke a channel id and remove associated channel + * @rchan_id: the channel id + */ +static inline void +rchan_free_id(int rchan_id) +{ + struct rchan *rchan; + + if ((rchan_id < 0) || (rchan_id >= RELAY_MAX_CHANNELS)) + return; + + write_lock(&rchan_table_lock); + rchan = rchan_table[rchan_id]; + rchan_table[rchan_id] = NULL; + write_unlock(&rchan_table_lock); +} + +/** + * rchan_destroy_buf - destroy the current channel buffer + * @rchan: the channel + */ +static inline void +rchan_destroy_buf(struct rchan *rchan) +{ + if (rchan->buf && !rchan->init_buf) + free_rchan_buf(rchan->buf, + rchan->buf_page_array, + rchan->buf_page_count); +} + +/** + * relay_release - perform end-of-buffer processing for last buffer + * @rchan: the channel + * + * Returns 0 if successful, negative otherwise. + * + * Releases the channel buffer, destroys the channel, and removes the + * relay file from the relayfs filesystem. Should only be called from + * rchan_put(). If we're here, it means by definition refcount is 0. + */ +static int +relay_release(struct rchan *rchan) +{ + if (rchan == NULL) + return -EBADF; + + rchan_destroy_buf(rchan); + rchan_free_id(rchan->id); + relayfs_remove_file(rchan->dentry); + clear_readers(rchan); + kfree(rchan); + + return 0; +} + +/** + * rchan_get - decrement channel refcount, releasing it if 0 + * @rchan: the channel + * + * If the refcount reaches 0, the channel will be destroyed. + */ +void +rchan_put(struct rchan *rchan) +{ + if (atomic_dec_and_test(&rchan->refcount)) + relay_release(rchan); +} + +/** + * relay_reserve - reserve a slot in the channel buffer + * @rchan: the channel + * @len: the length of the slot to reserve + * @td: the time delta between buffer start and current write, or TSC + * @err: receives the result flags + * @interrupting: 1 if interrupting previous, used only in locking scheme + * + * Returns pointer to the beginning of the reserved slot, NULL if error. + * + * The errcode value contains the result flags and is an ORed combination + * of the following: + * + * RELAY_BUFFER_SWITCH_NONE - no buffer switch occurred + * RELAY_EVENT_DISCARD_NONE - event should not be discarded + * RELAY_BUFFER_SWITCH - buffer switch occurred + * RELAY_EVENT_DISCARD - event should be discarded (all buffers are full) + * RELAY_EVENT_TOO_LONG - event won't fit into even an empty buffer + * + * buffer_start and buffer_end callbacks are triggered at this point + * if applicable. + */ +char * +relay_reserve(struct rchan *rchan, + u32 len, + struct timeval *ts, + u32 *td, + int *err, + int *interrupting) +{ + if (rchan == NULL) + return NULL; + + *interrupting = 0; + + return rchan->relay_ops->reserve(rchan, len, ts, td, err, interrupting); +} + + +/** + * wakeup_readers - wake up VFS readers waiting on a channel + * @private: the channel + * + * This is the work function used to defer reader waking. The + * reason waking is deferred is that calling directly from commit + * causes problems if you're writing from say the scheduler. + */ +static void +wakeup_readers(void *private) +{ + struct rchan *rchan = (struct rchan *)private; + + wake_up_interruptible(&rchan->read_wait); +} + + +/** + * relay_commit - commit a reserved slot in the buffer + * @rchan: the channel + * @from: commit the length starting here + * @len: length committed + * @interrupting: 1 if interrupting previous, used only in locking scheme + * + * After the write into the reserved buffer has been complted, this + * function must be called in order for the relay to determine whether + * buffers are complete and to wake up VFS readers. + * + * delivery callback is triggered at this point if applicable. + */ +void +relay_commit(struct rchan *rchan, + char *from, + u32 len, + int reserve_code, + int interrupting) +{ + int deliver; + + if (rchan == NULL) + return; + + deliver = packet_delivery(rchan) || + (reserve_code & RELAY_BUFFER_SWITCH); + + rchan->relay_ops->commit(rchan, from, len, deliver, interrupting); + + /* The params are always the same, so no worry about re-queuing */ + if (deliver && waitqueue_active(&rchan->read_wait)) { + PREPARE_WORK(&rchan->wake_readers, wakeup_readers, rchan); + schedule_delayed_work(&rchan->wake_readers, 1); + } +} + +/** + * relay_get_offset - get current and max channel buffer offsets + * @rchan: the channel + * @max_offset: maximum channel offset + * + * Returns the current and maximum channel buffer offsets. + */ +u32 +relay_get_offset(struct rchan *rchan, u32 *max_offset) +{ + return rchan->relay_ops->get_offset(rchan, max_offset); +} + +/** + * reset_index - try once to reset the current channel index + * @rchan: the channel + * @old_index: the index read before reset + * + * Attempts to reset the channel index to 0. It tries once, and + * if it fails, returns negative, 0 otherwise. + */ +int +reset_index(struct rchan *rchan, u32 old_index) +{ + return rchan->relay_ops->reset_index(rchan, old_index); +} + +/* + * close() vm_op implementation for relayfs file mapping. + */ +static void +relay_file_mmap_close(struct vm_area_struct *vma) +{ + struct file *filp = vma->vm_file; + struct rchan_reader *reader; + struct rchan *rchan; + + reader = (struct rchan_reader *)filp->private_data; + rchan = reader->rchan; + + atomic_dec(&rchan->mapped); + + rchan->callbacks->fileop_notify(reader->rchan->id, filp, + RELAY_FILE_UNMAP); +} + +/* + * vm_ops for relay file mappings. + */ +static struct vm_operations_struct relay_file_mmap_ops = { + .close = relay_file_mmap_close +}; + +/* \begin{Code inspired from BTTV driver} */ +static inline unsigned long +kvirt_to_pa(unsigned long adr) +{ + unsigned long kva, ret; + + kva = (unsigned long) page_address(vmalloc_to_page((void *) adr)); + kva |= adr & (PAGE_SIZE - 1); + ret = __pa(kva); + return ret; +} + +static int +relay_mmap_region(struct vm_area_struct *vma, + const char *adr, + const char *start_pos, + unsigned long size) +{ + unsigned long start = (unsigned long) adr; + unsigned long page, pos; + + pos = (unsigned long) start_pos; + + while (size > 0) { + page = kvirt_to_pa(pos); + if (remap_page_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) + return -EAGAIN; + start += PAGE_SIZE; + pos += PAGE_SIZE; + size -= PAGE_SIZE; + } + + return 0; +} +/* \end{Code inspired from BTTV driver} */ + +/** + * relay_mmap_buffer: - mmap buffer to process address space + * @rchan_id: relay channel id + * @vma: vm_area_struct describing memory to be mapped + * + * Returns: + * 0 if ok + * -EAGAIN, when remap failed + * -EINVAL, invalid requested length + * + * Caller should already have grabbed mmap_sem. + */ +int +__relay_mmap_buffer(struct rchan *rchan, + struct vm_area_struct *vma) +{ + int err = 0; + unsigned long length = vma->vm_end - vma->vm_start; + struct file *filp = vma->vm_file; + + if (rchan == NULL) { + err = -EBADF; + goto exit; + } + + if (rchan->init_buf) { + err = -EPERM; + goto exit; + } + + if (length != (unsigned long)rchan->alloc_size) { + err = -EINVAL; + goto exit; + } + + err = relay_mmap_region(vma, + (char *)vma->vm_start, + rchan->buf, + rchan->alloc_size); + + if (err == 0) { + vma->vm_ops = &relay_file_mmap_ops; + err = rchan->callbacks->fileop_notify(rchan->id, filp, + RELAY_FILE_MAP); + if (err == 0) + atomic_inc(&rchan->mapped); + } +exit: + return err; +} + +/* + * High-level relayfs kernel API. See Documentation/filesystems/relafys.txt. + */ + +/* + * rchan_callback implementations defining default channel behavior. Used + * in place of corresponding NULL values in client callback struct. + */ + +/* + * buffer_end() default callback. Does nothing. + */ +static int +buffer_end_default_callback(int rchan_id, + char *current_write_pos, + char *end_of_buffer, + struct timeval end_time, + u32 end_tsc, + int using_tsc) +{ + return 0; +} + +/* + * buffer_start() default callback. Does nothing. + */ +static int +buffer_start_default_callback(int rchan_id, + char *current_write_pos, + u32 buffer_id, + struct timeval start_time, + u32 start_tsc, + int using_tsc) +{ + return 0; +} + +/* + * deliver() default callback. Does nothing. + */ +static void +deliver_default_callback(int rchan_id, char *from, u32 len) +{ +} + +/* + * user_deliver() default callback. Does nothing. + */ +static void +user_deliver_default_callback(int rchan_id, char *from, u32 len) +{ +} + +/* + * needs_resize() default callback. Does nothing. + */ +static void +needs_resize_default_callback(int rchan_id, + int resize_type, + u32 suggested_buf_size, + u32 suggested_n_bufs) +{ +} + +/* + * fileop_notify() default callback. Does nothing. + */ +static int +fileop_notify_default_callback(int rchan_id, + struct file *filp, + enum relay_fileop fileop) +{ + return 0; +} + +/* + * ioctl() default callback. Does nothing. + */ +static int +ioctl_default_callback(int rchan_id, + unsigned int cmd, + unsigned long arg) +{ + return 0; +} + +/* relay channel default callbacks */ +static struct rchan_callbacks default_channel_callbacks = { + .buffer_start = buffer_start_default_callback, + .buffer_end = buffer_end_default_callback, + .deliver = deliver_default_callback, + .user_deliver = user_deliver_default_callback, + .needs_resize = needs_resize_default_callback, + .fileop_notify = fileop_notify_default_callback, + .ioctl = ioctl_default_callback, +}; + +/** + * check_attribute_flags - check sanity of channel attributes + * @flags: channel attributes + * @resizeable: 1 if true + * + * Returns 0 if successful, negative otherwise. + */ +static int +check_attribute_flags(u32 *attribute_flags, int resizeable) +{ + u32 flags = *attribute_flags; + + if (!(flags & RELAY_DELIVERY_BULK) && !(flags & RELAY_DELIVERY_PACKET)) + return -EINVAL; /* Delivery mode must be specified */ + + if (!(flags & RELAY_USAGE_SMP) && !(flags & RELAY_USAGE_GLOBAL)) + return -EINVAL; /* Usage must be specified */ + + if (resizeable) { /* Resizeable can never be continuous */ + *attribute_flags &= ~RELAY_MODE_CONTINUOUS; + *attribute_flags |= RELAY_MODE_NO_OVERWRITE; + } + + if ((flags & RELAY_MODE_CONTINUOUS) && + (flags & RELAY_MODE_NO_OVERWRITE)) + return -EINVAL; /* Can't have it both ways */ + + if (!(flags & RELAY_MODE_CONTINUOUS) && + !(flags & RELAY_MODE_NO_OVERWRITE)) + *attribute_flags |= RELAY_MODE_CONTINUOUS; /* Default to continuous */ + + if (!(flags & RELAY_SCHEME_ANY)) + return -EINVAL; /* One or both must be specified */ + else if (flags & RELAY_SCHEME_LOCKLESS) { + if (have_cmpxchg()) + *attribute_flags &= ~RELAY_SCHEME_LOCKING; + else if (flags & RELAY_SCHEME_LOCKING) + *attribute_flags &= ~RELAY_SCHEME_LOCKLESS; + else + return -EINVAL; /* Locking scheme not an alternative */ + } + + if (!(flags & RELAY_TIMESTAMP_ANY)) + return -EINVAL; /* One or both must be specified */ + else if (flags & RELAY_TIMESTAMP_TSC) { + if (have_tsc()) + *attribute_flags &= ~RELAY_TIMESTAMP_GETTIMEOFDAY; + else if (flags & RELAY_TIMESTAMP_GETTIMEOFDAY) + *attribute_flags &= ~RELAY_TIMESTAMP_TSC; + else + return -EINVAL; /* gettimeofday not an alternative */ + } + + return 0; +} + +/* + * High-level API functions. + */ + +/** + * __relay_reset - internal reset function + * @rchan: the channel + * @init: 1 if this is a first-time channel initialization + * + * See relay_reset for description of effect. + */ +void +__relay_reset(struct rchan *rchan, int init) +{ + int i; + + if (init) { + rchan->version = RELAYFS_CHANNEL_VERSION; + init_MUTEX(&rchan->resize_sem); + init_waitqueue_head(&rchan->read_wait); + init_waitqueue_head(&rchan->write_wait); + atomic_set(&rchan->refcount, 0); + INIT_LIST_HEAD(&rchan->open_readers); + rchan->open_readers_lock = RW_LOCK_UNLOCKED; + } + + rchan->buf_id = rchan->buf_idx = 0; + atomic_set(&rchan->suspended, 0); + atomic_set(&rchan->mapped, 0); + rchan->half_switch = 0; + rchan->bufs_produced = 0; + rchan->bufs_consumed = 0; + rchan->bytes_consumed = 0; + rchan->initialized = 0; + rchan->finalized = 0; + rchan->resize_min = rchan->resize_max = 0; + rchan->resizing = 0; + rchan->replace_buffer = 0; + rchan->resize_buf = NULL; + rchan->resize_buf_size = 0; + rchan->resize_alloc_size = 0; + rchan->resize_n_bufs = 0; + rchan->resize_err = 0; + rchan->resize_failures = 0; + rchan->resize_order = 0; + + rchan->expand_page_array = NULL; + rchan->expand_page_count = 0; + rchan->shrink_page_array = NULL; + rchan->shrink_page_count = 0; + rchan->resize_page_array = NULL; + rchan->resize_page_count = 0; + rchan->old_buf_page_array = NULL; + rchan->expand_buf_id = 0; + + INIT_WORK(&rchan->wake_readers, NULL, NULL); + INIT_WORK(&rchan->wake_writers, NULL, NULL); + + for (i = 0; i < RELAY_MAX_BUFS; i++) + rchan->unused_bytes[i] = 0; + + rchan->relay_ops->reset(rchan, init); +} + +/** + * relay_reset - reset the channel + * @rchan: the channel + * + * Returns 0 if successful, negative if not. + * + * This has the effect of erasing all data from the buffer and + * restarting the channel in its initial state. The buffer itself + * is not freed, so any mappings are still in effect. + * + * NOTE: Care should be taken that the channnel isn't actually + * being used by anything when this call is made. + */ +int +relay_reset(int rchan_id) +{ + struct rchan *rchan; + + rchan = rchan_get(rchan_id); + if (rchan == NULL) + return -EBADF; + + __relay_reset(rchan, 0); + update_readers_consumed(rchan, 0, 0); + + rchan_put(rchan); + + return 0; +} + +/** + * check_init_buf - check the sanity of init_buf, if present + * @init_buf: the initbuf + * @init_buf_size: the total initbuf size + * @bufsize: the channel's sub-buffer size + * @nbufs: the number of sub-buffers in the channel + * + * Returns 0 if ok, negative otherwise. + */ +static int +check_init_buf(char *init_buf, u32 init_buf_size, u32 bufsize, u32 nbufs) +{ + int err = 0; + + if (init_buf && nbufs == 1) /* 1 sub-buffer makes no sense */ + err = -EINVAL; + + if (init_buf && (bufsize * nbufs != init_buf_size)) + err = -EINVAL; + + return err; +} + +/** + * rchan_create_buf - allocate the initial channel buffer + * @rchan: the channel + * @size_alloc: the total size of the channel buffer + * + * Returns 0 if successful, negative otherwise. + */ +static inline int +rchan_create_buf(struct rchan *rchan, int size_alloc) +{ + struct page **page_array; + int page_count; + + if ((rchan->buf = (char *)alloc_rchan_buf(size_alloc, &page_array, &page_count)) == NULL) { + rchan->buf_page_array = NULL; + rchan->buf_page_count = 0; + return -ENOMEM; + } + + rchan->buf_page_array = page_array; + rchan->buf_page_count = page_count; + + return 0; +} + +/** + * rchan_create - allocate and initialize a channel, including buffer + * @chanpath: path specifying the relayfs channel file to create + * @bufsize: the size of the sub-buffers within the channel buffer + * @nbufs: the number of sub-buffers within the channel buffer + * @rchan_flags: flags specifying buffer attributes + * @err: err code + * + * Returns channel if successful, NULL otherwise, err receives errcode. + * + * Allocates a struct rchan representing a relay channel, according + * to the attributes passed in via rchan_flags. Does some basic sanity + * checking but doesn't try to do anything smart. In particular, the + * number of buffers must be a power of 2, and if the lockless scheme + * is being used, the sub-buffer size must also be a power of 2. The + * locking scheme can use buffers of any size. + */ +static struct rchan * +rchan_create(const char *chanpath, + int bufsize, + int nbufs, + u32 rchan_flags, + char *init_buf, + u32 init_buf_size, + int *err) +{ + int size_alloc; + struct rchan *rchan = NULL; + + *err = 0; + + rchan = (struct rchan *)kmalloc(sizeof(struct rchan), GFP_KERNEL); + if (rchan == NULL) { + *err = -ENOMEM; + return NULL; + } + rchan->buf = rchan->init_buf = NULL; + + *err = check_init_buf(init_buf, init_buf_size, bufsize, nbufs); + if (*err) + goto exit; + + if (nbufs == 1 && bufsize) { + rchan->n_bufs = nbufs; + rchan->buf_size = bufsize; + size_alloc = bufsize; + goto alloc; + } + + if (bufsize <= 0 || + (rchan_flags & RELAY_SCHEME_LOCKLESS && hweight32(bufsize) != 1) || + hweight32(nbufs) != 1 || + nbufs < RELAY_MIN_BUFS || + nbufs > RELAY_MAX_BUFS) { + *err = -EINVAL; + goto exit; + } + + size_alloc = FIX_SIZE(bufsize * nbufs); + if (size_alloc > RELAY_MAX_BUF_SIZE) { + *err = -EINVAL; + goto exit; + } + rchan->n_bufs = nbufs; + rchan->buf_size = bufsize; + + if (rchan_flags & RELAY_SCHEME_LOCKLESS) { + offset_bits(rchan) = ffs(bufsize) - 1; + offset_mask(rchan) = RELAY_BUF_OFFSET_MASK(offset_bits(rchan)); + bufno_bits(rchan) = ffs(nbufs) - 1; + } +alloc: + if (rchan_alloc_id(rchan) == -1) { + *err = -ENOMEM; + goto exit; + } + + if (init_buf == NULL) { + *err = rchan_create_buf(rchan, size_alloc); + if (*err) { + rchan_free_id(rchan->id); + goto exit; + } + } else + rchan->buf = rchan->init_buf = init_buf; + + rchan->alloc_size = size_alloc; + + if (rchan_flags & RELAY_SCHEME_LOCKLESS) + rchan->relay_ops = &lockless_ops; + else + rchan->relay_ops = &locking_ops; + +exit: + if (*err) { + kfree(rchan); + rchan = NULL; + } + + return rchan; +} + + +static char tmpname[NAME_MAX]; + +/** + * rchan_create_dir - create directory for file + * @chanpath: path to file, including filename + * @residual: filename remaining after parse + * @topdir: the directory filename should be created in + * + * Returns 0 if successful, negative otherwise. + * + * Inspired by xlate_proc_name() in procfs. Given a file path which + * includes the filename, creates any and all directories necessary + * to create the file. + */ +static int +rchan_create_dir(const char * chanpath, + const char **residual, + struct dentry **topdir) +{ + const char *cp = chanpath, *next; + struct dentry *parent = NULL; + int len, err = 0; + + while (1) { + next = strchr(cp, '/'); + if (!next) + break; + + len = next - cp; + + strncpy(tmpname, cp, len); + tmpname[len] = '\0'; + err = relayfs_create_dir(tmpname, parent, &parent); + if (err && (err != -EEXIST)) + return err; + cp += len + 1; + } + + *residual = cp; + *topdir = parent; + + return err; +} + +/** + * rchan_create_file - create file, including parent directories + * @chanpath: path to file, including filename + * @dentry: result dentry + * @data: data to associate with the file + * + * Returns 0 if successful, negative otherwise. + */ +static int +rchan_create_file(const char * chanpath, + struct dentry **dentry, + struct rchan * data, + int mode) +{ + int err; + const char * fname; + struct dentry *topdir; + + err = rchan_create_dir(chanpath, &fname, &topdir); + if (err && (err != -EEXIST)) + return err; + + err = relayfs_create_file(fname, topdir, dentry, (void *)data, mode); + + return err; +} + +/** + * relay_open - create a new file/channel buffer in relayfs + * @chanpath: name of file to create, including path + * @bufsize: size of sub-buffers + * @nbufs: number of sub-buffers + * @flags: channel attributes + * @callbacks: client callback functions + * @start_reserve: number of bytes to reserve at start of each sub-buffer + * @end_reserve: number of bytes to reserve at end of each sub-buffer + * @rchan_start_reserve: additional reserve at start of first sub-buffer + * @resize_min: minimum total buffer size, if set + * @resize_max: maximum total buffer size, if set + * @mode: the perms to be given to the relayfs file, 0 to accept defaults + * @init_buf: initial memory buffer to start out with, NULL if N/A + * @init_buf_size: initial memory buffer size to start out with, 0 if N/A + * + * Returns channel id if successful, negative otherwise. + * + * Creates a relay channel using the sizes and attributes specified. + * The default permissions, used if mode == 0 are S_IRUSR | S_IWUSR. See + * Documentation/filesystems/relayfs.txt for details. + */ +int +relay_open(const char *chanpath, + int bufsize, + int nbufs, + u32 flags, + struct rchan_callbacks *channel_callbacks, + u32 start_reserve, + u32 end_reserve, + u32 rchan_start_reserve, + u32 resize_min, + u32 resize_max, + int mode, + char *init_buf, + u32 init_buf_size) +{ + int err; + struct rchan *rchan; + struct dentry *dentry; + struct rchan_callbacks *callbacks = NULL; + + if (chanpath == NULL) + return -EINVAL; + + if (nbufs != 1) { + err = check_attribute_flags(&flags, resize_min ? 1 : 0); + if (err) + return err; + } + + rchan = rchan_create(chanpath, bufsize, nbufs, flags, init_buf, init_buf_size, &err); + + if (err < 0) + return err; + + /* Create file in fs */ + if ((err = rchan_create_file(chanpath, &dentry, rchan, mode)) < 0) { + rchan_destroy_buf(rchan); + rchan_free_id(rchan->id); + kfree(rchan); + return err; + } + + rchan->dentry = dentry; + + if (channel_callbacks == NULL) + callbacks = &default_channel_callbacks; + else + callbacks = channel_callbacks; + + if (callbacks->buffer_end == NULL) + callbacks->buffer_end = buffer_end_default_callback; + if (callbacks->buffer_start == NULL) + callbacks->buffer_start = buffer_start_default_callback; + if (callbacks->deliver == NULL) + callbacks->deliver = deliver_default_callback; + if (callbacks->user_deliver == NULL) + callbacks->user_deliver = user_deliver_default_callback; + if (callbacks->needs_resize == NULL) + callbacks->needs_resize = needs_resize_default_callback; + if (callbacks->fileop_notify == NULL) + callbacks->fileop_notify = fileop_notify_default_callback; + if (callbacks->ioctl == NULL) + callbacks->ioctl = ioctl_default_callback; + rchan->callbacks = callbacks; + + /* Just to let the client know the sizes used */ + rchan->callbacks->needs_resize(rchan->id, + RELAY_RESIZE_REPLACED, + rchan->buf_size, + rchan->n_bufs); + + rchan->flags = flags; + rchan->start_reserve = start_reserve; + rchan->end_reserve = end_reserve; + rchan->rchan_start_reserve = rchan_start_reserve; + + __relay_reset(rchan, 1); + + if (resize_min > 0 && resize_max > 0 && + resize_max < RELAY_MAX_TOTAL_BUF_SIZE) { + rchan->resize_min = resize_min; + rchan->resize_max = resize_max; + init_shrink_timer(rchan); + } + + rchan_get(rchan->id); + + return rchan->id; +} + +/** + * relay_discard_init_buf - alloc channel buffer and copy init_buf into it + * @rchan_id: the channel id + * + * Returns 0 if successful, negative otherwise. + * + * NOTE: May sleep. Should also be called only when the channel isn't + * actively being written into. + */ +int +relay_discard_init_buf(int rchan_id) +{ + struct rchan *rchan; + int err = 0; + + rchan = rchan_get(rchan_id); + if (rchan == NULL) + return -EBADF; + + if (rchan->init_buf == NULL) { + err = -EINVAL; + goto out; + } + + err = rchan_create_buf(rchan, rchan->alloc_size); + if (err) + goto out; + + memcpy(rchan->buf, rchan->init_buf, rchan->n_bufs * rchan->buf_size); + rchan->init_buf = NULL; +out: + rchan_put(rchan); + + return err; +} + +/** + * relay_finalize - perform end-of-buffer processing for last buffer + * @rchan_id: the channel id + * @releasing: true if called when releasing file + * + * Returns 0 if successful, negative otherwise. + */ +static int +relay_finalize(int rchan_id) +{ + struct rchan *rchan = rchan_get(rchan_id); + if (rchan == NULL) + return -EBADF; + + if (rchan->finalized == 0) { + rchan->relay_ops->finalize(rchan); + rchan->finalized = 1; + } + + if (waitqueue_active(&rchan->read_wait)) { + PREPARE_WORK(&rchan->wake_readers, wakeup_readers, rchan); + schedule_delayed_work(&rchan->wake_readers, 1); + } + + rchan_put(rchan); + + return 0; +} + +/** + * restore_callbacks - restore default channel callbacks + * @rchan: the channel + * + * Restore callbacks to the default versions. + */ +static inline void +restore_callbacks(struct rchan *rchan) +{ + if (rchan->callbacks != &default_channel_callbacks) + rchan->callbacks = &default_channel_callbacks; +} + +/** + * relay_close - close the channel + * @rchan_id: relay channel id + * + * Finalizes the last sub-buffer and marks the channel as finalized. + * The channel buffer and channel data structure are then freed + * automatically when the last reference to the channel is given up. + */ +int +relay_close(int rchan_id) +{ + int err; + struct rchan *rchan; + + if ((rchan_id < 0) || (rchan_id >= RELAY_MAX_CHANNELS)) + return -EBADF; + + err = relay_finalize(rchan_id); + + if (!err) { + read_lock(&rchan_table_lock); + rchan = rchan_table[rchan_id]; + read_unlock(&rchan_table_lock); + + if (rchan) { + restore_callbacks(rchan); + if (rchan->resize_min) + del_timer(&rchan->shrink_timer); + rchan_put(rchan); + } + } + + return err; +} + +/** + * relay_write - reserve a slot in the channel and write data into it + * @rchan_id: relay channel id + * @data_ptr: data to be written into reserved slot + * @count: number of bytes to write + * @td_offset: optional offset where time delta should be written + * @wrote_pos: optional ptr returning buf pos written to, ignored if NULL + * + * Returns the number of bytes written, 0 or negative on failure. + * + * Reserves space in the channel and writes count bytes of data_ptr + * to it. Automatically performs any necessary locking, depending + * on the scheme and SMP usage in effect (no locking is done for the + * lockless scheme regardless of usage). + * + * If td_offset is >= 0, the internal time delta calculated when + * slot was reserved will be written at that offset. + * + * If wrote_pos is non-NULL, it will receive the location the data + * was written to, which may be needed for some applications but is not + * normally interesting. + */ +int +relay_write(int rchan_id, + const void *data_ptr, + size_t count, + int td_offset, + void **wrote_pos) +{ + unsigned long flags; + char *reserved, *write_pos; + int bytes_written = 0; + int reserve_code, interrupting; + struct timeval ts; + u32 td; + struct rchan *rchan; + + rchan = rchan_get(rchan_id); + if (rchan == NULL) + return -EBADF; + + relay_lock_channel(rchan, flags); /* nop for lockless */ + + write_pos = reserved = relay_reserve(rchan, count, &ts, &td, + &reserve_code, &interrupting); + + if (reserved != NULL) { + relay_write_direct(write_pos, data_ptr, count); + if ((td_offset >= 0) && (td_offset < count - sizeof(td))) + *((u32 *)(reserved + td_offset)) = td; + bytes_written = count; + } else if (reserve_code == RELAY_WRITE_TOO_LONG) + bytes_written = -EINVAL; + + if (bytes_written > 0) + relay_commit(rchan, reserved, bytes_written, reserve_code, interrupting); + + relay_unlock_channel(rchan, flags); /* nop for lockless */ + + rchan_put(rchan); + + if (wrote_pos) + *wrote_pos = reserved; + + return bytes_written; +} + +/** + * wakeup_writers - wake up VFS writers waiting on a channel + * @private: the channel + * + * This is the work function used to defer writer waking. The + * reason waking is deferred is that calling directly from + * buffers_consumed causes problems if you're writing from say + * the scheduler. + */ +static void +wakeup_writers(void *private) +{ + struct rchan *rchan = (struct rchan *)private; + + wake_up_interruptible(&rchan->write_wait); +} + + +/** + * __relay_buffers_consumed - internal version of relay_buffers_consumed + * @rchan: the relay channel + * @bufs_consumed: number of buffers to add to current count for channel + * + * Internal - updates the channel's consumed buffer count. + */ +static void +__relay_buffers_consumed(struct rchan *rchan, u32 bufs_consumed) +{ + rchan->bufs_consumed += bufs_consumed; + + if (rchan->bufs_consumed > rchan->bufs_produced) + rchan->bufs_consumed = rchan->bufs_produced; + + atomic_set(&rchan->suspended, 0); + + PREPARE_WORK(&rchan->wake_writers, wakeup_writers, rchan); + schedule_delayed_work(&rchan->wake_writers, 1); +} + +/** + * __reader_buffers_consumed - update reader/channel consumed buffer count + * @reader: channel reader + * @bufs_consumed: number of buffers to add to current count for channel + * + * Internal - updates the reader's consumed buffer count. If the reader's + * resulting total is greater than the channel's, update the channel's. +*/ +static void +__reader_buffers_consumed(struct rchan_reader *reader, u32 bufs_consumed) +{ + reader->bufs_consumed += bufs_consumed; + + if (reader->bufs_consumed > reader->rchan->bufs_consumed) + __relay_buffers_consumed(reader->rchan, bufs_consumed); +} + +/** + * relay_buffers_consumed - add to the # buffers consumed for the channel + * @reader: channel reader + * @bufs_consumed: number of buffers to add to current count for channel + * + * Adds to the channel's consumed buffer count. buffers_consumed should + * be the number of buffers newly consumed, not the total number consumed. + * + * NOTE: kernel clients don't need to call this function if the reader + * is auto-consuming or the channel is MODE_CONTINUOUS. + */ +void +relay_buffers_consumed(struct rchan_reader *reader, u32 bufs_consumed) +{ + if (reader && reader->rchan) + __reader_buffers_consumed(reader, bufs_consumed); +} + +/** + * __relay_bytes_consumed - internal version of relay_bytes_consumed + * @rchan: the relay channel + * @bytes_consumed: number of bytes to add to current count for channel + * @read_offset: where the bytes were consumed from + * + * Internal - updates the channel's consumed count. +*/ +static void +__relay_bytes_consumed(struct rchan *rchan, u32 bytes_consumed, u32 read_offset) +{ + u32 consuming_idx; + u32 unused; + + consuming_idx = read_offset / rchan->buf_size; + + if (consuming_idx >= rchan->n_bufs) + consuming_idx = rchan->n_bufs - 1; + rchan->bytes_consumed += bytes_consumed; + + unused = rchan->unused_bytes[consuming_idx]; + + if (rchan->bytes_consumed + unused >= rchan->buf_size) { + __relay_buffers_consumed(rchan, 1); + rchan->bytes_consumed = 0; + } +} + +/** + * __reader_bytes_consumed - update reader/channel consumed count + * @reader: channel reader + * @bytes_consumed: number of bytes to add to current count for channel + * @read_offset: where the bytes were consumed from + * + * Internal - updates the reader's consumed count. If the reader's + * resulting total is greater than the channel's, update the channel's. +*/ +static void +__reader_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset) +{ + u32 consuming_idx; + u32 unused; + + consuming_idx = read_offset / reader->rchan->buf_size; + + if (consuming_idx >= reader->rchan->n_bufs) + consuming_idx = reader->rchan->n_bufs - 1; + + reader->bytes_consumed += bytes_consumed; + + unused = reader->rchan->unused_bytes[consuming_idx]; + + if (reader->bytes_consumed + unused >= reader->rchan->buf_size) { + reader->bufs_consumed++; + reader->bytes_consumed = 0; + } + + if ((reader->bufs_consumed > reader->rchan->bufs_consumed) || + ((reader->bufs_consumed == reader->rchan->bufs_consumed) && + (reader->bytes_consumed > reader->rchan->bytes_consumed))) + __relay_bytes_consumed(reader->rchan, bytes_consumed, read_offset); +} + +/** + * relay_bytes_consumed - add to the # bytes consumed for the channel + * @reader: channel reader + * @bytes_consumed: number of bytes to add to current count for channel + * @read_offset: where the bytes were consumed from + * + * Adds to the channel's consumed count. bytes_consumed should be the + * number of bytes actually read e.g. return value of relay_read() and + * the read_offset should be the actual offset the bytes were read from + * e.g. the actual_read_offset set by relay_read(). See + * Documentation/filesystems/relayfs.txt for more details. + * + * NOTE: kernel clients don't need to call this function if the reader + * is auto-consuming or the channel is MODE_CONTINUOUS. + */ +void +relay_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset) +{ + if (reader && reader->rchan) + __reader_bytes_consumed(reader, bytes_consumed, read_offset); +} + +/** + * update_readers_consumed - apply offset change to reader + * @rchan: the channel + * + * Apply the consumed counts to all readers open on the channel. + */ +void +update_readers_consumed(struct rchan *rchan, u32 bufs_consumed, u32 bytes_consumed) +{ + struct list_head *p; + struct rchan_reader *reader; + + read_lock(&rchan->open_readers_lock); + list_for_each(p, &rchan->open_readers) { + reader = list_entry(p, struct rchan_reader, list); + reader->bufs_consumed = bufs_consumed; + reader->bytes_consumed = bytes_consumed; + if (reader->vfs_reader) + reader->pos.file->f_pos = 0; + else + reader->pos.f_pos = 0; + reader->offset_changed = 1; + } + read_unlock(&rchan->open_readers_lock); +} + +/** + * do_read - utility function to do the actual read to user + * @rchan: the channel + * @buf: user buf to read into, NULL if just getting info + * @count: bytes requested + * @read_offset: offset into channel + * @new_offset: new offset into channel after read + * @actual_read_offset: read offset actually used + * + * Returns the number of bytes read, 0 if none. + */ +static ssize_t +do_read(struct rchan *rchan, char *buf, size_t count, u32 read_offset, u32 *new_offset, u32 *actual_read_offset) +{ + u32 read_bufno, cur_bufno; + u32 avail_offset, cur_idx, max_offset, buf_end_offset; + u32 avail_count, buf_size; + int unused_bytes = 0; + size_t read_count = 0; + u32 last_buf_byte_offset; + + *actual_read_offset = read_offset; + + buf_size = rchan->buf_size; + if (unlikely(!buf_size)) BUG(); + + read_bufno = read_offset / buf_size; + if (unlikely(read_bufno >= RELAY_MAX_BUFS)) BUG(); + unused_bytes = rchan->unused_bytes[read_bufno]; + + avail_offset = cur_idx = relay_get_offset(rchan, &max_offset); + + if (cur_idx == read_offset) { + if (atomic_read(&rchan->suspended) == 1) { + read_offset += 1; + if (read_offset >= max_offset) + read_offset = 0; + *actual_read_offset = read_offset; + } else { + *new_offset = read_offset; + return 0; + } + } else { + last_buf_byte_offset = (read_bufno + 1) * buf_size - 1; + if (read_offset == last_buf_byte_offset) { + if (unused_bytes != 1) { + read_offset += 1; + if (read_offset >= max_offset) + read_offset = 0; + *actual_read_offset = read_offset; + } + } + } + + read_bufno = read_offset / buf_size; + if (unlikely(read_bufno >= RELAY_MAX_BUFS)) BUG(); + unused_bytes = rchan->unused_bytes[read_bufno]; + + cur_bufno = cur_idx / buf_size; + + buf_end_offset = (read_bufno + 1) * buf_size - unused_bytes; + if (avail_offset > buf_end_offset) + avail_offset = buf_end_offset; + else if (avail_offset < read_offset) + avail_offset = buf_end_offset; + avail_count = avail_offset - read_offset; + read_count = avail_count >= count ? count : avail_count; + + if (read_count && buf != NULL) + if (copy_to_user(buf, rchan->buf + read_offset, read_count)) + return -EFAULT; + + if (read_bufno == cur_bufno) + if (read_count && (read_offset + read_count >= buf_end_offset) && (read_offset + read_count <= cur_idx)) { + *new_offset = cur_idx; + return read_count; + } + + if (read_offset + read_count + unused_bytes > max_offset) + *new_offset = 0; + else if (read_offset + read_count >= buf_end_offset) + *new_offset = read_offset + read_count + unused_bytes; + else + *new_offset = read_offset + read_count; + + return read_count; +} + +/** + * __relay_read - read bytes from channel, relative to current reader pos + * @reader: channel reader + * @buf: user buf to read into, NULL if just getting info + * @count: bytes requested + * @read_offset: offset into channel + * @new_offset: new offset into channel after read + * @actual_read_offset: read offset actually used + * @wait: if non-zero, wait for something to read + * + * Internal - see relay_read() for details. + * + * Returns the number of bytes read, 0 if none, negative on failure. + */ +static ssize_t +__relay_read(struct rchan_reader *reader, char *buf, size_t count, u32 read_offset, u32 *new_offset, u32 *actual_read_offset, int wait) +{ + int err = 0; + size_t read_count = 0; + struct rchan *rchan = reader->rchan; + + if (!wait && !rchan->initialized) + return -EAGAIN; + + if (using_lockless(rchan)) + read_offset &= idx_mask(rchan); + + if (read_offset >= rchan->n_bufs * rchan->buf_size) { + *new_offset = 0; + if (!wait) + return -EAGAIN; + else + return -EINTR; + } + + if (buf != NULL && wait) { + err = wait_event_interruptible(rchan->read_wait, + ((rchan->finalized == 1) || + (atomic_read(&rchan->suspended) == 1) || + (relay_get_offset(rchan, NULL) != read_offset))); + + if (rchan->finalized) + return 0; + + if (reader->offset_changed) { + reader->offset_changed = 0; + return -EINTR; + } + + if (err) + return err; + } + + read_count = do_read(rchan, buf, count, read_offset, new_offset, actual_read_offset); + + if (read_count < 0) + err = read_count; + + if (err) + return err; + else + return read_count; +} + +/** + * relay_read - read bytes from channel, relative to current reader pos + * @reader: channel reader + * @buf: user buf to read into, NULL if just getting info + * @count: bytes requested + * @wait: if non-zero, wait for something to read + * @actual_read_offset: set read offset actually used, must not be NULL + * + * Reads count bytes from the channel, or as much as is available within + * the sub-buffer currently being read. The read offset that will be + * read from is the position contained within the reader object. If the + * wait flag is set, buf is non-NULL, and there is nothing available, + * it will wait until there is. If the wait flag is 0 and there is + * nothing available, -EAGAIN is returned. If buf is NULL, the value + * returned is the number of bytes that would have been read. + * actual_read_offset is the value that should be passed as the read + * offset to relay_bytes_consumed, needed only if the reader is not + * auto-consuming and the channel is MODE_NO_OVERWRITE, but in any case, + * it must not be NULL. See Documentation/filesystems/relayfs.txt for + * more details. + */ +ssize_t +relay_read(struct rchan_reader *reader, char *buf, size_t count, int wait, u32 *actual_read_offset) +{ + u32 new_offset; + u32 read_offset; + ssize_t read_count; + + if (reader == NULL || reader->rchan == NULL) + return -EBADF; + + if (actual_read_offset == NULL) + return -EINVAL; + + if (reader->vfs_reader) + read_offset = (u32)(reader->pos.file->f_pos); + else + read_offset = reader->pos.f_pos; + *actual_read_offset = read_offset; + + read_count = __relay_read(reader, buf, count, read_offset, + &new_offset, actual_read_offset, wait); + + if (read_count < 0) + return read_count; + + if (reader->vfs_reader) + reader->pos.file->f_pos = new_offset; + else + reader->pos.f_pos = new_offset; + + if (reader->auto_consume && ((read_count) || (new_offset != read_offset))) + __reader_bytes_consumed(reader, read_count, *actual_read_offset); + + if (read_count == 0 && !wait) + return -EAGAIN; + + return read_count; +} + +/** + * relay_bytes_avail - number of bytes available in current sub-buffer + * @reader: channel reader + * + * Returns the number of bytes available relative to the reader's + * current read position within the corresponding sub-buffer, 0 if + * there is nothing available. See Documentation/filesystems/relayfs.txt + * for more details. + */ +ssize_t +relay_bytes_avail(struct rchan_reader *reader) +{ + u32 f_pos; + u32 new_offset; + u32 actual_read_offset; + ssize_t bytes_read; + + if (reader == NULL || reader->rchan == NULL) + return -EBADF; + + if (reader->vfs_reader) + f_pos = (u32)reader->pos.file->f_pos; + else + f_pos = reader->pos.f_pos; + new_offset = f_pos; + + bytes_read = __relay_read(reader, NULL, reader->rchan->buf_size, + f_pos, &new_offset, &actual_read_offset, 0); + + if ((new_offset != f_pos) && + ((bytes_read == -EINTR) || (bytes_read == 0))) + bytes_read = -EAGAIN; + else if ((bytes_read < 0) && (bytes_read != -EAGAIN)) + bytes_read = 0; + + return bytes_read; +} + +/** + * rchan_empty - boolean, is the channel empty wrt reader? + * @reader: channel reader + * + * Returns 1 if the channel is empty, 0 otherwise. + */ +int +rchan_empty(struct rchan_reader *reader) +{ + ssize_t avail_count; + u32 buffers_ready; + struct rchan *rchan = reader->rchan; + u32 cur_idx, curbuf_bytes; + int mapped; + + if (atomic_read(&rchan->suspended) == 1) + return 0; + + mapped = atomic_read(&rchan->mapped); + + if (mapped && bulk_delivery(rchan)) { + buffers_ready = rchan->bufs_produced - rchan->bufs_consumed; + return buffers_ready ? 0 : 1; + } + + if (mapped && packet_delivery(rchan)) { + buffers_ready = rchan->bufs_produced - rchan->bufs_consumed; + if (buffers_ready) + return 0; + else { + cur_idx = relay_get_offset(rchan, NULL); + curbuf_bytes = cur_idx % rchan->buf_size; + return curbuf_bytes == rchan->bytes_consumed ? 1 : 0; + } + } + + avail_count = relay_bytes_avail(reader); + + return avail_count ? 0 : 1; +} + +/** + * rchan_full - boolean, is the channel full wrt consuming reader? + * @reader: channel reader + * + * Returns 1 if the channel is full, 0 otherwise. + */ +int +rchan_full(struct rchan_reader *reader) +{ + u32 buffers_ready; + struct rchan *rchan = reader->rchan; + + if (mode_continuous(rchan)) + return 0; + + buffers_ready = rchan->bufs_produced - rchan->bufs_consumed; + + return buffers_ready > reader->rchan->n_bufs - 1 ? 1 : 0; +} + +/** + * relay_info - get status and other information about a relay channel + * @rchan_id: relay channel id + * @rchan_info: pointer to the rchan_info struct to be filled in + * + * Fills in an rchan_info struct with channel status and attribute + * information. See Documentation/filesystems/relayfs.txt for details. + * + * Returns 0 if successful, negative otherwise. + */ +int +relay_info(int rchan_id, struct rchan_info *rchan_info) +{ + int i; + struct rchan *rchan; + + rchan = rchan_get(rchan_id); + if (rchan == NULL) + return -EBADF; + + rchan_info->flags = rchan->flags; + rchan_info->buf_size = rchan->buf_size; + rchan_info->buf_addr = rchan->buf; + rchan_info->alloc_size = rchan->alloc_size; + rchan_info->n_bufs = rchan->n_bufs; + rchan_info->cur_idx = relay_get_offset(rchan, NULL); + rchan_info->bufs_produced = rchan->bufs_produced; + rchan_info->bufs_consumed = rchan->bufs_consumed; + rchan_info->buf_id = rchan->buf_id; + + for (i = 0; i < rchan->n_bufs; i++) { + rchan_info->unused_bytes[i] = rchan->unused_bytes[i]; + if (using_lockless(rchan)) + rchan_info->buffer_complete[i] = (atomic_read(&fill_count(rchan, i)) == rchan->buf_size); + else + rchan_info->buffer_complete[i] = 0; + } + + rchan_put(rchan); + + return 0; +} + +/** + * __add_rchan_reader - creates and adds a reader to a channel + * @rchan: relay channel + * @filp: the file associated with rchan, if applicable + * @auto_consume: boolean, whether reader's reads automatically consume + * @map_reader: boolean, whether reader's reading via a channel mapping + * + * Returns a pointer to the reader object create, NULL if unsuccessful + * + * Creates and initializes an rchan_reader object for reading the channel. + * If filp is non-NULL, the reader is a VFS reader, otherwise not. + * + * If the reader is a map reader, it isn't considered a VFS reader for + * our purposes. Also, map_readers can't be auto-consuming. + */ +struct rchan_reader * +__add_rchan_reader(struct rchan *rchan, struct file *filp, int auto_consume, int map_reader) +{ + struct rchan_reader *reader; + u32 will_read; + + reader = kmalloc(sizeof(struct rchan_reader), GFP_KERNEL); + + if (reader) { + write_lock(&rchan->open_readers_lock); + reader->rchan = rchan; + if (filp) { + reader->vfs_reader = 1; + reader->pos.file = filp; + } else { + reader->vfs_reader = 0; + reader->pos.f_pos = 0; + } + reader->map_reader = map_reader; + reader->auto_consume = auto_consume; + + if (!map_reader) { + will_read = rchan->bufs_produced % rchan->n_bufs; + if (!will_read && atomic_read(&rchan->suspended)) + will_read = rchan->n_bufs; + reader->bufs_consumed = rchan->bufs_produced - will_read; + rchan->bufs_consumed = reader->bufs_consumed; + rchan->bytes_consumed = reader->bytes_consumed = 0; + reader->offset_changed = 0; + } + + list_add(&reader->list, &rchan->open_readers); + write_unlock(&rchan->open_readers_lock); + } + + return reader; +} + +/** + * add_rchan_reader - create a reader for a channel + * @rchan_id: relay channel handle + * @auto_consume: boolean, whether reader's reads automatically consume + * + * Returns a pointer to the reader object created, NULL if unsuccessful + * + * Creates and initializes an rchan_reader object for reading the channel. + * This function is useful only for non-VFS readers. + */ +struct rchan_reader * +add_rchan_reader(int rchan_id, int auto_consume) +{ + struct rchan *rchan = rchan_get(rchan_id); + if (rchan == NULL) + return NULL; + + return __add_rchan_reader(rchan, NULL, auto_consume, 0); +} + +/** + * add_map_reader - create a map reader for a channel + * @rchan_id: relay channel handle + * + * Returns a pointer to the reader object created, NULL if unsuccessful + * + * Creates and initializes an rchan_reader object for reading the channel. + * This function is useful only for map readers. + */ +struct rchan_reader * +add_map_reader(int rchan_id) +{ + struct rchan *rchan = rchan_get(rchan_id); + if (rchan == NULL) + return NULL; + + return __add_rchan_reader(rchan, NULL, 0, 1); +} + +/** + * __remove_rchan_reader - destroy a channel reader + * @reader: channel reader + * + * Internal - removes reader from the open readers list, and frees it. + */ +void +__remove_rchan_reader(struct rchan_reader *reader) +{ + struct list_head *p; + struct rchan_reader *found_reader = NULL; + + write_lock(&reader->rchan->open_readers_lock); + list_for_each(p, &reader->rchan->open_readers) { + found_reader = list_entry(p, struct rchan_reader, list); + if (found_reader == reader) { + list_del(&found_reader->list); + break; + } + } + write_unlock(&reader->rchan->open_readers_lock); + + if (found_reader) + kfree(found_reader); +} + +/** + * remove_rchan_reader - destroy a channel reader + * @reader: channel reader + * + * Finds and removes the given reader from the channel. This function + * is useful only for non-VFS readers. + * + * Returns 0 if successful, negative otherwise. + */ +int +remove_rchan_reader(struct rchan_reader *reader) +{ + int err = 0; + + if (reader) { + rchan_put(reader->rchan); + __remove_rchan_reader(reader); + } else + err = -EINVAL; + + return err; +} + +/** + * remove_map_reader - destroy a map reader + * @reader: channel reader + * + * Finds and removes the given map reader from the channel. This function + * is useful only for map readers. + * + * Returns 0 if successful, negative otherwise. + */ +int +remove_map_reader(struct rchan_reader *reader) +{ + return remove_rchan_reader(reader); +} + +EXPORT_SYMBOL(relay_open); +EXPORT_SYMBOL(relay_close); +EXPORT_SYMBOL(relay_reset); +EXPORT_SYMBOL(relay_reserve); +EXPORT_SYMBOL(relay_commit); +EXPORT_SYMBOL(relay_read); +EXPORT_SYMBOL(relay_write); +EXPORT_SYMBOL(relay_bytes_avail); +EXPORT_SYMBOL(relay_buffers_consumed); +EXPORT_SYMBOL(relay_bytes_consumed); +EXPORT_SYMBOL(relay_info); +EXPORT_SYMBOL(relay_discard_init_buf); + + diff --git a/include/asm-um/cpufeature.h b/include/asm-um/cpufeature.h new file mode 100644 index 000000000..fb7bd42a4 --- /dev/null +++ b/include/asm-um/cpufeature.h @@ -0,0 +1,6 @@ +#ifndef __UM_CPUFEATURE_H +#define __UM_CPUFEATURE_H + +#include "asm/arch/cpufeature.h" + +#endif diff --git a/include/asm-um/local.h b/include/asm-um/local.h new file mode 100644 index 000000000..9a280c5bb --- /dev/null +++ b/include/asm-um/local.h @@ -0,0 +1,6 @@ +#ifndef __UM_LOCAL_H +#define __UM_LOCAL_H + +#include "asm/arch/local.h" + +#endif diff --git a/include/asm-um/module-generic.h b/include/asm-um/module-generic.h new file mode 100644 index 000000000..5a265f56b --- /dev/null +++ b/include/asm-um/module-generic.h @@ -0,0 +1,6 @@ +#ifndef __UM_MODULE_GENERIC_H +#define __UM_MODULE_GENERIC_H + +#include "asm/arch/module.h" + +#endif diff --git a/include/asm-um/sections.h b/include/asm-um/sections.h new file mode 100644 index 000000000..6b0231eef --- /dev/null +++ b/include/asm-um/sections.h @@ -0,0 +1,7 @@ +#ifndef _UM_SECTIONS_H +#define _UM_SECTIONS_H + +/* nothing to see, move along */ +#include + +#endif diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h new file mode 100644 index 000000000..2c52874ab --- /dev/null +++ b/include/linux/relayfs_fs.h @@ -0,0 +1,686 @@ +/* + * linux/include/linux/relayfs_fs.h + * + * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) + * + * RelayFS definitions and declarations + * + * Please see Documentation/filesystems/relayfs.txt for more info. + */ + +#ifndef _LINUX_RELAYFS_FS_H +#define _LINUX_RELAYFS_FS_H + +#include +#include +#include +#include +#include +#include + +/* + * Tracks changes to rchan struct + */ +#define RELAYFS_CHANNEL_VERSION 1 + +/* + * Maximum number of simultaneously open channels + */ +#define RELAY_MAX_CHANNELS 256 + +/* + * Relay properties + */ +#define RELAY_MIN_BUFS 2 +#define RELAY_MIN_BUFSIZE 4096 +#define RELAY_MAX_BUFS 256 +#define RELAY_MAX_BUF_SIZE 0x1000000 +#define RELAY_MAX_TOTAL_BUF_SIZE 0x8000000 + +/* + * Lockless scheme utility macros + */ +#define RELAY_MAX_BUFNO(bufno_bits) (1UL << (bufno_bits)) +#define RELAY_BUF_SIZE(offset_bits) (1UL << (offset_bits)) +#define RELAY_BUF_OFFSET_MASK(offset_bits) (RELAY_BUF_SIZE(offset_bits) - 1) +#define RELAY_BUFNO_GET(index, offset_bits) ((index) >> (offset_bits)) +#define RELAY_BUF_OFFSET_GET(index, mask) ((index) & (mask)) +#define RELAY_BUF_OFFSET_CLEAR(index, mask) ((index) & ~(mask)) + +/* + * Flags returned by relay_reserve() + */ +#define RELAY_BUFFER_SWITCH_NONE 0x0 +#define RELAY_WRITE_DISCARD_NONE 0x0 +#define RELAY_BUFFER_SWITCH 0x1 +#define RELAY_WRITE_DISCARD 0x2 +#define RELAY_WRITE_TOO_LONG 0x4 + +/* + * Relay attribute flags + */ +#define RELAY_DELIVERY_BULK 0x1 +#define RELAY_DELIVERY_PACKET 0x2 +#define RELAY_SCHEME_LOCKLESS 0x4 +#define RELAY_SCHEME_LOCKING 0x8 +#define RELAY_SCHEME_ANY 0xC +#define RELAY_TIMESTAMP_TSC 0x10 +#define RELAY_TIMESTAMP_GETTIMEOFDAY 0x20 +#define RELAY_TIMESTAMP_ANY 0x30 +#define RELAY_USAGE_SMP 0x40 +#define RELAY_USAGE_GLOBAL 0x80 +#define RELAY_MODE_CONTINUOUS 0x100 +#define RELAY_MODE_NO_OVERWRITE 0x200 + +/* + * Flags for needs_resize() callback + */ +#define RELAY_RESIZE_NONE 0x0 +#define RELAY_RESIZE_EXPAND 0x1 +#define RELAY_RESIZE_SHRINK 0x2 +#define RELAY_RESIZE_REPLACE 0x4 +#define RELAY_RESIZE_REPLACED 0x8 + +/* + * Values for fileop_notify() callback + */ +enum relay_fileop +{ + RELAY_FILE_OPEN, + RELAY_FILE_CLOSE, + RELAY_FILE_MAP, + RELAY_FILE_UNMAP +}; + +/* + * Data structure returned by relay_info() + */ +struct rchan_info +{ + u32 flags; /* relay attribute flags for channel */ + u32 buf_size; /* channel's sub-buffer size */ + char *buf_addr; /* address of channel start */ + u32 alloc_size; /* total buffer size actually allocated */ + u32 n_bufs; /* number of sub-buffers in channel */ + u32 cur_idx; /* current write index into channel */ + u32 bufs_produced; /* current count of sub-buffers produced */ + u32 bufs_consumed; /* current count of sub-buffers consumed */ + u32 buf_id; /* buf_id of current sub-buffer */ + int buffer_complete[RELAY_MAX_BUFS]; /* boolean per sub-buffer */ + int unused_bytes[RELAY_MAX_BUFS]; /* count per sub-buffer */ +}; + +/* + * Relay channel client callbacks + */ +struct rchan_callbacks +{ + /* + * buffer_start - called at the beginning of a new sub-buffer + * @rchan_id: the channel id + * @current_write_pos: position in sub-buffer client should write to + * @buffer_id: the id of the new sub-buffer + * @start_time: the timestamp associated with the start of sub-buffer + * @start_tsc: the TSC associated with the timestamp, if using_tsc + * @using_tsc: boolean, indicates whether start_tsc is valid + * + * Return value should be the number of bytes written by the client. + * + * See Documentation/filesystems/relayfs.txt for details. + */ + int (*buffer_start) (int rchan_id, + char *current_write_pos, + u32 buffer_id, + struct timeval start_time, + u32 start_tsc, + int using_tsc); + + /* + * buffer_end - called at the end of a sub-buffer + * @rchan_id: the channel id + * @current_write_pos: position in sub-buffer of end of data + * @end_of_buffer: the position of the end of the sub-buffer + * @end_time: the timestamp associated with the end of the sub-buffer + * @end_tsc: the TSC associated with the end_time, if using_tsc + * @using_tsc: boolean, indicates whether end_tsc is valid + * + * Return value should be the number of bytes written by the client. + * + * See Documentation/filesystems/relayfs.txt for details. + */ + int (*buffer_end) (int rchan_id, + char *current_write_pos, + char *end_of_buffer, + struct timeval end_time, + u32 end_tsc, + int using_tsc); + + /* + * deliver - called when data is ready for the client + * @rchan_id: the channel id + * @from: the start of the delivered data + * @len: the length of the delivered data + * + * See Documentation/filesystems/relayfs.txt for details. + */ + void (*deliver) (int rchan_id, char *from, u32 len); + + /* + * user_deliver - called when data has been written from userspace + * @rchan_id: the channel id + * @from: the start of the delivered data + * @len: the length of the delivered data + * + * See Documentation/filesystems/relayfs.txt for details. + */ + void (*user_deliver) (int rchan_id, char *from, u32 len); + + /* + * needs_resize - called when a resizing event occurs + * @rchan_id: the channel id + * @resize_type: the type of resizing event + * @suggested_buf_size: the suggested new sub-buffer size + * @suggested_buf_size: the suggested new number of sub-buffers + * + * See Documentation/filesystems/relayfs.txt for details. + */ + void (*needs_resize)(int rchan_id, + int resize_type, + u32 suggested_buf_size, + u32 suggested_n_bufs); + + /* + * fileop_notify - called on open/close/mmap/munmap of a relayfs file + * @rchan_id: the channel id + * @filp: relayfs file pointer + * @fileop: which file operation is in progress + * + * The return value can direct the outcome of the operation. + * + * See Documentation/filesystems/relayfs.txt for details. + */ + int (*fileop_notify)(int rchan_id, + struct file *filp, + enum relay_fileop fileop); + + /* + * ioctl - called in ioctl context from userspace + * @rchan_id: the channel id + * @cmd: ioctl cmd + * @arg: ioctl cmd arg + * + * The return value is returned as the value from the ioctl call. + * + * See Documentation/filesystems/relayfs.txt for details. + */ + int (*ioctl) (int rchan_id, unsigned int cmd, unsigned long arg); +}; + +/* + * Lockless scheme-specific data + */ +struct lockless_rchan +{ + u8 bufno_bits; /* # bits used for sub-buffer id */ + u8 offset_bits; /* # bits used for offset within sub-buffer */ + u32 index; /* current index = sub-buffer id and offset */ + u32 offset_mask; /* used to obtain offset portion of index */ + u32 index_mask; /* used to mask off unused bits index */ + atomic_t fill_count[RELAY_MAX_BUFS]; /* fill count per sub-buffer */ +}; + +/* + * Locking scheme-specific data + */ +struct locking_rchan +{ + char *write_buf; /* start of write sub-buffer */ + char *write_buf_end; /* end of write sub-buffer */ + char *current_write_pos; /* current write pointer */ + char *write_limit; /* takes reserves into account */ + char *in_progress_event_pos; /* used for interrupted writes */ + u16 in_progress_event_size; /* used for interrupted writes */ + char *interrupted_pos; /* used for interrupted writes */ + u16 interrupting_size; /* used for interrupted writes */ + spinlock_t lock; /* channel lock for locking scheme */ +}; + +struct relay_ops; + +/* + * Offset resizing data structure + */ +struct resize_offset +{ + u32 ge; + u32 le; + int delta; +}; + +/* + * Relay channel data structure + */ +struct rchan +{ + u32 version; /* the version of this struct */ + char *buf; /* the channel buffer */ + union + { + struct lockless_rchan lockless; + struct locking_rchan locking; + } scheme; /* scheme-specific channel data */ + + int id; /* the channel id */ + struct rchan_callbacks *callbacks; /* client callbacks */ + u32 flags; /* relay channel attributes */ + u32 buf_id; /* current sub-buffer id */ + u32 buf_idx; /* current sub-buffer index */ + + atomic_t mapped; /* map count */ + + atomic_t suspended; /* channel suspended i.e full? */ + int half_switch; /* used internally for suspend */ + + struct timeval buf_start_time; /* current sub-buffer start time */ + u32 buf_start_tsc; /* current sub-buffer start TSC */ + + u32 buf_size; /* sub-buffer size */ + u32 alloc_size; /* total buffer size allocated */ + u32 n_bufs; /* number of sub-buffers */ + + u32 bufs_produced; /* count of sub-buffers produced */ + u32 bufs_consumed; /* count of sub-buffers consumed */ + u32 bytes_consumed; /* bytes consumed in cur sub-buffer */ + + int initialized; /* first buffer initialized? */ + int finalized; /* channel finalized? */ + + u32 start_reserve; /* reserve at start of sub-buffers */ + u32 end_reserve; /* reserve at end of sub-buffers */ + u32 rchan_start_reserve; /* additional reserve sub-buffer 0 */ + + struct dentry *dentry; /* channel file dentry */ + + wait_queue_head_t read_wait; /* VFS read wait queue */ + wait_queue_head_t write_wait; /* VFS write wait queue */ + struct work_struct wake_readers; /* reader wake-up work struct */ + struct work_struct wake_writers; /* reader wake-up work struct */ + atomic_t refcount; /* channel refcount */ + + struct relay_ops *relay_ops; /* scheme-specific channel ops */ + + int unused_bytes[RELAY_MAX_BUFS]; /* unused count per sub-buffer */ + + struct semaphore resize_sem; /* serializes alloc/repace */ + struct work_struct work; /* resize allocation work struct */ + + struct list_head open_readers; /* open readers for this channel */ + rwlock_t open_readers_lock; /* protection for open_readers list */ + + char *init_buf; /* init channel buffer, if non-NULL */ + + u32 resize_min; /* minimum resized total buffer size */ + u32 resize_max; /* maximum resized total buffer size */ + char *resize_buf; /* for autosize alloc/free */ + u32 resize_buf_size; /* resized sub-buffer size */ + u32 resize_n_bufs; /* resized number of sub-buffers */ + u32 resize_alloc_size; /* resized actual total size */ + int resizing; /* is resizing in progress? */ + int resize_err; /* resizing err code */ + int resize_failures; /* number of resize failures */ + int replace_buffer; /* is the alloced buffer ready? */ + struct resize_offset resize_offset; /* offset change */ + struct timer_list shrink_timer; /* timer used for shrinking */ + int resize_order; /* size of last resize */ + u32 expand_buf_id; /* subbuf id expand will occur at */ + + struct page **buf_page_array; /* array of current buffer pages */ + int buf_page_count; /* number of current buffer pages */ + struct page **expand_page_array;/* new pages to be inserted */ + int expand_page_count; /* number of new pages */ + struct page **shrink_page_array;/* old pages to be freed */ + int shrink_page_count; /* number of old pages */ + struct page **resize_page_array;/* will become current pages */ + int resize_page_count; /* number of resize pages */ + struct page **old_buf_page_array; /* hold for freeing */ +} ____cacheline_aligned; + +/* + * Relay channel reader struct + */ +struct rchan_reader +{ + struct list_head list; /* for list inclusion */ + struct rchan *rchan; /* the channel we're reading from */ + int auto_consume; /* does this reader auto-consume? */ + u32 bufs_consumed; /* buffers this reader has consumed */ + u32 bytes_consumed; /* bytes consumed in cur sub-buffer */ + int offset_changed; /* have channel offsets changed? */ + int vfs_reader; /* are we a VFS reader? */ + int map_reader; /* are we an mmap reader? */ + + union + { + struct file *file; + u32 f_pos; + } pos; /* current read offset */ +}; + +/* + * These help make union member access less tedious + */ +#define channel_buffer(rchan) ((rchan)->buf) +#define idx(rchan) ((rchan)->scheme.lockless.index) +#define bufno_bits(rchan) ((rchan)->scheme.lockless.bufno_bits) +#define offset_bits(rchan) ((rchan)->scheme.lockless.offset_bits) +#define offset_mask(rchan) ((rchan)->scheme.lockless.offset_mask) +#define idx_mask(rchan) ((rchan)->scheme.lockless.index_mask) +#define bulk_delivery(rchan) (((rchan)->flags & RELAY_DELIVERY_BULK) ? 1 : 0) +#define packet_delivery(rchan) (((rchan)->flags & RELAY_DELIVERY_PACKET) ? 1 : 0) +#define using_lockless(rchan) (((rchan)->flags & RELAY_SCHEME_LOCKLESS) ? 1 : 0) +#define using_locking(rchan) (((rchan)->flags & RELAY_SCHEME_LOCKING) ? 1 : 0) +#define using_tsc(rchan) (((rchan)->flags & RELAY_TIMESTAMP_TSC) ? 1 : 0) +#define using_gettimeofday(rchan) (((rchan)->flags & RELAY_TIMESTAMP_GETTIMEOFDAY) ? 1 : 0) +#define usage_smp(rchan) (((rchan)->flags & RELAY_USAGE_SMP) ? 1 : 0) +#define usage_global(rchan) (((rchan)->flags & RELAY_USAGE_GLOBAL) ? 1 : 0) +#define mode_continuous(rchan) (((rchan)->flags & RELAY_MODE_CONTINUOUS) ? 1 : 0) +#define fill_count(rchan, i) ((rchan)->scheme.lockless.fill_count[(i)]) +#define write_buf(rchan) ((rchan)->scheme.locking.write_buf) +#define read_buf(rchan) ((rchan)->scheme.locking.read_buf) +#define write_buf_end(rchan) ((rchan)->scheme.locking.write_buf_end) +#define read_buf_end(rchan) ((rchan)->scheme.locking.read_buf_end) +#define cur_write_pos(rchan) ((rchan)->scheme.locking.current_write_pos) +#define read_limit(rchan) ((rchan)->scheme.locking.read_limit) +#define write_limit(rchan) ((rchan)->scheme.locking.write_limit) +#define in_progress_event_pos(rchan) ((rchan)->scheme.locking.in_progress_event_pos) +#define in_progress_event_size(rchan) ((rchan)->scheme.locking.in_progress_event_size) +#define interrupted_pos(rchan) ((rchan)->scheme.locking.interrupted_pos) +#define interrupting_size(rchan) ((rchan)->scheme.locking.interrupting_size) +#define channel_lock(rchan) ((rchan)->scheme.locking.lock) + + +/** + * calc_time_delta - utility function for time delta calculation + * @now: current time + * @start: start time + * + * Returns the time delta produced by subtracting start time from now. + */ +static inline u32 +calc_time_delta(struct timeval *now, + struct timeval *start) +{ + return (now->tv_sec - start->tv_sec) * 1000000 + + (now->tv_usec - start->tv_usec); +} + +/** + * recalc_time_delta - utility function for time delta recalculation + * @now: current time + * @new_delta: the new time delta calculated + * @cpu: the associated CPU id + */ +static inline void +recalc_time_delta(struct timeval *now, + u32 *new_delta, + struct rchan *rchan) +{ + if (using_tsc(rchan) == 0) + *new_delta = calc_time_delta(now, &rchan->buf_start_time); +} + +/** + * have_cmpxchg - does this architecture have a cmpxchg? + * + * Returns 1 if this architecture has a cmpxchg useable by + * the lockless scheme, 0 otherwise. + */ +static inline int +have_cmpxchg(void) +{ +#if defined(__HAVE_ARCH_CMPXCHG) + return 1; +#else + return 0; +#endif +} + +/** + * relay_write_direct - write data directly into destination buffer + */ +#define relay_write_direct(DEST, SRC, SIZE) \ +do\ +{\ + memcpy(DEST, SRC, SIZE);\ + DEST += SIZE;\ +} while (0); + +/** + * relay_lock_channel - lock the relay channel if applicable + * + * This macro only affects the locking scheme. If the locking scheme + * is in use and the channel usage is SMP, does a local_irq_save. If the + * locking sheme is in use and the channel usage is GLOBAL, uses + * spin_lock_irqsave. FLAGS is initialized to 0 since we know that + * it is being initialized prior to use and we avoid the compiler warning. + */ +#define relay_lock_channel(RCHAN, FLAGS) \ +do\ +{\ + FLAGS = 0;\ + if (using_locking(RCHAN)) {\ + if (usage_smp(RCHAN)) {\ + local_irq_save(FLAGS); \ + } else {\ + spin_lock_irqsave(&(RCHAN)->scheme.locking.lock, FLAGS); \ + }\ + }\ +} while (0); + +/** + * relay_unlock_channel - unlock the relay channel if applicable + * + * This macro only affects the locking scheme. See relay_lock_channel. + */ +#define relay_unlock_channel(RCHAN, FLAGS) \ +do\ +{\ + if (using_locking(RCHAN)) {\ + if (usage_smp(RCHAN)) {\ + local_irq_restore(FLAGS); \ + } else {\ + spin_unlock_irqrestore(&(RCHAN)->scheme.locking.lock, FLAGS); \ + }\ + }\ +} while (0); + +/* + * Define cmpxchg if we don't have it + */ +#ifndef __HAVE_ARCH_CMPXCHG +#define cmpxchg(p,o,n) 0 +#endif + +/* + * High-level relayfs kernel API, fs/relayfs/relay.c + */ +extern int +relay_open(const char *chanpath, + int bufsize, + int nbufs, + u32 flags, + struct rchan_callbacks *channel_callbacks, + u32 start_reserve, + u32 end_reserve, + u32 rchan_start_reserve, + u32 resize_min, + u32 resize_max, + int mode, + char *init_buf, + u32 init_buf_size); + +extern int +relay_close(int rchan_id); + +extern int +relay_write(int rchan_id, + const void *data_ptr, + size_t count, + int td_offset, + void **wrote_pos); + +extern ssize_t +relay_read(struct rchan_reader *reader, + char *buf, + size_t count, + int wait, + u32 *actual_read_offset); + +extern int +relay_discard_init_buf(int rchan_id); + +extern struct rchan_reader * +add_rchan_reader(int rchan_id, int autoconsume); + +extern int +remove_rchan_reader(struct rchan_reader *reader); + +extern struct rchan_reader * +add_map_reader(int rchan_id); + +extern int +remove_map_reader(struct rchan_reader *reader); + +extern int +relay_info(int rchan_id, struct rchan_info *rchan_info); + +extern void +relay_buffers_consumed(struct rchan_reader *reader, u32 buffers_consumed); + +extern void +relay_bytes_consumed(struct rchan_reader *reader, u32 bytes_consumed, u32 read_offset); + +extern ssize_t +relay_bytes_avail(struct rchan_reader *reader); + +extern int +relay_realloc_buffer(int rchan_id, u32 new_nbufs, int in_background); + +extern int +relay_replace_buffer(int rchan_id); + +extern int +rchan_empty(struct rchan_reader *reader); + +extern int +rchan_full(struct rchan_reader *reader); + +extern void +update_readers_consumed(struct rchan *rchan, u32 bufs_consumed, u32 bytes_consumed); + +extern int +__relay_mmap_buffer(struct rchan *rchan, struct vm_area_struct *vma); + +extern struct rchan_reader * +__add_rchan_reader(struct rchan *rchan, struct file *filp, int auto_consume, int map_reader); + +extern void +__remove_rchan_reader(struct rchan_reader *reader); + +/* + * Low-level relayfs kernel API, fs/relayfs/relay.c + */ +extern struct rchan * +rchan_get(int rchan_id); + +extern void +rchan_put(struct rchan *rchan); + +extern char * +relay_reserve(struct rchan *rchan, + u32 data_len, + struct timeval *time_stamp, + u32 *time_delta, + int *errcode, + int *interrupting); + +extern void +relay_commit(struct rchan *rchan, + char *from, + u32 len, + int reserve_code, + int interrupting); + +extern u32 +relay_get_offset(struct rchan *rchan, u32 *max_offset); + +extern int +relay_reset(int rchan_id); + +/* + * VFS functions, fs/relayfs/inode.c + */ +extern int +relayfs_create_dir(const char *name, + struct dentry *parent, + struct dentry **dentry); + +extern int +relayfs_create_file(const char * name, + struct dentry *parent, + struct dentry **dentry, + void * data, + int mode); + +extern int +relayfs_remove_file(struct dentry *dentry); + +extern int +reset_index(struct rchan *rchan, u32 old_index); + + +/* + * klog functions, fs/relayfs/klog.c + */ +extern int +create_klog_channel(void); + +extern int +remove_klog_channel(void); + +/* + * Scheme-specific channel ops + */ +struct relay_ops +{ + char * (*reserve) (struct rchan *rchan, + u32 slot_len, + struct timeval *time_stamp, + u32 *tsc, + int * errcode, + int * interrupting); + + void (*commit) (struct rchan *rchan, + char *from, + u32 len, + int deliver, + int interrupting); + + u32 (*get_offset) (struct rchan *rchan, + u32 *max_offset); + + void (*resume) (struct rchan *rchan); + void (*finalize) (struct rchan *rchan); + void (*reset) (struct rchan *rchan, + int init); + int (*reset_index) (struct rchan *rchan, + u32 old_index); +}; + +#endif /* _LINUX_RELAYFS_FS_H */ + + + + + diff --git a/include/linux/vs_base.h b/include/linux/vs_base.h new file mode 100644 index 000000000..4f04513ff --- /dev/null +++ b/include/linux/vs_base.h @@ -0,0 +1,78 @@ +#ifndef _VX_VS_BASE_H +#define _VX_VS_BASE_H + +#include "vserver/context.h" + +// #define VX_DEBUG + + +#if defined(VX_DEBUG) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + + +#define vx_task_xid(t) ((t)->xid) + +#define vx_current_xid() vx_task_xid(current) + +#define vx_check(c,m) __vx_check(vx_current_xid(),c,m) + +#define vx_weak_check(c,m) ((m) ? vx_check(c,m) : 1) + + +/* + * check current context for ADMIN/WATCH and + * optionally agains supplied argument + */ +static __inline__ int __vx_check(xid_t cid, xid_t id, unsigned int mode) +{ + if (mode & VX_ARG_MASK) { + if ((mode & VX_IDENT) && + (id == cid)) + return 1; + } + if (mode & VX_ATR_MASK) { + if ((mode & VX_DYNAMIC) && + (id >= MIN_D_CONTEXT) && + (id <= MAX_S_CONTEXT)) + return 1; + if ((mode & VX_STATIC) && + (id > 1) && (id < MIN_D_CONTEXT)) + return 1; + } + return (((mode & VX_ADMIN) && (cid == 0)) || + ((mode & VX_WATCH) && (cid == 1))); +} + + +#define __vx_flags(v,m,f) (((v) & (m)) ^ (f)) + +#define __vx_task_flags(t,m,f) \ + (((t) && ((t)->vx_info)) ? \ + __vx_flags((t)->vx_info->vx_flags,(m),(f)) : 0) + +#define vx_current_flags() \ + ((current->vx_info) ? current->vx_info->vx_flags : 0) + +#define vx_flags(m,f) __vx_flags(vx_current_flags(),(m),(f)) + + +#define vx_current_ccaps() \ + ((current->vx_info) ? current->vx_info->vx_ccaps : 0) + +#define vx_ccaps(c) (vx_current_ccaps() & (c)) + +#define vx_current_bcaps() \ + (((current->vx_info) && !vx_flags(VXF_STATE_SETUP, 0)) ? \ + current->vx_info->vx_bcaps : cap_bset) + + +/* generic flag merging */ + +#define vx_mask_flags(v,f,m) (((v) & ~(m)) | ((f) & (m))) + +#define vx_mask_mask(v,f,m) (((v) & ~(m)) | ((v) & (f) & (m))) + +#endif diff --git a/include/linux/vs_context.h b/include/linux/vs_context.h new file mode 100644 index 000000000..727a16cdc --- /dev/null +++ b/include/linux/vs_context.h @@ -0,0 +1,128 @@ +#ifndef _VX_VS_CONTEXT_H +#define _VX_VS_CONTEXT_H + + +// #define VX_DEBUG + +#include +#include +#include + +#include "vserver/context.h" + +#undef vxdprintk +#if defined(VX_DEBUG) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + + + +extern int proc_pid_vx_info(struct task_struct *, char *); + + +#define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__) + +static inline struct vx_info *__get_vx_info(struct vx_info *vxi, + const char *_file, int _line) +{ + if (!vxi) + return NULL; + vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + atomic_inc(&vxi->vx_usecnt); + return vxi; +} + + +#define free_vx_info(i) \ + call_rcu(&i->vx_rcu, rcu_free_vx_info, i); + +#define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__) + +static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line) +{ + if (!vxi) + return; + vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n", + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&vxi->vx_usecnt)) + free_vx_info(vxi); +} + +#define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__) + +static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi, + const char *_file, int _line) +{ + BUG_ON(*vxp); + if (!vxi) + return; + vxdprintk("set_vx_info(%p[#%d.%d.%d])\t%s:%d\n", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_refcnt):0, + _file, _line); + atomic_inc(&vxi->vx_refcnt); + *vxp = __get_vx_info(vxi, _file, _line); +} + +#define clr_vx_info(p) __clr_vx_info(p,__FILE__,__LINE__) + +static inline void __clr_vx_info(struct vx_info **vxp, + const char *_file, int _line) +{ + struct vx_info *vxo = *vxp; + + if (!vxo) + return; + vxdprintk("clr_vx_info(%p[#%d.%d.%d])\t%s:%d\n", + vxo, vxo?vxo->vx_id:0, + vxo?atomic_read(&vxo->vx_usecnt):0, + vxo?atomic_read(&vxo->vx_refcnt):0, + _file, _line); + *vxp = NULL; + wmb(); + if (vxo && atomic_dec_and_test(&vxo->vx_refcnt)) + unhash_vx_info(vxo); + __put_vx_info(vxo, _file, _line); +} + + +#define task_get_vx_info(i) __task_get_vx_info(i,__FILE__,__LINE__) + +static __inline__ struct vx_info *__task_get_vx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct vx_info *vxi; + + task_lock(p); + vxi = __get_vx_info(p->vx_info, _file, _line); + task_unlock(p); + return vxi; +} + + +#define vx_verify_info(p,i) \ + __vx_verify_info((p)->vx_info,i,__FILE__,__LINE__) + +static __inline__ void __vx_verify_info( + struct vx_info *vxa, struct vx_info *vxb, + const char *_file, int _line) +{ + if (vxa == vxb) + return; + printk(KERN_ERR "vx bad assumption (%p==%p) at %s:%d\n", + vxa, vxb, _file, _line); +} + + +#undef vxdprintk +#define vxdprintk(x...) + +#else +#warning duplicate inclusion +#endif diff --git a/include/linux/vs_cvirt.h b/include/linux/vs_cvirt.h new file mode 100644 index 000000000..65f430362 --- /dev/null +++ b/include/linux/vs_cvirt.h @@ -0,0 +1,71 @@ +#ifndef _VX_VS_CVIRT_H +#define _VX_VS_CVIRT_H + + +// #define VX_DEBUG + +#include "vserver/cvirt.h" +#include "vs_base.h" + +#if defined(VX_DEBUG) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + + +/* utsname virtualization */ + +static inline struct new_utsname *vx_new_utsname(void) +{ + if (current->vx_info) + return ¤t->vx_info->cvirt.utsname; + return &system_utsname; +} + +#define vx_new_uts(x) ((vx_new_utsname())->x) + + +/* pid faking stuff */ + + +#define vx_map_tgid(v,p) \ + __vx_map_tgid((v), (p), __FILE__, __LINE__) + +static inline int __vx_map_tgid(struct vx_info *vxi, int pid, + char *file, int line) +{ + if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) { + vxdprintk("vx_map_tgid: %p/%llx: %d -> %d in %s:%d\n", + vxi, vxi->vx_flags, pid, + (pid == vxi->vx_initpid)?1:pid, + file, line); + if (pid == vxi->vx_initpid) + return 1; + } + return pid; +} + +#define vx_rmap_tgid(v,p) \ + __vx_rmap_tgid((v), (p), __FILE__, __LINE__) + +static inline int __vx_rmap_tgid(struct vx_info *vxi, int pid, + char *file, int line) +{ + if (vxi && __vx_flags(vxi->vx_flags, VXF_INFO_INIT, 0)) { + vxdprintk("vx_rmap_tgid: %p/%llx: %d -> %d in %s:%d\n", + vxi, vxi->vx_flags, pid, + (pid == 1)?vxi->vx_initpid:pid, + file, line); + if ((pid == 1) && vxi->vx_initpid) + return vxi->vx_initpid; + } + return pid; +} + +#undef vxdprintk +#define vxdprintk(x...) + +#else +#warning duplicate inclusion +#endif diff --git a/include/linux/vs_dlimit.h b/include/linux/vs_dlimit.h new file mode 100644 index 000000000..d80c563f6 --- /dev/null +++ b/include/linux/vs_dlimit.h @@ -0,0 +1,169 @@ +#ifndef _VX_VS_DLIMIT_H +#define _VX_VS_DLIMIT_H + + +// #define VX_DEBUG + +#include +#include +#include + +#include "vserver/context.h" +#include "vserver/dlimit.h" + +#if defined(VX_DEBUG) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + + +#define get_dl_info(i) __get_dl_info(i,__FILE__,__LINE__) + +static inline struct dl_info *__get_dl_info(struct dl_info *dli, + const char *_file, int _line) +{ + if (!dli) + return NULL; + vxdprintk("get_dl_info(%p[#%d.%d])\t%s:%d\n", + dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + atomic_inc(&dli->dl_usecnt); + return dli; +} + + +#define free_dl_info(i) \ + call_rcu(&i->dl_rcu, rcu_free_dl_info, i); + +#define put_dl_info(i) __put_dl_info(i,__FILE__,__LINE__) + +static inline void __put_dl_info(struct dl_info *dli, const char *_file, int _line) +{ + if (!dli) + return; + vxdprintk("put_dl_info(%p[#%d.%d])\t%s:%d\n", + dli, dli?dli->dl_xid:0, dli?atomic_read(&dli->dl_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&dli->dl_usecnt)) + free_dl_info(dli); +} + + +extern int vx_debug_dlimit; + +#define __dlimit_char(d) ((d)?'*':' ') + +static inline int __dl_alloc_space(struct super_block *sb, + xid_t xid, dlsize_t nr, const char *file, int line) +{ + struct dl_info *dli = NULL; + int ret = 0; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_space_used + nr > dli->dl_space_total); + if (!ret) + dli->dl_space_used += nr; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + if (vx_debug_dlimit) + printk("ALLOC (%p,#%d)%c %lld bytes (%d)@ %s:%d\n", + sb, xid, __dlimit_char(dli), nr, ret, file, line); + return ret; +} + +static inline void __dl_free_space(struct super_block *sb, + xid_t xid, dlsize_t nr, const char *file, int line) +{ + struct dl_info *dli = NULL; + + if (nr == 0) + goto out; + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + dli->dl_space_used -= nr; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + if (vx_debug_dlimit) + printk("FREE (%p,#%d)%c %lld bytes @ %s:%d\n", + sb, xid, __dlimit_char(dli), nr, file, line); +} + +static inline int __dl_alloc_inode(struct super_block *sb, + xid_t xid, const char *file, int line) +{ + struct dl_info *dli; + int ret = 0; + + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + ret = (dli->dl_inodes_used >= dli->dl_inodes_total); + if (!ret) + dli->dl_inodes_used++; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + if (vx_debug_dlimit) + printk("ALLOC (%p,#%d)%c inode (%d)@ %s:%d\n", + sb, xid, __dlimit_char(dli), ret, file, line); + return ret; +} + +static inline void __dl_free_inode(struct super_block *sb, + xid_t xid, const char *file, int line) +{ + struct dl_info *dli; + + dli = locate_dl_info(sb, xid); + if (!dli) + goto out; + + spin_lock(&dli->dl_lock); + dli->dl_inodes_used--; + spin_unlock(&dli->dl_lock); + put_dl_info(dli); +out: + if (vx_debug_dlimit) + printk("FREE (%p,#%d)%c inode @ %s:%d\n", + sb, xid, __dlimit_char(dli), file, line); +} + + + +#define DLIMIT_ALLOC_BLOCK(sb, xid, nr) \ + __dl_alloc_space(sb, xid, \ + ((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \ + __FILE__, __LINE__ ) + +#define DLIMIT_FREE_BLOCK(sb, xid, nr) \ + __dl_free_space(sb, xid, \ + ((dlsize_t)(nr)) << (sb)->s_blocksize_bits, \ + __FILE__, __LINE__ ) + +#define DLIMIT_ALLOC_INODE(sb, xid) \ + __dl_alloc_inode(sb, xid, __FILE__, __LINE__ ) + +#define DLIMIT_FREE_INODE(sb, xid) \ + __dl_free_inode(sb, xid, __FILE__, __LINE__ ) + + +#define DLIMIT_ADJUST_BLOCK(sb, xid, fb, rb) + + +#else +#warning duplicate inclusion +#endif diff --git a/include/linux/vs_limit.h b/include/linux/vs_limit.h new file mode 100644 index 000000000..82e8de4ec --- /dev/null +++ b/include/linux/vs_limit.h @@ -0,0 +1,119 @@ +#ifndef _VX_VS_LIMIT_H +#define _VX_VS_LIMIT_H + + +// #define VX_DEBUG + +#include +#include +#include + +#include "vserver/context.h" +#include "vserver/limit.h" + + +/* file limits */ + +#define VX_DEBUG_ACC_FILE 0 +#define VX_DEBUG_ACC_OPENFD 0 + +#if (VX_DEBUG_ACC_FILE) || (VX_DEBUG_ACC_OPENFD) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + + +#define vx_acc_cres(v,d,r) \ + __vx_acc_cres((v), (r), (d), __FILE__, __LINE__) + +static inline void __vx_acc_cres(struct vx_info *vxi, + int res, int dir, char *file, int line) +{ + if (vxi) { + if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) || + (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD)) + printk("vx_acc_cres[%5d,%2d]: %5d%s in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + (dir>0)?"++":"--", file, line); + if (dir > 0) + atomic_inc(&vxi->limit.rcur[res]); + else + atomic_dec(&vxi->limit.rcur[res]); + } +} + +#define vx_nproc_inc(p) vx_acc_cres(current->vx_info, 1, RLIMIT_NPROC) +#define vx_nproc_dec(p) vx_acc_cres(current->vx_info,-1, RLIMIT_NPROC) + +#define vx_files_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_NOFILE) +#define vx_files_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_NOFILE) + +#define vx_openfd_inc(f) vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD) +#define vx_openfd_dec(f) vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD) + +/* +#define vx_openfd_inc(f) do { \ + vx_acc_cres(current->vx_info, 1, RLIMIT_OPENFD); \ + printk("vx_openfd_inc: %d[#%d] in %s:%d\n", \ + f, current->xid, __FILE__, __LINE__); \ + } while (0) + +#define vx_openfd_dec(f) do { \ + vx_acc_cres(current->vx_info,-1, RLIMIT_OPENFD); \ + printk("vx_openfd_dec: %d[#%d] in %s:%d\n", \ + f, current->xid, __FILE__, __LINE__); \ + } while (0) +*/ + +#define vx_cres_avail(v,n,r) \ + __vx_cres_avail((v), (r), (n), __FILE__, __LINE__) + +static inline int __vx_cres_avail(struct vx_info *vxi, + int res, int num, char *file, int line) +{ + unsigned long value; + + if ((res == RLIMIT_NOFILE && VX_DEBUG_ACC_FILE) || + (res == RLIMIT_OPENFD && VX_DEBUG_ACC_OPENFD)) + printk("vx_cres_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?vxi->limit.rlim[res]:1), + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + num, file, line); + if (!vxi) + return 1; + value = atomic_read(&vxi->limit.rcur[res]); + if (value > vxi->limit.rmax[res]) + vxi->limit.rmax[res] = value; + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + if (value + num <= vxi->limit.rlim[res]) + return 1; + atomic_inc(&vxi->limit.lhit[res]); + return 0; +} + +#define vx_nproc_avail(n) \ + vx_cres_avail(current->vx_info, (n), RLIMIT_NPROC) + +#define vx_files_avail(n) \ + vx_cres_avail(current->vx_info, (n), RLIMIT_NOFILE) + +#define vx_openfd_avail(n) \ + vx_cres_avail(current->vx_info, (n), RLIMIT_OPENFD) + + +/* socket limits */ + +#define vx_sock_inc(f) vx_acc_cres(current->vx_info, 1, VLIMIT_SOCK) +#define vx_sock_dec(f) vx_acc_cres(current->vx_info,-1, VLIMIT_SOCK) + +#define vx_sock_avail(n) \ + vx_cres_avail(current->vx_info, (n), VLIMIT_SOCK) + + +#else +#warning duplicate inclusion +#endif diff --git a/include/linux/vs_memory.h b/include/linux/vs_memory.h new file mode 100644 index 000000000..2fe9c0809 --- /dev/null +++ b/include/linux/vs_memory.h @@ -0,0 +1,132 @@ +#ifndef _VX_VS_MEMORY_H +#define _VX_VS_MEMORY_H + + +// #define VX_DEBUG + +#include +#include +#include + +#include "vserver/context.h" +#include "vserver/limit.h" + + +#define VX_DEBUG_ACC_RSS 0 +#define VX_DEBUG_ACC_VM 0 +#define VX_DEBUG_ACC_VML 0 + +#if (VX_DEBUG_ACC_RSS) || (VX_DEBUG_ACC_VM) || (VX_DEBUG_ACC_VML) +#define vxdprintk(x...) printk("vxd: " x) +#else +#define vxdprintk(x...) +#endif + +#define vx_acc_page(m, d, v, r) \ + __vx_acc_page(&(m->v), m->mm_vx_info, r, d, __FILE__, __LINE__) + +static inline void __vx_acc_page(unsigned long *v, struct vx_info *vxi, + int res, int dir, char *file, int line) +{ + if (v) { + if (dir > 0) + ++(*v); + else + --(*v); + } + if (vxi) { + if (dir > 0) + atomic_inc(&vxi->limit.rcur[res]); + else + atomic_dec(&vxi->limit.rcur[res]); + } +} + + +#define vx_acc_pages(m, p, v, r) \ + __vx_acc_pages(&(m->v), m->mm_vx_info, r, p, __FILE__, __LINE__) + +static inline void __vx_acc_pages(unsigned long *v, struct vx_info *vxi, + int res, int pages, char *file, int line) +{ + if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) || + (res == RLIMIT_AS && VX_DEBUG_ACC_VM) || + (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML)) + vxdprintk("vx_acc_pages [%5d,%2d]: %5d += %5d in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?atomic_read(&vxi->limit.res[res]):0), + pages, file, line); + if (pages == 0) + return; + if (v) + *v += pages; + if (vxi) + atomic_add(pages, &vxi->limit.rcur[res]); +} + + + +#define vx_acc_vmpage(m,d) vx_acc_page(m, d, total_vm, RLIMIT_AS) +#define vx_acc_vmlpage(m,d) vx_acc_page(m, d, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspage(m,d) vx_acc_page(m, d, rss, RLIMIT_RSS) + +#define vx_acc_vmpages(m,p) vx_acc_pages(m, p, total_vm, RLIMIT_AS) +#define vx_acc_vmlpages(m,p) vx_acc_pages(m, p, locked_vm, RLIMIT_MEMLOCK) +#define vx_acc_rsspages(m,p) vx_acc_pages(m, p, rss, RLIMIT_RSS) + +#define vx_pages_add(s,r,p) __vx_acc_pages(0, s, r, p, __FILE__, __LINE__) +#define vx_pages_sub(s,r,p) __vx_pages_add(s, r, -(p)) + +#define vx_vmpages_inc(m) vx_acc_vmpage(m, 1) +#define vx_vmpages_dec(m) vx_acc_vmpage(m,-1) +#define vx_vmpages_add(m,p) vx_acc_vmpages(m, p) +#define vx_vmpages_sub(m,p) vx_acc_vmpages(m,-(p)) + +#define vx_vmlocked_inc(m) vx_acc_vmlpage(m, 1) +#define vx_vmlocked_dec(m) vx_acc_vmlpage(m,-1) +#define vx_vmlocked_add(m,p) vx_acc_vmlpages(m, p) +#define vx_vmlocked_sub(m,p) vx_acc_vmlpages(m,-(p)) + +#define vx_rsspages_inc(m) vx_acc_rsspage(m, 1) +#define vx_rsspages_dec(m) vx_acc_rsspage(m,-1) +#define vx_rsspages_add(m,p) vx_acc_rsspages(m, p) +#define vx_rsspages_sub(m,p) vx_acc_rsspages(m,-(p)) + + + +#define vx_pages_avail(m, p, r) \ + __vx_pages_avail((m)->mm_vx_info, (r), (p), __FILE__, __LINE__) + +static inline int __vx_pages_avail(struct vx_info *vxi, + int res, int pages, char *file, int line) +{ + unsigned long value; + + if ((res == RLIMIT_RSS && VX_DEBUG_ACC_RSS) || + (res == RLIMIT_AS && VX_DEBUG_ACC_VM) || + (res == RLIMIT_MEMLOCK && VX_DEBUG_ACC_VML)) + printk("vx_pages_avail[%5d,%2d]: %5ld > %5d + %5d in %s:%d\n", + (vxi?vxi->vx_id:-1), res, + (vxi?vxi->limit.rlim[res]:1), + (vxi?atomic_read(&vxi->limit.rcur[res]):0), + pages, file, line); + if (!vxi) + return 1; + value = atomic_read(&vxi->limit.rcur[res]); + if (value > vxi->limit.rmax[res]) + vxi->limit.rmax[res] = value; + if (vxi->limit.rlim[res] == RLIM_INFINITY) + return 1; + if (value + pages <= vxi->limit.rlim[res]) + return 1; + atomic_inc(&vxi->limit.lhit[res]); + return 0; +} + +#define vx_vmpages_avail(m,p) vx_pages_avail(m, p, RLIMIT_AS) +#define vx_vmlocked_avail(m,p) vx_pages_avail(m, p, RLIMIT_MEMLOCK) +#define vx_rsspages_avail(m,p) vx_pages_avail(m, p, RLIMIT_RSS) + +#else +#warning duplicate inclusion +#endif diff --git a/include/linux/vs_network.h b/include/linux/vs_network.h new file mode 100644 index 000000000..0a3349c09 --- /dev/null +++ b/include/linux/vs_network.h @@ -0,0 +1,154 @@ +#ifndef _NX_VS_NETWORK_H +#define _NX_VS_NETWORK_H + + +// #define NX_DEBUG + +#include +#include +#include + +#include "vserver/network.h" + +#if defined(NX_DEBUG) +#define nxdprintk(x...) printk("nxd: " x) +#else +#define nxdprintk(x...) +#endif + + +extern int proc_pid_nx_info(struct task_struct *, char *); + + +#define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__) + +static inline struct nx_info *__get_nx_info(struct nx_info *nxi, + const char *_file, int _line) +{ + if (!nxi) + return NULL; + nxdprintk("get_nx_info(%p[#%d.%d])\t%s:%d\n", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + atomic_inc(&nxi->nx_usecnt); + return nxi; +} + + +#define free_nx_info(nxi) \ + call_rcu(&nxi->nx_rcu, rcu_free_nx_info, nxi); + +#define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__) + +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line) +{ + if (!nxi) + return; + nxdprintk("put_nx_info(%p[#%d.%d])\t%s:%d\n", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, + _file, _line); + if (atomic_dec_and_test(&nxi->nx_usecnt)) + free_nx_info(nxi); +} + + +#define set_nx_info(p,i) __set_nx_info(p,i,__FILE__,__LINE__) + +static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi, + const char *_file, int _line) +{ + BUG_ON(*nxp); + if (!nxi) + return; + nxdprintk("set_nx_info(%p[#%d.%d.%d])\t%s:%d\n", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_refcnt):0, + _file, _line); + atomic_inc(&nxi->nx_refcnt); + *nxp = __get_nx_info(nxi, _file, _line); +} + +#define clr_nx_info(p) __clr_nx_info(p,__FILE__,__LINE__) + +static inline void __clr_nx_info(struct nx_info **nxp, + const char *_file, int _line) +{ + struct nx_info *nxo = *nxp; + + if (!nxo) + return; + nxdprintk("clr_nx_info(%p[#%d.%d.%d])\t%s:%d\n", + nxo, nxo?nxo->nx_id:0, + nxo?atomic_read(&nxo->nx_usecnt):0, + nxo?atomic_read(&nxo->nx_refcnt):0, + _file, _line); + *nxp = NULL; + wmb(); + if (nxo && atomic_dec_and_test(&nxo->nx_refcnt)) + unhash_nx_info(nxo); + __put_nx_info(nxo, _file, _line); +} + + +#define task_get_nx_info(i) __task_get_nx_info(i,__FILE__,__LINE__) + +static __inline__ struct nx_info *__task_get_nx_info(struct task_struct *p, + const char *_file, int _line) +{ + struct nx_info *nxi; + + task_lock(p); + nxi = __get_nx_info(p->nx_info, _file, _line); + task_unlock(p); + return nxi; +} + +#define nx_verify_info(p,i) \ + __nx_verify_info((p)->nx_info,i,__FILE__,__LINE__) + +static __inline__ void __nx_verify_info( + struct nx_info *ipa, struct nx_info *ipb, + const char *_file, int _line) +{ + if (ipa == ipb) + return; + printk(KERN_ERR "ip bad assumption (%p==%p) at %s:%d\n", + ipa, ipb, _file, _line); +} + + +#define nx_task_nid(t) ((t)->nid) + +#define nx_current_nid() nx_task_nid(current) + +#define nx_check(c,m) __nx_check(nx_current_nid(),c,m) + +#define nx_weak_check(c,m) ((m) ? nx_check(c,m) : 1) + +#undef nxdprintk +#define nxdprintk(x...) + + +#define __nx_flags(v,m,f) (((v) & (m)) ^ (f)) + +#define __nx_task_flags(t,m,f) \ + (((t) && ((t)->nx_info)) ? \ + __nx_flags((t)->nx_info->nx_flags,(m),(f)) : 0) + +#define nx_current_flags() \ + ((current->nx_info) ? current->nx_info->nx_flags : 0) + +#define nx_flags(m,f) __nx_flags(nx_current_flags(),(m),(f)) + + +#define nx_current_ncaps() \ + ((current->nx_info) ? current->nx_info->nx_ncaps : 0) + +#define nx_ncaps(c) (nx_current_ncaps() & (c)) + + + +#else +#warning duplicate inclusion +#endif diff --git a/include/linux/vs_socket.h b/include/linux/vs_socket.h new file mode 100644 index 000000000..499245822 --- /dev/null +++ b/include/linux/vs_socket.h @@ -0,0 +1,65 @@ +#ifndef _VX_VS_LIMIT_H +#define _VX_VS_LIMIT_H + + +// #define VX_DEBUG + +#include +#include +#include + +#include "vserver/context.h" +#include "vserver/network.h" + + +/* socket accounting */ + +#include + +static inline int vx_sock_type(int family) +{ + int type = 4; + + if (family > 0 && family < 3) + type = family; + else if (family == PF_INET6) + type = 3; + return type; +} + +#define vx_acc_sock(v,f,p,s) \ + __vx_acc_sock((v), (f), (p), (s), __FILE__, __LINE__) + +static inline void __vx_acc_sock(struct vx_info *vxi, + int family, int pos, int size, char *file, int line) +{ + if (vxi) { + int type = vx_sock_type(family); + + atomic_inc(&vxi->cacct.sock[type][pos].count); + atomic_add(size, &vxi->cacct.sock[type][pos].total); + } +} + +#define vx_sock_recv(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 0, (s)) +#define vx_sock_send(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 1, (s)) +#define vx_sock_fail(sk,s) \ + vx_acc_sock((sk)->sk_vx_info, (sk)->sk_family, 2, (s)) + + +#define sock_vx_init(s) do { \ + (s)->sk_xid = 0; \ + (s)->sk_vx_info = NULL; \ + } while (0) + +#define sock_nx_init(s) do { \ + (s)->sk_nid = 0; \ + (s)->sk_nx_info = NULL; \ + } while (0) + + +#else +#warning duplicate inclusion +#endif diff --git a/include/linux/vserver/dlimit.h b/include/linux/vserver/dlimit.h new file mode 100644 index 000000000..74872ed74 --- /dev/null +++ b/include/linux/vserver/dlimit.h @@ -0,0 +1,83 @@ +#ifndef _VX_DLIMIT_H +#define _VX_DLIMIT_H + +#include "switch.h" +#include + +/* inode vserver commands */ + +#define VCMD_add_dlimit VC_CMD(DLIMIT, 1, 0) +#define VCMD_rem_dlimit VC_CMD(DLIMIT, 2, 0) + +#define VCMD_set_dlimit VC_CMD(DLIMIT, 5, 0) +#define VCMD_get_dlimit VC_CMD(DLIMIT, 6, 0) + + +struct vcmd_ctx_dlimit_base_v0 { + const char __user *name; + uint32_t flags; +}; + +struct vcmd_ctx_dlimit_v0 { + const char __user *name; + uint32_t space_used; /* used space in kbytes */ + uint32_t space_total; /* maximum space in kbytes */ + uint32_t inodes_used; /* used inodes */ + uint32_t inodes_total; /* maximum inodes */ + uint32_t reserved; /* reserved for root in % */ + uint32_t flags; +}; + +#define CDLIM_UNSET (0ULL) +#define CDLIM_INFINITY (~0ULL) +#define CDLIM_KEEP (~1ULL) + + +#ifdef __KERNEL__ + +struct super_block; + +struct dl_info { + struct hlist_node dl_hlist; /* linked list of contexts */ + struct rcu_head dl_rcu; /* the rcu head */ + xid_t dl_xid; /* context id */ + atomic_t dl_usecnt; /* usage count */ + atomic_t dl_refcnt; /* reference count */ + + struct super_block *dl_sb; /* associated superblock */ + +// struct rw_semaphore dl_sem; /* protect the values */ + spinlock_t dl_lock; /* protect the values */ + + uint64_t dl_space_used; /* used space in bytes */ + uint64_t dl_space_total; /* maximum space in bytes */ + uint32_t dl_inodes_used; /* used inodes */ + uint32_t dl_inodes_total; /* maximum inodes */ + + unsigned int dl_nrlmult; /* non root limit mult */ +}; + +extern void rcu_free_dl_info(void *); +extern void unhash_dl_info(struct dl_info *); + +extern struct dl_info *locate_dl_info(struct super_block *, xid_t); + + +struct kstatfs; + +extern void vx_vsi_statfs(struct super_block *, struct kstatfs *); + + +extern int vc_add_dlimit(uint32_t, void __user *); +extern int vc_rem_dlimit(uint32_t, void __user *); + +extern int vc_set_dlimit(uint32_t, void __user *); +extern int vc_get_dlimit(uint32_t, void __user *); + + +typedef uint64_t dlsize_t; + + +#endif /* __KERNEL__ */ + +#endif /* _VX_DLIMIT_H */ diff --git a/kernel/vserver/dlimit.c b/kernel/vserver/dlimit.c new file mode 100644 index 000000000..eb9282f9f --- /dev/null +++ b/kernel/vserver/dlimit.c @@ -0,0 +1,439 @@ +/* + * linux/kernel/vserver/dlimit.c + * + * Virtual Server: Context Disk Limits + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 initial version + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* __alloc_dl_info() + + * allocate an initialized dl_info struct + * doesn't make it visible (hash) */ + +static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid) +{ + struct dl_info *new = NULL; + + vxdprintk("alloc_dl_info(%p,%d)\n", sb, xid); + + /* would this benefit from a slab cache? */ + new = kmalloc(sizeof(struct dl_info), GFP_KERNEL); + if (!new) + return 0; + + memset (new, 0, sizeof(struct dl_info)); + new->dl_xid = xid; + new->dl_sb = sb; + INIT_RCU_HEAD(&new->dl_rcu); + INIT_HLIST_NODE(&new->dl_hlist); + spin_lock_init(&new->dl_lock); + atomic_set(&new->dl_refcnt, 0); + atomic_set(&new->dl_usecnt, 0); + + /* rest of init goes here */ + + vxdprintk("alloc_dl_info(%p,%d) = %p\n", sb, xid, new); + return new; +} + +/* __dealloc_dl_info() + + * final disposal of dl_info */ + +static void __dealloc_dl_info(struct dl_info *dli) +{ + vxdprintk("dealloc_dl_info(%p)\n", dli); + + dli->dl_hlist.next = LIST_POISON1; + dli->dl_xid = -1; + dli->dl_sb = 0; + + BUG_ON(atomic_read(&dli->dl_usecnt)); + BUG_ON(atomic_read(&dli->dl_refcnt)); + + kfree(dli); +} + + +/* hash table for dl_info hash */ + +#define DL_HASH_SIZE 13 + +struct hlist_head dl_info_hash[DL_HASH_SIZE]; + +static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(struct super_block *sb, xid_t xid) +{ + return ((xid ^ (unsigned int)sb) % DL_HASH_SIZE); +} + + + +/* __hash_dl_info() + + * add the dli to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_dl_info(struct dl_info *dli) +{ + struct hlist_head *head; + + vxdprintk("__hash_dl_info: %p[#%d]\n", dli, dli->dl_xid); + get_dl_info(dli); + head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)]; + hlist_add_head_rcu(&dli->dl_hlist, head); +} + +/* __unhash_dl_info() + + * remove the dli from the global hash table + * requires the hash_lock to be held */ + +static inline void __unhash_dl_info(struct dl_info *dli) +{ + vxdprintk("__unhash_dl_info: %p[#%d]\n", dli, dli->dl_xid); + hlist_del_rcu(&dli->dl_hlist); + put_dl_info(dli); +} + + +#define hlist_for_each_rcu(pos, head) \ + for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \ + pos = pos->next, ({ smp_read_barrier_depends(); 0;})) + + +/* __lookup_dl_info() + + * requires the rcu_read_lock() + * doesn't increment the dl_refcnt */ + +static inline struct dl_info *__lookup_dl_info(struct super_block *sb, xid_t xid) +{ + struct hlist_head *head = &dl_info_hash[__hashval(sb, xid)]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct dl_info *dli = + hlist_entry(pos, struct dl_info, dl_hlist); + + if (dli->dl_xid == xid && dli->dl_sb == sb) { + return dli; + } + } + return NULL; +} + + +struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid) +{ + struct dl_info *dli; + + rcu_read_lock(); + dli = get_dl_info(__lookup_dl_info(sb, xid)); + rcu_read_unlock(); + return dli; +} + +void rcu_free_dl_info(void *obj) +{ + struct dl_info *dli = obj; + int usecnt, refcnt; + + BUG_ON(!dli); + + usecnt = atomic_read(&dli->dl_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&dli->dl_refcnt); + BUG_ON(refcnt < 0); + + if (!usecnt) + __dealloc_dl_info(dli); + else + printk("!!! rcu didn't free\n"); +} + + + + +int vc_add_dlimit(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_dlimit_base_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + dli = __alloc_dl_info(sb, id); + spin_lock(&dl_info_hash_lock); + + ret = -EEXIST; + if (__lookup_dl_info(sb, id)) + goto out_unlock; + __hash_dl_info(dli); + dli = NULL; + ret = 0; + + out_unlock: + spin_unlock(&dl_info_hash_lock); + if (dli) + __dealloc_dl_info(dli); + out_release: + path_release(&nd); + } + return ret; +} + + +int vc_rem_dlimit(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_dlimit_base_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + + spin_lock(&dl_info_hash_lock); + dli = __lookup_dl_info(sb, id); + + ret = -ESRCH; + if (!dli) + goto out_unlock; + + __unhash_dl_info(dli); + ret = 0; + + out_unlock: + spin_unlock(&dl_info_hash_lock); + out_release: + path_release(&nd); + } + return ret; +} + + +int vc_set_dlimit(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_dlimit_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + if (vc_data.reserved > 100 || + vc_data.inodes_used > vc_data.inodes_total || + vc_data.space_used > vc_data.space_total) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + + if (vc_data.inodes_used != (uint32_t)CDLIM_KEEP) + dli->dl_inodes_used = vc_data.inodes_used; + if (vc_data.inodes_total != (uint32_t)CDLIM_KEEP) + dli->dl_inodes_total = vc_data.inodes_total; + if (vc_data.space_used != (uint32_t)CDLIM_KEEP) { + dli->dl_space_used = vc_data.space_used; + dli->dl_space_used <<= 10; + } + if (vc_data.space_total == (uint32_t)CDLIM_INFINITY) + dli->dl_space_total = (uint64_t)CDLIM_INFINITY; + else if (vc_data.space_total != (uint32_t)CDLIM_KEEP) { + dli->dl_space_total = vc_data.space_total; + dli->dl_space_total <<= 10; + } + if (vc_data.reserved != (uint32_t)CDLIM_KEEP) + dli->dl_nrlmult = (1 << 10) * (100 - vc_data.reserved) / 100; + + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = 0; + + out_release: + path_release(&nd); + } + return ret; +} + +int vc_get_dlimit(uint32_t id, void __user *data) +{ + struct nameidata nd; + struct vcmd_ctx_dlimit_v0 vc_data; + int ret; + + if (!vx_check(0, VX_ADMIN)) + return -ENOSYS; + if (copy_from_user (&vc_data, data, sizeof(vc_data))) + return -EFAULT; + + ret = user_path_walk_link(vc_data.name, &nd); + if (!ret) { + struct super_block *sb; + struct dl_info *dli; + + ret = -EINVAL; + if (!nd.dentry->d_inode) + goto out_release; + if (!(sb = nd.dentry->d_inode->i_sb)) + goto out_release; + if (vc_data.reserved > 100 || + vc_data.inodes_used > vc_data.inodes_total || + vc_data.space_used > vc_data.space_total) + goto out_release; + + ret = -ESRCH; + dli = locate_dl_info(sb, id); + if (!dli) + goto out_release; + + spin_lock(&dli->dl_lock); + vc_data.inodes_used = dli->dl_inodes_used; + vc_data.inodes_total = dli->dl_inodes_total; + vc_data.space_used = dli->dl_space_used >> 10; + if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY) + vc_data.space_total = (uint32_t)CDLIM_INFINITY; + else + vc_data.space_total = dli->dl_space_total >> 10; + + vc_data.reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10); + spin_unlock(&dli->dl_lock); + + put_dl_info(dli); + ret = -EFAULT; + if (copy_to_user(data, &vc_data, sizeof(vc_data))) + goto out_release; + + ret = 0; + out_release: + path_release(&nd); + } + return ret; +} + + +void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct dl_info *dli; + __u64 blimit, bfree, bavail; + __u32 ifree; + + dli = locate_dl_info(sb, current->xid); + if (!dli) + return; + + spin_lock(&dli->dl_lock); + if (dli->dl_inodes_total == (uint32_t)CDLIM_INFINITY) + goto no_ilim; + + /* reduce max inodes available to limit */ + if (buf->f_files > dli->dl_inodes_total) + buf->f_files = dli->dl_inodes_total; + + ifree = dli->dl_inodes_total - dli->dl_inodes_used; + /* reduce free inodes to min */ + if (ifree < buf->f_ffree) + buf->f_ffree = ifree; + +no_ilim: + if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY) + goto no_blim; + + blimit = dli->dl_space_total >> sb->s_blocksize_bits; + + if (dli->dl_space_total < dli->dl_space_used) + bfree = 0; + else + bfree = (dli->dl_space_total - dli->dl_space_used) + >> sb->s_blocksize_bits; + + bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult); + if (bavail < dli->dl_space_used) + bavail = 0; + else + bavail = (bavail - dli->dl_space_used) + >> sb->s_blocksize_bits; + + /* reduce max space available to limit */ + if (buf->f_blocks > blimit) + buf->f_blocks = blimit; + + /* reduce free space to min */ + if (bfree < buf->f_bfree) + buf->f_bfree = bfree; + + /* reduce avail space to min */ + if (bavail < buf->f_bavail) + buf->f_bavail = bavail; + +no_blim: + spin_unlock(&dli->dl_lock); + put_dl_info(dli); + + return; +} + diff --git a/kernel/vserver/helper.c b/kernel/vserver/helper.c new file mode 100644 index 000000000..880b84335 --- /dev/null +++ b/kernel/vserver/helper.c @@ -0,0 +1,92 @@ +/* + * linux/kernel/vserver/helper.c + * + * Virtual Context Support + * + * Copyright (C) 2004 Herbert Pötzl + * + * V0.01 basic helper + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +char vshelper_path[255] = "/sbin/vshelper"; + + +/* + * vshelper path is set via /proc/sys + * invoked by vserver sys_reboot(), with + * the following arguments + * + * argv [0] = vshelper_path; + * argv [1] = action: "restart", "halt", "poweroff", ... + * argv [2] = context identifier + * argv [3] = additional argument (restart2) + * + * envp [*] = type-specific parameters + */ + +long vs_reboot(unsigned int cmd, void * arg) +{ + char id_buf[8], cmd_buf[32]; + char uid_buf[32], pid_buf[32]; + char buffer[256]; + + char *argv[] = {vshelper_path, NULL, id_buf, NULL, 0}; + char *envp[] = {"HOME=/", "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + uid_buf, pid_buf, cmd_buf, 0}; + + snprintf(id_buf, sizeof(id_buf)-1, "%d", vx_current_xid()); + + snprintf(cmd_buf, sizeof(cmd_buf)-1, "VS_CMD=%08x", cmd); + snprintf(uid_buf, sizeof(uid_buf)-1, "VS_UID=%d", current->uid); + snprintf(pid_buf, sizeof(pid_buf)-1, "VS_PID=%d", current->pid); + + switch (cmd) { + case LINUX_REBOOT_CMD_RESTART: + argv[1] = "restart"; + break; + + case LINUX_REBOOT_CMD_HALT: + argv[1] = "halt"; + break; + + case LINUX_REBOOT_CMD_POWER_OFF: + argv[1] = "poweroff"; + break; + + case LINUX_REBOOT_CMD_SW_SUSPEND: + argv[1] = "swsusp"; + break; + + case LINUX_REBOOT_CMD_RESTART2: + if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) + return -EFAULT; + argv[3] = buffer; + default: + argv[1] = "restart2"; + break; + } + + /* maybe we should wait ? */ + if (call_usermodehelper(*argv, argv, envp, 0)) { + printk( KERN_WARNING + "vs_reboot(): failed to exec (%s %s %s %s)\n", + vshelper_path, argv[1], argv[2], argv[3]); + return -EPERM; + } + return 0; +} +