Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / drivers / char / mem.c
index 50a32d1..6a7aa93 100644 (file)
 #include <linux/devfs_fs_kernel.h>
 #include <linux/ptrace.h>
 #include <linux/device.h>
+#include <linux/highmem.h>
+#include <linux/crash_dump.h>
+#include <linux/backing-dev.h>
+#include <linux/bootmem.h>
+#include <linux/pipe_fs_i.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/pgalloc.h>
 
 #ifdef CONFIG_IA64
 # include <linux/efi.h>
 #endif
 
-#ifdef CONFIG_FB
-extern void fbmem_init(void);
-#endif
-#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
-extern void tapechar_init(void);
-#endif
-
 /*
  * Architectures vary in how they handle caching for addresses
  * outside of main memory.
@@ -80,14 +77,6 @@ static inline int uncached_access(struct file *file, unsigned long addr)
         * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
         */
        return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
-#elif defined(CONFIG_PPC64)
-       /* On PPC64, we always do non-cacheable access to the IO hole and
-        * cacheable elsewhere. Cache paradox can checkstop the CPU and
-        * the high_memory heuristic below is wrong on machines with memory
-        * above the IO hole... Ah, and of course, XFree86 doesn't pass
-        * O_SYNC when mapping us to tap IO space. Surprised ?
-        */
-       return !page_is_ram(addr);
 #else
        /*
         * Accessing memory above the top the kernel knows about or through a file pointer
@@ -100,54 +89,36 @@ static inline int uncached_access(struct file *file, unsigned long addr)
 }
 
 #ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
-static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
+static inline int valid_phys_addr_range(unsigned long addr, size_t count)
 {
-       unsigned long end_mem;
-
-       end_mem = __pa(high_memory);
-       if (addr >= end_mem)
+       if (addr + count > __pa(high_memory))
                return 0;
 
-       if (*count > end_mem - addr)
-               *count = end_mem - addr;
-
        return 1;
 }
-#endif
 
-static ssize_t do_write_mem(void *p, unsigned long realp,
-                           const char __user * buf, size_t count, loff_t *ppos)
+static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t size)
 {
-       ssize_t written;
-       unsigned long copied;
-
-       written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
-       /* we don't have page 0 mapped on sparc and m68k.. */
-       if (realp < PAGE_SIZE) {
-               unsigned long sz = PAGE_SIZE-realp;
-               if (sz > count) sz = count; 
-               /* Hmm. Do something? */
-               buf+=sz;
-               p+=sz;
-               count-=sz;
-               written+=sz;
-       }
+       return 1;
+}
 #endif
-       copied = copy_from_user(p, buf, count);
-       if (copied) {
-               ssize_t ret = written + (count - copied);
 
-               if (ret)
-                       return ret;
-               return -EFAULT;
+static inline int range_is_allowed(unsigned long from, unsigned long to)
+{
+       unsigned long cursor;
+
+       cursor = from >> PAGE_SHIFT;
+       while ((cursor << PAGE_SHIFT) < to) {
+               if (!devmem_is_allowed(cursor)) {
+                       printk ("Program %s tried to read /dev/mem between %lx->%lx.\n",
+                                       current->comm, from, to);
+                       return 0;
+               }
+               cursor++;
        }
-       written += count;
-       *ppos += written;
-       return written;
+       return 1;
 }
 
-
 /*
  * This funcion reads the *physical* memory. The f_pos points directly to the 
  * memory location. 
@@ -156,15 +127,16 @@ static ssize_t read_mem(struct file * file, char __user * buf,
                        size_t count, loff_t *ppos)
 {
        unsigned long p = *ppos;
-       ssize_t read;
+       ssize_t read, sz;
+       char *ptr;
 
-       if (!valid_phys_addr_range(p, &count))
+       if (!valid_phys_addr_range(p, count))
                return -EFAULT;
        read = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
        /* we don't have page 0 mapped on sparc and m68k.. */
        if (p < PAGE_SIZE) {
-               unsigned long sz = PAGE_SIZE-p;
+               sz = PAGE_SIZE - p;
                if (sz > count) 
                        sz = count; 
                if (sz > 0) {
@@ -177,9 +149,35 @@ static ssize_t read_mem(struct file * file, char __user * buf,
                }
        }
 #endif
-       if (copy_to_user(buf, __va(p), count))
-               return -EFAULT;
-       read += count;
+
+       while (count > 0) {
+               /*
+                * Handle first page in case it's not aligned
+                */
+               if (-p & (PAGE_SIZE - 1))
+                       sz = -p & (PAGE_SIZE - 1);
+               else
+                       sz = PAGE_SIZE;
+
+               sz = min_t(unsigned long, sz, count);
+
+               /*
+                * On ia64 if a page has been mapped somewhere as
+                * uncached, then it must also be accessed uncached
+                * by the kernel or data corruption may occur
+                */
+               ptr = xlate_dev_mem_ptr(p);
+
+               if (!range_is_allowed(p, p+count))
+                       return -EPERM;
+               if (copy_to_user(buf, ptr, sz))
+                       return -EFAULT;
+               buf += sz;
+               p += sz;
+               count -= sz;
+               read += sz;
+       }
+
        *ppos += read;
        return read;
 }
@@ -188,38 +186,156 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
                         size_t count, loff_t *ppos)
 {
        unsigned long p = *ppos;
+       ssize_t written, sz;
+       unsigned long copied;
+       void *ptr;
 
-       if (!valid_phys_addr_range(p, &count))
+       if (!valid_phys_addr_range(p, count))
                return -EFAULT;
-       return do_write_mem(__va(p), p, buf, count, ppos);
+
+       written = 0;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+       /* we don't have page 0 mapped on sparc and m68k.. */
+       if (p < PAGE_SIZE) {
+               unsigned long sz = PAGE_SIZE - p;
+               if (sz > count)
+                       sz = count;
+               /* Hmm. Do something? */
+               buf += sz;
+               p += sz;
+               count -= sz;
+               written += sz;
+       }
+#endif
+
+       while (count > 0) {
+               /*
+                * Handle first page in case it's not aligned
+                */
+               if (-p & (PAGE_SIZE - 1))
+                       sz = -p & (PAGE_SIZE - 1);
+               else
+                       sz = PAGE_SIZE;
+
+               sz = min_t(unsigned long, sz, count);
+
+               /*
+                * On ia64 if a page has been mapped somewhere as
+                * uncached, then it must also be accessed uncached
+                * by the kernel or data corruption may occur
+                */
+               ptr = xlate_dev_mem_ptr(p);
+
+               if (!range_is_allowed(ptr, ptr+sz))
+                       return -EPERM;
+               copied = copy_from_user(ptr, buf, sz);
+               if (copied) {
+                       written += sz - copied;
+                       if (written)
+                               break;
+                       return -EFAULT;
+               }
+               buf += sz;
+               p += sz;
+               count -= sz;
+               written += sz;
+       }
+
+       *ppos += written;
+       return written;
 }
 
-static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
+static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+                                    unsigned long size, pgprot_t vma_prot)
 {
-       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-       int uncached;
-
-       uncached = uncached_access(file, offset);
 #ifdef pgprot_noncached
-       if (uncached)
-               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       unsigned long offset = pfn << PAGE_SHIFT;
+
+       if (uncached_access(file, offset))
+               return pgprot_noncached(vma_prot);
 #endif
+       return vma_prot;
+}
+#endif
+
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+       size_t size = vma->vm_end - vma->vm_start;
 
-       /* Don't try to swap out physical pages.. */
-       vma->vm_flags |= VM_RESERVED;
+       if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, size))
+               return -EINVAL;
 
-       /*
-        * Don't dump addresses that are not real memory to a core file.
-        */
-       if (uncached)
-               vma->vm_flags |= VM_IO;
+       vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
+                                                size,
+                                                vma->vm_page_prot);
 
-       if (remap_page_range(vma, vma->vm_start, offset, vma->vm_end-vma->vm_start,
-                            vma->vm_page_prot))
+       /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
+       if (remap_pfn_range(vma,
+                           vma->vm_start,
+                           vma->vm_pgoff,
+                           size,
+                           vma->vm_page_prot))
                return -EAGAIN;
        return 0;
 }
 
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+       unsigned long pfn;
+
+       /* Turn a kernel-virtual address into a physical page frame */
+       pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
+
+       /*
+        * RED-PEN: on some architectures there is more mapped memory
+        * than available in mem_map which pfn_valid checks
+        * for. Perhaps should add a new macro here.
+        *
+        * RED-PEN: vmalloc is not supported right now.
+        */
+       if (!pfn_valid(pfn))
+               return -EIO;
+
+       vma->vm_pgoff = pfn;
+       return mmap_mem(file, vma);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * Read memory corresponding to the old kernel.
+ */
+static ssize_t read_oldmem(struct file *file, char __user *buf,
+                               size_t count, loff_t *ppos)
+{
+       unsigned long pfn, offset;
+       size_t read = 0, csize;
+       int rc = 0;
+
+       while (count) {
+               pfn = *ppos / PAGE_SIZE;
+               if (pfn > saved_max_pfn)
+                       return read;
+
+               offset = (unsigned long)(*ppos % PAGE_SIZE);
+               if (count > PAGE_SIZE - offset)
+                       csize = PAGE_SIZE - offset;
+               else
+                       csize = count;
+
+               rc = copy_oldmem_page(pfn, buf, csize, offset, 1);
+               if (rc < 0)
+                       return rc;
+               buf += csize;
+               *ppos += csize;
+               read += csize;
+               count -= csize;
+       }
+       return read;
+}
+#endif
+
 extern long vread(char *buf, char *addr, unsigned long count);
 extern long vwrite(char *buf, char *addr, unsigned long count);
 
@@ -230,33 +346,57 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
                         size_t count, loff_t *ppos)
 {
        unsigned long p = *ppos;
-       ssize_t read = 0;
-       ssize_t virtr = 0;
+       ssize_t low_count, read, sz;
        char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
-               
+
+       return -EPERM;
+
+       read = 0;
        if (p < (unsigned long) high_memory) {
-               read = count;
+               low_count = count;
                if (count > (unsigned long) high_memory - p)
-                       read = (unsigned long) high_memory - p;
+                       low_count = (unsigned long) high_memory - p;
 
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
                /* we don't have page 0 mapped on sparc and m68k.. */
-               if (p < PAGE_SIZE && read > 0) {
+               if (p < PAGE_SIZE && low_count > 0) {
                        size_t tmp = PAGE_SIZE - p;
-                       if (tmp > read) tmp = read;
+                       if (tmp > low_count) tmp = low_count;
                        if (clear_user(buf, tmp))
                                return -EFAULT;
                        buf += tmp;
                        p += tmp;
-                       read -= tmp;
+                       read += tmp;
+                       low_count -= tmp;
                        count -= tmp;
                }
 #endif
-               if (copy_to_user(buf, (char *)p, read))
-                       return -EFAULT;
-               p += read;
-               buf += read;
-               count -= read;
+               while (low_count > 0) {
+                       /*
+                        * Handle first page in case it's not aligned
+                        */
+                       if (-p & (PAGE_SIZE - 1))
+                               sz = -p & (PAGE_SIZE - 1);
+                       else
+                               sz = PAGE_SIZE;
+
+                       sz = min_t(unsigned long, sz, low_count);
+
+                       /*
+                        * On ia64 if a page has been mapped somewhere as
+                        * uncached, then it must also be accessed uncached
+                        * by the kernel or data corruption may occur
+                        */
+                       kbuf = xlate_dev_kmem_ptr((char *)p);
+
+                       if (copy_to_user(buf, kbuf, sz))
+                               return -EFAULT;
+                       buf += sz;
+                       p += sz;
+                       read += sz;
+                       low_count -= sz;
+                       count -= sz;
+               }
        }
 
        if (count > 0) {
@@ -277,73 +417,15 @@ static ssize_t read_kmem(struct file *file, char __user *buf,
                        }
                        count -= len;
                        buf += len;
-                       virtr += len;
+                       read += len;
                        p += len;
                }
                free_page((unsigned long)kbuf);
        }
        *ppos = p;
-       return virtr + read;
+       return read;
 }
 
-/*
- * This function writes to the *virtual* memory as seen by the kernel.
- */
-static ssize_t write_kmem(struct file * file, const char __user * buf, 
-                         size_t count, loff_t *ppos)
-{
-       unsigned long p = *ppos;
-       ssize_t wrote = 0;
-       ssize_t virtr = 0;
-       ssize_t written;
-       char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
-
-       if (p < (unsigned long) high_memory) {
-
-               wrote = count;
-               if (count > (unsigned long) high_memory - p)
-                       wrote = (unsigned long) high_memory - p;
-
-               written = do_write_mem((void*)p, p, buf, wrote, ppos);
-               if (written != wrote)
-                       return written;
-               wrote = written;
-               p += wrote;
-               buf += wrote;
-               count -= wrote;
-       }
-
-       if (count > 0) {
-               kbuf = (char *)__get_free_page(GFP_KERNEL);
-               if (!kbuf)
-                       return wrote ? wrote : -ENOMEM;
-               while (count > 0) {
-                       int len = count;
-
-                       if (len > PAGE_SIZE)
-                               len = PAGE_SIZE;
-                       if (len) {
-                               written = copy_from_user(kbuf, buf, len);
-                               if (written) {
-                                       ssize_t ret;
-
-                                       free_page((unsigned long)kbuf);
-                                       ret = wrote + virtr + (len - written);
-                                       return ret ? ret : -EFAULT;
-                               }
-                       }
-                       len = vwrite(kbuf, (char *)p, len);
-                       count -= len;
-                       buf += len;
-                       virtr += len;
-                       p += len;
-               }
-               free_page((unsigned long)kbuf);
-       }
-
-       *ppos = p;
-       return virtr + wrote;
-}
 
 #if defined(CONFIG_ISA) || !defined(__mc68000__)
 static ssize_t read_port(struct file * file, char __user * buf,
@@ -352,7 +434,7 @@ static ssize_t read_port(struct file * file, char __user * buf,
        unsigned long i = *ppos;
        char __user *tmp = buf;
 
-       if (verify_area(VERIFY_WRITE,buf,count))
+       if (!access_ok(VERIFY_WRITE, buf, count))
                return -EFAULT; 
        while (count-- > 0 && i < 65536) {
                if (__put_user(inb(i),tmp) < 0) 
@@ -370,12 +452,15 @@ static ssize_t write_port(struct file * file, const char __user * buf,
        unsigned long i = *ppos;
        const char __user * tmp = buf;
 
-       if (verify_area(VERIFY_READ,buf,count))
+       if (!access_ok(VERIFY_READ,buf,count))
                return -EFAULT;
        while (count-- > 0 && i < 65536) {
                char c;
-               if (__get_user(c, tmp)) 
+               if (__get_user(c, tmp)) {
+                       if (tmp > buf)
+                               break;
                        return -EFAULT; 
+               }
                outb(c,i);
                i++;
                tmp++;
@@ -397,6 +482,18 @@ static ssize_t write_null(struct file * file, const char __user * buf,
        return count;
 }
 
+static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
+                       struct splice_desc *sd)
+{
+       return sd->len;
+}
+
+static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out,
+                                loff_t *ppos, size_t len, unsigned int flags)
+{
+       return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
+}
+
 #ifdef CONFIG_MMU
 /*
  * For fun, we are using the MMU for this.
@@ -417,7 +514,7 @@ static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
 
                if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
                        goto out_up;
-               if (vma->vm_flags & VM_SHARED)
+               if (vma->vm_flags & (VM_SHARED | VM_HUGETLB))
                        break;
                count = vma->vm_end - addr;
                if (count > size)
@@ -552,7 +649,7 @@ static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
 {
        loff_t ret;
 
-       down(&file->f_dentry->d_inode->i_sem);
+       mutex_lock(&file->f_dentry->d_inode->i_mutex);
        switch (orig) {
                case 0:
                        file->f_pos = offset;
@@ -567,7 +664,7 @@ static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
                default:
                        ret = -EINVAL;
        }
-       up(&file->f_dentry->d_inode->i_sem);
+       mutex_unlock(&file->f_dentry->d_inode->i_mutex);
        return ret;
 }
 
@@ -576,13 +673,13 @@ static int open_port(struct inode * inode, struct file * filp)
        return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
 }
 
-#define mmap_kmem      mmap_mem
 #define zero_lseek     null_lseek
 #define full_lseek      null_lseek
 #define write_zero     write_null
 #define read_full       read_zero
 #define open_mem       open_port
 #define open_kmem      open_mem
+#define open_oldmem    open_mem
 
 static struct file_operations mem_fops = {
        .llseek         = memory_lseek,
@@ -595,7 +692,6 @@ static struct file_operations mem_fops = {
 static struct file_operations kmem_fops = {
        .llseek         = memory_lseek,
        .read           = read_kmem,
-       .write          = write_kmem,
        .mmap           = mmap_kmem,
        .open           = open_kmem,
 };
@@ -604,6 +700,7 @@ static struct file_operations null_fops = {
        .llseek         = null_lseek,
        .read           = read_null,
        .write          = write_null,
+       .splice_write   = splice_write_null,
 };
 
 #if defined(CONFIG_ISA) || !defined(__mc68000__)
@@ -622,17 +719,28 @@ static struct file_operations zero_fops = {
        .mmap           = mmap_zero,
 };
 
+static struct backing_dev_info zero_bdi = {
+       .capabilities   = BDI_CAP_MAP_COPY,
+};
+
 static struct file_operations full_fops = {
        .llseek         = full_lseek,
        .read           = read_full,
        .write          = write_full,
 };
 
+#ifdef CONFIG_CRASH_DUMP
+static struct file_operations oldmem_fops = {
+       .read   = read_oldmem,
+       .open   = open_oldmem,
+};
+#endif
+
 static ssize_t kmsg_write(struct file * file, const char __user * buf,
                          size_t count, loff_t *ppos)
 {
        char *tmp;
-       int ret;
+       ssize_t ret;
 
        tmp = kmalloc(count + 1, GFP_KERNEL);
        if (tmp == NULL)
@@ -641,6 +749,9 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf,
        if (!copy_from_user(tmp, buf, count)) {
                tmp[count] = 0;
                ret = printk("%s", tmp);
+               if (ret > count)
+                       /* printk can add a prefix */
+                       ret = count;
        }
        kfree(tmp);
        return ret;
@@ -668,6 +779,7 @@ static int memory_open(struct inode * inode, struct file * filp)
                        break;
 #endif
                case 5:
+                       filp->f_mapping->backing_dev_info = &zero_bdi;
                        filp->f_op = &zero_fops;
                        break;
                case 7:
@@ -682,6 +794,11 @@ static int memory_open(struct inode * inode, struct file * filp)
                case 11:
                        filp->f_op = &kmsg_fops;
                        break;
+#ifdef CONFIG_CRASH_DUMP
+               case 12:
+                       filp->f_op = &oldmem_fops;
+                       break;
+#endif
                default:
                        return -ENXIO;
        }
@@ -698,10 +815,9 @@ static const struct {
        unsigned int            minor;
        char                    *name;
        umode_t                 mode;
-       struct file_operations  *fops;
+       const struct file_operations    *fops;
 } devlist[] = { /* list of minor devices */
        {1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
-       {2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
        {3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
 #if defined(CONFIG_ISA) || !defined(__mc68000__)
        {4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
@@ -711,9 +827,12 @@ static const struct {
        {8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
        {9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops},
        {11,"kmsg",    S_IRUGO | S_IWUSR,           &kmsg_fops},
+#ifdef CONFIG_CRASH_DUMP
+       {12,"oldmem",    S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
+#endif
 };
 
-static struct class_simple *mem_class;
+static struct class *mem_class;
 
 static int __init chr_dev_init(void)
 {
@@ -722,18 +841,15 @@ static int __init chr_dev_init(void)
        if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
                printk("unable to get major %d for memory devs\n", MEM_MAJOR);
 
-       mem_class = class_simple_create(THIS_MODULE, "mem");
+       mem_class = class_create(THIS_MODULE, "mem");
        for (i = 0; i < ARRAY_SIZE(devlist); i++) {
-               class_simple_device_add(mem_class,
+               class_device_create(mem_class, NULL,
                                        MKDEV(MEM_MAJOR, devlist[i].minor),
                                        NULL, devlist[i].name);
                devfs_mk_cdev(MKDEV(MEM_MAJOR, devlist[i].minor),
                                S_IFCHR | devlist[i].mode, devlist[i].name);
        }
        
-#if defined (CONFIG_FB)
-       fbmem_init();
-#endif
        return 0;
 }