#include <linux/devfs_fs_kernel.h>
#include <linux/ptrace.h>
#include <linux/device.h>
+#include <linux/highmem.h>
+#include <linux/crash_dump.h>
+#include <linux/backing-dev.h>
+#include <linux/bootmem.h>
+#include <linux/pipe_fs_i.h>
#include <asm/uaccess.h>
#include <asm/io.h>
# include <linux/efi.h>
#endif
-#ifdef CONFIG_FB
-extern void fbmem_init(void);
-#endif
-#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
-extern void tapechar_init(void);
-#endif
+static inline int range_is_allowed(unsigned long from, unsigned long to)
+{
+ unsigned long cursor;
+
+ cursor = from >> PAGE_SHIFT;
+ while ((cursor << PAGE_SHIFT) < to) {
+ if (!devmem_is_allowed(cursor)) {
+ printk ("Program %s tried to read /dev/mem between %lx->%lx.\n",
+ current->comm, from, to);
+ return 0;
+ }
+ cursor++;
+ }
+ return 1;
+}
/*
* Architectures vary in how they handle caching for addresses
* On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
*/
return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
-#elif defined(CONFIG_PPC64)
- /* On PPC64, we always do non-cacheable access to the IO hole and
- * cacheable elsewhere. Cache paradox can checkstop the CPU and
- * the high_memory heuristic below is wrong on machines with memory
- * above the IO hole... Ah, and of course, XFree86 doesn't pass
- * O_SYNC when mapping us to tap IO space. Surprised ?
- */
- return !page_is_ram(addr);
#else
/*
* Accessing memory above the top the kernel knows about or through a file pointer
}
#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
-static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
+static inline int valid_phys_addr_range(unsigned long addr, size_t count)
{
- unsigned long end_mem;
-
- end_mem = __pa(high_memory);
- if (addr >= end_mem)
+ if (addr + count > __pa(high_memory))
return 0;
- if (*count > end_mem - addr)
- *count = end_mem - addr;
-
return 1;
}
-#endif
-static inline int range_is_allowed(unsigned long from, unsigned long to)
+static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t size)
{
- unsigned long cursor;
-
- cursor = from >> PAGE_SHIFT;
- while ((cursor << PAGE_SHIFT) < to) {
- if (!devmem_is_allowed(cursor))
- return 0;
- cursor++;
- }
return 1;
}
-static ssize_t do_write_mem(void *p, unsigned long realp,
- const char __user * buf, size_t count, loff_t *ppos)
-{
- ssize_t written;
- unsigned long copied;
-
- written = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (realp < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-realp;
- if (sz > count) sz = count;
- /* Hmm. Do something? */
- buf+=sz;
- p+=sz;
- count-=sz;
- written+=sz;
- }
#endif
- if (!range_is_allowed(realp, realp+count))
- return -EPERM;
- copied = copy_from_user(p, buf, count);
- if (copied) {
- ssize_t ret = written + (count - copied);
-
- if (ret)
- return ret;
- return -EFAULT;
- }
- written += count;
- *ppos += written;
- return written;
-}
-
+#ifndef ARCH_HAS_DEV_MEM
/*
* This funcion reads the *physical* memory. The f_pos points directly to the
* memory location.
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read;
+ ssize_t read, sz;
+ char *ptr;
- if (!valid_phys_addr_range(p, &count))
+ if (!valid_phys_addr_range(p, count))
return -EFAULT;
read = 0;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
/* we don't have page 0 mapped on sparc and m68k.. */
if (p < PAGE_SIZE) {
- unsigned long sz = PAGE_SIZE-p;
+ sz = PAGE_SIZE - p;
if (sz > count)
sz = count;
if (sz > 0) {
}
}
#endif
- if (!range_is_allowed(p, p+count))
- return -EPERM;
- if (copy_to_user(buf, __va(p), count))
- return -EFAULT;
- read += count;
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_mem_ptr(p);
+
+ if (!range_is_allowed(p, p+count))
+ return -EPERM;
+ if (copy_to_user(buf, ptr, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ count -= sz;
+ read += sz;
+ }
+
*ppos += read;
return read;
}
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
+ ssize_t written, sz;
+ unsigned long copied;
+ void *ptr;
- if (!valid_phys_addr_range(p, &count))
+ if (!valid_phys_addr_range(p, count))
return -EFAULT;
- return do_write_mem(__va(p), p, buf, count, ppos);
+
+ written = 0;
+
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
+ /* we don't have page 0 mapped on sparc and m68k.. */
+ if (p < PAGE_SIZE) {
+ unsigned long sz = PAGE_SIZE - p;
+ if (sz > count)
+ sz = count;
+ /* Hmm. Do something? */
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+#endif
+
+ while (count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ ptr = xlate_dev_mem_ptr(p);
+
+ if (!range_is_allowed(ptr, ptr+sz))
+ return -EPERM;
+ copied = copy_from_user(ptr, buf, sz);
+ if (copied) {
+ written += sz - copied;
+ if (written)
+ break;
+ return -EFAULT;
+ }
+ buf += sz;
+ p += sz;
+ count -= sz;
+ written += sz;
+ }
+
+ *ppos += written;
+ return written;
}
+#endif
-static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
+static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot)
{
- unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
- int uncached;
- unsigned long cursor;
-
- uncached = uncached_access(file, offset);
#ifdef pgprot_noncached
- if (uncached)
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ unsigned long offset = pfn << PAGE_SHIFT;
+
+ if (uncached_access(file, offset))
+ return pgprot_noncached(vma_prot);
+#endif
+ return vma_prot;
+}
#endif
- /* Don't try to swap out physical pages.. */
- vma->vm_flags |= VM_RESERVED;
+static int mmap_mem(struct file * file, struct vm_area_struct * vma)
+{
+ size_t size = vma->vm_end - vma->vm_start;
- /*
- * Don't dump addresses that are not real memory to a core file.
- */
- if (uncached)
- vma->vm_flags |= VM_IO;
-
- cursor = vma->vm_pgoff;
- while ((cursor << PAGE_SHIFT) < offset + vma->vm_end-vma->vm_start) {
- if (!devmem_is_allowed(cursor))
- return -EPERM;
- cursor++;
- }
+ if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, size))
+ return -EINVAL;
- if (remap_page_range(vma, vma->vm_start, offset, vma->vm_end-vma->vm_start,
- vma->vm_page_prot))
+ vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
+ size,
+ vma->vm_page_prot);
+
+ /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
+ if (remap_pfn_range(vma,
+ vma->vm_start,
+ vma->vm_pgoff,
+ size,
+ vma->vm_page_prot))
return -EAGAIN;
return 0;
}
+static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
+{
+ unsigned long pfn;
+
+ /* Turn a kernel-virtual address into a physical page frame */
+ pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
+
+ /*
+ * RED-PEN: on some architectures there is more mapped memory
+ * than available in mem_map which pfn_valid checks
+ * for. Perhaps should add a new macro here.
+ *
+ * RED-PEN: vmalloc is not supported right now.
+ */
+ if (!pfn_valid(pfn))
+ return -EIO;
+
+ vma->vm_pgoff = pfn;
+ return mmap_mem(file, vma);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * Read memory corresponding to the old kernel.
+ */
+static ssize_t read_oldmem(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned long pfn, offset;
+ size_t read = 0, csize;
+ int rc = 0;
+
+ while (count) {
+ pfn = *ppos / PAGE_SIZE;
+ if (pfn > saved_max_pfn)
+ return read;
+
+ offset = (unsigned long)(*ppos % PAGE_SIZE);
+ if (count > PAGE_SIZE - offset)
+ csize = PAGE_SIZE - offset;
+ else
+ csize = count;
+
+ rc = copy_oldmem_page(pfn, buf, csize, offset, 1);
+ if (rc < 0)
+ return rc;
+ buf += csize;
+ *ppos += csize;
+ read += csize;
+ count -= csize;
+ }
+ return read;
+}
+#endif
+
extern long vread(char *buf, char *addr, unsigned long count);
extern long vwrite(char *buf, char *addr, unsigned long count);
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
- ssize_t read = 0;
- ssize_t virtr = 0;
+ ssize_t low_count, read, sz;
char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
-
+
return -EPERM;
-
+
+ read = 0;
if (p < (unsigned long) high_memory) {
- read = count;
+ low_count = count;
if (count > (unsigned long) high_memory - p)
- read = (unsigned long) high_memory - p;
+ low_count = (unsigned long) high_memory - p;
-#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
+#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
/* we don't have page 0 mapped on sparc and m68k.. */
- if (p < PAGE_SIZE && read > 0) {
+ if (p < PAGE_SIZE && low_count > 0) {
size_t tmp = PAGE_SIZE - p;
- if (tmp > read) tmp = read;
+ if (tmp > low_count) tmp = low_count;
if (clear_user(buf, tmp))
return -EFAULT;
buf += tmp;
p += tmp;
- read -= tmp;
+ read += tmp;
+ low_count -= tmp;
count -= tmp;
}
#endif
- if (copy_to_user(buf, (char *)p, read))
- return -EFAULT;
- p += read;
- buf += read;
- count -= read;
+ while (low_count > 0) {
+ /*
+ * Handle first page in case it's not aligned
+ */
+ if (-p & (PAGE_SIZE - 1))
+ sz = -p & (PAGE_SIZE - 1);
+ else
+ sz = PAGE_SIZE;
+
+ sz = min_t(unsigned long, sz, low_count);
+
+ /*
+ * On ia64 if a page has been mapped somewhere as
+ * uncached, then it must also be accessed uncached
+ * by the kernel or data corruption may occur
+ */
+ kbuf = xlate_dev_kmem_ptr((char *)p);
+
+ if (copy_to_user(buf, kbuf, sz))
+ return -EFAULT;
+ buf += sz;
+ p += sz;
+ read += sz;
+ low_count -= sz;
+ count -= sz;
+ }
}
if (count > 0) {
}
count -= len;
buf += len;
- virtr += len;
+ read += len;
p += len;
}
free_page((unsigned long)kbuf);
}
*ppos = p;
- return virtr + read;
+ return read;
}
+
#if defined(CONFIG_ISA) || !defined(__mc68000__)
static ssize_t read_port(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
unsigned long i = *ppos;
char __user *tmp = buf;
- if (verify_area(VERIFY_WRITE,buf,count))
+ if (!access_ok(VERIFY_WRITE, buf, count))
return -EFAULT;
while (count-- > 0 && i < 65536) {
if (__put_user(inb(i),tmp) < 0)
unsigned long i = *ppos;
const char __user * tmp = buf;
- if (verify_area(VERIFY_READ,buf,count))
+ if (!access_ok(VERIFY_READ,buf,count))
return -EFAULT;
while (count-- > 0 && i < 65536) {
char c;
- if (__get_user(c, tmp))
+ if (__get_user(c, tmp)) {
+ if (tmp > buf)
+ break;
return -EFAULT;
+ }
outb(c,i);
i++;
tmp++;
return count;
}
+static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
+ struct splice_desc *sd)
+{
+ return sd->len;
+}
+
+static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
+}
+
#ifdef CONFIG_MMU
/*
* For fun, we are using the MMU for this.
if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
goto out_up;
- if (vma->vm_flags & VM_SHARED)
+ if (vma->vm_flags & (VM_SHARED | VM_HUGETLB))
break;
count = vma->vm_end - addr;
if (count > size)
{
loff_t ret;
- down(&file->f_dentry->d_inode->i_sem);
+ mutex_lock(&file->f_dentry->d_inode->i_mutex);
switch (orig) {
case 0:
file->f_pos = offset;
default:
ret = -EINVAL;
}
- up(&file->f_dentry->d_inode->i_sem);
+ mutex_unlock(&file->f_dentry->d_inode->i_mutex);
return ret;
}
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
-#define mmap_kmem mmap_mem
#define zero_lseek null_lseek
#define full_lseek null_lseek
#define write_zero write_null
#define read_full read_zero
#define open_mem open_port
#define open_kmem open_mem
+#define open_oldmem open_mem
+#ifndef ARCH_HAS_DEV_MEM
static struct file_operations mem_fops = {
.llseek = memory_lseek,
.read = read_mem,
.mmap = mmap_mem,
.open = open_mem,
};
+#else
+extern struct file_operations mem_fops;
+#endif
static struct file_operations kmem_fops = {
.llseek = memory_lseek,
.llseek = null_lseek,
.read = read_null,
.write = write_null,
+ .splice_write = splice_write_null,
};
#if defined(CONFIG_ISA) || !defined(__mc68000__)
.mmap = mmap_zero,
};
+static struct backing_dev_info zero_bdi = {
+ .capabilities = BDI_CAP_MAP_COPY,
+};
+
static struct file_operations full_fops = {
.llseek = full_lseek,
.read = read_full,
.write = write_full,
};
+#ifdef CONFIG_CRASH_DUMP
+static struct file_operations oldmem_fops = {
+ .read = read_oldmem,
+ .open = open_oldmem,
+};
+#endif
+
static ssize_t kmsg_write(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
char *tmp;
- int ret;
+ ssize_t ret;
tmp = kmalloc(count + 1, GFP_KERNEL);
if (tmp == NULL)
if (!copy_from_user(tmp, buf, count)) {
tmp[count] = 0;
ret = printk("%s", tmp);
+ if (ret > count)
+ /* printk can add a prefix */
+ ret = count;
}
kfree(tmp);
return ret;
break;
#endif
case 5:
+ filp->f_mapping->backing_dev_info = &zero_bdi;
filp->f_op = &zero_fops;
break;
case 7:
case 11:
filp->f_op = &kmsg_fops;
break;
+#ifdef CONFIG_CRASH_DUMP
+ case 12:
+ filp->f_op = &oldmem_fops;
+ break;
+#endif
default:
return -ENXIO;
}
unsigned int minor;
char *name;
umode_t mode;
- struct file_operations *fops;
+ const struct file_operations *fops;
} devlist[] = { /* list of minor devices */
{1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
- {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
{3, "null", S_IRUGO | S_IWUGO, &null_fops},
#if defined(CONFIG_ISA) || !defined(__mc68000__)
{4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
{8, "random", S_IRUGO | S_IWUSR, &random_fops},
{9, "urandom", S_IRUGO | S_IWUSR, &urandom_fops},
{11,"kmsg", S_IRUGO | S_IWUSR, &kmsg_fops},
+#ifdef CONFIG_CRASH_DUMP
+ {12,"oldmem", S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
+#endif
};
-static struct class_simple *mem_class;
+static struct class *mem_class;
static int __init chr_dev_init(void)
{
if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
printk("unable to get major %d for memory devs\n", MEM_MAJOR);
- mem_class = class_simple_create(THIS_MODULE, "mem");
+ mem_class = class_create(THIS_MODULE, "mem");
for (i = 0; i < ARRAY_SIZE(devlist); i++) {
- class_simple_device_add(mem_class,
+ class_device_create(mem_class, NULL,
MKDEV(MEM_MAJOR, devlist[i].minor),
NULL, devlist[i].name);
devfs_mk_cdev(MKDEV(MEM_MAJOR, devlist[i].minor),
S_IFCHR | devlist[i].mode, devlist[i].name);
}
-#if defined (CONFIG_FB)
- fbmem_init();
-#endif
return 0;
}