ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / drivers / char / mem.c
1 /*
2  *  linux/drivers/char/mem.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  *
6  *  Added devfs support. 
7  *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
8  *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
9  */
10
11 #include <linux/config.h>
12 #include <linux/mm.h>
13 #include <linux/miscdevice.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/mman.h>
17 #include <linux/random.h>
18 #include <linux/init.h>
19 #include <linux/raw.h>
20 #include <linux/tty.h>
21 #include <linux/capability.h>
22 #include <linux/smp_lock.h>
23 #include <linux/devfs_fs_kernel.h>
24 #include <linux/ptrace.h>
25 #include <linux/device.h>
26
27 #include <asm/uaccess.h>
28 #include <asm/io.h>
29 #include <asm/pgalloc.h>
30
31 #ifdef CONFIG_IA64
32 # include <linux/efi.h>
33 #endif
34
35 #ifdef CONFIG_FB
36 extern void fbmem_init(void);
37 #endif
38 #if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_CHAR)
39 extern void tapechar_init(void);
40 #endif
41
42 /*
43  * Architectures vary in how they handle caching for addresses
44  * outside of main memory.
45  *
46  */
47 static inline int uncached_access(struct file *file, unsigned long addr)
48 {
49 #if defined(__i386__)
50         /*
51          * On the PPro and successors, the MTRRs are used to set
52          * memory types for physical addresses outside main memory,
53          * so blindly setting PCD or PWT on those pages is wrong.
54          * For Pentiums and earlier, the surround logic should disable
55          * caching for the high addresses through the KEN pin, but
56          * we maintain the tradition of paranoia in this code.
57          */
58         if (file->f_flags & O_SYNC)
59                 return 1;
60         return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
61                   test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
62                   test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
63                   test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) )
64           && addr >= __pa(high_memory);
65 #elif defined(CONFIG_IA64)
66         /*
67          * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
68          */
69         return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
70 #elif defined(CONFIG_PPC64)
71         /* On PPC64, we always do non-cacheable access to the IO hole and
72          * cacheable elsewhere. Cache paradox can checkstop the CPU and
73          * the high_memory heuristic below is wrong on machines with memory
74          * above the IO hole... Ah, and of course, XFree86 doesn't pass
75          * O_SYNC when mapping us to tap IO space. Surprised ?
76          */
77         return !page_is_ram(addr);
78 #else
79         /*
80          * Accessing memory above the top the kernel knows about or through a file pointer
81          * that was marked O_SYNC will be done non-cached.
82          */
83         if (file->f_flags & O_SYNC)
84                 return 1;
85         return addr >= __pa(high_memory);
86 #endif
87 }
88
89 #ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
90 static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
91 {
92         unsigned long end_mem;
93
94         end_mem = __pa(high_memory);
95         if (addr >= end_mem)
96                 return 0;
97
98         if (*count > end_mem - addr)
99                 *count = end_mem - addr;
100
101         return 1;
102 }
103 #endif
104
105 static ssize_t do_write_mem(void *p, unsigned long realp,
106                             const char * buf, size_t count, loff_t *ppos)
107 {
108         ssize_t written;
109         unsigned long copied;
110
111         written = 0;
112 #if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
113         /* we don't have page 0 mapped on sparc and m68k.. */
114         if (realp < PAGE_SIZE) {
115                 unsigned long sz = PAGE_SIZE-realp;
116                 if (sz > count) sz = count; 
117                 /* Hmm. Do something? */
118                 buf+=sz;
119                 p+=sz;
120                 count-=sz;
121                 written+=sz;
122         }
123 #endif
124         copied = copy_from_user(p, buf, count);
125         if (copied) {
126                 ssize_t ret = written + (count - copied);
127
128                 if (ret)
129                         return ret;
130                 return -EFAULT;
131         }
132         written += count;
133         *ppos += written;
134         return written;
135 }
136
137
138 /*
139  * This funcion reads the *physical* memory. The f_pos points directly to the 
140  * memory location. 
141  */
142 static ssize_t read_mem(struct file * file, char * buf,
143                         size_t count, loff_t *ppos)
144 {
145         unsigned long p = *ppos;
146         ssize_t read;
147
148         if (!valid_phys_addr_range(p, &count))
149                 return -EFAULT;
150         read = 0;
151 #if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
152         /* we don't have page 0 mapped on sparc and m68k.. */
153         if (p < PAGE_SIZE) {
154                 unsigned long sz = PAGE_SIZE-p;
155                 if (sz > count) 
156                         sz = count; 
157                 if (sz > 0) {
158                         if (clear_user(buf, sz))
159                                 return -EFAULT;
160                         buf += sz; 
161                         p += sz; 
162                         count -= sz; 
163                         read += sz; 
164                 }
165         }
166 #endif
167         if (copy_to_user(buf, __va(p), count))
168                 return -EFAULT;
169         read += count;
170         *ppos += read;
171         return read;
172 }
173
174 static ssize_t write_mem(struct file * file, const char * buf, 
175                          size_t count, loff_t *ppos)
176 {
177         unsigned long p = *ppos;
178
179         if (!valid_phys_addr_range(p, &count))
180                 return -EFAULT;
181         return do_write_mem(__va(p), p, buf, count, ppos);
182 }
183
184 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
185 {
186         unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
187         int uncached;
188
189         uncached = uncached_access(file, offset);
190 #ifdef pgprot_noncached
191         if (uncached)
192                 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
193 #endif
194
195         /* Don't try to swap out physical pages.. */
196         vma->vm_flags |= VM_RESERVED;
197
198         /*
199          * Don't dump addresses that are not real memory to a core file.
200          */
201         if (uncached)
202                 vma->vm_flags |= VM_IO;
203
204         if (remap_page_range(vma, vma->vm_start, offset, vma->vm_end-vma->vm_start,
205                              vma->vm_page_prot))
206                 return -EAGAIN;
207         return 0;
208 }
209
210 extern long vread(char *buf, char *addr, unsigned long count);
211 extern long vwrite(char *buf, char *addr, unsigned long count);
212
213 /*
214  * This function reads the *virtual* memory as seen by the kernel.
215  */
216 static ssize_t read_kmem(struct file *file, char *buf, 
217                          size_t count, loff_t *ppos)
218 {
219         unsigned long p = *ppos;
220         ssize_t read = 0;
221         ssize_t virtr = 0;
222         char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
223                 
224         if (p < (unsigned long) high_memory) {
225                 read = count;
226                 if (count > (unsigned long) high_memory - p)
227                         read = (unsigned long) high_memory - p;
228
229 #if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
230                 /* we don't have page 0 mapped on sparc and m68k.. */
231                 if (p < PAGE_SIZE && read > 0) {
232                         size_t tmp = PAGE_SIZE - p;
233                         if (tmp > read) tmp = read;
234                         if (clear_user(buf, tmp))
235                                 return -EFAULT;
236                         buf += tmp;
237                         p += tmp;
238                         read -= tmp;
239                         count -= tmp;
240                 }
241 #endif
242                 if (copy_to_user(buf, (char *)p, read))
243                         return -EFAULT;
244                 p += read;
245                 buf += read;
246                 count -= read;
247         }
248
249         if (count > 0) {
250                 kbuf = (char *)__get_free_page(GFP_KERNEL);
251                 if (!kbuf)
252                         return -ENOMEM;
253                 while (count > 0) {
254                         int len = count;
255
256                         if (len > PAGE_SIZE)
257                                 len = PAGE_SIZE;
258                         len = vread(kbuf, (char *)p, len);
259                         if (!len)
260                                 break;
261                         if (copy_to_user(buf, kbuf, len)) {
262                                 free_page((unsigned long)kbuf);
263                                 return -EFAULT;
264                         }
265                         count -= len;
266                         buf += len;
267                         virtr += len;
268                         p += len;
269                 }
270                 free_page((unsigned long)kbuf);
271         }
272         *ppos = p;
273         return virtr + read;
274 }
275
276 /*
277  * This function writes to the *virtual* memory as seen by the kernel.
278  */
279 static ssize_t write_kmem(struct file * file, const char * buf, 
280                           size_t count, loff_t *ppos)
281 {
282         unsigned long p = *ppos;
283         ssize_t wrote = 0;
284         ssize_t virtr = 0;
285         ssize_t written;
286         char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
287
288         if (p < (unsigned long) high_memory) {
289
290                 wrote = count;
291                 if (count > (unsigned long) high_memory - p)
292                         wrote = (unsigned long) high_memory - p;
293
294                 written = do_write_mem((void*)p, p, buf, wrote, ppos);
295                 if (written != wrote)
296                         return written;
297                 wrote = written;
298                 p += wrote;
299                 buf += wrote;
300                 count -= wrote;
301         }
302
303         if (count > 0) {
304                 kbuf = (char *)__get_free_page(GFP_KERNEL);
305                 if (!kbuf)
306                         return wrote ? wrote : -ENOMEM;
307                 while (count > 0) {
308                         int len = count;
309
310                         if (len > PAGE_SIZE)
311                                 len = PAGE_SIZE;
312                         if (len) {
313                                 written = copy_from_user(kbuf, buf, len);
314                                 if (written) {
315                                         ssize_t ret;
316
317                                         free_page((unsigned long)kbuf);
318                                         ret = wrote + virtr + (len - written);
319                                         return ret ? ret : -EFAULT;
320                                 }
321                         }
322                         len = vwrite(kbuf, (char *)p, len);
323                         count -= len;
324                         buf += len;
325                         virtr += len;
326                         p += len;
327                 }
328                 free_page((unsigned long)kbuf);
329         }
330
331         *ppos = p;
332         return virtr + wrote;
333 }
334
335 #if defined(CONFIG_ISA) || !defined(__mc68000__)
336 static ssize_t read_port(struct file * file, char * buf,
337                          size_t count, loff_t *ppos)
338 {
339         unsigned long i = *ppos;
340         char *tmp = buf;
341
342         if (verify_area(VERIFY_WRITE,buf,count))
343                 return -EFAULT; 
344         while (count-- > 0 && i < 65536) {
345                 if (__put_user(inb(i),tmp) < 0) 
346                         return -EFAULT;  
347                 i++;
348                 tmp++;
349         }
350         *ppos = i;
351         return tmp-buf;
352 }
353
354 static ssize_t write_port(struct file * file, const char * buf,
355                           size_t count, loff_t *ppos)
356 {
357         unsigned long i = *ppos;
358         const char * tmp = buf;
359
360         if (verify_area(VERIFY_READ,buf,count))
361                 return -EFAULT;
362         while (count-- > 0 && i < 65536) {
363                 char c;
364                 if (__get_user(c, tmp)) 
365                         return -EFAULT; 
366                 outb(c,i);
367                 i++;
368                 tmp++;
369         }
370         *ppos = i;
371         return tmp-buf;
372 }
373 #endif
374
375 static ssize_t read_null(struct file * file, char * buf,
376                          size_t count, loff_t *ppos)
377 {
378         return 0;
379 }
380
381 static ssize_t write_null(struct file * file, const char * buf,
382                           size_t count, loff_t *ppos)
383 {
384         return count;
385 }
386
387 #ifdef CONFIG_MMU
388 /*
389  * For fun, we are using the MMU for this.
390  */
391 static inline size_t read_zero_pagealigned(char * buf, size_t size)
392 {
393         struct mm_struct *mm;
394         struct vm_area_struct * vma;
395         unsigned long addr=(unsigned long)buf;
396
397         mm = current->mm;
398         /* Oops, this was forgotten before. -ben */
399         down_read(&mm->mmap_sem);
400
401         /* For private mappings, just map in zero pages. */
402         for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
403                 unsigned long count;
404
405                 if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
406                         goto out_up;
407                 if (vma->vm_flags & VM_SHARED)
408                         break;
409                 count = vma->vm_end - addr;
410                 if (count > size)
411                         count = size;
412
413                 zap_page_range(vma, addr, count, NULL);
414                 zeromap_page_range(vma, addr, count, PAGE_COPY);
415
416                 size -= count;
417                 buf += count;
418                 addr += count;
419                 if (size == 0)
420                         goto out_up;
421         }
422
423         up_read(&mm->mmap_sem);
424         
425         /* The shared case is hard. Let's do the conventional zeroing. */ 
426         do {
427                 unsigned long unwritten = clear_user(buf, PAGE_SIZE);
428                 if (unwritten)
429                         return size + unwritten - PAGE_SIZE;
430                 cond_resched();
431                 buf += PAGE_SIZE;
432                 size -= PAGE_SIZE;
433         } while (size);
434
435         return size;
436 out_up:
437         up_read(&mm->mmap_sem);
438         return size;
439 }
440
441 static ssize_t read_zero(struct file * file, char * buf, 
442                          size_t count, loff_t *ppos)
443 {
444         unsigned long left, unwritten, written = 0;
445
446         if (!count)
447                 return 0;
448
449         if (!access_ok(VERIFY_WRITE, buf, count))
450                 return -EFAULT;
451
452         left = count;
453
454         /* do we want to be clever? Arbitrary cut-off */
455         if (count >= PAGE_SIZE*4) {
456                 unsigned long partial;
457
458                 /* How much left of the page? */
459                 partial = (PAGE_SIZE-1) & -(unsigned long) buf;
460                 unwritten = clear_user(buf, partial);
461                 written = partial - unwritten;
462                 if (unwritten)
463                         goto out;
464                 left -= partial;
465                 buf += partial;
466                 unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
467                 written += (left & PAGE_MASK) - unwritten;
468                 if (unwritten)
469                         goto out;
470                 buf += left & PAGE_MASK;
471                 left &= ~PAGE_MASK;
472         }
473         unwritten = clear_user(buf, left);
474         written += left - unwritten;
475 out:
476         return written ? written : -EFAULT;
477 }
478
479 static int mmap_zero(struct file * file, struct vm_area_struct * vma)
480 {
481         if (vma->vm_flags & VM_SHARED)
482                 return shmem_zero_setup(vma);
483         if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
484                 return -EAGAIN;
485         return 0;
486 }
487 #else /* CONFIG_MMU */
488 static ssize_t read_zero(struct file * file, char * buf, 
489                          size_t count, loff_t *ppos)
490 {
491         size_t todo = count;
492
493         while (todo) {
494                 size_t chunk = todo;
495
496                 if (chunk > 4096)
497                         chunk = 4096;   /* Just for latency reasons */
498                 if (clear_user(buf, chunk))
499                         return -EFAULT;
500                 buf += chunk;
501                 todo -= chunk;
502                 cond_resched();
503         }
504         return count;
505 }
506
507 static int mmap_zero(struct file * file, struct vm_area_struct * vma)
508 {
509         return -ENOSYS;
510 }
511 #endif /* CONFIG_MMU */
512
513 static ssize_t write_full(struct file * file, const char * buf,
514                           size_t count, loff_t *ppos)
515 {
516         return -ENOSPC;
517 }
518
519 /*
520  * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
521  * can fopen() both devices with "a" now.  This was previously impossible.
522  * -- SRB.
523  */
524
525 static loff_t null_lseek(struct file * file, loff_t offset, int orig)
526 {
527         return file->f_pos = 0;
528 }
529
530 /*
531  * The memory devices use the full 32/64 bits of the offset, and so we cannot
532  * check against negative addresses: they are ok. The return value is weird,
533  * though, in that case (0).
534  *
535  * also note that seeking relative to the "end of file" isn't supported:
536  * it has no meaning, so it returns -EINVAL.
537  */
538 static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
539 {
540         loff_t ret;
541
542         down(&file->f_dentry->d_inode->i_sem);
543         switch (orig) {
544                 case 0:
545                         file->f_pos = offset;
546                         ret = file->f_pos;
547                         force_successful_syscall_return();
548                         break;
549                 case 1:
550                         file->f_pos += offset;
551                         ret = file->f_pos;
552                         force_successful_syscall_return();
553                         break;
554                 default:
555                         ret = -EINVAL;
556         }
557         up(&file->f_dentry->d_inode->i_sem);
558         return ret;
559 }
560
561 static int open_port(struct inode * inode, struct file * filp)
562 {
563         return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
564 }
565
566 #define mmap_kmem       mmap_mem
567 #define zero_lseek      null_lseek
568 #define full_lseek      null_lseek
569 #define write_zero      write_null
570 #define read_full       read_zero
571 #define open_mem        open_port
572 #define open_kmem       open_mem
573
574 static struct file_operations mem_fops = {
575         .llseek         = memory_lseek,
576         .read           = read_mem,
577         .write          = write_mem,
578         .mmap           = mmap_mem,
579         .open           = open_mem,
580 };
581
582 static struct file_operations kmem_fops = {
583         .llseek         = memory_lseek,
584         .read           = read_kmem,
585         .write          = write_kmem,
586         .mmap           = mmap_kmem,
587         .open           = open_kmem,
588 };
589
590 static struct file_operations null_fops = {
591         .llseek         = null_lseek,
592         .read           = read_null,
593         .write          = write_null,
594 };
595
596 #if defined(CONFIG_ISA) || !defined(__mc68000__)
597 static struct file_operations port_fops = {
598         .llseek         = memory_lseek,
599         .read           = read_port,
600         .write          = write_port,
601         .open           = open_port,
602 };
603 #endif
604
605 static struct file_operations zero_fops = {
606         .llseek         = zero_lseek,
607         .read           = read_zero,
608         .write          = write_zero,
609         .mmap           = mmap_zero,
610 };
611
612 static struct file_operations full_fops = {
613         .llseek         = full_lseek,
614         .read           = read_full,
615         .write          = write_full,
616 };
617
618 static ssize_t kmsg_write(struct file * file, const char * buf,
619                           size_t count, loff_t *ppos)
620 {
621         char *tmp;
622         int ret;
623
624         tmp = kmalloc(count + 1, GFP_KERNEL);
625         if (tmp == NULL)
626                 return -ENOMEM;
627         ret = -EFAULT;
628         if (!copy_from_user(tmp, buf, count)) {
629                 tmp[count] = 0;
630                 ret = printk("%s", tmp);
631         }
632         kfree(tmp);
633         return ret;
634 }
635
636 static struct file_operations kmsg_fops = {
637         .write =        kmsg_write,
638 };
639
640 static int memory_open(struct inode * inode, struct file * filp)
641 {
642         switch (iminor(inode)) {
643                 case 1:
644                         filp->f_op = &mem_fops;
645                         break;
646                 case 2:
647                         filp->f_op = &kmem_fops;
648                         break;
649                 case 3:
650                         filp->f_op = &null_fops;
651                         break;
652 #if defined(CONFIG_ISA) || !defined(__mc68000__)
653                 case 4:
654                         filp->f_op = &port_fops;
655                         break;
656 #endif
657                 case 5:
658                         filp->f_op = &zero_fops;
659                         break;
660                 case 7:
661                         filp->f_op = &full_fops;
662                         break;
663                 case 8:
664                         filp->f_op = &random_fops;
665                         break;
666                 case 9:
667                         filp->f_op = &urandom_fops;
668                         break;
669                 case 11:
670                         filp->f_op = &kmsg_fops;
671                         break;
672                 default:
673                         return -ENXIO;
674         }
675         if (filp->f_op && filp->f_op->open)
676                 return filp->f_op->open(inode,filp);
677         return 0;
678 }
679
680 static struct file_operations memory_fops = {
681         .open           = memory_open,  /* just a selector for the real open */
682 };
683
684 static const struct {
685         unsigned int            minor;
686         char                    *name;
687         umode_t                 mode;
688         struct file_operations  *fops;
689 } devlist[] = { /* list of minor devices */
690         {1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
691         {2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
692         {3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
693 #if defined(CONFIG_ISA) || !defined(__mc68000__)
694         {4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
695 #endif
696         {5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
697         {7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
698         {8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
699         {9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops},
700         {11,"kmsg",    S_IRUGO | S_IWUSR,           &kmsg_fops},
701 };
702
703 static struct class_simple *mem_class;
704
705 static int __init chr_dev_init(void)
706 {
707         int i;
708
709         if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
710                 printk("unable to get major %d for memory devs\n", MEM_MAJOR);
711
712         mem_class = class_simple_create(THIS_MODULE, "mem");
713         for (i = 0; i < ARRAY_SIZE(devlist); i++) {
714                 class_simple_device_add(mem_class,
715                                         MKDEV(MEM_MAJOR, devlist[i].minor),
716                                         NULL, devlist[i].name);
717                 devfs_mk_cdev(MKDEV(MEM_MAJOR, devlist[i].minor),
718                                 S_IFCHR | devlist[i].mode, devlist[i].name);
719         }
720         
721 #if defined (CONFIG_FB)
722         fbmem_init();
723 #endif
724         return 0;
725 }
726
727 fs_initcall(chr_dev_init);