Merge to Fedora kernel-2.6.18-1.2255_FC5-vs2.0.2.2-rc9 patched with stable patch...
[linux-2.6.git] / drivers / xen / privcmd / privcmd.c
1 /******************************************************************************
2  * privcmd.c
3  * 
4  * Interface to privileged domain-0 commands.
5  * 
6  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/mm.h>
15 #include <linux/mman.h>
16 #include <linux/swap.h>
17 #include <linux/smp_lock.h>
18 #include <linux/highmem.h>
19 #include <linux/pagemap.h>
20 #include <linux/seq_file.h>
21 #include <linux/kthread.h>
22 #include <asm/hypervisor.h>
23
24 #include <asm/pgalloc.h>
25 #include <asm/pgtable.h>
26 #include <asm/uaccess.h>
27 #include <asm/tlb.h>
28 #include <asm/hypervisor.h>
29 #include <xen/public/privcmd.h>
30 #include <xen/interface/xen.h>
31 #include <xen/interface/dom0_ops.h>
32 #include <xen/xen_proc.h>
33
34 static struct proc_dir_entry *privcmd_intf;
35 static struct proc_dir_entry *capabilities_intf;
36
37 #ifndef HAVE_ARCH_PRIVCMD_MMAP
38 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
39 #endif
40
41 static int privcmd_ioctl(struct inode *inode, struct file *file,
42                          unsigned int cmd, unsigned long data)
43 {
44         int ret = -ENOSYS;
45         void __user *udata = (void __user *) data;
46
47         switch (cmd) {
48         case IOCTL_PRIVCMD_HYPERCALL: {
49                 privcmd_hypercall_t hypercall;
50   
51                 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
52                         return -EFAULT;
53
54 #if defined(__i386__)
55                 __asm__ __volatile__ (
56                         "pushl %%ebx; pushl %%ecx; pushl %%edx; "
57                         "pushl %%esi; pushl %%edi; "
58                         "movl  8(%%eax),%%ebx ;"
59                         "movl 16(%%eax),%%ecx ;"
60                         "movl 24(%%eax),%%edx ;"
61                         "movl 32(%%eax),%%esi ;"
62                         "movl 40(%%eax),%%edi ;"
63                         "movl   (%%eax),%%eax ;"
64                         "shll $5,%%eax ;"
65                         "addl $hypercall_page,%%eax ;"
66                         "call *%%eax ;"
67                         "popl %%edi; popl %%esi; popl %%edx; "
68                         "popl %%ecx; popl %%ebx"
69                         : "=a" (ret) : "0" (&hypercall) : "memory" );
70 #elif defined (__x86_64__)
71                 {
72                         long ign1, ign2, ign3;
73                         __asm__ __volatile__ (
74                                 "movq %8,%%r10; movq %9,%%r8;"
75                                 "shlq $5,%%rax ;"
76                                 "addq $hypercall_page,%%rax ;"
77                                 "call *%%rax"
78                                 : "=a" (ret), "=D" (ign1),
79                                   "=S" (ign2), "=d" (ign3)
80                                 : "0" ((unsigned long)hypercall.op), 
81                                 "1" ((unsigned long)hypercall.arg[0]), 
82                                 "2" ((unsigned long)hypercall.arg[1]),
83                                 "3" ((unsigned long)hypercall.arg[2]), 
84                                 "g" ((unsigned long)hypercall.arg[3]),
85                                 "g" ((unsigned long)hypercall.arg[4])
86                                 : "r8", "r10", "memory" );
87                 }
88 #elif defined (__ia64__)
89                 __asm__ __volatile__ (
90                         ";; mov r14=%2; mov r15=%3; "
91                         "mov r16=%4; mov r17=%5; mov r18=%6;"
92                         "mov r2=%1; break 0x1000;; mov %0=r8 ;;"
93                         : "=r" (ret)
94                         : "r" (hypercall.op),
95                         "r" (hypercall.arg[0]),
96                         "r" (hypercall.arg[1]),
97                         "r" (hypercall.arg[2]),
98                         "r" (hypercall.arg[3]),
99                         "r" (hypercall.arg[4])
100                         : "r14","r15","r16","r17","r18","r2","r8","memory");
101 #endif
102         }
103         break;
104
105         case IOCTL_PRIVCMD_MMAP: {
106                 privcmd_mmap_t mmapcmd;
107                 privcmd_mmap_entry_t msg;
108                 privcmd_mmap_entry_t __user *p;
109                 struct mm_struct *mm = current->mm;
110                 struct vm_area_struct *vma;
111                 unsigned long va;
112                 int i, rc;
113
114                 if (!is_initial_xendomain())
115                         return -EPERM;
116
117                 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
118                         return -EFAULT;
119
120                 p = mmapcmd.entry;
121                 if (copy_from_user(&msg, p, sizeof(msg)))
122                         return -EFAULT;
123
124                 down_read(&mm->mmap_sem);
125
126                 vma = find_vma(mm, msg.va);
127                 rc = -EINVAL;
128                 if (!vma || (msg.va != vma->vm_start) ||
129                     !privcmd_enforce_singleshot_mapping(vma))
130                         goto mmap_out;
131
132                 va = vma->vm_start;
133
134                 for (i = 0; i < mmapcmd.num; i++) {
135                         rc = -EFAULT;
136                         if (copy_from_user(&msg, p, sizeof(msg)))
137                                 goto mmap_out;
138
139                         /* Do not allow range to wrap the address space. */
140                         rc = -EINVAL;
141                         if ((msg.npages > (LONG_MAX >> PAGE_SHIFT)) ||
142                             ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
143                                 goto mmap_out;
144
145                         /* Range chunks must be contiguous in va space. */
146                         if ((msg.va != va) ||
147                             ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
148                                 goto mmap_out;
149
150                         if ((rc = direct_remap_pfn_range(
151                                 vma,
152                                 msg.va & PAGE_MASK, 
153                                 msg.mfn, 
154                                 msg.npages << PAGE_SHIFT, 
155                                 vma->vm_page_prot,
156                                 mmapcmd.dom)) < 0)
157                                 goto mmap_out;
158
159                         p++;
160                         va += msg.npages << PAGE_SHIFT;
161                 }
162
163                 rc = 0;
164
165         mmap_out:
166                 up_read(&mm->mmap_sem);
167                 ret = rc;
168         }
169         break;
170
171         case IOCTL_PRIVCMD_MMAPBATCH: {
172                 privcmd_mmapbatch_t m;
173                 struct mm_struct *mm = current->mm;
174                 struct vm_area_struct *vma;
175                 xen_pfn_t __user *p;
176                 unsigned long addr, mfn;
177                 int i;
178
179                 if (!is_initial_xendomain())
180                         return -EPERM;
181
182                 if (copy_from_user(&m, udata, sizeof(m)))
183                         return -EFAULT;
184
185                 if ((m.num <= 0) || (m.num > (LONG_MAX >> PAGE_SHIFT)))
186                         return -EINVAL;
187
188                 down_read(&mm->mmap_sem);
189
190                 vma = find_vma(mm, m.addr);
191                 if (!vma ||
192                     (m.addr != vma->vm_start) ||
193                     ((m.addr + ((unsigned long)m.num<<PAGE_SHIFT)) !=
194                      vma->vm_end) ||
195                     !privcmd_enforce_singleshot_mapping(vma)) {
196                         up_read(&mm->mmap_sem);
197                         return -EINVAL;
198                 }
199
200                 p = m.arr;
201                 addr = m.addr;
202                 for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
203                         if (get_user(mfn, p)) {
204                                 up_read(&mm->mmap_sem);
205                                 return -EFAULT;
206                         }
207
208                         ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
209                                                      mfn, PAGE_SIZE,
210                                                      vma->vm_page_prot, m.dom);
211                         if (ret < 0)
212                                 put_user(0xF0000000 | mfn, p);
213                 }
214
215                 up_read(&mm->mmap_sem);
216                 ret = 0;
217         }
218         break;
219
220         default:
221                 ret = -EINVAL;
222                 break;
223         }
224
225         return ret;
226 }
227
228 #ifndef HAVE_ARCH_PRIVCMD_MMAP
229 static struct page *privcmd_nopage(struct vm_area_struct *vma,
230                                    unsigned long address,
231                                    int *type)
232 {
233         return NOPAGE_SIGBUS;
234 }
235
236 static struct vm_operations_struct privcmd_vm_ops = {
237         .nopage = privcmd_nopage
238 };
239
240 static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
241 {
242         /* Unsupported for auto-translate guests. */
243         if (xen_feature(XENFEAT_auto_translated_physmap))
244                 return -ENOSYS;
245
246         /* DONTCOPY is essential for Xen as copy_page_range is broken. */
247         vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
248         vma->vm_ops = &privcmd_vm_ops;
249         vma->vm_private_data = NULL;
250
251         return 0;
252 }
253
254 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
255 {
256         return (xchg(&vma->vm_private_data, (void *)1) == NULL);
257 }
258 #endif
259
260 static struct file_operations privcmd_file_ops = {
261         .ioctl = privcmd_ioctl,
262         .mmap  = privcmd_mmap,
263 };
264
265 static int capabilities_read(char *page, char **start, off_t off,
266                              int count, int *eof, void *data)
267 {
268         int len = 0;
269         *page = 0;
270
271         if (is_initial_xendomain())
272                 len = sprintf( page, "control_d\n" );
273
274         *eof = 1;
275         return len;
276 }
277
278 static int __init privcmd_init(void)
279 {
280         if (!is_running_on_xen())
281                 return -ENODEV;
282
283         privcmd_intf = create_xen_proc_entry("privcmd", 0400);
284         if (privcmd_intf != NULL)
285                 privcmd_intf->proc_fops = &privcmd_file_ops;
286
287         capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
288         if (capabilities_intf != NULL)
289                 capabilities_intf->read_proc = capabilities_read;
290
291         return 0;
292 }
293
294 __initcall(privcmd_init);