Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / drivers / xen / privcmd / privcmd.c
1 /******************************************************************************
2  * privcmd.c
3  * 
4  * Interface to privileged domain-0 commands.
5  * 
6  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/mm.h>
15 #include <linux/mman.h>
16 #include <linux/swap.h>
17 #include <linux/smp_lock.h>
18 #include <linux/highmem.h>
19 #include <linux/pagemap.h>
20 #include <linux/seq_file.h>
21 #include <linux/kthread.h>
22 #include <asm/hypervisor.h>
23
24 #include <asm/pgalloc.h>
25 #include <asm/pgtable.h>
26 #include <asm/uaccess.h>
27 #include <asm/tlb.h>
28 #include <asm/hypervisor.h>
29 #include <xen/public/privcmd.h>
30 #include <xen/interface/xen.h>
31 #include <xen/interface/dom0_ops.h>
32 #include <xen/xen_proc.h>
33
34 static struct proc_dir_entry *privcmd_intf;
35 static struct proc_dir_entry *capabilities_intf;
36
37 static int privcmd_ioctl(struct inode *inode, struct file *file,
38                          unsigned int cmd, unsigned long data)
39 {
40         int ret = -ENOSYS;
41         void __user *udata = (void __user *) data;
42
43         switch (cmd) {
44         case IOCTL_PRIVCMD_HYPERCALL: {
45                 privcmd_hypercall_t hypercall;
46   
47                 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
48                         return -EFAULT;
49
50 #if defined(__i386__)
51                 __asm__ __volatile__ (
52                         "pushl %%ebx; pushl %%ecx; pushl %%edx; "
53                         "pushl %%esi; pushl %%edi; "
54                         "movl  8(%%eax),%%ebx ;"
55                         "movl 16(%%eax),%%ecx ;"
56                         "movl 24(%%eax),%%edx ;"
57                         "movl 32(%%eax),%%esi ;"
58                         "movl 40(%%eax),%%edi ;"
59                         "movl   (%%eax),%%eax ;"
60                         "shll $5,%%eax ;"
61                         "addl $hypercall_page,%%eax ;"
62                         "call *%%eax ;"
63                         "popl %%edi; popl %%esi; popl %%edx; "
64                         "popl %%ecx; popl %%ebx"
65                         : "=a" (ret) : "0" (&hypercall) : "memory" );
66 #elif defined (__x86_64__)
67                 {
68                         long ign1, ign2, ign3;
69                         __asm__ __volatile__ (
70                                 "movq %8,%%r10; movq %9,%%r8;"
71                                 "shlq $5,%%rax ;"
72                                 "addq $hypercall_page,%%rax ;"
73                                 "call *%%rax"
74                                 : "=a" (ret), "=D" (ign1),
75                                   "=S" (ign2), "=d" (ign3)
76                                 : "0" ((unsigned long)hypercall.op), 
77                                 "1" ((unsigned long)hypercall.arg[0]), 
78                                 "2" ((unsigned long)hypercall.arg[1]),
79                                 "3" ((unsigned long)hypercall.arg[2]), 
80                                 "g" ((unsigned long)hypercall.arg[3]),
81                                 "g" ((unsigned long)hypercall.arg[4])
82                                 : "r8", "r10", "memory" );
83                 }
84 #elif defined (__ia64__)
85                 __asm__ __volatile__ (
86                         ";; mov r14=%2; mov r15=%3; "
87                         "mov r16=%4; mov r17=%5; mov r18=%6;"
88                         "mov r2=%1; break 0x1000;; mov %0=r8 ;;"
89                         : "=r" (ret)
90                         : "r" (hypercall.op),
91                         "r" (hypercall.arg[0]),
92                         "r" (hypercall.arg[1]),
93                         "r" (hypercall.arg[2]),
94                         "r" (hypercall.arg[3]),
95                         "r" (hypercall.arg[4])
96                         : "r14","r15","r16","r17","r18","r2","r8","memory");
97 #endif
98         }
99         break;
100
101         case IOCTL_PRIVCMD_MMAP: {
102                 privcmd_mmap_t mmapcmd;
103                 privcmd_mmap_entry_t msg;
104                 privcmd_mmap_entry_t __user *p;
105                 struct mm_struct *mm = current->mm;
106                 struct vm_area_struct *vma;
107                 unsigned long va;
108                 int i, rc;
109
110                 if (!is_initial_xendomain())
111                         return -EPERM;
112
113                 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
114                         return -EFAULT;
115
116                 p = mmapcmd.entry;
117                 if (copy_from_user(&msg, p, sizeof(msg)))
118                         return -EFAULT;
119
120                 down_read(&mm->mmap_sem);
121
122                 vma = find_vma(mm, msg.va);
123                 rc = -EINVAL;
124                 if (!vma || (msg.va != vma->vm_start) || vma->vm_private_data)
125                         goto mmap_out;
126
127                 /* Mapping is a one-shot operation per vma. */
128                 vma->vm_private_data = (void *)1;
129
130                 va = vma->vm_start;
131
132                 for (i = 0; i < mmapcmd.num; i++) {
133                         rc = -EFAULT;
134                         if (copy_from_user(&msg, p, sizeof(msg)))
135                                 goto mmap_out;
136
137                         /* Do not allow range to wrap the address space. */
138                         rc = -EINVAL;
139                         if ((msg.npages > (INT_MAX >> PAGE_SHIFT)) ||
140                             ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
141                                 goto mmap_out;
142
143                         /* Range chunks must be contiguous in va space. */
144                         if ((msg.va != va) ||
145                             ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
146                                 goto mmap_out;
147
148                         if ((rc = direct_remap_pfn_range(
149                                 vma,
150                                 msg.va & PAGE_MASK, 
151                                 msg.mfn, 
152                                 msg.npages << PAGE_SHIFT, 
153                                 vma->vm_page_prot,
154                                 mmapcmd.dom)) < 0)
155                                 goto mmap_out;
156
157                         p++;
158                         va += msg.npages << PAGE_SHIFT;
159                 }
160
161                 rc = 0;
162
163         mmap_out:
164                 up_read(&mm->mmap_sem);
165                 ret = rc;
166         }
167         break;
168
169         case IOCTL_PRIVCMD_MMAPBATCH: {
170                 privcmd_mmapbatch_t m;
171                 struct mm_struct *mm = current->mm;
172                 struct vm_area_struct *vma;
173                 xen_pfn_t __user *p;
174                 unsigned long addr, mfn;
175                 int i;
176
177                 if (!is_initial_xendomain())
178                         return -EPERM;
179
180                 if (copy_from_user(&m, udata, sizeof(m)))
181                         return -EFAULT;
182
183                 if ((m.num <= 0) || (m.num > (INT_MAX >> PAGE_SHIFT)))
184                         return -EINVAL;
185
186                 down_read(&mm->mmap_sem);
187
188                 vma = find_vma(mm, m.addr);
189                 if (!vma ||
190                     (m.addr != vma->vm_start) ||
191                     ((m.addr + (m.num<<PAGE_SHIFT)) != vma->vm_end) ||
192                     vma->vm_private_data) {
193                         up_read(&mm->mmap_sem);
194                         return -EINVAL;
195                 }
196
197                 /* Mapping is a one-shot operation per vma. */
198                 vma->vm_private_data = (void *)1;
199
200                 p = m.arr;
201                 addr = m.addr;
202                 for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
203                         if (get_user(mfn, p)) {
204                                 up_read(&mm->mmap_sem);
205                                 return -EFAULT;
206                         }
207
208                         ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
209                                                      mfn, PAGE_SIZE,
210                                                      vma->vm_page_prot, m.dom);
211                         if (ret < 0)
212                                 put_user(0xF0000000 | mfn, p);
213                 }
214
215                 up_read(&mm->mmap_sem);
216                 ret = 0;
217         }
218         break;
219
220         default:
221                 ret = -EINVAL;
222                 break;
223         }
224
225         return ret;
226 }
227
228 #ifndef HAVE_ARCH_PRIVCMD_MMAP
229 static struct page *privcmd_nopage(struct vm_area_struct *vma,
230                                    unsigned long address,
231                                    int *type)
232 {
233         return NOPAGE_SIGBUS;
234 }
235
236 static struct vm_operations_struct privcmd_vm_ops = {
237         .nopage = privcmd_nopage
238 };
239
240 static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
241 {
242         /* Unsupported for auto-translate guests. */
243         if (xen_feature(XENFEAT_auto_translated_physmap))
244                 return -ENOSYS;
245
246         /* DONTCOPY is essential for Xen as copy_page_range is broken. */
247         vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
248         vma->vm_ops = &privcmd_vm_ops;
249         vma->vm_private_data = NULL;
250
251         return 0;
252 }
253 #endif
254
255 static struct file_operations privcmd_file_ops = {
256         .ioctl = privcmd_ioctl,
257         .mmap  = privcmd_mmap,
258 };
259
260 static int capabilities_read(char *page, char **start, off_t off,
261                              int count, int *eof, void *data)
262 {
263         int len = 0;
264         *page = 0;
265
266         if (is_initial_xendomain())
267                 len = sprintf( page, "control_d\n" );
268
269         *eof = 1;
270         return len;
271 }
272
273 static int __init privcmd_init(void)
274 {
275         if (!is_running_on_xen())
276                 return -ENODEV;
277
278         privcmd_intf = create_xen_proc_entry("privcmd", 0400);
279         if (privcmd_intf != NULL)
280                 privcmd_intf->proc_fops = &privcmd_file_ops;
281
282         capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
283         if (capabilities_intf != NULL)
284                 capabilities_intf->read_proc = capabilities_read;
285
286         return 0;
287 }
288
289 __initcall(privcmd_init);