fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / drivers / xen / privcmd / privcmd.c
1 /******************************************************************************
2  * privcmd.c
3  * 
4  * Interface to privileged domain-0 commands.
5  * 
6  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/mm.h>
15 #include <linux/mman.h>
16 #include <linux/swap.h>
17 #include <linux/smp_lock.h>
18 #include <linux/highmem.h>
19 #include <linux/pagemap.h>
20 #include <linux/seq_file.h>
21 #include <linux/kthread.h>
22 #include <asm/hypervisor.h>
23
24 #include <asm/pgalloc.h>
25 #include <asm/pgtable.h>
26 #include <asm/uaccess.h>
27 #include <asm/tlb.h>
28 #include <asm/hypervisor.h>
29 #include <xen/public/privcmd.h>
30 #include <xen/interface/xen.h>
31 #include <xen/interface/dom0_ops.h>
32 #include <xen/xen_proc.h>
33
34 static struct proc_dir_entry *privcmd_intf;
35 static struct proc_dir_entry *capabilities_intf;
36
37 #ifndef HAVE_ARCH_PRIVCMD_MMAP
38 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
39 #endif
40
41 static int privcmd_ioctl(struct inode *inode, struct file *file,
42                          unsigned int cmd, unsigned long data)
43 {
44         int ret = -ENOSYS;
45         void __user *udata = (void __user *) data;
46
47         switch (cmd) {
48         case IOCTL_PRIVCMD_HYPERCALL: {
49                 privcmd_hypercall_t hypercall;
50   
51                 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
52                         return -EFAULT;
53
54 #if defined(__i386__)
55                 if (hypercall.op >= (PAGE_SIZE >> 5))
56                         break;
57                 __asm__ __volatile__ (
58                         "pushl %%ebx; pushl %%ecx; pushl %%edx; "
59                         "pushl %%esi; pushl %%edi; "
60                         "movl  8(%%eax),%%ebx ;"
61                         "movl 16(%%eax),%%ecx ;"
62                         "movl 24(%%eax),%%edx ;"
63                         "movl 32(%%eax),%%esi ;"
64                         "movl 40(%%eax),%%edi ;"
65                         "movl   (%%eax),%%eax ;"
66                         "shll $5,%%eax ;"
67                         "addl $hypercall_page,%%eax ;"
68                         "call *%%eax ;"
69                         "popl %%edi; popl %%esi; popl %%edx; "
70                         "popl %%ecx; popl %%ebx"
71                         : "=a" (ret) : "0" (&hypercall) : "memory" );
72 #elif defined (__x86_64__)
73                 if (hypercall.op < (PAGE_SIZE >> 5)) {
74                         long ign1, ign2, ign3;
75                         __asm__ __volatile__ (
76                                 "movq %8,%%r10; movq %9,%%r8;"
77                                 "shll $5,%%eax ;"
78                                 "addq $hypercall_page,%%rax ;"
79                                 "call *%%rax"
80                                 : "=a" (ret), "=D" (ign1),
81                                   "=S" (ign2), "=d" (ign3)
82                                 : "0" ((unsigned int)hypercall.op),
83                                 "1" (hypercall.arg[0]),
84                                 "2" (hypercall.arg[1]),
85                                 "3" (hypercall.arg[2]),
86                                 "g" (hypercall.arg[3]),
87                                 "g" (hypercall.arg[4])
88                                 : "r8", "r10", "memory" );
89                 }
90 #elif defined (__ia64__)
91                 __asm__ __volatile__ (
92                         ";; mov r14=%2; mov r15=%3; "
93                         "mov r16=%4; mov r17=%5; mov r18=%6;"
94                         "mov r2=%1; break 0x1000;; mov %0=r8 ;;"
95                         : "=r" (ret)
96                         : "r" (hypercall.op),
97                         "r" (hypercall.arg[0]),
98                         "r" (hypercall.arg[1]),
99                         "r" (hypercall.arg[2]),
100                         "r" (hypercall.arg[3]),
101                         "r" (hypercall.arg[4])
102                         : "r14","r15","r16","r17","r18","r2","r8","memory");
103 #endif
104         }
105         break;
106
107         case IOCTL_PRIVCMD_MMAP: {
108                 privcmd_mmap_t mmapcmd;
109                 privcmd_mmap_entry_t msg;
110                 privcmd_mmap_entry_t __user *p;
111                 struct mm_struct *mm = current->mm;
112                 struct vm_area_struct *vma;
113                 unsigned long va;
114                 int i, rc;
115
116                 if (!is_initial_xendomain())
117                         return -EPERM;
118
119                 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
120                         return -EFAULT;
121
122                 p = mmapcmd.entry;
123                 if (copy_from_user(&msg, p, sizeof(msg)))
124                         return -EFAULT;
125
126                 down_read(&mm->mmap_sem);
127
128                 vma = find_vma(mm, msg.va);
129                 rc = -EINVAL;
130                 if (!vma || (msg.va != vma->vm_start) ||
131                     !privcmd_enforce_singleshot_mapping(vma))
132                         goto mmap_out;
133
134                 va = vma->vm_start;
135
136                 for (i = 0; i < mmapcmd.num; i++) {
137                         rc = -EFAULT;
138                         if (copy_from_user(&msg, p, sizeof(msg)))
139                                 goto mmap_out;
140
141                         /* Do not allow range to wrap the address space. */
142                         rc = -EINVAL;
143                         if ((msg.npages > (LONG_MAX >> PAGE_SHIFT)) ||
144                             ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
145                                 goto mmap_out;
146
147                         /* Range chunks must be contiguous in va space. */
148                         if ((msg.va != va) ||
149                             ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
150                                 goto mmap_out;
151
152                         if ((rc = direct_remap_pfn_range(
153                                 vma,
154                                 msg.va & PAGE_MASK, 
155                                 msg.mfn, 
156                                 msg.npages << PAGE_SHIFT, 
157                                 vma->vm_page_prot,
158                                 mmapcmd.dom)) < 0)
159                                 goto mmap_out;
160
161                         p++;
162                         va += msg.npages << PAGE_SHIFT;
163                 }
164
165                 rc = 0;
166
167         mmap_out:
168                 up_read(&mm->mmap_sem);
169                 ret = rc;
170         }
171         break;
172
173         case IOCTL_PRIVCMD_MMAPBATCH: {
174                 privcmd_mmapbatch_t m;
175                 struct mm_struct *mm = current->mm;
176                 struct vm_area_struct *vma;
177                 xen_pfn_t __user *p;
178                 unsigned long addr, mfn;
179                 int i;
180
181                 if (!is_initial_xendomain())
182                         return -EPERM;
183
184                 if (copy_from_user(&m, udata, sizeof(m)))
185                         return -EFAULT;
186
187                 if ((m.num <= 0) || (m.num > (LONG_MAX >> PAGE_SHIFT)))
188                         return -EINVAL;
189
190                 down_read(&mm->mmap_sem);
191
192                 vma = find_vma(mm, m.addr);
193                 if (!vma ||
194                     (m.addr != vma->vm_start) ||
195                     ((m.addr + ((unsigned long)m.num<<PAGE_SHIFT)) !=
196                      vma->vm_end) ||
197                     !privcmd_enforce_singleshot_mapping(vma)) {
198                         up_read(&mm->mmap_sem);
199                         return -EINVAL;
200                 }
201
202                 p = m.arr;
203                 addr = m.addr;
204                 for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
205                         if (get_user(mfn, p)) {
206                                 up_read(&mm->mmap_sem);
207                                 return -EFAULT;
208                         }
209
210                         ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
211                                                      mfn, PAGE_SIZE,
212                                                      vma->vm_page_prot, m.dom);
213                         if (ret < 0)
214                                 put_user(0xF0000000 | mfn, p);
215                 }
216
217                 up_read(&mm->mmap_sem);
218                 ret = 0;
219         }
220         break;
221
222         default:
223                 ret = -EINVAL;
224                 break;
225         }
226
227         return ret;
228 }
229
230 #ifndef HAVE_ARCH_PRIVCMD_MMAP
231 static struct page *privcmd_nopage(struct vm_area_struct *vma,
232                                    unsigned long address,
233                                    int *type)
234 {
235         return NOPAGE_SIGBUS;
236 }
237
238 static struct vm_operations_struct privcmd_vm_ops = {
239         .nopage = privcmd_nopage
240 };
241
242 static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
243 {
244         /* Unsupported for auto-translate guests. */
245         if (xen_feature(XENFEAT_auto_translated_physmap))
246                 return -ENOSYS;
247
248         /* DONTCOPY is essential for Xen as copy_page_range is broken. */
249         vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
250         vma->vm_ops = &privcmd_vm_ops;
251         vma->vm_private_data = NULL;
252
253         return 0;
254 }
255
256 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
257 {
258         return (xchg(&vma->vm_private_data, (void *)1) == NULL);
259 }
260 #endif
261
262 static struct file_operations privcmd_file_ops = {
263         .ioctl = privcmd_ioctl,
264         .mmap  = privcmd_mmap,
265 };
266
267 static int capabilities_read(char *page, char **start, off_t off,
268                              int count, int *eof, void *data)
269 {
270         int len = 0;
271         *page = 0;
272
273         if (is_initial_xendomain())
274                 len = sprintf( page, "control_d\n" );
275
276         *eof = 1;
277         return len;
278 }
279
280 static int __init privcmd_init(void)
281 {
282         if (!is_running_on_xen())
283                 return -ENODEV;
284
285         privcmd_intf = create_xen_proc_entry("privcmd", 0400);
286         if (privcmd_intf != NULL)
287                 privcmd_intf->proc_fops = &privcmd_file_ops;
288
289         capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
290         if (capabilities_intf != NULL)
291                 capabilities_intf->read_proc = capabilities_read;
292
293         return 0;
294 }
295
296 __initcall(privcmd_init);