This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / ia64 / xen / hypervisor.c
1 /******************************************************************************
2  * include/asm-ia64/shadow.h
3  *
4  * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  */
22
23 //#include <linux/kernel.h>
24 #include <linux/spinlock.h>
25 #include <linux/bootmem.h>
26 #include <linux/module.h>
27 #include <linux/vmalloc.h>
28 #include <asm/page.h>
29 #include <asm/hypervisor.h>
30 #include <asm/hypercall.h>
31 #include <xen/interface/memory.h>
32 #include <xen/balloon.h>
33
34 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)XSI_BASE;
35 EXPORT_SYMBOL(HYPERVISOR_shared_info);
36
37 start_info_t *xen_start_info;
38 EXPORT_SYMBOL(xen_start_info);
39
40 int running_on_xen;
41 EXPORT_SYMBOL(running_on_xen);
42
43 //XXX xen/ia64 copy_from_guest() is broken.
44 //    This is a temporal work around until it is fixed.
45 //    used by balloon.c netfront.c
46
47 // get_xen_guest_handle is defined only when __XEN_TOOLS__ is defined
48 // if the definition in arch-ia64.h is changed, this must be updated.
49 #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
50
51 int
52 ia64_xenmem_reservation_op(unsigned long op,
53                            struct xen_memory_reservation* reservation__)
54 {
55         struct xen_memory_reservation reservation = *reservation__;
56         unsigned long* frame_list;
57         unsigned long nr_extents = reservation__->nr_extents;
58         int ret = 0;
59         get_xen_guest_handle(frame_list, reservation__->extent_start);
60
61         BUG_ON(op != XENMEM_increase_reservation &&
62                op != XENMEM_decrease_reservation &&
63                op != XENMEM_populate_physmap);
64
65         while (nr_extents > 0) {
66                 int tmp_ret;
67                 volatile unsigned long dummy;
68
69                 set_xen_guest_handle(reservation.extent_start, frame_list);
70                 reservation.nr_extents = nr_extents;
71
72                 dummy = frame_list[0];// re-install tlb entry before hypercall
73                 tmp_ret = ____HYPERVISOR_memory_op(op, &reservation);
74                 if (tmp_ret < 0) {
75                         if (ret == 0) {
76                                 ret = tmp_ret;
77                         }
78                         break;
79                 }
80                 if (tmp_ret == 0) {
81                         //XXX dirty work around for skbuff_ctor()
82                         //    of a non-privileged domain, 
83                         if ((op == XENMEM_increase_reservation ||
84                              op == XENMEM_populate_physmap) &&
85                             !is_initial_xendomain() &&
86                             reservation.extent_order > 0)
87                                 return ret;
88                 }
89                 frame_list += tmp_ret;
90                 nr_extents -= tmp_ret;
91                 ret += tmp_ret;
92         }
93         return ret;
94 }
95 EXPORT_SYMBOL(ia64_xenmem_reservation_op);
96
97 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
98 // move those to lib/contiguous_bitmap?
99 //XXX discontigmem/sparsemem
100
101 /*
102  * Bitmap is indexed by page number. If bit is set, the page is part of a
103  * xen_create_contiguous_region() area of memory.
104  */
105 unsigned long *contiguous_bitmap;
106
107 void
108 contiguous_bitmap_init(unsigned long end_pfn)
109 {
110         unsigned long size = (end_pfn + 2 * BITS_PER_LONG) >> 3;
111         contiguous_bitmap = alloc_bootmem_low_pages(size);
112         BUG_ON(!contiguous_bitmap);
113         memset(contiguous_bitmap, 0, size);
114 }
115
116 #if 0
117 int
118 contiguous_bitmap_test(void* p)
119 {
120         return test_bit(__pa(p) >> PAGE_SHIFT, contiguous_bitmap);
121 }
122 #endif
123
124 static void contiguous_bitmap_set(
125         unsigned long first_page, unsigned long nr_pages)
126 {
127         unsigned long start_off, end_off, curr_idx, end_idx;
128
129         curr_idx  = first_page / BITS_PER_LONG;
130         start_off = first_page & (BITS_PER_LONG-1);
131         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
132         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
133
134         if (curr_idx == end_idx) {
135                 contiguous_bitmap[curr_idx] |=
136                         ((1UL<<end_off)-1) & -(1UL<<start_off);
137         } else {
138                 contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
139                 while ( ++curr_idx < end_idx )
140                         contiguous_bitmap[curr_idx] = ~0UL;
141                 contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
142         }
143 }
144
145 static void contiguous_bitmap_clear(
146         unsigned long first_page, unsigned long nr_pages)
147 {
148         unsigned long start_off, end_off, curr_idx, end_idx;
149
150         curr_idx  = first_page / BITS_PER_LONG;
151         start_off = first_page & (BITS_PER_LONG-1);
152         end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
153         end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
154
155         if (curr_idx == end_idx) {
156                 contiguous_bitmap[curr_idx] &=
157                         -(1UL<<end_off) | ((1UL<<start_off)-1);
158         } else {
159                 contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
160                 while ( ++curr_idx != end_idx )
161                         contiguous_bitmap[curr_idx] = 0;
162                 contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
163         }
164 }
165
166 // __xen_create_contiguous_region(), __xen_destroy_contiguous_region()
167 // are based on i386 xen_create_contiguous_region(),
168 // xen_destroy_contiguous_region()
169
170 /* Protected by balloon_lock. */
171 #define MAX_CONTIG_ORDER 7
172 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
173
174 /* Ensure multi-page extents are contiguous in machine memory. */
175 int
176 __xen_create_contiguous_region(unsigned long vstart,
177                                unsigned int order, unsigned int address_bits)
178 {
179         unsigned long error = 0;
180         unsigned long gphys = __pa(vstart);
181         unsigned long start_gpfn = gphys >> PAGE_SHIFT;
182         unsigned long num_gpfn = 1 << order;
183         unsigned long i;
184         unsigned long flags;
185
186         unsigned long *in_frames = discontig_frames, out_frame;
187         int success;
188         struct xen_memory_exchange exchange = {
189                 .in = {
190                         .nr_extents   = num_gpfn,
191                         .extent_order = 0,
192                         .domid        = DOMID_SELF
193                 },
194                 .out = {
195                          .nr_extents   = 1,
196                          .extent_order = order,
197                          .address_bits = address_bits,
198                          .domid        = DOMID_SELF
199                  },
200                 .nr_exchanged = 0
201         };
202
203         if (unlikely(order > MAX_CONTIG_ORDER))
204                 return -ENOMEM;
205         
206         set_xen_guest_handle(exchange.in.extent_start, in_frames);
207         set_xen_guest_handle(exchange.out.extent_start, &out_frame);
208
209         scrub_pages(vstart, num_gpfn);
210
211         balloon_lock(flags);
212
213         /* Get a new contiguous memory extent. */
214         for (i = 0; i < num_gpfn; i++) {
215                 in_frames[i] = start_gpfn + i;
216         }
217         out_frame = start_gpfn;
218         error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
219         success = (exchange.nr_exchanged == num_gpfn);
220         BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
221         BUG_ON(success && (error != 0));
222         if (unlikely(error == -ENOSYS)) {
223                 /* Compatibility when XENMEM_exchange is unsupported. */
224                 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
225                                              &exchange.in);
226                 BUG_ON(error != num_gpfn);
227                 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
228                                              &exchange.out);
229                 if (error != 1) {
230                         /* Couldn't get special memory: fall back to normal. */
231                         for (i = 0; i < num_gpfn; i++) {
232                                 in_frames[i] = start_gpfn + i;
233                         }
234                         error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
235                                                      &exchange.in);
236                         BUG_ON(error != num_gpfn);
237                         success = 0;
238                 } else
239                         success = 1;
240         }
241         if (success)
242                 contiguous_bitmap_set(start_gpfn, num_gpfn);
243 #if 0
244         if (success) {
245                 unsigned long mfn;
246                 unsigned long mfn_prev = ~0UL;
247                 for (i = 0; i < num_gpfn; i++) {
248                         mfn = pfn_to_mfn_for_dma(start_gpfn + i);
249                         if (mfn_prev != ~0UL && mfn != mfn_prev + 1) {
250                                 xprintk("\n");
251                                 xprintk("%s:%d order %d "
252                                         "start 0x%lx bus 0x%lx "
253                                         "machine 0x%lx\n",
254                                         __func__, __LINE__, order,
255                                         vstart, virt_to_bus((void*)vstart),
256                                         phys_to_machine_for_dma(gphys));
257                                 xprintk("mfn: ");
258                                 for (i = 0; i < num_gpfn; i++) {
259                                         mfn = pfn_to_mfn_for_dma(
260                                                 start_gpfn + i);
261                                         xprintk("0x%lx ", mfn);
262                                 }
263                                 xprintk("\n");
264                                 break;
265                         }
266                         mfn_prev = mfn;
267                 }
268         }
269 #endif
270         balloon_unlock(flags);
271         return success? 0: -ENOMEM;
272 }
273
274 void
275 __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
276 {
277         unsigned long flags;
278         unsigned long error = 0;
279         unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT;
280         unsigned long num_gpfn = 1UL << order;
281         unsigned long i;
282
283         unsigned long *out_frames = discontig_frames, in_frame;
284         int            success;
285         struct xen_memory_exchange exchange = {
286                 .in = {
287                         .nr_extents   = 1,
288                         .extent_order = order,
289                         .domid        = DOMID_SELF
290                 },
291                 .out = {
292                          .nr_extents   = num_gpfn,
293                          .extent_order = 0,
294                          .address_bits = 0,
295                          .domid        = DOMID_SELF
296                  },
297                 .nr_exchanged = 0
298         };
299         
300
301         if (!test_bit(start_gpfn, contiguous_bitmap))
302                 return;
303
304         if (unlikely(order > MAX_CONTIG_ORDER))
305                 return;
306
307         set_xen_guest_handle(exchange.in.extent_start, &in_frame);
308         set_xen_guest_handle(exchange.out.extent_start, out_frames);
309
310         scrub_pages(vstart, num_gpfn);
311
312         balloon_lock(flags);
313
314         contiguous_bitmap_clear(start_gpfn, num_gpfn);
315
316         /* Do the exchange for non-contiguous MFNs. */
317         in_frame = start_gpfn;
318         for (i = 0; i < num_gpfn; i++) {
319                 out_frames[i] = start_gpfn + i;
320         }
321         error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
322         success = (exchange.nr_exchanged == 1);
323         BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0)));
324         BUG_ON(success && (error != 0));
325         if (unlikely(error == -ENOSYS)) {
326                 /* Compatibility when XENMEM_exchange is unsupported. */
327                 error = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
328                                              &exchange.in);
329                 BUG_ON(error != 1);
330
331                 error = HYPERVISOR_memory_op(XENMEM_populate_physmap,
332                                              &exchange.out);
333                 BUG_ON(error != num_gpfn);
334         }
335         balloon_unlock(flags);
336 }
337
338
339 ///////////////////////////////////////////////////////////////////////////
340 // grant table hack
341 // cmd: GNTTABOP_xxx
342
343 #include <linux/mm.h>
344 #include <xen/interface/xen.h>
345 #include <xen/gnttab.h>
346
347 static void
348 gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
349 {
350         uint32_t flags;
351
352         flags = uop->flags;
353
354         if (flags & GNTMAP_host_map) {
355                 if (flags & GNTMAP_application_map) {
356                         xprintd("GNTMAP_application_map is not supported yet: flags 0x%x\n", flags);
357                         BUG();
358                 }
359                 if (flags & GNTMAP_contains_pte) {
360                         xprintd("GNTMAP_contains_pte is not supported yet flags 0x%x\n", flags);
361                         BUG();
362                 }
363         } else if (flags & GNTMAP_device_map) {
364                 xprintd("GNTMAP_device_map is not supported yet 0x%x\n", flags);
365                 BUG();//XXX not yet. actually this flag is not used.
366         } else {
367                 BUG();
368         }
369 }
370
371 int
372 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
373 {
374         __u64 va1, va2, pa1, pa2;
375
376         if (cmd == GNTTABOP_map_grant_ref) {
377                 unsigned int i;
378                 for (i = 0; i < count; i++) {
379                         gnttab_map_grant_ref_pre(
380                                 (struct gnttab_map_grant_ref*)uop + i);
381                 }
382         }
383         va1 = (__u64)uop & PAGE_MASK;
384         pa1 = pa2 = 0;
385         if ((REGION_NUMBER(va1) == 5) &&
386             ((va1 - KERNEL_START) >= KERNEL_TR_PAGE_SIZE)) {
387                 pa1 = ia64_tpa(va1);
388                 if (cmd <= GNTTABOP_transfer) {
389                         static uint32_t uop_size[GNTTABOP_transfer + 1] = {
390                                 sizeof(struct gnttab_map_grant_ref),
391                                 sizeof(struct gnttab_unmap_grant_ref),
392                                 sizeof(struct gnttab_setup_table),
393                                 sizeof(struct gnttab_dump_table),
394                                 sizeof(struct gnttab_transfer),
395                         };
396                         va2 = (__u64)uop + (uop_size[cmd] * count) - 1;
397                         va2 &= PAGE_MASK;
398                         if (va1 != va2) {
399                                 /* maximum size of uop is 2pages */
400                                 BUG_ON(va2 > va1 + PAGE_SIZE);
401                                 pa2 = ia64_tpa(va2);
402                         }
403                 }
404         }
405         return ____HYPERVISOR_grant_table_op(cmd, uop, count, pa1, pa2);
406 }
407 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
408
409 ///////////////////////////////////////////////////////////////////////////
410 // PageForeign(), SetPageForeign(), ClearPageForeign()
411
412 struct address_space xen_ia64_foreign_dummy_mapping;
413 EXPORT_SYMBOL(xen_ia64_foreign_dummy_mapping);
414
415 ///////////////////////////////////////////////////////////////////////////
416 // foreign mapping
417 #include <linux/efi.h>
418 #include <asm/meminit.h> // for IA64_GRANULE_SIZE, GRANULEROUND{UP,DOWN}()
419
420 static unsigned long privcmd_resource_min = 0;
421 // Xen/ia64 currently can handle pseudo physical address bits up to
422 // (PAGE_SHIFT * 3)
423 static unsigned long privcmd_resource_max = GRANULEROUNDDOWN((1UL << (PAGE_SHIFT * 3)) - 1);
424 static unsigned long privcmd_resource_align = IA64_GRANULE_SIZE;
425
426 static unsigned long
427 md_end_addr(const efi_memory_desc_t *md)
428 {
429         return md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
430 }
431
432 #define XEN_IA64_PRIVCMD_LEAST_GAP_SIZE (1024 * 1024 * 1024UL)
433 static int
434 xen_ia64_privcmd_check_size(unsigned long start, unsigned long end)
435 {
436         return (start < end &&
437                 (end - start) > XEN_IA64_PRIVCMD_LEAST_GAP_SIZE);
438 }
439
440 static int __init
441 xen_ia64_privcmd_init(void)
442 {
443         void *efi_map_start, *efi_map_end, *p;
444         u64 efi_desc_size;
445         efi_memory_desc_t *md;
446         unsigned long tmp_min;
447         unsigned long tmp_max;
448         unsigned long gap_size;
449         unsigned long prev_end;
450
451         if (!is_running_on_xen())
452                 return -1;
453
454         efi_map_start = __va(ia64_boot_param->efi_memmap);
455         efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
456         efi_desc_size = ia64_boot_param->efi_memdesc_size;
457
458         // at first check the used highest address
459         for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
460                 // nothing
461         }
462         md = p - efi_desc_size;
463         privcmd_resource_min = GRANULEROUNDUP(md_end_addr(md));
464         if (xen_ia64_privcmd_check_size(privcmd_resource_min,
465                                         privcmd_resource_max)) {
466                 goto out;
467         }
468
469         // the used highest address is too large. try to find the largest gap.
470         tmp_min = privcmd_resource_max;
471         tmp_max = 0;
472         gap_size = 0;
473         prev_end = 0;
474         for (p = efi_map_start;
475              p < efi_map_end - efi_desc_size;
476              p += efi_desc_size) {
477                 unsigned long end;
478                 efi_memory_desc_t* next;
479                 unsigned long next_start;
480
481                 md = p;
482                 end = md_end_addr(md);
483                 if (end > privcmd_resource_max) {
484                         break;
485                 }
486                 if (end < prev_end) {
487                         // work around. 
488                         // Xen may pass incompletely sorted memory
489                         // descriptors like
490                         // [x, x + length]
491                         // [x, x]
492                         // this order should be reversed.
493                         continue;
494                 }
495                 next = p + efi_desc_size;
496                 next_start = next->phys_addr;
497                 if (next_start > privcmd_resource_max) {
498                         next_start = privcmd_resource_max;
499                 }
500                 if (end < next_start && gap_size < (next_start - end)) {
501                         tmp_min = end;
502                         tmp_max = next_start;
503                         gap_size = tmp_max - tmp_min;
504                 }
505                 prev_end = end;
506         }
507
508         privcmd_resource_min = GRANULEROUNDUP(tmp_min);
509         if (xen_ia64_privcmd_check_size(privcmd_resource_min, tmp_max)) {
510                 privcmd_resource_max = tmp_max;
511                 goto out;
512         }
513
514         privcmd_resource_min = tmp_min;
515         privcmd_resource_max = tmp_max;
516         if (!xen_ia64_privcmd_check_size(privcmd_resource_min,
517                                          privcmd_resource_max)) {
518                 // Any large enough gap isn't found.
519                 // go ahead anyway with the warning hoping that large region
520                 // won't be requested.
521                 printk(KERN_WARNING "xen privcmd: large enough region for privcmd mmap is not found.\n");
522         }
523
524 out:
525         printk(KERN_INFO "xen privcmd uses pseudo physical addr range [0x%lx, 0x%lx] (%ldMB)\n",
526                privcmd_resource_min, privcmd_resource_max, 
527                (privcmd_resource_max - privcmd_resource_min) >> 20);
528         BUG_ON(privcmd_resource_min >= privcmd_resource_max);
529         return 0;
530 }
531 late_initcall(xen_ia64_privcmd_init);
532
533 struct xen_ia64_privcmd_entry {
534         atomic_t        map_count;
535 #define INVALID_GPFN    (~0UL)
536         unsigned long   gpfn;
537 };
538
539 struct xen_ia64_privcmd_range {
540         atomic_t                        ref_count;
541         unsigned long                   pgoff; // in PAGE_SIZE
542         struct resource*                res;
543
544         unsigned long                   num_entries;
545         struct xen_ia64_privcmd_entry   entries[0];
546 };
547
548 struct xen_ia64_privcmd_vma {
549         struct xen_ia64_privcmd_range*  range;
550
551         unsigned long                   num_entries;
552         struct xen_ia64_privcmd_entry*  entries;
553 };
554
555 static void
556 xen_ia64_privcmd_init_entry(struct xen_ia64_privcmd_entry* entry)
557 {
558         atomic_set(&entry->map_count, 0);
559         entry->gpfn = INVALID_GPFN;
560 }
561
562 static int
563 xen_ia64_privcmd_entry_mmap(struct vm_area_struct* vma,
564                             unsigned long addr,
565                             struct xen_ia64_privcmd_range* privcmd_range,
566                             int i,
567                             unsigned long mfn,
568                             pgprot_t prot,
569                             domid_t domid)
570 {
571         int error = 0;
572         struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
573         unsigned long gpfn;
574         unsigned long flags;
575
576         if ((addr & ~PAGE_MASK) != 0 || mfn == INVALID_MFN) {
577                 error = -EINVAL;
578                 goto out;
579         }
580
581         if (entry->gpfn != INVALID_GPFN) {
582                 error = -EBUSY;
583                 goto out;
584         }
585         gpfn = (privcmd_range->res->start >> PAGE_SHIFT) + i;
586
587         flags = ASSIGN_writable;
588         if (pgprot_val(prot) == PROT_READ) {
589                 flags = ASSIGN_readonly;
590         }
591         error = HYPERVISOR_add_physmap(gpfn, mfn, flags, domid);
592         if (error != 0) {
593                 goto out;
594         }
595
596         prot = vma->vm_page_prot;
597         error = remap_pfn_range(vma, addr, gpfn, 1 << PAGE_SHIFT, prot);
598         if (error != 0) {
599                 error = HYPERVISOR_zap_physmap(gpfn, 0);
600                 if (error) {
601                         BUG();//XXX
602                 }
603         } else {
604                 atomic_inc(&entry->map_count);
605                 entry->gpfn = gpfn;
606         }
607
608 out:
609         return error;
610 }
611
612 static void
613 xen_ia64_privcmd_entry_munmap(struct xen_ia64_privcmd_range* privcmd_range,
614                               int i)
615 {
616         struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
617         unsigned long gpfn = entry->gpfn;
618         //gpfn = (privcmd_range->res->start >> PAGE_SHIFT) +
619         //      (vma->vm_pgoff - privcmd_range->pgoff);
620         int error;
621
622         error = HYPERVISOR_zap_physmap(gpfn, 0);
623         if (error) {
624                 BUG();//XXX
625         }
626         entry->gpfn = INVALID_GPFN;
627 }
628
629 static void
630 xen_ia64_privcmd_entry_open(struct xen_ia64_privcmd_range* privcmd_range,
631                             int i)
632 {
633         struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
634         if (entry->gpfn != INVALID_GPFN) {
635                 atomic_inc(&entry->map_count);
636         } else {
637                 BUG_ON(atomic_read(&entry->map_count) != 0);
638         }
639 }
640
641 static void
642 xen_ia64_privcmd_entry_close(struct xen_ia64_privcmd_range* privcmd_range,
643                              int i)
644 {
645         struct xen_ia64_privcmd_entry* entry = &privcmd_range->entries[i];
646         if (entry->gpfn != INVALID_GPFN &&
647             atomic_dec_and_test(&entry->map_count)) {
648                 xen_ia64_privcmd_entry_munmap(privcmd_range, i);
649         }
650 }
651
652 static void xen_ia64_privcmd_vma_open(struct vm_area_struct* vma);
653 static void xen_ia64_privcmd_vma_close(struct vm_area_struct* vma);
654
655 struct vm_operations_struct xen_ia64_privcmd_vm_ops = {
656         .open = &xen_ia64_privcmd_vma_open,
657         .close = &xen_ia64_privcmd_vma_close,
658 };
659
660 static void
661 __xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
662                             struct xen_ia64_privcmd_vma* privcmd_vma,
663                             struct xen_ia64_privcmd_range* privcmd_range)
664 {
665         unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
666         unsigned long num_entries = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
667         unsigned long i;
668
669         BUG_ON(entry_offset < 0);
670         BUG_ON(entry_offset + num_entries > privcmd_range->num_entries);
671
672         privcmd_vma->range = privcmd_range;
673         privcmd_vma->num_entries = num_entries;
674         privcmd_vma->entries = &privcmd_range->entries[entry_offset];
675         vma->vm_private_data = privcmd_vma;
676         for (i = 0; i < privcmd_vma->num_entries; i++) {
677                 xen_ia64_privcmd_entry_open(privcmd_range, entry_offset + i);
678         }
679
680         vma->vm_private_data = privcmd_vma;
681         vma->vm_ops = &xen_ia64_privcmd_vm_ops;
682 }
683
684 static void
685 xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
686 {
687         struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
688         struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
689
690         atomic_inc(&privcmd_range->ref_count);
691         // vm_op->open() can't fail.
692         privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
693
694         __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
695 }
696
697 static void
698 xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
699 {
700         struct xen_ia64_privcmd_vma* privcmd_vma =
701                 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
702         struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
703         unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
704         unsigned long i;
705
706         for (i = 0; i < privcmd_vma->num_entries; i++) {
707                 xen_ia64_privcmd_entry_close(privcmd_range, entry_offset + i);
708         }
709         vma->vm_private_data = NULL;
710         kfree(privcmd_vma);
711
712         if (atomic_dec_and_test(&privcmd_range->ref_count)) {
713 #if 1
714                 for (i = 0; i < privcmd_range->num_entries; i++) {
715                         struct xen_ia64_privcmd_entry* entry =
716                                 &privcmd_range->entries[i];
717                         BUG_ON(atomic_read(&entry->map_count) != 0);
718                         BUG_ON(entry->gpfn != INVALID_GPFN);
719                 }
720 #endif
721                 release_resource(privcmd_range->res);
722                 kfree(privcmd_range->res);
723                 vfree(privcmd_range);
724         }
725 }
726
727 int
728 privcmd_mmap(struct file * file, struct vm_area_struct * vma)
729 {
730         int error;
731         unsigned long size = vma->vm_end - vma->vm_start;
732         unsigned long num_entries = size >> PAGE_SHIFT;
733         struct xen_ia64_privcmd_range* privcmd_range = NULL;
734         struct xen_ia64_privcmd_vma* privcmd_vma = NULL;
735         struct resource* res = NULL;
736         unsigned long i;
737         BUG_ON(!is_running_on_xen());
738
739         BUG_ON(file->private_data != NULL);
740
741         error = -ENOMEM;
742         privcmd_range =
743                 vmalloc(sizeof(*privcmd_range) +
744                         sizeof(privcmd_range->entries[0]) * num_entries);
745         if (privcmd_range == NULL) {
746                 goto out_enomem0;
747         }
748         privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL);
749         if (privcmd_vma == NULL) {
750                 goto out_enomem1;
751         }
752         res = kzalloc(sizeof(*res), GFP_KERNEL);
753         if (res == NULL) {
754                 goto out_enomem1;
755         }
756         res->name = "Xen privcmd mmap";
757         error = allocate_resource(&iomem_resource, res, size,
758                                   privcmd_resource_min, privcmd_resource_max,
759                                   privcmd_resource_align, NULL, NULL);
760         if (error) {
761                 goto out_enomem1;
762         }
763         privcmd_range->res = res;
764
765         /* DONTCOPY is essential for Xen as copy_page_range is broken. */
766         vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
767
768         atomic_set(&privcmd_range->ref_count, 1);
769         privcmd_range->pgoff = vma->vm_pgoff;
770         privcmd_range->num_entries = num_entries;
771         for (i = 0; i < privcmd_range->num_entries; i++) {
772                 xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]);
773         }
774
775         __xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
776         return 0;
777
778 out_enomem1:
779         kfree(res);
780         kfree(privcmd_vma);
781 out_enomem0:
782         vfree(privcmd_range);
783         return error;
784 }
785
786 int
787 direct_remap_pfn_range(struct vm_area_struct *vma,
788                        unsigned long address,   // process virtual address
789                        unsigned long mfn,       // mfn, mfn + 1, ... mfn + size/PAGE_SIZE
790                        unsigned long size,
791                        pgprot_t prot,
792                        domid_t  domid)          // target domain
793 {
794         struct xen_ia64_privcmd_vma* privcmd_vma =
795                 (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
796         struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
797         unsigned long entry_offset = vma->vm_pgoff - privcmd_range->pgoff;
798
799         unsigned long i;
800         unsigned long offset;
801         int error = 0;
802         BUG_ON(!is_running_on_xen());
803
804 #if 0
805         if (prot != vm->vm_page_prot) {
806                 return -EINVAL;
807         }
808 #endif
809
810         i = (address - vma->vm_start) >> PAGE_SHIFT;
811         for (offset = 0; offset < size; offset += PAGE_SIZE) {
812                 error = xen_ia64_privcmd_entry_mmap(vma, (address + offset) & PAGE_MASK, privcmd_range, entry_offset + i, mfn, prot, domid);
813                 if (error != 0) {
814                         break;
815                 }
816
817                 i++;
818                 mfn++;
819         }
820
821         return error;
822 }
823
824
825 /* Called after suspend, to resume time.  */
826 void
827 time_resume(void)
828 {
829         extern void ia64_cpu_local_tick(void);
830
831         /* Just trigger a tick.  */
832         ia64_cpu_local_tick();
833 }