This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / i386 / mm / ioremap-xen.c
1 /*
2  * arch/i386/mm/ioremap.c
3  *
4  * Re-map IO memory to kernel address space so that we can access it.
5  * This is needed for high PCI addresses that aren't mapped in the
6  * 640k-1MB IO memory area on PC's
7  *
8  * (C) Copyright 1995 1996 Linus Torvalds
9  */
10
11 #include <linux/vmalloc.h>
12 #include <linux/init.h>
13 #include <linux/slab.h>
14 #include <linux/module.h>
15 #include <asm/io.h>
16 #include <asm/fixmap.h>
17 #include <asm/cacheflush.h>
18 #include <asm/tlbflush.h>
19 #include <asm/pgtable.h>
20 #include <asm/pgalloc.h>
21
22 #define ISA_START_ADDRESS       0x0
23 #define ISA_END_ADDRESS         0x100000
24
25 #if 0 /* not PAE safe */
26 /* These hacky macros avoid phys->machine translations. */
27 #define __direct_pte(x) ((pte_t) { (x) } )
28 #define __direct_mk_pte(page_nr,pgprot) \
29   __direct_pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
30 #define direct_mk_pte_phys(physpage, pgprot) \
31   __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
32 #endif
33
34 static int direct_remap_area_pte_fn(pte_t *pte, 
35                                     struct page *pmd_page,
36                                     unsigned long address, 
37                                     void *data)
38 {
39         mmu_update_t **v = (mmu_update_t **)data;
40
41         (*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pmd_page)) <<
42                      PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
43         (*v)++;
44
45         return 0;
46 }
47
48 static int __direct_remap_pfn_range(struct mm_struct *mm,
49                                     unsigned long address, 
50                                     unsigned long mfn,
51                                     unsigned long size, 
52                                     pgprot_t prot,
53                                     domid_t  domid)
54 {
55         int rc;
56         unsigned long i, start_address;
57         mmu_update_t *u, *v, *w;
58
59         u = v = w = (mmu_update_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
60         if (u == NULL)
61                 return -ENOMEM;
62
63         start_address = address;
64
65         flush_cache_all();
66
67         for (i = 0; i < size; i += PAGE_SIZE) {
68                 if ((v - u) == (PAGE_SIZE / sizeof(mmu_update_t))) {
69                         /* Fill in the PTE pointers. */
70                         rc = apply_to_page_range(mm, start_address, 
71                                                  address - start_address,
72                                                  direct_remap_area_pte_fn, &w);
73                         if (rc)
74                                 goto out;
75                         w = u;
76                         rc = -EFAULT;
77                         if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
78                                 goto out;
79                         v = u;
80                         start_address = address;
81                 }
82
83                 /*
84                  * Fill in the machine address: PTE ptr is done later by
85                  * __direct_remap_area_pages(). 
86                  */
87                 v->val = pte_val_ma(pfn_pte_ma(mfn, prot));
88
89                 mfn++;
90                 address += PAGE_SIZE; 
91                 v++;
92         }
93
94         if (v != u) {
95                 /* get the ptep's filled in */
96                 rc = apply_to_page_range(mm, start_address,
97                                          address - start_address,
98                                          direct_remap_area_pte_fn, &w);
99                 if (rc)
100                         goto out;
101                 rc = -EFAULT;
102                 if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
103                         goto out;
104         }
105
106         rc = 0;
107
108  out:
109         flush_tlb_all();
110
111         free_page((unsigned long)u);
112
113         return rc;
114 }
115
116 int direct_remap_pfn_range(struct vm_area_struct *vma,
117                            unsigned long address, 
118                            unsigned long mfn,
119                            unsigned long size, 
120                            pgprot_t prot,
121                            domid_t  domid)
122 {
123         /* Same as remap_pfn_range(). */
124         vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
125
126         if (domid == DOMID_SELF)
127                 return -EINVAL;
128
129         vma->vm_mm->context.has_foreign_mappings = 1;
130
131         return __direct_remap_pfn_range(
132                 vma->vm_mm, address, mfn, size, prot, domid);
133 }
134 EXPORT_SYMBOL(direct_remap_pfn_range);
135
136 int direct_kernel_remap_pfn_range(unsigned long address, 
137                                   unsigned long mfn,
138                                   unsigned long size, 
139                                   pgprot_t prot,
140                                   domid_t  domid)
141 {
142         return __direct_remap_pfn_range(
143                 &init_mm, address, mfn, size, prot, domid);
144 }
145 EXPORT_SYMBOL(direct_kernel_remap_pfn_range);
146
147 static int lookup_pte_fn(
148         pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
149 {
150         uint64_t *ptep = (uint64_t *)data;
151         if (ptep)
152                 *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pmd_page)) <<
153                          PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
154         return 0;
155 }
156
157 int create_lookup_pte_addr(struct mm_struct *mm, 
158                            unsigned long address,
159                            uint64_t *ptep)
160 {
161         return apply_to_page_range(mm, address, PAGE_SIZE,
162                                    lookup_pte_fn, ptep);
163 }
164
165 EXPORT_SYMBOL(create_lookup_pte_addr);
166
167 static int noop_fn(
168         pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
169 {
170         return 0;
171 }
172
173 int touch_pte_range(struct mm_struct *mm,
174                     unsigned long address,
175                     unsigned long size)
176 {
177         return apply_to_page_range(mm, address, size, noop_fn, NULL);
178
179
180 EXPORT_SYMBOL(touch_pte_range);
181
182 void *vm_map_xen_pages (unsigned long maddr, int vm_size, pgprot_t prot)
183 {
184         int error;
185        
186         struct vm_struct *vma;
187         vma = get_vm_area (vm_size, VM_IOREMAP);
188       
189         if (vma == NULL) {
190                 printk ("ioremap.c,vm_map_xen_pages(): "
191                         "Failed to get VMA area\n");
192                 return NULL;
193         }
194
195         error = direct_kernel_remap_pfn_range((unsigned long) vma->addr,
196                                               maddr >> PAGE_SHIFT, vm_size,
197                                               prot, DOMID_SELF );
198         if (error == 0) {
199                 return vma->addr;
200         } else {
201                 printk ("ioremap.c,vm_map_xen_pages(): "
202                         "Failed to map xen shared pages into kernel space\n");
203                 return NULL;
204         }
205 }
206 EXPORT_SYMBOL(vm_map_xen_pages);
207
208 /*
209  * Does @address reside within a non-highmem page that is local to this virtual
210  * machine (i.e., not an I/O page, nor a memory page belonging to another VM).
211  * See the comment that accompanies mfn_to_local_pfn() in page.h to understand
212  * why this works.
213  */
214 static inline int is_local_lowmem(unsigned long address)
215 {
216         extern unsigned long max_low_pfn;
217         return (mfn_to_local_pfn(address >> PAGE_SHIFT) < max_low_pfn);
218 }
219
220 /*
221  * Generic mapping function (not visible outside):
222  */
223
224 /*
225  * Remap an arbitrary physical address space into the kernel virtual
226  * address space. Needed when the kernel wants to access high addresses
227  * directly.
228  *
229  * NOTE! We need to allow non-page-aligned mappings too: we will obviously
230  * have to convert them into an offset in a page-aligned mapping, but the
231  * caller shouldn't need to know that small detail.
232  */
233 void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
234 {
235         void __iomem * addr;
236         struct vm_struct * area;
237         unsigned long offset, last_addr;
238         domid_t domid = DOMID_IO;
239
240         /* Don't allow wraparound or zero size */
241         last_addr = phys_addr + size - 1;
242         if (!size || last_addr < phys_addr)
243                 return NULL;
244
245         /*
246          * Don't remap the low PCI/ISA area, it's always mapped..
247          */
248         if (xen_start_info->flags & SIF_PRIVILEGED &&
249             phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
250                 return (void __iomem *) isa_bus_to_virt(phys_addr);
251
252         /*
253          * Don't allow anybody to remap normal RAM that we're using..
254          */
255         if (is_local_lowmem(phys_addr)) {
256                 char *t_addr, *t_end;
257                 struct page *page;
258
259                 t_addr = bus_to_virt(phys_addr);
260                 t_end = t_addr + (size - 1);
261            
262                 for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
263                         if(!PageReserved(page))
264                                 return NULL;
265
266                 domid = DOMID_SELF;
267         }
268
269         /*
270          * Mappings have to be page-aligned
271          */
272         offset = phys_addr & ~PAGE_MASK;
273         phys_addr &= PAGE_MASK;
274         size = PAGE_ALIGN(last_addr+1) - phys_addr;
275
276         /*
277          * Ok, go for it..
278          */
279         area = get_vm_area(size, VM_IOREMAP | (flags << 20));
280         if (!area)
281                 return NULL;
282         area->phys_addr = phys_addr;
283         addr = (void __iomem *) area->addr;
284         flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
285 #ifdef __x86_64__
286         flags |= _PAGE_USER;
287 #endif
288         if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
289                                      phys_addr>>PAGE_SHIFT,
290                                      size, __pgprot(flags), domid)) {
291                 vunmap((void __force *) addr);
292                 return NULL;
293         }
294         return (void __iomem *) (offset + (char __iomem *)addr);
295 }
296 EXPORT_SYMBOL(__ioremap);
297
298 /**
299  * ioremap_nocache     -   map bus memory into CPU space
300  * @offset:    bus address of the memory
301  * @size:      size of the resource to map
302  *
303  * ioremap_nocache performs a platform specific sequence of operations to
304  * make bus memory CPU accessible via the readb/readw/readl/writeb/
305  * writew/writel functions and the other mmio helpers. The returned
306  * address is not guaranteed to be usable directly as a virtual
307  * address. 
308  *
309  * This version of ioremap ensures that the memory is marked uncachable
310  * on the CPU as well as honouring existing caching rules from things like
311  * the PCI bus. Note that there are other caches and buffers on many 
312  * busses. In particular driver authors should read up on PCI writes
313  *
314  * It's useful if some control registers are in such an area and
315  * write combining or read caching is not desirable:
316  * 
317  * Must be freed with iounmap.
318  */
319
320 void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
321 {
322         unsigned long last_addr;
323         void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
324         if (!p) 
325                 return p; 
326
327         /* Guaranteed to be > phys_addr, as per __ioremap() */
328         last_addr = phys_addr + size - 1;
329
330         if (is_local_lowmem(last_addr)) { 
331                 struct page *ppage = virt_to_page(bus_to_virt(phys_addr));
332                 unsigned long npages;
333
334                 phys_addr &= PAGE_MASK;
335
336                 /* This might overflow and become zero.. */
337                 last_addr = PAGE_ALIGN(last_addr);
338
339                 /* .. but that's ok, because modulo-2**n arithmetic will make
340                 * the page-aligned "last - first" come out right.
341                 */
342                 npages = (last_addr - phys_addr) >> PAGE_SHIFT;
343
344                 if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { 
345                         iounmap(p); 
346                         p = NULL;
347                 }
348                 global_flush_tlb();
349         }
350
351         return p;                                       
352 }
353 EXPORT_SYMBOL(ioremap_nocache);
354
355 /**
356  * iounmap - Free a IO remapping
357  * @addr: virtual address from ioremap_*
358  *
359  * Caller must ensure there is only one unmapping for the same pointer.
360  */
361 void iounmap(volatile void __iomem *addr)
362 {
363         struct vm_struct *p, *o;
364
365         if ((void __force *)addr <= high_memory)
366                 return;
367
368         /*
369          * __ioremap special-cases the PCI/ISA range by not instantiating a
370          * vm_area and by simply returning an address into the kernel mapping
371          * of ISA space.   So handle that here.
372          */
373         if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
374                 return;
375
376         addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
377
378         /* Use the vm area unlocked, assuming the caller
379            ensures there isn't another iounmap for the same address
380            in parallel. Reuse of the virtual address is prevented by
381            leaving it in the global lists until we're done with it.
382            cpa takes care of the direct mappings. */
383         read_lock(&vmlist_lock);
384         for (p = vmlist; p; p = p->next) {
385                 if (p->addr == addr)
386                         break;
387         }
388         read_unlock(&vmlist_lock);
389
390         if (!p) {
391                 printk("iounmap: bad address %p\n", addr);
392                 dump_stack();
393                 return;
394         }
395
396         /* Reset the direct mapping. Can block */
397         if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
398                 /* p->size includes the guard page, but cpa doesn't like that */
399                 change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
400                                  (p->size - PAGE_SIZE) >> PAGE_SHIFT,
401                                  PAGE_KERNEL);
402                 global_flush_tlb();
403         } 
404
405         /* Finally remove it */
406         o = remove_vm_area((void *)addr);
407         BUG_ON(p != o || o == NULL);
408         kfree(p); 
409 }
410 EXPORT_SYMBOL(iounmap);
411
412 void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
413 {
414         unsigned long offset, last_addr;
415         unsigned int nrpages;
416         enum fixed_addresses idx;
417
418         /* Don't allow wraparound or zero size */
419         last_addr = phys_addr + size - 1;
420         if (!size || last_addr < phys_addr)
421                 return NULL;
422
423         /*
424          * Don't remap the low PCI/ISA area, it's always mapped..
425          */
426         if (xen_start_info->flags & SIF_PRIVILEGED &&
427             phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
428                 return isa_bus_to_virt(phys_addr);
429
430         /*
431          * Mappings have to be page-aligned
432          */
433         offset = phys_addr & ~PAGE_MASK;
434         phys_addr &= PAGE_MASK;
435         size = PAGE_ALIGN(last_addr) - phys_addr;
436
437         /*
438          * Mappings have to fit in the FIX_BTMAP area.
439          */
440         nrpages = size >> PAGE_SHIFT;
441         if (nrpages > NR_FIX_BTMAPS)
442                 return NULL;
443
444         /*
445          * Ok, go for it..
446          */
447         idx = FIX_BTMAP_BEGIN;
448         while (nrpages > 0) {
449                 set_fixmap(idx, phys_addr);
450                 phys_addr += PAGE_SIZE;
451                 --idx;
452                 --nrpages;
453         }
454         return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN));
455 }
456
457 void __init bt_iounmap(void *addr, unsigned long size)
458 {
459         unsigned long virt_addr;
460         unsigned long offset;
461         unsigned int nrpages;
462         enum fixed_addresses idx;
463
464         virt_addr = (unsigned long)addr;
465         if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))
466                 return;
467         if (virt_addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
468                 return;
469         offset = virt_addr & ~PAGE_MASK;
470         nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
471
472         idx = FIX_BTMAP_BEGIN;
473         while (nrpages > 0) {
474                 clear_fixmap(idx);
475                 --idx;
476                 --nrpages;
477         }
478 }