vserver 1.9.5.x5
[linux-2.6.git] / mm / vmalloc.c
index 16e6f88..e6516c2 100644 (file)
 #include <linux/vmalloc.h>
 
 #include <asm/uaccess.h>
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
 
-rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
+DEFINE_RWLOCK(vmlist_lock);
 struct vm_struct *vmlist;
 
 static void unmap_area_pte(pmd_t *pmd, unsigned long address,
@@ -57,25 +56,25 @@ static void unmap_area_pte(pmd_t *pmd, unsigned long address,
        } while (address < end);
 }
 
-static void unmap_area_pmd(pgd_t *dir, unsigned long address,
+static void unmap_area_pmd(pud_t *pud, unsigned long address,
                                  unsigned long size)
 {
        unsigned long end;
        pmd_t *pmd;
 
-       if (pgd_none(*dir))
+       if (pud_none(*pud))
                return;
-       if (pgd_bad(*dir)) {
-               pgd_ERROR(*dir);
-               pgd_clear(dir);
+       if (pud_bad(*pud)) {
+               pud_ERROR(*pud);
+               pud_clear(pud);
                return;
        }
 
-       pmd = pmd_offset(dir, address);
-       address &= ~PGDIR_MASK;
+       pmd = pmd_offset(pud, address);
+       address &= ~PUD_MASK;
        end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
+       if (end > PUD_SIZE)
+               end = PUD_SIZE;
 
        do {
                unmap_area_pte(pmd, address, end - address);
@@ -84,6 +83,33 @@ static void unmap_area_pmd(pgd_t *dir, unsigned long address,
        } while (address < end);
 }
 
+static void unmap_area_pud(pgd_t *pgd, unsigned long address,
+                          unsigned long size)
+{
+       pud_t *pud;
+       unsigned long end;
+
+       if (pgd_none(*pgd))
+               return;
+       if (pgd_bad(*pgd)) {
+               pgd_ERROR(*pgd);
+               pgd_clear(pgd);
+               return;
+       }
+
+       pud = pud_offset(pgd, address);
+       address &= ~PGDIR_MASK;
+       end = address + size;
+       if (end > PGDIR_SIZE)
+               end = PGDIR_SIZE;
+
+       do {
+               unmap_area_pmd(pud, address, end - address);
+               address = (address + PUD_SIZE) & PUD_MASK;
+               pud++;
+       } while (address && (address < end));
+}
+
 static int map_area_pte(pte_t *pte, unsigned long address,
                               unsigned long size, pgprot_t prot,
                               struct page ***pages)
@@ -97,7 +123,6 @@ static int map_area_pte(pte_t *pte, unsigned long address,
 
        do {
                struct page *page = **pages;
-
                WARN_ON(!pte_none(*pte));
                if (!page)
                        return -ENOMEM;
@@ -116,11 +141,11 @@ static int map_area_pmd(pmd_t *pmd, unsigned long address,
 {
        unsigned long base, end;
 
-       base = address & PGDIR_MASK;
-       address &= ~PGDIR_MASK;
+       base = address & PUD_MASK;
+       address &= ~PUD_MASK;
        end = address + size;
-       if (end > PGDIR_SIZE)
-               end = PGDIR_SIZE;
+       if (end > PUD_SIZE)
+               end = PUD_SIZE;
 
        do {
                pte_t * pte = pte_alloc_kernel(&init_mm, pmd, base + address);
@@ -135,19 +160,41 @@ static int map_area_pmd(pmd_t *pmd, unsigned long address,
        return 0;
 }
 
+static int map_area_pud(pud_t *pud, unsigned long address,
+                              unsigned long end, pgprot_t prot,
+                              struct page ***pages)
+{
+       do {
+               pmd_t *pmd = pmd_alloc(&init_mm, pud, address);
+               if (!pmd)
+                       return -ENOMEM;
+               if (map_area_pmd(pmd, address, end - address, prot, pages))
+                       return -ENOMEM;
+               address = (address + PUD_SIZE) & PUD_MASK;
+               pud++;
+       } while (address && address < end);
+
+       return 0;
+}
+
 void unmap_vm_area(struct vm_struct *area)
 {
        unsigned long address = (unsigned long) area->addr;
        unsigned long end = (address + area->size);
-       pgd_t *dir;
+       unsigned long next;
+       pgd_t *pgd;
+       int i;
 
-       dir = pgd_offset_k(address);
+       pgd = pgd_offset_k(address);
        flush_cache_vunmap(address, end);
-       do {
-               unmap_area_pmd(dir, address, end - address);
-               address = (address + PGDIR_SIZE) & PGDIR_MASK;
-               dir++;
-       } while (address && (address < end));
+       for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
+               next = (address + PGDIR_SIZE) & PGDIR_MASK;
+               if (next <= address || next > end)
+                       next = end;
+               unmap_area_pud(pgd, address, next - address);
+               address = next;
+               pgd++;
+       }
        flush_tlb_kernel_range((unsigned long) area->addr, end);
 }
 
@@ -155,36 +202,56 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 {
        unsigned long address = (unsigned long) area->addr;
        unsigned long end = address + (area->size-PAGE_SIZE);
-       pgd_t *dir;
+       unsigned long next;
+       pgd_t *pgd;
        int err = 0;
+       int i;
 
-       dir = pgd_offset_k(address);
+       pgd = pgd_offset_k(address);
        spin_lock(&init_mm.page_table_lock);
-       do {
-               pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
-               if (!pmd) {
+       for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
+               pud_t *pud = pud_alloc(&init_mm, pgd, address);
+               if (!pud) {
                        err = -ENOMEM;
                        break;
                }
-               if (map_area_pmd(pmd, address, end - address, prot, pages)) {
+               next = (address + PGDIR_SIZE) & PGDIR_MASK;
+               if (next < address || next > end)
+                       next = end;
+               if (map_area_pud(pud, address, next, prot, pages)) {
                        err = -ENOMEM;
                        break;
                }
 
-               address = (address + PGDIR_SIZE) & PGDIR_MASK;
-               dir++;
-       } while (address && (address < end));
+               address = next;
+               pgd++;
+       }
 
        spin_unlock(&init_mm.page_table_lock);
        flush_cache_vmap((unsigned long) area->addr, end);
        return err;
 }
 
+#define IOREMAP_MAX_ORDER      (7 + PAGE_SHIFT)        /* 128 pages */
+
 struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
                                unsigned long start, unsigned long end)
 {
        struct vm_struct **p, *tmp, *area;
-       unsigned long addr = start;
+       unsigned long align = 1;
+       unsigned long addr;
+
+       if (flags & VM_IOREMAP) {
+               int bit = fls(size);
+
+               if (bit > IOREMAP_MAX_ORDER)
+                       bit = IOREMAP_MAX_ORDER;
+               else if (bit < PAGE_SHIFT)
+                       bit = PAGE_SHIFT;
+
+               align = 1ul << bit;
+       }
+       addr = ALIGN(start, align);
 
        area = kmalloc(sizeof(*area), GFP_KERNEL);
        if (unlikely(!area))
@@ -200,14 +267,18 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
        }
 
        write_lock(&vmlist_lock);
-       for (p = &vmlist; (tmp = *p) ;p = &tmp->next) {
-               if ((unsigned long)tmp->addr < addr)
+       for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) {
+               if ((unsigned long)tmp->addr < addr) {
+                       if((unsigned long)tmp->addr + tmp->size >= addr)
+                               addr = ALIGN(tmp->size + 
+                                            (unsigned long)tmp->addr, align);
                        continue;
+               }
                if ((size + addr) < addr)
                        goto out;
                if (size + addr <= (unsigned long)tmp->addr)
                        goto found;
-               addr = tmp->size + (unsigned long)tmp->addr;
+               addr = ALIGN(tmp->size + (unsigned long)tmp->addr, align);
                if (addr > end - size)
                        goto out;
        }
@@ -229,6 +300,8 @@ found:
 out:
        write_unlock(&vmlist_lock);
        kfree(area);
+       if (printk_ratelimit())
+               printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc=<size> to increase size.\n");
        return NULL;
 }
 
@@ -261,7 +334,7 @@ struct vm_struct *remove_vm_area(void *addr)
        struct vm_struct **p, *tmp;
 
        write_lock(&vmlist_lock);
-       for (p = &vmlist ; (tmp = *p) ;p = &tmp->next) {
+       for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
                 if (tmp->addr == addr)
                         goto found;
        }
@@ -305,7 +378,10 @@ void __vunmap(void *addr, int deallocate_pages)
                        __free_page(area->pages[i]);
                }
 
-               kfree(area->pages);
+               if (area->nr_pages > PAGE_SIZE/sizeof(struct page *))
+                       vfree(area->pages);
+               else
+                       kfree(area->pages);
        }
 
        kfree(area);
@@ -409,7 +485,12 @@ void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
        array_size = (nr_pages * sizeof(struct page *));
 
        area->nr_pages = nr_pages;
-       area->pages = pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
+       /* Please note that the recursion is strictly bounded. */
+       if (array_size > PAGE_SIZE)
+               pages = __vmalloc(array_size, gfp_mask, PAGE_KERNEL);
+       else
+               pages = kmalloc(array_size, (gfp_mask & ~__GFP_HIGHMEM));
+       area->pages = pages;
        if (!area->pages) {
                remove_vm_area(area->addr);
                kfree(area);
@@ -455,6 +536,28 @@ void *vmalloc(unsigned long size)
 
 EXPORT_SYMBOL(vmalloc);
 
+/**
+ *     vmalloc_exec  -  allocate virtually contiguous, executable memory
+ *
+ *     @size:          allocation size
+ *
+ *     Kernel-internal function to allocate enough pages to cover @size
+ *     the page level allocator and map them into contiguous and
+ *     executable kernel virtual space.
+ *
+ *     For tight cotrol over page level allocator and protection flags
+ *     use __vmalloc() instead.
+ */
+
+#ifndef PAGE_KERNEL_EXEC
+# define PAGE_KERNEL_EXEC PAGE_KERNEL
+#endif
+
+void *vmalloc_exec(unsigned long size)
+{
+       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
+}
+
 /**
  *     vmalloc_32  -  allocate virtually contiguous memory (32bit addressable)
  *