Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / arch / i386 / mm / pageattr.c
index 4cfc303..92c3d9f 100644 (file)
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
 
-static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(cpa_lock);
 static struct list_head df_list = LIST_HEAD_INIT(df_list);
 
 
 pte_t *lookup_address(unsigned long address) 
 { 
-       pgd_t *pgd = pgd_offset_k(address); 
+       pgd_t *pgd = pgd_offset_k(address);
+       pud_t *pud;
        pmd_t *pmd;
        if (pgd_none(*pgd))
                return NULL;
-       pmd = pmd_offset(pgd, address);                
+       pud = pud_offset(pgd, address);
+       if (pud_none(*pud))
+               return NULL;
+       pmd = pmd_offset(pud, address);
        if (pmd_none(*pmd))
                return NULL;
        if (pmd_large(*pmd))
@@ -31,7 +37,8 @@ pte_t *lookup_address(unsigned long address)
         return pte_offset_kernel(pmd, address);
 } 
 
-static struct page *split_large_page(unsigned long address, pgprot_t prot)
+static struct page *split_large_page(unsigned long address, pgprot_t prot,
+                                       pgprot_t ref_prot)
 { 
        int i; 
        unsigned long addr;
@@ -44,12 +51,19 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot)
        if (!base) 
                return NULL;
 
+       /*
+        * page_private is used to track the number of entries in
+        * the page table page that have non standard attributes.
+        */
+       SetPagePrivate(base);
+       page_private(base) = 0;
+
        address = __pa(address);
        addr = address & LARGE_PAGE_MASK; 
        pbase = (pte_t *)page_address(base);
        for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
-               pbase[i] = pfn_pte(addr >> PAGE_SHIFT, 
-                                  addr == address ? prot : PAGE_KERNEL);
+               set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
+                                          addr == address ? prot : ref_prot));
        }
        return base;
 } 
@@ -58,7 +72,7 @@ static void flush_kernel_map(void *dummy)
 { 
        /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
        if (boot_cpu_data.x86_model >= 4) 
-               asm volatile("wbinvd":::"memory"); 
+               wbinvd();
        /* Flush all to work around Errata in early athlons regarding 
         * large page flushing. 
         */
@@ -77,9 +91,11 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
        spin_lock_irqsave(&pgd_lock, flags);
        for (page = pgd_list; page; page = (struct page *)page->index) {
                pgd_t *pgd;
+               pud_t *pud;
                pmd_t *pmd;
                pgd = (pgd_t *)page_address(page) + pgd_index(address);
-               pmd = pmd_offset(pgd, address);
+               pud = pud_offset(pgd, address);
+               pmd = pmd_offset(pud, address);
                set_pte_atomic((pte_t *)pmd, pte);
        }
        spin_unlock_irqrestore(&pgd_lock, flags);
@@ -91,11 +107,18 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
  */
 static inline void revert_page(struct page *kpte_page, unsigned long address)
 {
-       pte_t *linear = (pte_t *) 
-               pmd_offset(pgd_offset(&init_mm, address), address);
+       pgprot_t ref_prot;
+       pte_t *linear;
+
+       ref_prot =
+       ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
+               ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
+
+       linear = (pte_t *)
+               pmd_offset(pud_offset(pgd_offset_k(address), address), address);
        set_pmd_pte(linear,  address,
                    pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
-                           PAGE_KERNEL_LARGE));
+                           ref_prot));
 }
 
 static int
@@ -105,10 +128,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
        unsigned long address;
        struct page *kpte_page;
 
-#ifdef CONFIG_HIGHMEM
-       if (page >= highmem_start_page) 
-               BUG(); 
-#endif
+       BUG_ON(PageHighMem(page));
        address = (unsigned long)page_address(page);
 
        kpte = lookup_address(address);
@@ -117,27 +137,40 @@ __change_page_attr(struct page *page, pgprot_t prot)
        kpte_page = virt_to_page(kpte);
        if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
                if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
-                       pte_t old = *kpte;
-                       pte_t standard = mk_pte(page, PAGE_KERNEL); 
                        set_pte_atomic(kpte, mk_pte(page, prot)); 
-                       if (pte_same(old,standard))
-                               get_page(kpte_page);
                } else {
-                       struct page *split = split_large_page(address, prot); 
+                       pgprot_t ref_prot;
+                       struct page *split;
+
+                       ref_prot =
+                       ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
+                               ? PAGE_KERNEL_EXEC : PAGE_KERNEL;
+                       split = split_large_page(address, prot, ref_prot);
                        if (!split)
                                return -ENOMEM;
-                       get_page(kpte_page);
-                       set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
-               }       
+                       set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
+                       kpte_page = split;
+               }
+               page_private(kpte_page)++;
        } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
                set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
-               __put_page(kpte_page);
+               BUG_ON(page_private(kpte_page) == 0);
+               page_private(kpte_page)--;
+       } else
+               BUG();
+
+       /*
+        * If the pte was reserved, it means it was created at boot
+        * time (not via split_large_page) and in turn we must not
+        * replace it with a largepage.
+        */
+       if (!PageReserved(kpte_page)) {
+               if (cpu_has_pse && (page_private(kpte_page) == 0)) {
+                       ClearPagePrivate(kpte_page);
+                       list_add(&kpte_page->lru, &df_list);
+                       revert_page(kpte_page, address);
+               }
        }
-
-       if (cpu_has_pse && (page_count(kpte_page) == 1)) {
-               list_add(&kpte_page->lru, &df_list);
-               revert_page(kpte_page, address);
-       } 
        return 0;
 } 
 
@@ -178,7 +211,7 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 void global_flush_tlb(void)
 { 
        LIST_HEAD(l);
-       struct list_head* n;
+       struct page *pg, *next;
 
        BUG_ON(irqs_disabled());
 
@@ -186,12 +219,8 @@ void global_flush_tlb(void)
        list_splice_init(&df_list, &l);
        spin_unlock_irq(&cpa_lock);
        flush_map();
-       n = l.next;
-       while (n != &l) {
-               struct page *pg = list_entry(n, struct page, lru);
-               n = n->next;
+       list_for_each_entry_safe(pg, next, &l, lru)
                __free_page(pg);
-       }
 } 
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
@@ -199,6 +228,10 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 {
        if (PageHighMem(page))
                return;
+       if (!enable)
+               mutex_debug_check_no_locks_freed(page_address(page),
+                                                numpages * PAGE_SIZE);
+
        /* the return value is ignored - the calls cannot fail,
         * large pages are disabled at boot time.
         */
@@ -208,7 +241,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
         */
        __flush_tlb_all();
 }
-EXPORT_SYMBOL(kernel_map_pages);
 #endif
 
 EXPORT_SYMBOL(change_page_attr);