X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fpowerpc%2Fmm%2Fhugetlbpage.c;h=b51bb28c054bbb5e14007af0d9efee7be0806a3a;hb=987b0145d94eecf292d8b301228356f44611ab7c;hp=266b8b2ceac947071d225e4de981d2229d499fc2;hpb=f7ed79d23a47594e7834d66a8f14449796d4f3e6;p=linux-2.6.git diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 266b8b2ce..b51bb28c0 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -30,66 +30,13 @@ #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) -#ifdef CONFIG_PPC_64K_PAGES -#define HUGEPTE_INDEX_SIZE (PMD_SHIFT-HPAGE_SHIFT) -#else -#define HUGEPTE_INDEX_SIZE (PUD_SHIFT-HPAGE_SHIFT) -#endif -#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) -#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << HUGEPTE_INDEX_SIZE) - -#define HUGEPD_SHIFT (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE) -#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) -#define HUGEPD_MASK (~(HUGEPD_SIZE-1)) - -#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) - -/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() - * will choke on pointers to hugepte tables, which is handy for - * catching screwups early. */ -#define HUGEPD_OK 0x1 - -typedef struct { unsigned long pd; } hugepd_t; - -#define hugepd_none(hpd) ((hpd).pd == 0) - -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!(hpd.pd & HUGEPD_OK)); - return (pte_t *)(hpd.pd & ~HUGEPD_OK); -} - -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) -{ - unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); - pte_t *dir = hugepd_page(*hpdp); - - return dir + idx; -} - -static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, - unsigned long address) -{ - pte_t *new = kmem_cache_alloc(huge_pgtable_cache, - GFP_KERNEL|__GFP_REPEAT); - - if (! new) - return -ENOMEM; - - spin_lock(&mm->page_table_lock); - if (!hugepd_none(*hpdp)) - kmem_cache_free(huge_pgtable_cache, new); - else - hpdp->pd = (unsigned long)new | HUGEPD_OK; - spin_unlock(&mm->page_table_lock); - return 0; -} - /* Modelled after find_linux_pte() */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pg; pud_t *pu; + pmd_t *pm; + pte_t *pt; BUG_ON(! in_hugepage_area(mm->context, addr)); @@ -99,14 +46,26 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) if (!pgd_none(*pg)) { pu = pud_offset(pg, addr); if (!pud_none(*pu)) { -#ifdef CONFIG_PPC_64K_PAGES - pmd_t *pm; pm = pmd_offset(pu, addr); - if (!pmd_none(*pm)) - return hugepte_offset((hugepd_t *)pm, addr); -#else - return hugepte_offset((hugepd_t *)pu, addr); -#endif +#ifdef CONFIG_PPC_64K_PAGES + /* Currently, we use the normal PTE offset within full + * size PTE pages, thus our huge PTEs are scattered in + * the PTE page and we do waste some. We may change + * that in the future, but the current mecanism keeps + * things much simpler + */ + if (!pmd_none(*pm)) { + /* Note: pte_offset_* are all equivalent on + * ppc64 as we don't have HIGHMEM + */ + pt = pte_offset_kernel(pm, addr); + return pt; + } +#else /* CONFIG_PPC_64K_PAGES */ + /* On 4k pages, we put huge PTEs in the PMD page */ + pt = (pte_t *)pm; + return pt; +#endif /* CONFIG_PPC_64K_PAGES */ } } @@ -117,7 +76,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pgd_t *pg; pud_t *pu; - hugepd_t *hpdp = NULL; + pmd_t *pm; + pte_t *pt; BUG_ON(! in_hugepage_area(mm->context, addr)); @@ -127,182 +87,23 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) pu = pud_alloc(mm, pg, addr); if (pu) { -#ifdef CONFIG_PPC_64K_PAGES - pmd_t *pm; pm = pmd_alloc(mm, pu, addr); - if (pm) - hpdp = (hugepd_t *)pm; -#else - hpdp = (hugepd_t *)pu; -#endif - } - - if (! hpdp) - return NULL; - - if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) - return NULL; - - return hugepte_offset(hpdp, addr); -} - -static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) -{ - pte_t *hugepte = hugepd_page(*hpdp); - - hpdp->pd = 0; - tlb->need_flush = 1; - pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, - HUGEPTE_TABLE_SIZE-1)); -} - -#ifdef CONFIG_PPC_64K_PAGES -static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ - pmd_t *pmd; - unsigned long next; - unsigned long start; - - start = addr; - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); - if (pmd_none(*pmd)) - continue; - free_hugepte_range(tlb, (hugepd_t *)pmd); - } while (pmd++, addr = next, addr != end); - - start &= PUD_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PUD_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) - return; - - pmd = pmd_offset(pud, start); - pud_clear(pud); - pmd_free_tlb(tlb, pmd); -} -#endif - -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ - pud_t *pud; - unsigned long next; - unsigned long start; - - start = addr; - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); + if (pm) { #ifdef CONFIG_PPC_64K_PAGES - if (pud_none_or_clear_bad(pud)) - continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); -#else - if (pud_none(*pud)) - continue; - free_hugepte_range(tlb, (hugepd_t *)pud); -#endif - } while (pud++, addr = next, addr != end); - - start &= PGDIR_MASK; - if (start < floor) - return; - if (ceiling) { - ceiling &= PGDIR_MASK; - if (!ceiling) - return; - } - if (end - 1 > ceiling - 1) - return; - - pud = pud_offset(pgd, start); - pgd_clear(pgd); - pud_free_tlb(tlb, pud); -} - -/* - * This function frees user-level page tables of a process. - * - * Must be called with pagetable lock held. - */ -void hugetlb_free_pgd_range(struct mmu_gather **tlb, - unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) -{ - pgd_t *pgd; - unsigned long next; - unsigned long start; - - /* - * Comments below take from the normal free_pgd_range(). They - * apply here too. The tests against HUGEPD_MASK below are - * essential, because we *don't* test for this at the bottom - * level. Without them we'll attempt to free a hugepte table - * when we unmap just part of it, even if there are other - * active mappings using it. - * - * The next few lines have given us lots of grief... - * - * Why are we testing HUGEPD* at this top level? Because - * often there will be no work to do at all, and we'd prefer - * not to go all the way down to the bottom just to discover - * that. - * - * Why all these "- 1"s? Because 0 represents both the bottom - * of the address space and the top of it (using -1 for the - * top wouldn't help much: the masks would do the wrong thing). - * The rule is that addr 0 and floor 0 refer to the bottom of - * the address space, but end 0 and ceiling 0 refer to the top - * Comparisons need to use "end - 1" and "ceiling - 1" (though - * that end 0 case should be mythical). - * - * Wherever addr is brought up or ceiling brought down, we - * must be careful to reject "the opposite 0" before it - * confuses the subsequent tests. But what about where end is - * brought down by HUGEPD_SIZE below? no, end can't go down to - * 0 there. - * - * Whereas we round start (addr) and ceiling down, by different - * masks at different levels, in order to test whether a table - * now has no other vmas using it, so can be freed, we don't - * bother to round floor or end up - the tests don't need that. - */ - - addr &= HUGEPD_MASK; - if (addr < floor) { - addr += HUGEPD_SIZE; - if (!addr) - return; - } - if (ceiling) { - ceiling &= HUGEPD_MASK; - if (!ceiling) - return; + /* See comment in huge_pte_offset. Note that if we ever + * want to put the page size in the PMD, we would have + * to open code our own pte_alloc* function in order + * to populate and set the size atomically + */ + pt = pte_alloc_map(mm, pm, addr); +#else /* CONFIG_PPC_64K_PAGES */ + pt = (pte_t *)pm; +#endif /* CONFIG_PPC_64K_PAGES */ + return pt; + } } - if (end - 1 > ceiling - 1) - end -= HUGEPD_SIZE; - if (addr > end - 1) - return; - start = addr; - pgd = pgd_offset((*tlb)->mm, addr); - do { - BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); - } while (pgd++, addr = next, addr != end); + return NULL; } void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, @@ -332,6 +133,21 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(old); } +/* + * This function checks for proper alignment of input addr and len parameters. + */ +int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + if (! (within_hugepage_low_range(addr, len) + || within_hugepage_high_range(addr, len)) ) + return -EINVAL; + return 0; +} + struct slb_flush_info { struct mm_struct *mm; u16 newareas; @@ -1040,27 +856,3 @@ repeat: out: return err; } - -static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) -{ - memset(addr, 0, kmem_cache_size(cache)); -} - -static int __init hugetlbpage_init(void) -{ - if (!cpu_has_feature(CPU_FTR_16M_PAGE)) - return -ENODEV; - - huge_pgtable_cache = kmem_cache_create("hugepte_cache", - HUGEPTE_TABLE_SIZE, - HUGEPTE_TABLE_SIZE, - SLAB_HWCACHE_ALIGN | - SLAB_MUST_HWCACHE_ALIGN, - zero_ctor, NULL); - if (! huge_pgtable_cache) - panic("hugetlbpage_init(): could not create hugepte cache\n"); - - return 0; -} - -module_init(hugetlbpage_init);