X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fpowerpc%2Fmm%2Fhugetlbpage.c;h=b51bb28c054bbb5e14007af0d9efee7be0806a3a;hb=987b0145d94eecf292d8b301228356f44611ab7c;hp=266b8b2ceac947071d225e4de981d2229d499fc2;hpb=f7ed79d23a47594e7834d66a8f14449796d4f3e6;p=linux-2.6.git

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 266b8b2ce..b51bb28c0 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -30,66 +30,13 @@
 #define NUM_LOW_AREAS	(0x100000000UL >> SID_SHIFT)
 #define NUM_HIGH_AREAS	(PGTABLE_RANGE >> HTLB_AREA_SHIFT)
 
-#ifdef CONFIG_PPC_64K_PAGES
-#define HUGEPTE_INDEX_SIZE	(PMD_SHIFT-HPAGE_SHIFT)
-#else
-#define HUGEPTE_INDEX_SIZE	(PUD_SHIFT-HPAGE_SHIFT)
-#endif
-#define PTRS_PER_HUGEPTE	(1 << HUGEPTE_INDEX_SIZE)
-#define HUGEPTE_TABLE_SIZE	(sizeof(pte_t) << HUGEPTE_INDEX_SIZE)
-
-#define HUGEPD_SHIFT		(HPAGE_SHIFT + HUGEPTE_INDEX_SIZE)
-#define HUGEPD_SIZE		(1UL << HUGEPD_SHIFT)
-#define HUGEPD_MASK		(~(HUGEPD_SIZE-1))
-
-#define huge_pgtable_cache	(pgtable_cache[HUGEPTE_CACHE_NUM])
-
-/* Flag to mark huge PD pointers.  This means pmd_bad() and pud_bad()
- * will choke on pointers to hugepte tables, which is handy for
- * catching screwups early. */
-#define HUGEPD_OK	0x1
-
-typedef struct { unsigned long pd; } hugepd_t;
-
-#define hugepd_none(hpd)	((hpd).pd == 0)
-
-static inline pte_t *hugepd_page(hugepd_t hpd)
-{
-	BUG_ON(!(hpd.pd & HUGEPD_OK));
-	return (pte_t *)(hpd.pd & ~HUGEPD_OK);
-}
-
-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr)
-{
-	unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1));
-	pte_t *dir = hugepd_page(*hpdp);
-
-	return dir + idx;
-}
-
-static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
-			   unsigned long address)
-{
-	pte_t *new = kmem_cache_alloc(huge_pgtable_cache,
-				      GFP_KERNEL|__GFP_REPEAT);
-
-	if (! new)
-		return -ENOMEM;
-
-	spin_lock(&mm->page_table_lock);
-	if (!hugepd_none(*hpdp))
-		kmem_cache_free(huge_pgtable_cache, new);
-	else
-		hpdp->pd = (unsigned long)new | HUGEPD_OK;
-	spin_unlock(&mm->page_table_lock);
-	return 0;
-}
-
 /* Modelled after find_linux_pte() */
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pg;
 	pud_t *pu;
+	pmd_t *pm;
+	pte_t *pt;
 
 	BUG_ON(! in_hugepage_area(mm->context, addr));
 
@@ -99,14 +46,26 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	if (!pgd_none(*pg)) {
 		pu = pud_offset(pg, addr);
 		if (!pud_none(*pu)) {
-#ifdef CONFIG_PPC_64K_PAGES
-			pmd_t *pm;
 			pm = pmd_offset(pu, addr);
-			if (!pmd_none(*pm))
-				return hugepte_offset((hugepd_t *)pm, addr);
-#else
-			return hugepte_offset((hugepd_t *)pu, addr);
-#endif
+#ifdef CONFIG_PPC_64K_PAGES
+			/* Currently, we use the normal PTE offset within full
+			 * size PTE pages, thus our huge PTEs are scattered in
+			 * the PTE page and we do waste some. We may change
+			 * that in the future, but the current mecanism keeps
+			 * things much simpler
+			 */
+			if (!pmd_none(*pm)) {
+				/* Note: pte_offset_* are all equivalent on
+				 * ppc64 as we don't have HIGHMEM
+				 */
+				pt = pte_offset_kernel(pm, addr);
+				return pt;
+			}
+#else /* CONFIG_PPC_64K_PAGES */
+			/* On 4k pages, we put huge PTEs in the PMD page */
+			pt = (pte_t *)pm;
+			return pt;
+#endif /* CONFIG_PPC_64K_PAGES */
 		}
 	}
 
@@ -117,7 +76,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 {
 	pgd_t *pg;
 	pud_t *pu;
-	hugepd_t *hpdp = NULL;
+	pmd_t *pm;
+	pte_t *pt;
 
 	BUG_ON(! in_hugepage_area(mm->context, addr));
 
@@ -127,182 +87,23 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
 	pu = pud_alloc(mm, pg, addr);
 
 	if (pu) {
-#ifdef CONFIG_PPC_64K_PAGES
-		pmd_t *pm;
 		pm = pmd_alloc(mm, pu, addr);
-		if (pm)
-			hpdp = (hugepd_t *)pm;
-#else
-		hpdp = (hugepd_t *)pu;
-#endif
-	}
-
-	if (! hpdp)
-		return NULL;
-
-	if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr))
-		return NULL;
-
-	return hugepte_offset(hpdp, addr);
-}
-
-static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp)
-{
-	pte_t *hugepte = hugepd_page(*hpdp);
-
-	hpdp->pd = 0;
-	tlb->need_flush = 1;
-	pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM,
-						 HUGEPTE_TABLE_SIZE-1));
-}
-
-#ifdef CONFIG_PPC_64K_PAGES
-static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
-				   unsigned long addr, unsigned long end,
-				   unsigned long floor, unsigned long ceiling)
-{
-	pmd_t *pmd;
-	unsigned long next;
-	unsigned long start;
-
-	start = addr;
-	pmd = pmd_offset(pud, addr);
-	do {
-		next = pmd_addr_end(addr, end);
-		if (pmd_none(*pmd))
-			continue;
-		free_hugepte_range(tlb, (hugepd_t *)pmd);
-	} while (pmd++, addr = next, addr != end);
-
-	start &= PUD_MASK;
-	if (start < floor)
-		return;
-	if (ceiling) {
-		ceiling &= PUD_MASK;
-		if (!ceiling)
-			return;
-	}
-	if (end - 1 > ceiling - 1)
-		return;
-
-	pmd = pmd_offset(pud, start);
-	pud_clear(pud);
-	pmd_free_tlb(tlb, pmd);
-}
-#endif
-
-static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
-				   unsigned long addr, unsigned long end,
-				   unsigned long floor, unsigned long ceiling)
-{
-	pud_t *pud;
-	unsigned long next;
-	unsigned long start;
-
-	start = addr;
-	pud = pud_offset(pgd, addr);
-	do {
-		next = pud_addr_end(addr, end);
+		if (pm) {
 #ifdef CONFIG_PPC_64K_PAGES
-		if (pud_none_or_clear_bad(pud))
-			continue;
-		hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
-#else
-		if (pud_none(*pud))
-			continue;
-		free_hugepte_range(tlb, (hugepd_t *)pud);
-#endif
-	} while (pud++, addr = next, addr != end);
-
-	start &= PGDIR_MASK;
-	if (start < floor)
-		return;
-	if (ceiling) {
-		ceiling &= PGDIR_MASK;
-		if (!ceiling)
-			return;
-	}
-	if (end - 1 > ceiling - 1)
-		return;
-
-	pud = pud_offset(pgd, start);
-	pgd_clear(pgd);
-	pud_free_tlb(tlb, pud);
-}
-
-/*
- * This function frees user-level page tables of a process.
- *
- * Must be called with pagetable lock held.
- */
-void hugetlb_free_pgd_range(struct mmu_gather **tlb,
-			    unsigned long addr, unsigned long end,
-			    unsigned long floor, unsigned long ceiling)
-{
-	pgd_t *pgd;
-	unsigned long next;
-	unsigned long start;
-
-	/*
-	 * Comments below take from the normal free_pgd_range().  They
-	 * apply here too.  The tests against HUGEPD_MASK below are
-	 * essential, because we *don't* test for this at the bottom
-	 * level.  Without them we'll attempt to free a hugepte table
-	 * when we unmap just part of it, even if there are other
-	 * active mappings using it.
-	 *
-	 * The next few lines have given us lots of grief...
-	 *
-	 * Why are we testing HUGEPD* at this top level?  Because
-	 * often there will be no work to do at all, and we'd prefer
-	 * not to go all the way down to the bottom just to discover
-	 * that.
-	 *
-	 * Why all these "- 1"s?  Because 0 represents both the bottom
-	 * of the address space and the top of it (using -1 for the
-	 * top wouldn't help much: the masks would do the wrong thing).
-	 * The rule is that addr 0 and floor 0 refer to the bottom of
-	 * the address space, but end 0 and ceiling 0 refer to the top
-	 * Comparisons need to use "end - 1" and "ceiling - 1" (though
-	 * that end 0 case should be mythical).
-	 *
-	 * Wherever addr is brought up or ceiling brought down, we
-	 * must be careful to reject "the opposite 0" before it
-	 * confuses the subsequent tests.  But what about where end is
-	 * brought down by HUGEPD_SIZE below? no, end can't go down to
-	 * 0 there.
-	 *
-	 * Whereas we round start (addr) and ceiling down, by different
-	 * masks at different levels, in order to test whether a table
-	 * now has no other vmas using it, so can be freed, we don't
-	 * bother to round floor or end up - the tests don't need that.
-	 */
-
-	addr &= HUGEPD_MASK;
-	if (addr < floor) {
-		addr += HUGEPD_SIZE;
-		if (!addr)
-			return;
-	}
-	if (ceiling) {
-		ceiling &= HUGEPD_MASK;
-		if (!ceiling)
-			return;
+			/* See comment in huge_pte_offset. Note that if we ever
+			 * want to put the page size in the PMD, we would have
+			 * to open code our own pte_alloc* function in order
+			 * to populate and set the size atomically
+			 */
+			pt = pte_alloc_map(mm, pm, addr);
+#else /* CONFIG_PPC_64K_PAGES */
+			pt = (pte_t *)pm;
+#endif /* CONFIG_PPC_64K_PAGES */
+			return pt;
+		}
 	}
-	if (end - 1 > ceiling - 1)
-		end -= HUGEPD_SIZE;
-	if (addr > end - 1)
-		return;
 
-	start = addr;
-	pgd = pgd_offset((*tlb)->mm, addr);
-	do {
-		BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr));
-		next = pgd_addr_end(addr, end);
-		if (pgd_none_or_clear_bad(pgd))
-			continue;
-		hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
-	} while (pgd++, addr = next, addr != end);
+	return NULL;
 }
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -332,6 +133,21 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 	return __pte(old);
 }
 
+/*
+ * This function checks for proper alignment of input addr and len parameters.
+ */
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+{
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
+		return -EINVAL;
+	if (! (within_hugepage_low_range(addr, len)
+	       || within_hugepage_high_range(addr, len)) )
+		return -EINVAL;
+	return 0;
+}
+
 struct slb_flush_info {
 	struct mm_struct *mm;
 	u16 newareas;
@@ -1040,27 +856,3 @@ repeat:
  out:
 	return err;
 }
-
-static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
-{
-	memset(addr, 0, kmem_cache_size(cache));
-}
-
-static int __init hugetlbpage_init(void)
-{
-	if (!cpu_has_feature(CPU_FTR_16M_PAGE))
-		return -ENODEV;
-
-	huge_pgtable_cache = kmem_cache_create("hugepte_cache",
-					       HUGEPTE_TABLE_SIZE,
-					       HUGEPTE_TABLE_SIZE,
-					       SLAB_HWCACHE_ALIGN |
-					       SLAB_MUST_HWCACHE_ALIGN,
-					       zero_ctor, NULL);
-	if (! huge_pgtable_cache)
-		panic("hugetlbpage_init(): could not create hugepte cache\n");
-
-	return 0;
-}
-
-module_init(hugetlbpage_init);