#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
-#ifdef CONFIG_PPC_64K_PAGES
-#define HUGEPTE_INDEX_SIZE (PMD_SHIFT-HPAGE_SHIFT)
-#else
-#define HUGEPTE_INDEX_SIZE (PUD_SHIFT-HPAGE_SHIFT)
-#endif
-#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE)
-#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << HUGEPTE_INDEX_SIZE)
-
-#define HUGEPD_SHIFT (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE)
-#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT)
-#define HUGEPD_MASK (~(HUGEPD_SIZE-1))
-
-#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM])
-
-/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad()
- * will choke on pointers to hugepte tables, which is handy for
- * catching screwups early. */
-#define HUGEPD_OK 0x1
-
-typedef struct { unsigned long pd; } hugepd_t;
-
-#define hugepd_none(hpd) ((hpd).pd == 0)
-
-static inline pte_t *hugepd_page(hugepd_t hpd)
-{
- BUG_ON(!(hpd.pd & HUGEPD_OK));
- return (pte_t *)(hpd.pd & ~HUGEPD_OK);
-}
-
-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr)
-{
- unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1));
- pte_t *dir = hugepd_page(*hpdp);
-
- return dir + idx;
-}
-
-static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
- unsigned long address)
-{
- pte_t *new = kmem_cache_alloc(huge_pgtable_cache,
- GFP_KERNEL|__GFP_REPEAT);
-
- if (! new)
- return -ENOMEM;
-
- spin_lock(&mm->page_table_lock);
- if (!hugepd_none(*hpdp))
- kmem_cache_free(huge_pgtable_cache, new);
- else
- hpdp->pd = (unsigned long)new | HUGEPD_OK;
- spin_unlock(&mm->page_table_lock);
- return 0;
-}
-
/* Modelled after find_linux_pte() */
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pg;
pud_t *pu;
+ pmd_t *pm;
+ pte_t *pt;
BUG_ON(! in_hugepage_area(mm->context, addr));
if (!pgd_none(*pg)) {
pu = pud_offset(pg, addr);
if (!pud_none(*pu)) {
-#ifdef CONFIG_PPC_64K_PAGES
- pmd_t *pm;
pm = pmd_offset(pu, addr);
- if (!pmd_none(*pm))
- return hugepte_offset((hugepd_t *)pm, addr);
-#else
- return hugepte_offset((hugepd_t *)pu, addr);
-#endif
+#ifdef CONFIG_PPC_64K_PAGES
+ /* Currently, we use the normal PTE offset within full
+ * size PTE pages, thus our huge PTEs are scattered in
+ * the PTE page and we do waste some. We may change
+ * that in the future, but the current mecanism keeps
+ * things much simpler
+ */
+ if (!pmd_none(*pm)) {
+ /* Note: pte_offset_* are all equivalent on
+ * ppc64 as we don't have HIGHMEM
+ */
+ pt = pte_offset_kernel(pm, addr);
+ return pt;
+ }
+#else /* CONFIG_PPC_64K_PAGES */
+ /* On 4k pages, we put huge PTEs in the PMD page */
+ pt = (pte_t *)pm;
+ return pt;
+#endif /* CONFIG_PPC_64K_PAGES */
}
}
{
pgd_t *pg;
pud_t *pu;
- hugepd_t *hpdp = NULL;
+ pmd_t *pm;
+ pte_t *pt;
BUG_ON(! in_hugepage_area(mm->context, addr));
pu = pud_alloc(mm, pg, addr);
if (pu) {
-#ifdef CONFIG_PPC_64K_PAGES
- pmd_t *pm;
pm = pmd_alloc(mm, pu, addr);
- if (pm)
- hpdp = (hugepd_t *)pm;
-#else
- hpdp = (hugepd_t *)pu;
-#endif
- }
-
- if (! hpdp)
- return NULL;
-
- if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr))
- return NULL;
-
- return hugepte_offset(hpdp, addr);
-}
-
-static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp)
-{
- pte_t *hugepte = hugepd_page(*hpdp);
-
- hpdp->pd = 0;
- tlb->need_flush = 1;
- pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM,
- PGF_CACHENUM_MASK));
-}
-
-#ifdef CONFIG_PPC_64K_PAGES
-static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pmd_t *pmd;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd))
- continue;
- free_hugepte_range(tlb, (hugepd_t *)pmd);
- } while (pmd++, addr = next, addr != end);
-
- start &= PUD_MASK;
- if (start < floor)
- return;
- if (ceiling) {
- ceiling &= PUD_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- return;
-
- pmd = pmd_offset(pud, start);
- pud_clear(pud);
- pmd_free_tlb(tlb, pmd);
-}
-#endif
-
-static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pud_t *pud;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
+ if (pm) {
#ifdef CONFIG_PPC_64K_PAGES
- if (pud_none_or_clear_bad(pud))
- continue;
- hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
-#else
- if (pud_none(*pud))
- continue;
- free_hugepte_range(tlb, (hugepd_t *)pud);
-#endif
- } while (pud++, addr = next, addr != end);
-
- start &= PGDIR_MASK;
- if (start < floor)
- return;
- if (ceiling) {
- ceiling &= PGDIR_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- return;
-
- pud = pud_offset(pgd, start);
- pgd_clear(pgd);
- pud_free_tlb(tlb, pud);
-}
-
-/*
- * This function frees user-level page tables of a process.
- *
- * Must be called with pagetable lock held.
- */
-void hugetlb_free_pgd_range(struct mmu_gather **tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pgd_t *pgd;
- unsigned long next;
- unsigned long start;
-
- /*
- * Comments below take from the normal free_pgd_range(). They
- * apply here too. The tests against HUGEPD_MASK below are
- * essential, because we *don't* test for this at the bottom
- * level. Without them we'll attempt to free a hugepte table
- * when we unmap just part of it, even if there are other
- * active mappings using it.
- *
- * The next few lines have given us lots of grief...
- *
- * Why are we testing HUGEPD* at this top level? Because
- * often there will be no work to do at all, and we'd prefer
- * not to go all the way down to the bottom just to discover
- * that.
- *
- * Why all these "- 1"s? Because 0 represents both the bottom
- * of the address space and the top of it (using -1 for the
- * top wouldn't help much: the masks would do the wrong thing).
- * The rule is that addr 0 and floor 0 refer to the bottom of
- * the address space, but end 0 and ceiling 0 refer to the top
- * Comparisons need to use "end - 1" and "ceiling - 1" (though
- * that end 0 case should be mythical).
- *
- * Wherever addr is brought up or ceiling brought down, we
- * must be careful to reject "the opposite 0" before it
- * confuses the subsequent tests. But what about where end is
- * brought down by HUGEPD_SIZE below? no, end can't go down to
- * 0 there.
- *
- * Whereas we round start (addr) and ceiling down, by different
- * masks at different levels, in order to test whether a table
- * now has no other vmas using it, so can be freed, we don't
- * bother to round floor or end up - the tests don't need that.
- */
-
- addr &= HUGEPD_MASK;
- if (addr < floor) {
- addr += HUGEPD_SIZE;
- if (!addr)
- return;
- }
- if (ceiling) {
- ceiling &= HUGEPD_MASK;
- if (!ceiling)
- return;
+ /* See comment in huge_pte_offset. Note that if we ever
+ * want to put the page size in the PMD, we would have
+ * to open code our own pte_alloc* function in order
+ * to populate and set the size atomically
+ */
+ pt = pte_alloc_map(mm, pm, addr);
+#else /* CONFIG_PPC_64K_PAGES */
+ pt = (pte_t *)pm;
+#endif /* CONFIG_PPC_64K_PAGES */
+ return pt;
+ }
}
- if (end - 1 > ceiling - 1)
- end -= HUGEPD_SIZE;
- if (addr > end - 1)
- return;
- start = addr;
- pgd = pgd_offset((*tlb)->mm, addr);
- do {
- BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr));
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
- } while (pgd++, addr = next, addr != end);
+ return NULL;
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
return __pte(old);
}
+/*
+ * This function checks for proper alignment of input addr and len parameters.
+ */
+int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+{
+ if (len & ~HPAGE_MASK)
+ return -EINVAL;
+ if (addr & ~HPAGE_MASK)
+ return -EINVAL;
+ if (! (within_hugepage_low_range(addr, len)
+ || within_hugepage_high_range(addr, len)) )
+ return -EINVAL;
+ return 0;
+}
+
struct slb_flush_info {
struct mm_struct *mm;
u16 newareas;
out:
return err;
}
-
-static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
-{
- memset(addr, 0, kmem_cache_size(cache));
-}
-
-static int __init hugetlbpage_init(void)
-{
- if (!cpu_has_feature(CPU_FTR_16M_PAGE))
- return -ENODEV;
-
- huge_pgtable_cache = kmem_cache_create("hugepte_cache",
- HUGEPTE_TABLE_SIZE,
- HUGEPTE_TABLE_SIZE,
- SLAB_HWCACHE_ALIGN |
- SLAB_MUST_HWCACHE_ALIGN,
- zero_ctor, NULL);
- if (! huge_pgtable_cache)
- panic("hugetlbpage_init(): could not create hugepte cache\n");
-
- return 0;
-}
-
-module_init(hugetlbpage_init);