#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/proc_fs.h>
+#include <linux/bitops.h>
#include <asm/a.out.h>
-#include <asm/bitops.h>
#include <asm/dma.h>
#include <asm/ia32.h>
#include <asm/io.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
+DEFINE_PER_CPU(long, __pgtable_quicklist_size);
+
extern void ia64_tlb_init (void);
unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
EXPORT_SYMBOL(vmem_map);
#endif
-static int pgt_cache_water[2] = { 25, 50 };
-
-struct page *zero_page_memmap_ptr; /* map entry for zero page */
+struct page *zero_page_memmap_ptr; /* map entry for zero page */
EXPORT_SYMBOL(zero_page_memmap_ptr);
+#define MIN_PGT_PAGES 25UL
+#define MAX_PGT_FREES_PER_PASS 16L
+#define PGT_FRACTION_OF_NODE_MEM 16
+
+static inline long
+max_pgt_pages(void)
+{
+ u64 node_free_pages, max_pgt_pages;
+
+#ifndef CONFIG_NUMA
+ node_free_pages = nr_free_pages();
+#else
+ node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id()));
+#endif
+ max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
+ max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
+ return max_pgt_pages;
+}
+
+static inline long
+min_pages_to_free(void)
+{
+ long pages_to_free;
+
+ pages_to_free = pgtable_quicklist_size - max_pgt_pages();
+ pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
+ return pages_to_free;
+}
+
void
-check_pgt_cache (void)
+check_pgt_cache(void)
{
- int low, high;
-
- low = pgt_cache_water[0];
- high = pgt_cache_water[1];
-
- if (pgtable_cache_size > (u64) high) {
- do {
- if (pgd_quicklist)
- free_page((unsigned long)pgd_alloc_one_fast(0));
- if (pmd_quicklist)
- free_page((unsigned long)pmd_alloc_one_fast(0, 0));
- } while (pgtable_cache_size > (u64) low);
+ long pages_to_free;
+
+ if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
+ return;
+
+ preempt_disable();
+ while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
+ while (pages_to_free--) {
+ free_page((unsigned long)pgtable_quicklist_alloc());
+ }
+ preempt_enable();
+ preempt_disable();
}
+ preempt_enable();
}
void
-update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
+lazy_mmu_prot_update (pte_t pte)
{
unsigned long addr;
struct page *page;
return; /* not an executable page... */
page = pte_page(pte);
- /* don't use VADDR: it may not be mapped on this CPU (or may have just been flushed): */
addr = (unsigned long) page_address(page);
if (test_bit(PG_arch_1, &page->flags))
inline void
ia64_set_rbs_bot (void)
{
- unsigned long stack_size = current->rlim[RLIMIT_STACK].rlim_max & -16;
+ unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16;
if (stack_size > MAX_USER_STACK_SIZE)
stack_size = MAX_USER_STACK_SIZE;
*/
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (vma) {
+ memset(vma, 0, sizeof(*vma));
vma->vm_mm = current->mm;
vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
vma->vm_end = vma->vm_start + PAGE_SIZE;
vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
- vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP;
- vma->vm_ops = NULL;
- vma->vm_pgoff = 0;
- vma->vm_file = NULL;
- vma->vm_private_data = NULL;
- insert_vm_struct(current->mm, vma);
+ vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP;
+ down_write(¤t->mm->mmap_sem);
+ if (insert_vm_struct(current->mm, vma)) {
+ up_write(¤t->mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, vma);
+ return;
+ }
+ up_write(¤t->mm->mmap_sem);
}
/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
vma->vm_end = PAGE_SIZE;
vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
- insert_vm_struct(current->mm, vma);
+ down_write(¤t->mm->mmap_sem);
+ if (insert_vm_struct(current->mm, vma)) {
+ up_write(¤t->mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, vma);
+ return;
+ }
+ up_write(¤t->mm->mmap_sem);
}
}
}
{
struct page *page;
/*
- * EFI uses 4KB pages while the kernel can use 4KB or bigger.
+ * EFI uses 4KB pages while the kernel can use 4KB or bigger.
* Thus EFI and the kernel may have different page sizes. It is
* therefore possible to have the initrd share the same page as
* the end of the kernel (given current setup).
}
/*
- * This is like put_dirty_page() but installs a clean page in the kernel's page table.
+ * This installs a clean page in the kernel's page table.
*/
struct page *
put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
{
pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
spin_lock(&init_mm.page_table_lock);
{
- pmd = pmd_alloc(&init_mm, pgd, address);
+ pud = pud_alloc(&init_mm, pgd, address);
+ if (!pud)
+ goto out;
+
+ pmd = pmd_alloc(&init_mm, pud, address);
if (!pmd)
goto out;
pte = pte_alloc_map(&init_mm, pmd, address);
struct page *page;
/*
- * Map the gate page twice: once read-only to export the ELF headers etc. and once
- * execute-only page to enable privilege-promotion via "epc":
+ * Map the gate page twice: once read-only to export the ELF
+ * headers etc. and once execute-only page to enable
+ * privilege-promotion via "epc":
*/
page = virt_to_page(ia64_imva(__start_gate_section));
put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
#else
put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
+ /* Fill in the holes (if any) with read-only zero pages: */
+ {
+ unsigned long addr;
+
+ for (addr = GATE_ADDR + PAGE_SIZE;
+ addr < GATE_ADDR + PERCPU_PAGE_SIZE;
+ addr += PAGE_SIZE)
+ {
+ put_kernel_page(ZERO_PAGE(0), addr,
+ PAGE_READONLY);
+ put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE,
+ PAGE_READONLY);
+ }
+ }
#endif
ia64_patch_gate();
}
-void __init
+void __devinit
ia64_mmu_init (void *my_cpu_data)
{
unsigned long psr, pta, impl_va_bits;
- extern void __init tlb_init (void);
- int cpu;
+ extern void __devinit tlb_init (void);
#ifdef CONFIG_DISABLE_VHPT
# define VHPT_ENABLE_BIT 0
#ifdef CONFIG_HUGETLB_PAGE
ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
+ ia64_srlz_d();
#endif
-
- cpu = smp_processor_id();
-
- /* mca handler uses cr.lid as key to pick the right entry */
- ia64_mca_tlb_list[cpu].cr_lid = ia64_getreg(_IA64_REG_CR_LID);
-
- /* insert this percpu data information into our list for MCA recovery purposes */
- ia64_mca_tlb_list[cpu].percpu_paddr = pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL));
- /* Also save per-cpu tlb flush recipe for use in physical mode mca handler */
- ia64_mca_tlb_list[cpu].ptce_base = local_cpu_data->ptce_base;
- ia64_mca_tlb_list[cpu].ptce_count[0] = local_cpu_data->ptce_count[0];
- ia64_mca_tlb_list[cpu].ptce_count[1] = local_cpu_data->ptce_count[1];
- ia64_mca_tlb_list[cpu].ptce_stride[0] = local_cpu_data->ptce_stride[0];
- ia64_mca_tlb_list[cpu].ptce_stride[1] = local_cpu_data->ptce_stride[1];
}
#ifdef CONFIG_VIRTUAL_MEM_MAP
struct page *map_start, *map_end;
int node;
pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset_k(address);
if (pgd_none(*pgd))
pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
- pmd = pmd_offset(pgd, address);
+ pud = pud_offset(pgd, address);
+
+ if (pud_none(*pud))
+ pud_populate(&init_mm, pud, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+ pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
struct page *map_start, *map_end;
args = (struct memmap_init_callback_data *) arg;
-
map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
/ sizeof(struct page));
if (map_start < map_end)
- memmap_init_zone(map_start, (unsigned long) (map_end - map_start),
+ memmap_init_zone((unsigned long)(map_end - map_start),
args->nid, args->zone, page_to_pfn(map_start));
return 0;
}
void
-memmap_init (struct page *start, unsigned long size, int nid,
- unsigned long zone, unsigned long start_pfn)
+memmap_init (unsigned long size, int nid, unsigned long zone,
+ unsigned long start_pfn)
{
if (!vmem_map)
- memmap_init_zone(start, size, nid, zone, start_pfn);
+ memmap_init_zone(size, nid, zone, start_pfn);
else {
+ struct page *start;
struct memmap_init_callback_data args;
+ start = pfn_to_page(start_pfn);
args.start = start;
args.end = start + size;
args.nid = nid;
char byte;
struct page *pg = pfn_to_page(pfn);
- return (__get_user(byte, (char *) pg) == 0)
+ return (__get_user(byte, (char __user *) pg) == 0)
&& ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
- || (__get_user(byte, (char *) (pg + 1) - 1) == 0));
+ || (__get_user(byte, (char __user *) (pg + 1) - 1) == 0));
}
EXPORT_SYMBOL(ia64_pfn_valid);
mem_init (void)
{
long reserved_pages, codesize, datasize, initsize;
- unsigned long num_pgt_pages;
pg_data_t *pgdat;
int i;
static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
+ BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
+ BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
+ BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE);
+
#ifdef CONFIG_PCI
/*
* This needs to be called _after_ the command line has been parsed but _before_
num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
- /*
- * Allow for enough (cached) page table pages so that we can map the entire memory
- * at least once. Each task also needs a couple of page tables pages, so add in a
- * fudge factor for that (don't use "threads-max" here; that would be wrong!).
- * Don't allow the cache to be more than 10% of total memory, though.
- */
-# define NUM_TASKS 500 /* typical number of tasks */
- num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
- if (num_pgt_pages > nr_free_pages() / 10)
- num_pgt_pages = nr_free_pages() / 10;
- if (num_pgt_pages > (u64) pgt_cache_water[1])
- pgt_cache_water[1] = num_pgt_pages;
/*
* For fsyscall entrpoints with no light-weight handler, use the ordinary
if (!fsyscall_table[i] || nolwsys)
fsyscall_table[i] = sys_call_table[i] | 1;
}
- setup_gate(); /* setup gate pages before we free up boot memory... */
+ setup_gate();
#ifdef CONFIG_IA32_SUPPORT
- ia32_boot_gdt_init();
+ ia32_mem_init();
#endif
}