* Copyright (C) 1998-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/init.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
+DEFINE_PER_CPU(long, __pgtable_quicklist_size);
+
extern void ia64_tlb_init (void);
unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
EXPORT_SYMBOL(vmem_map);
#endif
-static int pgt_cache_water[2] = { 25, 50 };
-
-struct page *zero_page_memmap_ptr; /* map entry for zero page */
+struct page *zero_page_memmap_ptr; /* map entry for zero page */
EXPORT_SYMBOL(zero_page_memmap_ptr);
+#define MIN_PGT_PAGES 25UL
+#define MAX_PGT_FREES_PER_PASS 16L
+#define PGT_FRACTION_OF_NODE_MEM 16
+
+static inline long
+max_pgt_pages(void)
+{
+ u64 node_free_pages, max_pgt_pages;
+
+#ifndef CONFIG_NUMA
+ node_free_pages = nr_free_pages();
+#else
+ node_free_pages = nr_free_pages_pgdat(NODE_DATA(numa_node_id()));
+#endif
+ max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM;
+ max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES);
+ return max_pgt_pages;
+}
+
+static inline long
+min_pages_to_free(void)
+{
+ long pages_to_free;
+
+ pages_to_free = pgtable_quicklist_size - max_pgt_pages();
+ pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS);
+ return pages_to_free;
+}
+
void
-check_pgt_cache (void)
+check_pgt_cache(void)
{
- int low, high;
+ long pages_to_free;
- low = pgt_cache_water[0];
- high = pgt_cache_water[1];
+ if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
+ return;
preempt_disable();
- if (pgtable_cache_size > (u64) high) {
- do {
- if (pgd_quicklist)
- free_page((unsigned long)pgd_alloc_one_fast(NULL));
- if (pmd_quicklist)
- free_page((unsigned long)pmd_alloc_one_fast(NULL, 0));
- } while (pgtable_cache_size > (u64) low);
+ while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
+ while (pages_to_free--) {
+ free_page((unsigned long)pgtable_quicklist_alloc());
+ }
+ preempt_enable();
+ preempt_disable();
}
preempt_enable();
}
void
-update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte)
+lazy_mmu_prot_update (pte_t pte)
{
unsigned long addr;
struct page *page;
+ unsigned long order;
if (!pte_exec(pte))
return; /* not an executable page... */
page = pte_page(pte);
- /* don't use VADDR: it may not be mapped on this CPU (or may have just been flushed): */
addr = (unsigned long) page_address(page);
if (test_bit(PG_arch_1, &page->flags))
return; /* i-cache is already coherent with d-cache */
- flush_icache_range(addr, addr + PAGE_SIZE);
+ if (PageCompound(page)) {
+ order = (unsigned long) (page[1].lru.prev);
+ flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
+ }
+ else
+ flush_icache_range(addr, addr + PAGE_SIZE);
set_bit(PG_arch_1, &page->flags); /* mark page as clean */
}
* the problem. When the process attempts to write to the register backing store
* for the first time, it will get a SEGFAULT in this case.
*/
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (vma) {
memset(vma, 0, sizeof(*vma));
vma->vm_mm = current->mm;
vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
vma->vm_end = vma->vm_start + PAGE_SIZE;
vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
- vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP;
+ vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
down_write(¤t->mm->mmap_sem);
if (insert_vm_struct(current->mm, vma)) {
up_write(¤t->mm->mmap_sem);
/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
if (!(current->personality & MMAP_PAGE_ZERO)) {
- vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (vma) {
memset(vma, 0, sizeof(*vma));
vma->vm_mm = current->mm;
eaddr = (unsigned long) ia64_imva(__init_end);
while (addr < eaddr) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
++totalram_pages;
addr += PAGE_SIZE;
(__init_end - __init_begin) >> 10);
}
-void
+void __init
free_initrd_mem (unsigned long start, unsigned long end)
{
struct page *page;
continue;
page = virt_to_page(start);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(start);
++totalram_pages;
}
}
-struct curr_mem_request {
- unsigned long requested;
- unsigned long min_physaddr;
- int found;
-};
-
-/*
- * Check whether a physical address fits within the memory descriptor
- * block sent from efi_mmap_walk(). If it fits, set found.
- */
-static int
-verify_physaddr (unsigned long start, unsigned long end, void *arg)
-{
- struct curr_mem_request *cr = arg;
-
- start = __pa(start);
- end = __pa(end);
-
- if ((cr->requested >= start) && (cr->requested + PAGE_SIZE) <= end) {
- cr->found = 1;
- return -1;
- }
-
- return 0;
-}
-
-/*
- * If physical page 'nr' is valid RAM then return 1. Otherwise return 0.
- */
-
-int
-page_is_ram (unsigned long pagenr)
-{
- struct curr_mem_request cr;
-
- if (!pfn_valid(pagenr))
- return 0;
-
- cr.requested = pagenr << PAGE_SHIFT;
- cr.found = 0;
-
- efi_memmap_walk(verify_physaddr, &cr);
-
- return cr.found;
-}
-EXPORT_SYMBOL_GPL(page_is_ram);
-
-static int
-find_next (unsigned long start, unsigned long end, void *arg)
-{
- struct curr_mem_request *cr = (struct curr_mem_request *)arg;
-
- start = __pa(start);
- end = __pa(end);
-
- if ((cr->requested >= start) && (cr->requested + PAGE_SIZE) <= end) {
- cr->min_physaddr = cr->requested;
- cr->found = 1;
- return -1;
- }
- if ((cr->requested < start) && (start + PAGE_SIZE) <= end)
- if (start < cr->min_physaddr) {
- cr->min_physaddr = start;
- cr->found = 1;
- }
-
- return 0;
-}
-
-unsigned long
-next_ram_page (unsigned long pagenr)
+int page_is_ram(unsigned long pagenr)
{
- struct curr_mem_request cr;
-
- pagenr++;
-
- cr.requested = pagenr << PAGE_SHIFT;
- cr.found = 0;
- cr.min_physaddr = ULONG_MAX;
-
- efi_memmap_walk(find_next, &cr);
-
- if (cr.found)
- return cr.min_physaddr >> PAGE_SHIFT;
- else
- return ULONG_MAX;
+ //FIXME: implement w/efi walk
+ printk("page is ram is called!!!!!\n");
+ return 1;
}
-EXPORT_SYMBOL_GPL(next_ram_page);
/*
* This installs a clean page in the kernel's page table.
*/
-struct page *
+static struct page * __init
put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
{
pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
- spin_lock(&init_mm.page_table_lock);
{
- pmd = pmd_alloc(&init_mm, pgd, address);
+ pud = pud_alloc(&init_mm, pgd, address);
+ if (!pud)
+ goto out;
+ pmd = pmd_alloc(&init_mm, pud, address);
if (!pmd)
goto out;
- pte = pte_alloc_map(&init_mm, pmd, address);
+ pte = pte_alloc_kernel(pmd, address);
if (!pte)
goto out;
- if (!pte_none(*pte)) {
- pte_unmap(pte);
+ if (!pte_none(*pte))
goto out;
- }
set_pte(pte, mk_pte(page, pgprot));
- pte_unmap(pte);
}
- out: spin_unlock(&init_mm.page_table_lock);
+ out:
/* no need for flush_tlb */
return page;
}
-static void
+static void __init
setup_gate (void)
{
struct page *page;
/*
- * Map the gate page twice: once read-only to export the ELF headers etc. and once
- * execute-only page to enable privilege-promotion via "epc":
+ * Map the gate page twice: once read-only to export the ELF
+ * headers etc. and once execute-only page to enable
+ * privilege-promotion via "epc":
*/
page = virt_to_page(ia64_imva(__start_gate_section));
put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
#else
put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
+ /* Fill in the holes (if any) with read-only zero pages: */
+ {
+ unsigned long addr;
+
+ for (addr = GATE_ADDR + PAGE_SIZE;
+ addr < GATE_ADDR + PERCPU_PAGE_SIZE;
+ addr += PAGE_SIZE)
+ {
+ put_kernel_page(ZERO_PAGE(0), addr,
+ PAGE_READONLY);
+ put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE,
+ PAGE_READONLY);
+ }
+ }
#endif
ia64_patch_gate();
}
{
unsigned long psr, pta, impl_va_bits;
extern void __devinit tlb_init (void);
- int cpu;
#ifdef CONFIG_DISABLE_VHPT
# define VHPT_ENABLE_BIT 0
if (impl_va_bits < 51 || impl_va_bits > 61)
panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
+ /*
+ * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need,
+ * which must fit into "vmlpt_bits - pte_bits" slots. Second half of
+ * the test makes sure that our mapped space doesn't overlap the
+ * unimplemented hole in the middle of the region.
+ */
+ if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) ||
+ (mapped_space_bits > impl_va_bits - 1))
+ panic("Cannot build a big enough virtual-linear page table"
+ " to cover mapped address space.\n"
+ " Try using a smaller page size.\n");
+
/* place the VMLPT at the end of each page-table mapped region: */
pta = POW2(61) - POW2(vmlpt_bits);
- if (POW2(mapped_space_bits) >= pta)
- panic("mm/init: overlap between virtually mapped linear page table and "
- "mapped kernel space!");
/*
* Set the (virtually mapped linear) page table address. Bit
* 8 selects between the short and long format, bits 2-7 the
ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
ia64_srlz_d();
#endif
+}
- cpu = smp_processor_id();
+#ifdef CONFIG_VIRTUAL_MEM_MAP
+int vmemmap_find_next_valid_pfn(int node, int i)
+{
+ unsigned long end_address, hole_next_pfn;
+ unsigned long stop_address;
+ pg_data_t *pgdat = NODE_DATA(node);
- /* mca handler uses cr.lid as key to pick the right entry */
- ia64_mca_tlb_list[cpu].cr_lid = ia64_getreg(_IA64_REG_CR_LID);
+ end_address = (unsigned long) &vmem_map[pgdat->node_start_pfn + i];
+ end_address = PAGE_ALIGN(end_address);
- /* insert this percpu data information into our list for MCA recovery purposes */
- ia64_mca_tlb_list[cpu].percpu_paddr = pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL));
- /* Also save per-cpu tlb flush recipe for use in physical mode mca handler */
- ia64_mca_tlb_list[cpu].ptce_base = local_cpu_data->ptce_base;
- ia64_mca_tlb_list[cpu].ptce_count[0] = local_cpu_data->ptce_count[0];
- ia64_mca_tlb_list[cpu].ptce_count[1] = local_cpu_data->ptce_count[1];
- ia64_mca_tlb_list[cpu].ptce_stride[0] = local_cpu_data->ptce_stride[0];
- ia64_mca_tlb_list[cpu].ptce_stride[1] = local_cpu_data->ptce_stride[1];
-}
+ stop_address = (unsigned long) &vmem_map[
+ pgdat->node_start_pfn + pgdat->node_spanned_pages];
-#ifdef CONFIG_VIRTUAL_MEM_MAP
+ do {
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
-int
+ pgd = pgd_offset_k(end_address);
+ if (pgd_none(*pgd)) {
+ end_address += PGDIR_SIZE;
+ continue;
+ }
+
+ pud = pud_offset(pgd, end_address);
+ if (pud_none(*pud)) {
+ end_address += PUD_SIZE;
+ continue;
+ }
+
+ pmd = pmd_offset(pud, end_address);
+ if (pmd_none(*pmd)) {
+ end_address += PMD_SIZE;
+ continue;
+ }
+
+ pte = pte_offset_kernel(pmd, end_address);
+retry_pte:
+ if (pte_none(*pte)) {
+ end_address += PAGE_SIZE;
+ pte++;
+ if ((end_address < stop_address) &&
+ (end_address != ALIGN(end_address, 1UL << PMD_SHIFT)))
+ goto retry_pte;
+ continue;
+ }
+ /* Found next valid vmem_map page */
+ break;
+ } while (end_address < stop_address);
+
+ end_address = min(end_address, stop_address);
+ end_address = end_address - (unsigned long) vmem_map + sizeof(struct page) - 1;
+ hole_next_pfn = end_address / sizeof(struct page);
+ return hole_next_pfn - pgdat->node_start_pfn;
+}
+
+int __init
create_mem_map_page_table (u64 start, u64 end, void *arg)
{
unsigned long address, start_page, end_page;
struct page *map_start, *map_end;
int node;
pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset_k(address);
if (pgd_none(*pgd))
pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
- pmd = pmd_offset(pgd, address);
+ pud = pud_offset(pgd, address);
+
+ if (pud_none(*pud))
+ pud_populate(&init_mm, pud, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
+ pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
struct page *map_start, *map_end;
args = (struct memmap_init_callback_data *) arg;
-
map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
if (map_start < map_end)
memmap_init_zone((unsigned long)(map_end - map_start),
- args->nid, args->zone, page_to_pfn(map_start));
+ args->nid, args->zone, page_to_pfn(map_start),
+ MEMMAP_EARLY);
return 0;
}
unsigned long start_pfn)
{
if (!vmem_map)
- memmap_init_zone(size, nid, zone, start_pfn);
+ memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY);
else {
struct page *start;
struct memmap_init_callback_data args;
}
EXPORT_SYMBOL(ia64_pfn_valid);
-int
+int __init
find_largest_hole (u64 start, u64 end, void *arg)
{
u64 *max_gap = arg;
last_end = end;
return 0;
}
+
+int __init
+register_active_ranges(u64 start, u64 end, void *arg)
+{
+ add_active_range(0, __pa(start) >> PAGE_SHIFT, __pa(end) >> PAGE_SHIFT);
+ return 0;
+}
#endif /* CONFIG_VIRTUAL_MEM_MAP */
-static int
+static int __init
count_reserved_pages (u64 start, u64 end, void *arg)
{
unsigned long num_reserved = 0;
* purposes.
*/
-static int nolwsys;
+static int nolwsys __initdata;
static int __init
nolwsys_setup (char *s)
__setup("nolwsys", nolwsys_setup);
-void
+void __init
mem_init (void)
{
long reserved_pages, codesize, datasize, initsize;
- unsigned long num_pgt_pages;
pg_data_t *pgdat;
int i;
static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
+ BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
+ BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
+ BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE);
+
#ifdef CONFIG_PCI
/*
* This needs to be called _after_ the command line has been parsed but _before_
platform_dma_init();
#endif
-#ifndef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_FLATMEM
if (!mem_map)
BUG();
max_mapnr = max_low_pfn;
kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
kclist_add(&kcore_kernel, _stext, _end - _stext);
- for_each_pgdat(pgdat)
- totalram_pages += free_all_bootmem_node(pgdat);
+ for_each_online_pgdat(pgdat)
+ if (pgdat->bdata->node_bootmem_map)
+ totalram_pages += free_all_bootmem_node(pgdat);
reserved_pages = 0;
efi_memmap_walk(count_reserved_pages, &reserved_pages);
num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
- /*
- * Allow for enough (cached) page table pages so that we can map the entire memory
- * at least once. Each task also needs a couple of page tables pages, so add in a
- * fudge factor for that (don't use "threads-max" here; that would be wrong!).
- * Don't allow the cache to be more than 10% of total memory, though.
- */
-# define NUM_TASKS 500 /* typical number of tasks */
- num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
- if (num_pgt_pages > nr_free_pages() / 10)
- num_pgt_pages = nr_free_pages() / 10;
- if (num_pgt_pages > (u64) pgt_cache_water[1])
- pgt_cache_water[1] = num_pgt_pages;
/*
* For fsyscall entrpoints with no light-weight handler, use the ordinary
ia32_mem_init();
#endif
}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void online_page(struct page *page)
+{
+ ClearPageReserved(page);
+ init_page_count(page);
+ __free_page(page);
+ totalram_pages++;
+ num_physpages++;
+}
+
+int arch_add_memory(int nid, u64 start, u64 size)
+{
+ pg_data_t *pgdat;
+ struct zone *zone;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ int ret;
+
+ pgdat = NODE_DATA(nid);
+
+ zone = pgdat->node_zones + ZONE_NORMAL;
+ ret = __add_pages(zone, start_pfn, nr_pages);
+
+ if (ret)
+ printk("%s: Problem encountered in __add_pages() as ret=%d\n",
+ __FUNCTION__, ret);
+
+ return ret;
+}
+
+int remove_memory(u64 start, u64 size)
+{
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(remove_memory);
+#endif