X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fppc64%2Fmm%2Finit.c;h=4cb05a070c0109056db541ebb6295909fe5278d2;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=84ceb2db2ab4418e68928eeeb764e996433b546a;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 84ceb2db2..4cb05a070 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -51,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -62,8 +63,6 @@ #include #include - -struct mmu_context_queue_t mmu_context_queue; int mem_init_done; unsigned long ioremap_bot = IMALLOC_BASE; static unsigned long phbs_io_bot = PHBS_IO_BASE; @@ -85,19 +84,18 @@ unsigned long __max_memory; /* info on what we think the IO hole is */ unsigned long io_hole_start; unsigned long io_hole_size; -unsigned long top_of_ram; void show_mem(void) { - int total = 0, reserved = 0; - int shared = 0, cached = 0; + unsigned long total = 0, reserved = 0; + unsigned long shared = 0, cached = 0; struct page *page; pg_data_t *pgdat; unsigned long i; printk("Mem-info:\n"); show_free_areas(); - printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { page = pgdat->node_mem_map + i; @@ -110,26 +108,26 @@ void show_mem(void) shared += page_count(page) - 1; } } - printk("%d pages of RAM\n",total); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); + printk("%ld pages of RAM\n", total); + printk("%ld reserved pages\n", reserved); + printk("%ld pages shared\n", shared); + printk("%ld pages swap cached\n", cached); } #ifdef CONFIG_PPC_ISERIES -void *ioremap(unsigned long addr, unsigned long size) +void __iomem *ioremap(unsigned long addr, unsigned long size) { - return (void *)addr; + return (void __iomem *)addr; } -extern void *__ioremap(unsigned long addr, unsigned long size, +extern void __iomem *__ioremap(unsigned long addr, unsigned long size, unsigned long flags) { - return (void *)addr; + return (void __iomem *)addr; } -void iounmap(void *addr) +void iounmap(volatile void __iomem *addr) { return; } @@ -171,7 +169,7 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags) hash = hpt_hash(vpn, 0); - hpteg = ((hash & htab_data.htab_hash_mask)*HPTES_PER_GROUP); + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); /* Panic if a pte grpup is full */ if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, 0, @@ -183,7 +181,7 @@ static void map_io_page(unsigned long ea, unsigned long pa, int flags) } -static void * __ioremap_com(unsigned long addr, unsigned long pa, +static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, unsigned long ea, unsigned long size, unsigned long flags) { @@ -198,20 +196,17 @@ static void * __ioremap_com(unsigned long addr, unsigned long pa, map_io_page(ea+i, pa+i, flags); } - return (void *) (ea + (addr & ~PAGE_MASK)); + return (void __iomem *) (ea + (addr & ~PAGE_MASK)); } -void * +void __iomem * ioremap(unsigned long addr, unsigned long size) { - void *ret = __ioremap(addr, size, _PAGE_NO_CACHE); - if(mem_init_done) - return eeh_ioremap(addr, ret); /* may remap the addr */ - return ret; + return __ioremap(addr, size, _PAGE_NO_CACHE); } -void * +void __iomem * __ioremap(unsigned long addr, unsigned long size, unsigned long flags) { unsigned long pa, ea; @@ -268,9 +263,10 @@ int __ioremap_explicit(unsigned long pa, unsigned long ea, */ ; } else { - area = im_get_area(ea, size, IM_REGION_UNUSED|IM_REGION_SUBSET); + area = im_get_area(ea, size, + IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); if (area == NULL) { - printk(KERN_ERR "could not obtain imalloc area for ea 0x%lx\n", ea); + /* Expected when PHB-dlpar is in play */ return 1; } if (ea != (unsigned long) area->addr) { @@ -354,19 +350,18 @@ static void unmap_im_area_pmd(pgd_t *dir, unsigned long address, * * XXX what about calls before mem_init_done (ie python_countermeasures()) */ -void iounmap(void *addr) +void iounmap(volatile void __iomem *token) { unsigned long address, start, end, size; struct mm_struct *mm; pgd_t *dir; + void *addr; if (!mem_init_done) { return; } - /* addr could be in EEH or IO region, map it to IO region regardless. - */ - addr = (void *) (IO_TOKEN_TO_ADDR(addr) & PAGE_MASK); + addr = (void *) ((unsigned long __force) token & PAGE_MASK); if ((size = im_free(addr)) == 0) { return; @@ -392,27 +387,55 @@ void iounmap(void *addr) return; } -int iounmap_explicit(void *addr, unsigned long size) +static int iounmap_subset_regions(unsigned long addr, unsigned long size) { struct vm_struct *area; + + /* Check whether subsets of this region exist */ + area = im_get_area(addr, size, IM_REGION_SUPERSET); + if (area == NULL) + return 1; + + while (area) { + iounmap((void __iomem *) area->addr); + area = im_get_area(addr, size, + IM_REGION_SUPERSET); + } + + return 0; +} + +int iounmap_explicit(volatile void __iomem *start, unsigned long size) +{ + struct vm_struct *area; + unsigned long addr; + int rc; - /* addr could be in EEH or IO region, map it to IO region regardless. - */ - addr = (void *) (IO_TOKEN_TO_ADDR(addr) & PAGE_MASK); + addr = (unsigned long __force) start & PAGE_MASK; /* Verify that the region either exists or is a subset of an existing * region. In the latter case, split the parent region to create * the exact region */ - area = im_get_area((unsigned long) addr, size, + area = im_get_area(addr, size, IM_REGION_EXISTS | IM_REGION_SUBSET); if (area == NULL) { - printk(KERN_ERR "%s() cannot unmap nonexistent range 0x%lx\n", - __FUNCTION__, (unsigned long) addr); - return 1; + /* Determine whether subset regions exist. If so, unmap */ + rc = iounmap_subset_regions(addr, size); + if (rc) { + printk(KERN_ERR + "%s() cannot unmap nonexistent range 0x%lx\n", + __FUNCTION__, addr); + return 1; + } + } else { + iounmap((void __iomem *) area->addr); } - + /* + * FIXME! This can't be right: iounmap(area->addr); + * Maybe it should be "iounmap(area);" + */ return 0; } @@ -447,41 +470,76 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif +static DEFINE_SPINLOCK(mmu_context_lock); +static DEFINE_IDR(mmu_context_idr); + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + int index; + int err; + +#ifdef CONFIG_HUGETLB_PAGE + /* We leave htlb_segs as it was, but for a fork, we need to + * clear the huge_pgdir. */ + mm->context.huge_pgdir = NULL; +#endif + +again: + if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&mmu_context_lock); + err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); + spin_unlock(&mmu_context_lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > MAX_CONTEXT) { + idr_remove(&mmu_context_idr, index); + return -ENOMEM; + } + + mm->context.id = index; + + return 0; +} + +void destroy_context(struct mm_struct *mm) +{ + spin_lock(&mmu_context_lock); + idr_remove(&mmu_context_idr, mm->context.id); + spin_unlock(&mmu_context_lock); + + mm->context.id = NO_CONTEXT; + + hugetlb_mm_free_pgd(mm); +} + /* * Do very early mm setup. */ void __init mm_init_ppc64(void) { +#ifndef CONFIG_PPC_ISERIES unsigned long i; +#endif ppc64_boot_msg(0x100, "MM Init"); - /* Reserve all contexts < FIRST_USER_CONTEXT for kernel use. - * The range of contexts [FIRST_USER_CONTEXT, NUM_USER_CONTEXT) - * are stored on a stack/queue for easy allocation and deallocation. - */ - mmu_context_queue.lock = SPIN_LOCK_UNLOCKED; - mmu_context_queue.head = 0; - mmu_context_queue.tail = NUM_USER_CONTEXT-1; - mmu_context_queue.size = NUM_USER_CONTEXT; - for (i = 0; i < NUM_USER_CONTEXT; i++) - mmu_context_queue.elements[i] = i + FIRST_USER_CONTEXT; - /* This is the story of the IO hole... please, keep seated, * unfortunately, we are out of oxygen masks at the moment. * So we need some rough way to tell where your big IO hole * is. On pmac, it's between 2G and 4G, on POWER3, it's around * that area as well, on POWER4 we don't have one, etc... - * We need that to implement something approx. decent for - * page_is_ram() so that /dev/mem doesn't map cacheable IO space - * when XFree resquest some IO regions witout using O_SYNC, we - * also need that as a "hint" when sizing the TCE table on POWER3 + * We need that as a "hint" when sizing the TCE table on POWER3 * So far, the simplest way that seem work well enough for us it * to just assume that the first discontinuity in our physical * RAM layout is the IO hole. That may not be correct in the future * (and isn't on iSeries but then we don't care ;) */ - top_of_ram = lmb_end_of_DRAM(); #ifndef CONFIG_PPC_ISERIES for (i = 1; i < lmb.memory.cnt; i++) { @@ -504,22 +562,32 @@ void __init mm_init_ppc64(void) ppc64_boot_msg(0x100, "MM Init Done"); } - /* * This is called by /dev/mem to know if a given address has to * be mapped non-cacheable or not */ -int page_is_ram(unsigned long physaddr) +int page_is_ram(unsigned long pfn) { -#ifdef CONFIG_PPC_ISERIES - return 1; + int i; + unsigned long paddr = (pfn << PAGE_SHIFT); + + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long base; + +#ifdef CONFIG_MSCHUNKS + base = lmb.memory.region[i].physbase; +#else + base = lmb.memory.region[i].base; #endif - if (physaddr >= top_of_ram) - return 0; - return io_hole_start == 0 || physaddr < io_hole_start || - physaddr >= (io_hole_start + io_hole_size); -} + if ((paddr >= base) && + (paddr < (base + lmb.memory.region[i].size))) { + return 1; + } + } + return 0; +} +EXPORT_SYMBOL(page_is_ram); /* * Initialize the bootmem system and give it all the memory we @@ -545,6 +613,8 @@ void __init do_init_bootmem(void) boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); + max_pfn = max_low_pfn; + /* add all physical memory to the bootmem map. Also find the first */ for (i=0; i < lmb.memory.cnt; i++) { unsigned long physbase, size; @@ -571,6 +641,7 @@ void __init paging_init(void) unsigned long zones_size[MAX_NR_ZONES]; unsigned long zholes_size[MAX_NR_ZONES]; unsigned long total_ram = lmb_phys_mem_size(); + unsigned long top_of_ram = lmb_end_of_DRAM(); printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram); @@ -585,7 +656,7 @@ void __init paging_init(void) zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; - free_area_init_node(0, &contig_page_data, NULL, zones_size, + free_area_init_node(0, &contig_page_data, zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); mem_map = contig_page_data.node_mem_map; } @@ -620,60 +691,53 @@ module_init(setup_kcore); void __init mem_init(void) { -#ifndef CONFIG_DISCONTIGMEM - unsigned long addr; +#ifdef CONFIG_DISCONTIGMEM + int nid; #endif - int codepages = 0; - int datapages = 0; - int initpages = 0; + pg_data_t *pgdat; + unsigned long i; + struct page *page; + unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; num_physpages = max_low_pfn; /* RAM is assumed contiguous */ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - max_pfn = max_low_pfn; #ifdef CONFIG_DISCONTIGMEM -{ - int nid; - - for (nid = 0; nid < numnodes; nid++) { - if (node_data[nid].node_spanned_pages != 0) { + for_each_online_node(nid) { + if (NODE_DATA(nid)->node_spanned_pages != 0) { printk("freeing bootmem node %x\n", nid); totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); } } - - printk("Memory: %luk available (%dk kernel code, %dk data, %dk init) [%08lx,%08lx]\n", - (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10), - codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10), - initpages<< (PAGE_SHIFT-10), - PAGE_OFFSET, (unsigned long)__va(lmb_end_of_DRAM())); -} #else max_mapnr = num_physpages; - totalram_pages += free_all_bootmem(); +#endif - for (addr = KERNELBASE; addr <= (unsigned long)__va(lmb_end_of_DRAM()); - addr += PAGE_SIZE) { - if (!PageReserved(virt_to_page(addr))) - continue; - if (addr < (unsigned long)_etext) - codepages++; - - else if (addr >= (unsigned long)__init_begin - && addr < (unsigned long)__init_end) - initpages++; - else if (addr < klimit) - datapages++; + for_each_pgdat(pgdat) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { + page = pgdat->node_mem_map + i; + if (PageReserved(page)) + reservedpages++; + } } - printk("Memory: %luk available (%dk kernel code, %dk data, %dk init) [%08lx,%08lx]\n", - (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10), - codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10), - initpages<< (PAGE_SHIFT-10), - PAGE_OFFSET, (unsigned long)__va(lmb_end_of_DRAM())); -#endif + codesize = (unsigned long)&_etext - (unsigned long)&_stext; + initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; + datasize = (unsigned long)&_edata - (unsigned long)&__init_end; + bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; + + printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " + "%luk reserved, %luk data, %luk bss, %luk init)\n", + (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), + num_physpages << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + bsssize >> 10, + initsize >> 10); + mem_init_done = 1; #ifdef CONFIG_PPC_ISERIES @@ -765,6 +829,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, pte_t *ptep; int local = 0; cpumask_t tmp; + unsigned long flags; /* handle i-cache coherency */ if (!(cur_cpu_spec->cpu_features & CPU_FTR_COHERENT_ICACHE) && @@ -794,22 +859,24 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, vsid = get_vsid(vma->vm_mm->context.id, ea); + local_irq_save(flags); tmp = cpumask_of_cpu(smp_processor_id()); if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) local = 1; __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, 0x300, local); + local_irq_restore(flags); } -void * reserve_phb_iospace(unsigned long size) +void __iomem * reserve_phb_iospace(unsigned long size) { - void *virt_addr; + void __iomem *virt_addr; if (phbs_io_bot >= IMALLOC_BASE) panic("reserve_phb_iospace(): phb io space overflow\n"); - virt_addr = (void *) phbs_io_bot; + virt_addr = (void __iomem *) phbs_io_bot; phbs_io_bot += size; return virt_addr;