X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fkernel%2Fefi.c;h=bb8770a177b5f3c6eac55a11e83ac9cbc9d389e9;hb=16c70f8c1b54b61c3b951b6fb220df250fe09b32;hp=3789a515a601b41aef52e26de8bb24b3f4002202;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 3789a515a..bb8770a17 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -8,6 +8,8 @@ * Copyright (C) 1999-2003 Hewlett-Packard Co. * David Mosberger-Tang * Stephane Eranian + * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas * * All EFI Runtime Services are not implemented yet as EFI only * supports physical mode addressing on SoftSDV. This is to be fixed @@ -18,7 +20,6 @@ * Goutham Rao: * Skip non-WB memory and ignore empty memory ranges. */ -#include #include #include #include @@ -28,6 +29,7 @@ #include #include +#include #include #include #include @@ -43,18 +45,20 @@ static unsigned long mem_limit = ~0UL, max_addr = ~0UL; #define efi_call_virt(f, args...) (*(f))(args) -#define STUB_GET_TIME(prefix, adjust_arg) \ -static efi_status_t \ -prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \ -{ \ - struct ia64_fpreg fr[6]; \ - efi_status_t ret; \ - \ - ia64_save_scratch_fpregs(fr); \ - ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), \ - adjust_arg(tc)); \ - ia64_load_scratch_fpregs(fr); \ - return ret; \ +#define STUB_GET_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_time_cap_t *atc = NULL; \ + efi_status_t ret; \ + \ + if (tc) \ + atc = adjust_arg(tc); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ } #define STUB_SET_TIME(prefix, adjust_arg) \ @@ -89,11 +93,14 @@ static efi_status_t \ prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \ { \ struct ia64_fpreg fr[6]; \ + efi_time_t *atm = NULL; \ efi_status_t ret; \ \ + if (tm) \ + atm = adjust_arg(tm); \ ia64_save_scratch_fpregs(fr); \ ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \ - enabled, adjust_arg(tm)); \ + enabled, atm); \ ia64_load_scratch_fpregs(fr); \ return ret; \ } @@ -104,11 +111,14 @@ prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \ unsigned long *data_size, void *data) \ { \ struct ia64_fpreg fr[6]; \ + u32 *aattr = NULL; \ efi_status_t ret; \ \ + if (attr) \ + aattr = adjust_arg(attr); \ ia64_save_scratch_fpregs(fr); \ ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable), \ - adjust_arg(name), adjust_arg(vendor), adjust_arg(attr), \ + adjust_arg(name), adjust_arg(vendor), aattr, \ adjust_arg(data_size), adjust_arg(data)); \ ia64_load_scratch_fpregs(fr); \ return ret; \ @@ -164,33 +174,41 @@ prefix##_reset_system (int reset_type, efi_status_t status, \ unsigned long data_size, efi_char16_t *data) \ { \ struct ia64_fpreg fr[6]; \ + efi_char16_t *adata = NULL; \ + \ + if (data) \ + adata = adjust_arg(data); \ \ ia64_save_scratch_fpregs(fr); \ efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system), \ - reset_type, status, data_size, adjust_arg(data)); \ + reset_type, status, data_size, adata); \ /* should not return, but just in case... */ \ ia64_load_scratch_fpregs(fr); \ } -STUB_GET_TIME(phys, __pa) -STUB_SET_TIME(phys, __pa) -STUB_GET_WAKEUP_TIME(phys, __pa) -STUB_SET_WAKEUP_TIME(phys, __pa) -STUB_GET_VARIABLE(phys, __pa) -STUB_GET_NEXT_VARIABLE(phys, __pa) -STUB_SET_VARIABLE(phys, __pa) -STUB_GET_NEXT_HIGH_MONO_COUNT(phys, __pa) -STUB_RESET_SYSTEM(phys, __pa) - -STUB_GET_TIME(virt, ) -STUB_SET_TIME(virt, ) -STUB_GET_WAKEUP_TIME(virt, ) -STUB_SET_WAKEUP_TIME(virt, ) -STUB_GET_VARIABLE(virt, ) -STUB_GET_NEXT_VARIABLE(virt, ) -STUB_SET_VARIABLE(virt, ) -STUB_GET_NEXT_HIGH_MONO_COUNT(virt, ) -STUB_RESET_SYSTEM(virt, ) +#define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg)) + +STUB_GET_TIME(phys, phys_ptr) +STUB_SET_TIME(phys, phys_ptr) +STUB_GET_WAKEUP_TIME(phys, phys_ptr) +STUB_SET_WAKEUP_TIME(phys, phys_ptr) +STUB_GET_VARIABLE(phys, phys_ptr) +STUB_GET_NEXT_VARIABLE(phys, phys_ptr) +STUB_SET_VARIABLE(phys, phys_ptr) +STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr) +STUB_RESET_SYSTEM(phys, phys_ptr) + +#define id(arg) arg + +STUB_GET_TIME(virt, id) +STUB_SET_TIME(virt, id) +STUB_GET_WAKEUP_TIME(virt, id) +STUB_SET_WAKEUP_TIME(virt, id) +STUB_GET_VARIABLE(virt, id) +STUB_GET_NEXT_VARIABLE(virt, id) +STUB_SET_VARIABLE(virt, id) +STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id) +STUB_RESET_SYSTEM(virt, id) void efi_gettimeofday (struct timespec *ts) @@ -198,7 +216,7 @@ efi_gettimeofday (struct timespec *ts) efi_time_t tm; memset(ts, 0, sizeof(ts)); - if ((*efi.get_time)(&tm, 0) != EFI_SUCCESS) + if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) return; ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second); @@ -222,57 +240,56 @@ is_available_memory (efi_memory_desc_t *md) return 0; } -/* - * Trim descriptor MD so its starts at address START_ADDR. If the descriptor covers - * memory that is normally available to the kernel, issue a warning that some memory - * is being ignored. - */ -static void -trim_bottom (efi_memory_desc_t *md, u64 start_addr) -{ - u64 num_skipped_pages; +typedef struct kern_memdesc { + u64 attribute; + u64 start; + u64 num_pages; +} kern_memdesc_t; - if (md->phys_addr >= start_addr || !md->num_pages) - return; +static kern_memdesc_t *kern_memmap; - num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT; - if (num_skipped_pages > md->num_pages) - num_skipped_pages = md->num_pages; +#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT) - if (is_available_memory(md)) - printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " - "at 0x%lx\n", __FUNCTION__, - (num_skipped_pages << EFI_PAGE_SHIFT) >> 10, - md->phys_addr, start_addr - IA64_GRANULE_SIZE); - /* - * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory - * descriptor list to become unsorted. In such a case, md->num_pages will be - * zero, so the Right Thing will happen. - */ - md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT; - md->num_pages -= num_skipped_pages; +static inline u64 +kmd_end(kern_memdesc_t *kmd) +{ + return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT)); } -static void -trim_top (efi_memory_desc_t *md, u64 end_addr) +static inline u64 +efi_md_end(efi_memory_desc_t *md) { - u64 num_dropped_pages, md_end_addr; + return (md->phys_addr + efi_md_size(md)); +} - md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); +static inline int +efi_wb(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_WB); +} - if (md_end_addr <= end_addr || !md->num_pages) - return; +static inline int +efi_uc(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_UC); +} - num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT; - if (num_dropped_pages > md->num_pages) - num_dropped_pages = md->num_pages; +static void +walk (efi_freemem_callback_t callback, void *arg, u64 attr) +{ + kern_memdesc_t *k; + u64 start, end, voff; - if (is_available_memory(md)) - printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " - "at 0x%lx\n", __FUNCTION__, - (num_dropped_pages << EFI_PAGE_SHIFT) >> 10, - md->phys_addr, end_addr); - md->num_pages -= num_dropped_pages; + voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET; + for (k = kern_memmap; k->start != ~0UL; k++) { + if (k->attribute != attr) + continue; + start = PAGE_ALIGN(k->start); + end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK; + if (start < end) + if ((*callback)(start + voff, end + voff, arg) < 0) + return; + } } /* @@ -282,100 +299,17 @@ trim_top (efi_memory_desc_t *md, u64 end_addr) void efi_memmap_walk (efi_freemem_callback_t callback, void *arg) { - int prev_valid = 0; - struct range { - u64 start; - u64 end; - } prev, curr; - void *efi_map_start, *efi_map_end, *p, *q; - efi_memory_desc_t *md, *check_md; - u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0; - unsigned long total_mem = 0; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - - /* skip over non-WB memory descriptors; that's all we're interested in... */ - if (!(md->attribute & EFI_MEMORY_WB)) - continue; - - /* - * granule_addr is the base of md's first granule. - * [granule_addr - first_non_wb_addr) is guaranteed to - * be contiguous WB memory. - */ - granule_addr = md->phys_addr & ~(IA64_GRANULE_SIZE - 1); - first_non_wb_addr = max(first_non_wb_addr, granule_addr); - - if (first_non_wb_addr < md->phys_addr) { - trim_bottom(md, granule_addr + IA64_GRANULE_SIZE); - granule_addr = md->phys_addr & ~(IA64_GRANULE_SIZE - 1); - first_non_wb_addr = max(first_non_wb_addr, granule_addr); - } - - for (q = p; q < efi_map_end; q += efi_desc_size) { - check_md = q; - - if ((check_md->attribute & EFI_MEMORY_WB) && - (check_md->phys_addr == first_non_wb_addr)) - first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT; - else - break; /* non-WB or hole */ - } - - last_granule_addr = first_non_wb_addr & ~(IA64_GRANULE_SIZE - 1); - if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) - trim_top(md, last_granule_addr); - - if (is_available_memory(md)) { - if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) > max_addr) { - if (md->phys_addr > max_addr) - continue; - md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT; - } - - if (total_mem >= mem_limit) - continue; - total_mem += (md->num_pages << EFI_PAGE_SHIFT); - if (total_mem > mem_limit) - md->num_pages -= ((total_mem - mem_limit) >> EFI_PAGE_SHIFT); - - if (md->num_pages == 0) - continue; - - curr.start = PAGE_OFFSET + md->phys_addr; - curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); - - if (!prev_valid) { - prev = curr; - prev_valid = 1; - } else { - if (curr.start < prev.start) - printk(KERN_ERR "Oops: EFI memory table not ordered!\n"); + walk(callback, arg, EFI_MEMORY_WB); +} - if (prev.end == curr.start) { - /* merge two consecutive memory ranges */ - prev.end = curr.end; - } else { - start = PAGE_ALIGN(prev.start); - end = prev.end & PAGE_MASK; - if ((end > start) && (*callback)(start, end, arg) < 0) - return; - prev = curr; - } - } - } - } - if (prev_valid) { - start = PAGE_ALIGN(prev.start); - end = prev.end & PAGE_MASK; - if (end > start) - (*callback)(start, end, arg); - } +/* + * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that + * has memory that is available for uncached allocator. + */ +void +efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg) +{ + walk(callback, arg, EFI_MEMORY_UC); } /* @@ -383,16 +317,15 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg) * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor * Abstraction Layer chapter 11 in ADAG */ -void -efi_map_pal_code (void) + +void * +efi_get_pal_addr (void) { void *efi_map_start, *efi_map_end, *p; efi_memory_desc_t *md; u64 efi_desc_size; int pal_code_count = 0; - u64 mask, psr; - u64 vaddr; - int cpu; + u64 vaddr, mask; efi_map_start = __va(ia64_boot_param->efi_memmap); efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; @@ -436,30 +369,39 @@ efi_map_pal_code (void) if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE) panic("Woah! PAL code size bigger than a granule!"); - mask = ~((1 << IA64_GRANULE_SHIFT) - 1); #if EFI_DEBUG + mask = ~((1 << IA64_GRANULE_SHIFT) - 1); + printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n", - smp_processor_id(), md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), - vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE); + smp_processor_id(), md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE); #endif - - /* - * Cannot write to CRx with PSR.ic=1 - */ - psr = ia64_clear_ic(); - ia64_itr(0x1, IA64_TR_PALCODE, vaddr & mask, - pte_val(pfn_pte(md->phys_addr >> PAGE_SHIFT, PAGE_KERNEL)), - IA64_GRANULE_SHIFT); - ia64_set_psr(psr); /* restore psr */ - ia64_srlz_i(); - - cpu = smp_processor_id(); - - /* insert this TR into our list for MCA recovery purposes */ - ia64_mca_tlb_list[cpu].pal_base = vaddr & mask; - ia64_mca_tlb_list[cpu].pal_paddr = pte_val(mk_pte_phys(md->phys_addr, PAGE_KERNEL)); + return __va(md->phys_addr); } + printk(KERN_WARNING "%s: no PAL-code memory-descriptor found", + __FUNCTION__); + return NULL; +} + +void +efi_map_pal_code (void) +{ + void *pal_vaddr = efi_get_pal_addr (); + u64 psr; + + if (!pal_vaddr) + return; + + /* + * Cannot write to CRx with PSR.ic=1 + */ + psr = ia64_clear_ic(); + ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr), + pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), + IA64_GRANULE_SHIFT); + ia64_set_psr(psr); /* restore psr */ + ia64_srlz_i(); } void __init @@ -469,24 +411,16 @@ efi_init (void) efi_config_table_t *config_tables; efi_char16_t *c16; u64 efi_desc_size; - char *cp, *end, vendor[100] = "unknown"; + char *cp, vendor[100] = "unknown"; extern char saved_command_line[]; int i; /* it's too early to be able to use the standard kernel command line support... */ for (cp = saved_command_line; *cp; ) { if (memcmp(cp, "mem=", 4) == 0) { - cp += 4; - mem_limit = memparse(cp, &end) - 2; - if (end != cp) - break; - cp = end; + mem_limit = memparse(cp + 4, &cp); } else if (memcmp(cp, "max_addr=", 9) == 0) { - cp += 9; - max_addr = memparse(cp, &end) - 1; - if (end != cp) - break; - cp = end; + max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); } else { while (*cp != ' ' && *cp) ++cp; @@ -517,7 +451,7 @@ efi_init (void) /* Show what we know for posterity */ c16 = __va(efi.systab->fw_vendor); if (c16) { - for (i = 0;i < (int) sizeof(vendor) && *c16; ++i) + for (i = 0;i < (int) sizeof(vendor) - 1 && *c16; ++i) vendor[i] = *c16++; vendor[i] = '\0'; } @@ -525,24 +459,33 @@ efi_init (void) printk(KERN_INFO "EFI v%u.%.02u by %s:", efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); + efi.mps = EFI_INVALID_TABLE_ADDR; + efi.acpi = EFI_INVALID_TABLE_ADDR; + efi.acpi20 = EFI_INVALID_TABLE_ADDR; + efi.smbios = EFI_INVALID_TABLE_ADDR; + efi.sal_systab = EFI_INVALID_TABLE_ADDR; + efi.boot_info = EFI_INVALID_TABLE_ADDR; + efi.hcdp = EFI_INVALID_TABLE_ADDR; + efi.uga = EFI_INVALID_TABLE_ADDR; + for (i = 0; i < (int) efi.systab->nr_tables; i++) { if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { - efi.mps = __va(config_tables[i].table); + efi.mps = config_tables[i].table; printk(" MPS=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { - efi.acpi20 = __va(config_tables[i].table); + efi.acpi20 = config_tables[i].table; printk(" ACPI 2.0=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { - efi.acpi = __va(config_tables[i].table); + efi.acpi = config_tables[i].table; printk(" ACPI=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { - efi.smbios = __va(config_tables[i].table); + efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) { - efi.sal_systab = __va(config_tables[i].table); + efi.sal_systab = config_tables[i].table; printk(" SALsystab=0x%lx", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { - efi.hcdp = __va(config_tables[i].table); + efi.hcdp = config_tables[i].table; printk(" HCDP=0x%lx", config_tables[i].table); } } @@ -680,8 +623,20 @@ efi_get_iobase (void) return 0; } -u32 -efi_mem_type (unsigned long phys_addr) +static struct kern_memdesc * +kern_memory_descriptor (unsigned long phys_addr) +{ + struct kern_memdesc *md; + + for (md = kern_memmap; md->start != ~0UL; md++) { + if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT)) + return md; + } + return NULL; +} + +static efi_memory_desc_t * +efi_memory_descriptor (unsigned long phys_addr) { void *efi_map_start, *efi_map_end, *p; efi_memory_desc_t *md; @@ -695,55 +650,151 @@ efi_mem_type (unsigned long phys_addr) md = p; if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) - return md->type; + return md; } + return NULL; +} + +u32 +efi_mem_type (unsigned long phys_addr) +{ + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + + if (md) + return md->type; return 0; } u64 efi_mem_attributes (unsigned long phys_addr) { - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; + if (md) + return md->attribute; + return 0; +} +EXPORT_SYMBOL(efi_mem_attributes); - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; +u64 +efi_mem_attribute (unsigned long phys_addr, unsigned long size) +{ + unsigned long end = phys_addr + size; + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + u64 attr; - if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) - return md->attribute; + if (!md) + return 0; + + /* + * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells + * the kernel that firmware needs this region mapped. + */ + attr = md->attribute & ~EFI_MEMORY_RUNTIME; + do { + unsigned long md_end = efi_md_end(md); + + if (end <= md_end) + return attr; + + md = efi_memory_descriptor(md_end); + if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr) + return 0; + } while (md); + return 0; +} + +u64 +kern_mem_attribute (unsigned long phys_addr, unsigned long size) +{ + unsigned long end = phys_addr + size; + struct kern_memdesc *md; + u64 attr; + + /* + * This is a hack for ioremap calls before we set up kern_memmap. + * Maybe we should do efi_memmap_init() earlier instead. + */ + if (!kern_memmap) { + attr = efi_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return EFI_MEMORY_WB; + return 0; } + + md = kern_memory_descriptor(phys_addr); + if (!md) + return 0; + + attr = md->attribute; + do { + unsigned long md_end = kmd_end(md); + + if (end <= md_end) + return attr; + + md = kern_memory_descriptor(md_end); + if (!md || md->attribute != attr) + return 0; + } while (md); return 0; } +EXPORT_SYMBOL(kern_mem_attribute); int -valid_phys_addr_range (unsigned long phys_addr, unsigned long *size) +valid_phys_addr_range (unsigned long phys_addr, unsigned long size) { - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size; + u64 attr; - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; + /* + * /dev/mem reads and writes use copy_to_user(), which implicitly + * uses a granule-sized kernel identity mapping. It's really + * only safe to do this for regions in kern_memmap. For more + * details, see Documentation/ia64/aliasing.txt. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) + return 1; + return 0; +} - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; +int +valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size) +{ + /* + * MMIO regions are often missing from the EFI memory map. + * We must allow mmap of them for programs like X, so we + * currently can't do any useful validation. + */ + return 1; +} - if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) { - if (!(md->attribute & EFI_MEMORY_WB)) - return 0; +pgprot_t +phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, + pgprot_t vma_prot) +{ + unsigned long phys_addr = pfn << PAGE_SHIFT; + u64 attr; - if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr) - *size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr; - return 1; - } - } - return 0; + /* + * For /dev/mem mmap, we use user mappings, but if the region is + * in kern_memmap (and hence may be covered by a kernel mapping), + * we must use the same attribute as the kernel mapping. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + else if (attr & EFI_MEMORY_UC) + return pgprot_noncached(vma_prot); + + /* + * Some chipsets don't support UC access to memory. If + * WB is supported, we prefer that. + */ + if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + + return pgprot_noncached(vma_prot); } int __init @@ -790,3 +841,281 @@ efi_uart_console_only(void) printk(KERN_ERR "Malformed %s value\n", name); return 0; } + +/* + * Look for the first granule aligned memory descriptor memory + * that is big enough to hold EFI memory map. Make sure this + * descriptor is atleast granule sized so it does not get trimmed + */ +struct kern_memdesc * +find_memmap_space (void) +{ + u64 contig_low=0, contig_high=0; + u64 as = 0, ae; + void *efi_map_start, *efi_map_end, *p, *q; + efi_memory_desc_t *md, *pmd = NULL, *check_md; + u64 space_needed, efi_desc_size; + unsigned long total_mem = 0; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + /* + * Worst case: we need 3 kernel descriptors for each efi descriptor + * (if every entry has a WB part in the middle, and UC head and tail), + * plus one for the end marker. + */ + space_needed = sizeof(kern_memdesc_t) * + (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1); + + for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { + md = p; + if (!efi_wb(md)) { + continue; + } + if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) { + contig_low = GRANULEROUNDUP(md->phys_addr); + contig_high = efi_md_end(md); + for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) { + check_md = q; + if (!efi_wb(check_md)) + break; + if (contig_high != check_md->phys_addr) + break; + contig_high = efi_md_end(check_md); + } + contig_high = GRANULEROUNDDOWN(contig_high); + } + if (!is_available_memory(md) || md->type == EFI_LOADER_DATA) + continue; + + /* Round ends inward to granule boundaries */ + as = max(contig_low, md->phys_addr); + ae = min(contig_high, efi_md_end(md)); + + /* keep within max_addr= command line arg */ + ae = min(ae, max_addr); + if (ae <= as) + continue; + + /* avoid going over mem= command line arg */ + if (total_mem + (ae - as) > mem_limit) + ae -= total_mem + (ae - as) - mem_limit; + + if (ae <= as) + continue; + + if (ae - as > space_needed) + break; + } + if (p >= efi_map_end) + panic("Can't allocate space for kernel memory descriptors"); + + return __va(as); +} + +/* + * Walk the EFI memory map and gather all memory available for kernel + * to use. We can allocate partial granules only if the unavailable + * parts exist, and are WB. + */ +void +efi_memmap_init(unsigned long *s, unsigned long *e) +{ + struct kern_memdesc *k, *prev = NULL; + u64 contig_low=0, contig_high=0; + u64 as, ae, lim; + void *efi_map_start, *efi_map_end, *p, *q; + efi_memory_desc_t *md, *pmd = NULL, *check_md; + u64 efi_desc_size; + unsigned long total_mem = 0; + + k = kern_memmap = find_memmap_space(); + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { + md = p; + if (!efi_wb(md)) { + if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY || + md->type == EFI_BOOT_SERVICES_DATA)) { + k->attribute = EFI_MEMORY_UC; + k->start = md->phys_addr; + k->num_pages = md->num_pages; + k++; + } + continue; + } + if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) { + contig_low = GRANULEROUNDUP(md->phys_addr); + contig_high = efi_md_end(md); + for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) { + check_md = q; + if (!efi_wb(check_md)) + break; + if (contig_high != check_md->phys_addr) + break; + contig_high = efi_md_end(check_md); + } + contig_high = GRANULEROUNDDOWN(contig_high); + } + if (!is_available_memory(md)) + continue; + + /* + * Round ends inward to granule boundaries + * Give trimmings to uncached allocator + */ + if (md->phys_addr < contig_low) { + lim = min(efi_md_end(md), contig_low); + if (efi_uc(md)) { + if (k > kern_memmap && (k-1)->attribute == EFI_MEMORY_UC && + kmd_end(k-1) == md->phys_addr) { + (k-1)->num_pages += (lim - md->phys_addr) >> EFI_PAGE_SHIFT; + } else { + k->attribute = EFI_MEMORY_UC; + k->start = md->phys_addr; + k->num_pages = (lim - md->phys_addr) >> EFI_PAGE_SHIFT; + k++; + } + } + as = contig_low; + } else + as = md->phys_addr; + + if (efi_md_end(md) > contig_high) { + lim = max(md->phys_addr, contig_high); + if (efi_uc(md)) { + if (lim == md->phys_addr && k > kern_memmap && + (k-1)->attribute == EFI_MEMORY_UC && + kmd_end(k-1) == md->phys_addr) { + (k-1)->num_pages += md->num_pages; + } else { + k->attribute = EFI_MEMORY_UC; + k->start = lim; + k->num_pages = (efi_md_end(md) - lim) >> EFI_PAGE_SHIFT; + k++; + } + } + ae = contig_high; + } else + ae = efi_md_end(md); + + /* keep within max_addr= command line arg */ + ae = min(ae, max_addr); + if (ae <= as) + continue; + + /* avoid going over mem= command line arg */ + if (total_mem + (ae - as) > mem_limit) + ae -= total_mem + (ae - as) - mem_limit; + + if (ae <= as) + continue; + if (prev && kmd_end(prev) == md->phys_addr) { + prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT; + total_mem += ae - as; + continue; + } + k->attribute = EFI_MEMORY_WB; + k->start = as; + k->num_pages = (ae - as) >> EFI_PAGE_SHIFT; + total_mem += ae - as; + prev = k++; + } + k->start = ~0L; /* end-marker */ + + /* reserve the memory we are using for kern_memmap */ + *s = (u64)kern_memmap; + *e = (u64)++k; +} + +void +efi_initialize_iomem_resources(struct resource *code_resource, + struct resource *data_resource) +{ + struct resource *res; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + char *name; + unsigned long flags; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + res = NULL; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (md->num_pages == 0) /* should not happen */ + continue; + + flags = IORESOURCE_MEM; + switch (md->type) { + + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + continue; + + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_CONVENTIONAL_MEMORY: + if (md->attribute & EFI_MEMORY_WP) { + name = "System ROM"; + flags |= IORESOURCE_READONLY; + } else { + name = "System RAM"; + } + break; + + case EFI_ACPI_MEMORY_NVS: + name = "ACPI Non-volatile Storage"; + flags |= IORESOURCE_BUSY; + break; + + case EFI_UNUSABLE_MEMORY: + name = "reserved"; + flags |= IORESOURCE_BUSY | IORESOURCE_DISABLED; + break; + + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_ACPI_RECLAIM_MEMORY: + default: + name = "reserved"; + flags |= IORESOURCE_BUSY; + break; + } + + if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) { + printk(KERN_ERR "failed to alocate resource for iomem\n"); + return; + } + + res->name = name; + res->start = md->phys_addr; + res->end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; + res->flags = flags; + + if (insert_resource(&iomem_resource, res) < 0) + kfree(res); + else { + /* + * We don't know which region contains + * kernel data so we try it repeatedly and + * let the resource manager test it. + */ + insert_resource(res, code_resource); + insert_resource(res, data_resource); + } + } +}