X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Fsetup.c;h=af425a8049fb5620596a0bc61bf005d399df3e32;hb=refs%2Fheads%2Fvserver;hp=96e4ca9b79ac8e8f753f6a8f9cd4490d3dcbedde;hpb=6a77f38946aaee1cd85eeec6cf4229b204c15071;p=linux-2.6.git diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 96e4ca9b7..af425a804 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -5,8 +5,6 @@ * * Nov 2001 Dave Jones * Forked from i386 setup code. - * - * $Id$ */ /* @@ -23,10 +21,9 @@ #include #include #include -#include +#include #include #include -#include #include #include #include @@ -35,11 +32,19 @@ #include #include #include +#include #include #include #include #include #include +#include +#include +#include +#include +#include +#include + #include #include #include @@ -57,56 +62,43 @@ #include #include #include +#include +#include /* * Machine setup.. */ -struct cpuinfo_x86 boot_cpu_data; +struct cpuinfo_x86 boot_cpu_data __read_mostly; +EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; -EXPORT_SYMBOL_GPL(mmu_cr4_features); - -int acpi_disabled; -EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI_BOOT -extern int __initdata acpi_ht; -extern acpi_interrupt_flags acpi_sci_flags; -int __initdata acpi_force = 0; -#endif - -int acpi_numa __initdata; - -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; /* Boot loader ID as an integer, for the benefit of proc_dointvec */ int bootloader_type; unsigned long saved_video_mode; -#ifdef CONFIG_SWIOTLB -int swiotlb; -EXPORT_SYMBOL(swiotlb); -#endif +/* + * Early DMI memory + */ +int dmi_alloc_index; +char dmi_alloc_data[DMI_MAX_DATA]; /* * Setup options */ -struct drive_info_struct { char dummy[32]; } drive_info; struct screen_info screen_info; +EXPORT_SYMBOL(screen_info); struct sys_desc_table_struct { unsigned short length; unsigned char table[0]; }; struct edid_info edid_info; -struct e820map e820; - -unsigned char aux_device_present; +EXPORT_SYMBOL_GPL(edid_info); extern int root_mountflags; -extern char _text, _etext, _edata, _end; char command_line[COMMAND_LINE_SIZE]; @@ -131,9 +123,6 @@ struct resource standard_io_resources[] = { .flags = IORESOURCE_BUSY | IORESOURCE_IO } }; -#define STANDARD_IO_RESOURCES \ - (sizeof standard_io_resources / sizeof standard_io_resources[0]) - #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) struct resource data_resource = { @@ -180,9 +169,6 @@ static struct resource adapter_rom_resources[] = { .flags = IORESOURCE_ROM } }; -#define ADAPTER_ROM_RESOURCES \ - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) - static struct resource video_rom_resource = { .name = "Video ROM", .start = 0xc0000, @@ -253,7 +239,8 @@ static void __init probe_roms(void) } /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; + start += 2048) { rom = isa_bus_to_virt(start); if (!romsignature(rom)) continue; @@ -273,191 +260,38 @@ static void __init probe_roms(void) } } -static __init void parse_cmdline_early (char ** cmdline_p) +#ifdef CONFIG_PROC_VMCORE +/* elfcorehdr= specifies the location of elf core header + * stored by the crashed kernel. This option will be passed + * by kexec loader to the capture kernel. + */ +static int __init setup_elfcorehdr(char *arg) { - char c = ' ', *to = command_line, *from = COMMAND_LINE; - int len = 0; - - /* Save unparsed command line copy for /proc/cmdline */ - memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE); - saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; - - for (;;) { - if (c != ' ') - goto next_char; - -#ifdef CONFIG_SMP - /* - * If the BIOS enumerates physical processors before logical, - * maxcpus=N at enumeration-time can be used to disable HT. - */ - else if (!memcmp(from, "maxcpus=", 8)) { - extern unsigned int maxcpus; - - maxcpus = simple_strtoul(from + 8, NULL, 0); - } -#endif -#ifdef CONFIG_ACPI_BOOT - /* "acpi=off" disables both ACPI table parsing and interpreter init */ - if (!memcmp(from, "acpi=off", 8)) - disable_acpi(); - - if (!memcmp(from, "acpi=force", 10)) { - /* add later when we do DMI horrors: */ - acpi_force = 1; - acpi_disabled = 0; - } - - /* acpi=ht just means: do ACPI MADT parsing - at bootup, but don't enable the full ACPI interpreter */ - if (!memcmp(from, "acpi=ht", 7)) { - if (!acpi_force) - disable_acpi(); - acpi_ht = 1; - } - else if (!memcmp(from, "pci=noacpi", 10)) - acpi_disable_pci(); - else if (!memcmp(from, "acpi=noirq", 10)) - acpi_noirq_set(); - - else if (!memcmp(from, "acpi_sci=edge", 13)) - acpi_sci_flags.trigger = 1; - else if (!memcmp(from, "acpi_sci=level", 14)) - acpi_sci_flags.trigger = 3; - else if (!memcmp(from, "acpi_sci=high", 13)) - acpi_sci_flags.polarity = 1; - else if (!memcmp(from, "acpi_sci=low", 12)) - acpi_sci_flags.polarity = 3; - - /* acpi=strict disables out-of-spec workarounds */ - else if (!memcmp(from, "acpi=strict", 11)) { - acpi_strict = 1; - } -#endif - - if (!memcmp(from, "nolapic", 7) || - !memcmp(from, "disableapic", 11)) - disable_apic = 1; - - if (!memcmp(from, "noapic", 6)) - skip_ioapic_setup = 1; - - if (!memcmp(from, "apic", 4)) { - skip_ioapic_setup = 0; - ioapic_force = 1; - } - - if (!memcmp(from, "mem=", 4)) - parse_memopt(from+4, &from); - -#ifdef CONFIG_DISCONTIGMEM - if (!memcmp(from, "numa=", 5)) - numa_setup(from+5); -#endif - -#ifdef CONFIG_GART_IOMMU - if (!memcmp(from,"iommu=",6)) { - iommu_setup(from+6); - } -#endif - - if (!memcmp(from,"oops=panic", 10)) - panic_on_oops = 1; - - if (!memcmp(from, "noexec=", 7)) - nonx_setup(from + 7); - - next_char: - c = *(from++); - if (!c) - break; - if (COMMAND_LINE_SIZE <= ++len) - break; - *(to++) = c; - } - *to = '\0'; - *cmdline_p = command_line; + char *end; + if (!arg) + return -EINVAL; + elfcorehdr_addr = memparse(arg, &end); + return end > arg ? 0 : -EINVAL; } - -#ifndef CONFIG_DISCONTIGMEM -static void __init contig_initmem_init(void) -{ - unsigned long bootmap_size, bootmap; - bootmap_size = bootmem_bootmap_pages(end_pfn)<> PAGE_SHIFT, end_pfn); - e820_bootmem_free(&contig_page_data, 0, end_pfn << PAGE_SHIFT); - reserve_bootmem(bootmap, bootmap_size); -} +early_param("elfcorehdr", setup_elfcorehdr); #endif -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); - -extern unsigned char k8nops[]; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; - -/* Replace instructions with better alternatives for this CPU type. - - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - In this case boot with "noreplacement". */ -void apply_alternatives(void *start, void *end) -{ - struct alt_instr *a; - int diff, i, k; - for (a = start; (void *)a < end; a++) { - if (!boot_cpu_has(a->cpuid)) - continue; - - BUG_ON(a->replacementlen > a->instrlen); - __inline_memcpy(a->instr, a->replacement, a->replacementlen); - diff = a->instrlen - a->replacementlen; - - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - __inline_memcpy(a->instr + i, k8_nops[k], k); - } - } -} - -static int no_replacement __initdata = 0; - -void __init alternative_instructions(void) +#ifndef CONFIG_NUMA +static void __init +contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) { - extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - if (no_replacement) - return; - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - -static int __init noreplacement_setup(char *s) -{ - no_replacement = 1; - return 0; + unsigned long bootmap_size, bootmap; + + bootmap_size = bootmem_bootmap_pages(end_pfn)<> PAGE_SHIFT, end_pfn); + e820_register_active_regions(0, start_pfn, end_pfn); + free_bootmem_with_active_regions(0, end_pfn); + reserve_bootmem(bootmap, bootmap_size); } - -__setup("noreplacement", noreplacement_setup); +#endif #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; @@ -483,29 +317,37 @@ static inline void copy_edd(void) #endif #define EBDA_ADDR_POINTER 0x40E -static void __init reserve_ebda_region(void) + +unsigned __initdata ebda_addr; +unsigned __initdata ebda_size; + +static void discover_ebda(void) { - unsigned int addr; - /** + /* * there is a real-mode segmented pointer pointing to the * 4K EBDA area at 0x40E */ - addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER); - addr <<= 4; - if (addr) - reserve_bootmem_generic(addr, PAGE_SIZE); + ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; + ebda_addr <<= 4; + + ebda_size = *(unsigned short *)(unsigned long)ebda_addr; + + /* Round EBDA up to pages */ + if (ebda_size == 0) + ebda_size = 1; + ebda_size <<= 10; + ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); + if (ebda_size > 64*1024) + ebda_size = 64*1024; } void __init setup_arch(char **cmdline_p) { - unsigned long low_mem_size; - unsigned long kernel_end; + printk(KERN_INFO "Command line: %s\n", saved_command_line); ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - drive_info = DRIVE_INFO; screen_info = SCREEN_INFO; edid_info = EDID_INFO; - aux_device_present = AUX_DEVICE_INFO; saved_video_mode = SAVED_VIDEO_MODE; bootloader_type = LOADER_TYPE; @@ -529,21 +371,34 @@ void __init setup_arch(char **cmdline_p) data_resource.start = virt_to_phys(&_etext); data_resource.end = virt_to_phys(&_edata)-1; - parse_cmdline_early(cmdline_p); - early_identify_cpu(&boot_cpu_data); + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + parse_early_param(); + + finish_e820_parsing(); + + e820_register_active_regions(0, 0, -1UL); /* * partially used pages are not usable - thus * we are rounding upwards: */ end_pfn = e820_end_of_ram(); + num_physpages = end_pfn; check_efer(); + discover_ebda(); + init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); -#ifdef CONFIG_ACPI_BOOT + dmi_scan_machine(); + + zap_low_mappings(0); + +#ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). * Call this early for SRAT node setup. @@ -551,6 +406,14 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); #endif + /* How many end-of-memory variables you have, grandma! */ + max_low_pfn = end_pfn; + max_pfn = end_pfn; + high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1; + + /* Remove active ranges so rediscovery with NUMA-awareness happens */ + remove_all_active_ranges(); + #ifdef CONFIG_ACPI_NUMA /* * Parse SRAT to discover nodes. @@ -558,10 +421,10 @@ void __init setup_arch(char **cmdline_p) acpi_numa_init(); #endif -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA numa_initmem_init(0, end_pfn); #else - contig_initmem_init(); + contig_initmem_init(0, end_pfn); #endif /* Reserve direct mapping */ @@ -569,8 +432,8 @@ void __init setup_arch(char **cmdline_p) (table_end - table_start) << PAGE_SHIFT); /* reserve kernel */ - kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE); - reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY); + reserve_bootmem_generic(__pa_symbol(&_text), + __pa_symbol(&_end) - __pa_symbol(&_text)); /* * reserve physical page 0 - it's a special BIOS page on many boxes, @@ -579,7 +442,8 @@ void __init setup_arch(char **cmdline_p) reserve_bootmem_generic(0, PAGE_SIZE); /* reserve ebda region */ - reserve_ebda_region(); + if (ebda_addr) + reserve_bootmem_generic(ebda_addr, ebda_size); #ifdef CONFIG_SMP /* @@ -599,18 +463,15 @@ void __init setup_arch(char **cmdline_p) */ acpi_reserve_bootmem(); #endif -#ifdef CONFIG_X86_LOCAL_APIC /* * Find and reserve possible boot-time SMP configuration: */ find_smp_config(); -#endif #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { reserve_bootmem_generic(INITRD_START, INITRD_SIZE); - initrd_start = - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; + initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start+INITRD_SIZE; } else { @@ -622,25 +483,40 @@ void __init setup_arch(char **cmdline_p) } } #endif +#ifdef CONFIG_KEXEC + if (crashk_res.start != crashk_res.end) { + reserve_bootmem_generic(crashk_res.start, + crashk_res.end - crashk_res.start + 1); + } +#endif + paging_init(); - check_ioapic(); +#ifdef CONFIG_PCI + early_quirks(); +#endif -#ifdef CONFIG_ACPI_BOOT + /* + * set this early, so we dont allocate cpu0 + * if MADT list doesnt list BSP first + * mpparse.c/MP_processor_info() allocates logical cpu numbers. + */ + cpu_set(0, cpu_present_map); +#ifdef CONFIG_ACPI /* * Read APIC and some other early information from ACPI tables. */ acpi_boot_init(); #endif -#ifdef CONFIG_X86_LOCAL_APIC + init_cpu_to_node(); + /* * get boot-time SMP configuration: */ if (smp_found_config) get_smp_config(); init_apic_mappings(); -#endif /* * Request address space for all standard RAM and ROM resources @@ -648,27 +524,18 @@ void __init setup_arch(char **cmdline_p) */ probe_roms(); e820_reserve_resources(); + e820_mark_nosave_regions(); request_resource(&iomem_resource, &video_ram_resource); { unsigned i; /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < STANDARD_IO_RESOURCES; i++) + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) request_resource(&ioport_resource, &standard_io_resources[i]); } - /* Will likely break when you have unassigned resources with more - than 4GB memory and bridges that don't support more than 4GB. - Doing it properly would require to use pci_alloc_consistent - in this case. */ - low_mem_size = ((end_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff; - if (low_mem_size > pci_mem_start) - pci_mem_start = low_mem_size; - -#ifdef CONFIG_GART_IOMMU - iommu_hole_init(); -#endif + e820_setup_gap(); #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) @@ -679,11 +546,11 @@ void __init setup_arch(char **cmdline_p) #endif } -static int __init get_model_name(struct cpuinfo_x86 *c) +static int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; - if (c->x86_cpuid_level < 0x80000004) + if (c->extended_cpuid_level < 0x80000004) return 0; v = (unsigned int *) c->x86_model_id; @@ -695,11 +562,11 @@ static int __init get_model_name(struct cpuinfo_x86 *c) } -static void __init display_cacheinfo(struct cpuinfo_x86 *c) +static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, eax, ebx, ecx, edx; - n = c->x86_cpuid_level; + n = c->extended_cpuid_level; if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); @@ -729,26 +596,120 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c) } } +#ifdef CONFIG_NUMA +static int nearby_node(int apicid) +{ + int i; + for (i = apicid - 1; i >= 0; i--) { + int node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + int node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif -static int __init init_amd(struct cpuinfo_x86 *c) +/* + * On a AMD dual core setup the lower bits of the APIC id distingush the cores. + * Assumes number of cores is a power of two. + */ +static void __init amd_detect_cmp(struct cpuinfo_x86 *c) { - int r; - int level; +#ifdef CONFIG_SMP + unsigned bits; +#ifdef CONFIG_NUMA + int cpu = smp_processor_id(); + int node = 0; + unsigned apicid = hard_smp_processor_id(); +#endif + unsigned ecx = cpuid_ecx(0x80000008); + + c->x86_max_cores = (ecx & 0xff) + 1; + + /* CPU telling us the core id bits shift? */ + bits = (ecx >> 12) & 0xF; + + /* Otherwise recompute */ + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + + /* Low order bits define the core id (index of core in socket) */ + c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1); + /* Convert the APIC ID into the socket ID */ + c->phys_proc_id = phys_pkg_id(bits); + #ifdef CONFIG_NUMA - int cpu; + node = c->phys_proc_id; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits); + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); +#endif +#endif +} + +static void __cpuinit init_amd(struct cpuinfo_x86 *c) +{ + unsigned level; + +#ifdef CONFIG_SMP + unsigned long value; + + /* + * Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K8_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K8_HWCR, value); + } #endif /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ clear_bit(0*32+31, &c->x86_capability); - /* C-stepping K8? */ + /* On C+ stepping K8 rep microcode works well for copy/memset */ level = cpuid_eax(1); - if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) - set_bit(X86_FEATURE_K8_C, &c->x86_capability); + if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); - r = get_model_name(c); - if (!r) { + level = get_model_name(c); + if (!level) { switch (c->x86) { case 15: /* Should distinguish Models here, but this is only @@ -759,109 +720,155 @@ static int __init init_amd(struct cpuinfo_x86 *c) } display_cacheinfo(c); - if (c->x86_cpuid_level >= 0x80000008) { - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_num_cores & (c->x86_num_cores - 1)) - c->x86_num_cores = 1; + /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ + if (c->x86_power & (1<<8)) + set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); -#ifdef CONFIG_NUMA - /* On a dual core setup the lower bits of apic id - distingush the cores. Fix up the CPU<->node mappings - here based on that. - Assumes number of cores is a power of two. - When using SRAT use mapping from SRAT. */ - cpu = c->x86_apicid; - if (acpi_numa <= 0 && c->x86_num_cores > 1) { - cpu_to_node[cpu] = cpu >> hweight32(c->x86_num_cores - 1); - if (!node_online(cpu_to_node[cpu])) - cpu_to_node[cpu] = first_node(node_online_map); - } - printk(KERN_INFO "CPU %d(%d) -> Node %d\n", - cpu, c->x86_num_cores, cpu_to_node[cpu]); -#endif - } + /* Multi core CPU? */ + if (c->extended_cpuid_level >= 0x80000008) + amd_detect_cmp(c); + + /* Fix cpuid4 emulation for more */ + num_cache_leaves = 3; - return r; + /* RDTSC can be speculated around */ + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); } -static void __init detect_ht(struct cpuinfo_x86 *c) +static void __cpuinit detect_ht(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; - int index_lsb, index_msb, tmp; - int cpu = smp_processor_id(); - + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + if (!cpu_has(c, X86_FEATURE_HT)) return; + if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) + goto out; - cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; - + if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - index_lsb = 0; - index_msb = 31; - /* - * At this point we only support two siblings per - * processor package. - */ + } else if (smp_num_siblings > 1 ) { + if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } - tmp = smp_num_siblings; - while ((tmp & 1) == 0) { - tmp >>=1 ; - index_lsb++; - } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (index_lsb != index_msb ) - index_msb++; - phys_proc_id[cpu] = phys_pkg_id(index_msb); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - phys_proc_id[cpu]); + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id(index_msb); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings) ; + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); + } +out: + if ((c->x86_max_cores * smp_num_siblings) > 1) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id); + printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); } + #endif } -static void __init sched_cmp_hack(struct cpuinfo_x86 *c) +/* + * find out the number of processor cores on the die + */ +static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP - /* AMD dual core looks like HT but isn't really. Hide it from the - scheduler. This works around problems with the domain scheduler. - Also probably gives slightly better scheduling and disables - SMT nice which is harmful on dual core. - TBD tune the domain scheduler for dual core. */ - if (c->x86_vendor == X86_VENDOR_AMD && cpu_has(c, X86_FEATURE_CMP_LEGACY)) - smp_num_siblings = 1; + unsigned int eax, t; + + if (c->cpuid_level < 4) + return 1; + + cpuid_count(4, 0, &eax, &t, &t, &t); + + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + +static void srat_detect_node(void) +{ +#ifdef CONFIG_NUMA + unsigned node; + int cpu = smp_processor_id(); + int apicid = hard_smp_processor_id(); + + /* Don't do the funky fallback heuristics the AMD version employs + for now. */ + node = apicid_to_node[apicid]; + if (node == NUMA_NO_NODE) + node = first_node(node_online_map); + numa_set_node(cpu, node); + + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); #endif } - -static void __init init_intel(struct cpuinfo_x86 *c) + +static void __cpuinit init_intel(struct cpuinfo_x86 *c) { /* Cache sizes */ unsigned n; init_intel_cacheinfo(c); - n = c->x86_cpuid_level; + if (c->cpuid_level > 9 ) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); + } + + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } + + n = c->extended_cpuid_level; if (n >= 0x80000008) { unsigned eax = cpuid_eax(0x80000008); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; + /* CPUID workaround for Intel 0F34 CPU */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 0xF && c->x86_model == 0x3 && + c->x86_mask == 0x4) + c->x86_phys_bits = 36; } if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); + if (c->x86 == 6) + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + if (c->x86 == 15) + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + else + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + c->x86_max_cores = intel_num_cpu_cores(c); + + srat_detect_node(); } -void __init get_cpu_vendor(struct cpuinfo_x86 *c) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; @@ -882,7 +889,7 @@ struct cpu_model_info { /* Do some early cpuid on the boot CPU to get some parameter that are needed before check_bugs. Everything advanced is in identify_cpu below. */ -void __init early_identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) { u32 tfms; @@ -894,9 +901,8 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_clflush_size = 64; c->x86_cache_alignment = c->x86_clflush_size; - c->x86_num_cores = 1; - c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data; - c->x86_cpuid_level = 0; + c->x86_max_cores = 1; + c->extended_cpuid_level = 0; memset(&c->x86_capability, 0, sizeof c->x86_capability); /* Get vendor name */ @@ -918,23 +924,26 @@ void __init early_identify_cpu(struct cpuinfo_x86 *c) c->x86 = (tfms >> 8) & 0xf; c->x86_model = (tfms >> 4) & 0xf; c->x86_mask = tfms & 0xf; - if (c->x86 == 0xf) { + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - } if (c->x86_capability[0] & (1<<19)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; - c->x86_apicid = misc >> 24; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; } + +#ifdef CONFIG_SMP + c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; +#endif } /* * This does the hard work of actually picking apart the CPU stuff... */ -void __init identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; u32 xlvl; @@ -943,11 +952,11 @@ void __init identify_cpu(struct cpuinfo_x86 *c) /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); - c->x86_cpuid_level = xlvl; + c->extended_cpuid_level = xlvl; if ((xlvl & 0xffff0000) == 0x80000000) { if (xlvl >= 0x80000001) { c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[5] = cpuid_ecx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); } if (xlvl >= 0x80000004) get_model_name(c); /* Default name */ @@ -961,6 +970,8 @@ void __init identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } + c->apicid = phys_pkg_id(0); + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are @@ -988,7 +999,6 @@ void __init identify_cpu(struct cpuinfo_x86 *c) select_idle_routine(c); detect_ht(c); - sched_cmp_hack(c); /* * On SMP, boot_cpu_data holds the common feature set between @@ -1005,14 +1015,17 @@ void __init identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_MCE mcheck_init(c); #endif + if (c == &boot_cpu_data) + mtrr_bp_init(); + else + mtrr_ap_init(); #ifdef CONFIG_NUMA - if (c != &boot_cpu_data) - numa_add_cpu(c - cpu_data); + numa_add_cpu(smp_processor_id()); #endif } -void __init print_cpu_info(struct cpuinfo_x86 *c) +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) printk("%s", c->x86_model_id); @@ -1047,10 +1060,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL, /* AMD-defined */ - "pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow", + NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow", /* Transmeta-defined */ "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, @@ -1059,28 +1072,39 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL, + "constant_tsc", NULL, NULL, + "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est", - "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, + NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ + NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; static char *x86_power_flags[] = { "ts", /* temperature sensor */ "fid", /* frequency id control */ "vid", /* voltage id control */ "ttp", /* thermal trip */ + "tm", + "stc", + NULL, + /* nothing */ /* constant_tsc - moved to flags */ }; @@ -1106,8 +1130,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { + unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + if (!freq) + freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - cpu_khz / 1000, (cpu_khz % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ @@ -1115,8 +1142,13 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); #ifdef CONFIG_SMP - seq_printf(m, "physical id\t: %d\n", phys_proc_id[c - cpu_data]); - seq_printf(m, "siblings\t: %d\n", c->x86_num_cores * smp_num_siblings); + if (smp_num_siblings * c->x86_max_cores > 1) { + int cpu = c - cpu_data; + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + } #endif seq_printf(m, @@ -1130,8 +1162,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { int i; for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, &c->x86_capability) && - x86_cap_flags[i] != NULL ) + if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); } @@ -1152,18 +1183,17 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned i; for (i = 0; i < 32; i++) if (c->x86_power & (1 << i)) { - if (i < ARRAY_SIZE(x86_power_flags)) - seq_printf(m, " %s", x86_power_flags[i]); + if (i < ARRAY_SIZE(x86_power_flags) && + x86_power_flags[i]) + seq_printf(m, "%s%s", + x86_power_flags[i][0]?" ":"", + x86_power_flags[i]); else seq_printf(m, " [%d]", i); } } - seq_printf(m, "\n"); - - if (c->x86_num_cores > 1) - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); - seq_printf(m, "\n\n"); + seq_printf(m, "\n\n"); return 0; } @@ -1189,3 +1219,23 @@ struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; + +#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE) +#include +static __init int add_pcspkr(void) +{ + struct platform_device *pd; + int ret; + + pd = platform_device_alloc("pcspkr", -1); + if (!pd) + return -ENOMEM; + + ret = platform_device_add(pd); + if (ret) + platform_device_put(pd); + + return ret; +} +device_initcall(add_pcspkr); +#endif