X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Fapic.c;h=fdbf536c8adcf4ddaa90d72dbd8e2839bc17487d;hb=refs%2Fheads%2Fvserver;hp=6fb84e2c7cec08d867c7070a65a58035149174d9;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 6fb84e2c7..fdbf536c8 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -14,11 +14,9 @@ * Mikael Pettersson : PM converted to driver model. */ -#include #include #include -#include #include #include #include @@ -26,44 +24,82 @@ #include #include #include +#include +#include #include #include #include #include #include +#include +#include +#include +#include +#include +#include + +int apic_mapped; +int apic_verbosity; +int apic_runs_main_timer; +int apic_calibrate_pmtmr __initdata; int disable_apic_timer __initdata; -/* Using APIC to generate smp_local_timer_interrupt? */ -int using_apic_timer = 0; +static struct resource *ioapic_resources; +static struct resource lapic_resource = { + .name = "Local APIC", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; +/* + * cpu_mask that denotes the CPUs that needs timer interrupt coming in as + * IPIs in place of local APIC timers + */ +static cpumask_t timer_interrupt_broadcast_ipi_mask; + +/* Using APIC to generate smp_local_timer_interrupt? */ +int using_apic_timer __read_mostly = 0; static void apic_pm_activate(void); void enable_NMI_through_LVT0 (void * dummy) { - unsigned int v, ver; + unsigned int v; - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); v = APIC_DM_NMI; /* unmask and set to NMI */ - apic_write_around(APIC_LVT0, v); + apic_write(APIC_LVT0, v); } int get_maxlvt(void) { - unsigned int v, ver, maxlvt; + unsigned int v, maxlvt; v = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(v); maxlvt = GET_APIC_MAXLVT(v); return maxlvt; } +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk("unexpected IRQ trap at vector %02x\n", irq); + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + * But don't ack when the APIC is disabled. -AK + */ + if (!disable_apic) + ack_APIC_irq(); +} + void clear_local_APIC(void) { int maxlvt; @@ -72,76 +108,76 @@ void clear_local_APIC(void) maxlvt = get_maxlvt(); /* - * Masking an LVT entry on a P6 can trigger a local APIC error + * Masking an LVT entry can trigger a local APIC error * if the vector is zero. Mask LVTERR first to prevent this. */ if (maxlvt >= 3) { v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); } /* * Careful: we have to set masks only first to deassert * any level-triggered sources. */ v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); + apic_write(APIC_LVTT, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT1); - apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); + apic_write(APIC_LVT1, v | APIC_LVT_MASKED); if (maxlvt >= 4) { v = apic_read(APIC_LVTPC); - apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); + apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); } /* * Clean APIC state for other OSs: */ - apic_write_around(APIC_LVTT, APIC_LVT_MASKED); - apic_write_around(APIC_LVT0, APIC_LVT_MASKED); - apic_write_around(APIC_LVT1, APIC_LVT_MASKED); + apic_write(APIC_LVTT, APIC_LVT_MASKED); + apic_write(APIC_LVT0, APIC_LVT_MASKED); + apic_write(APIC_LVT1, APIC_LVT_MASKED); if (maxlvt >= 3) - apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); + apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) - apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); - v = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (APIC_INTEGRATED(v)) { /* !82489DX */ - if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } + apic_write(APIC_LVTPC, APIC_LVT_MASKED); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); } -void __init connect_bsp_APIC(void) +void disconnect_bsp_APIC(int virt_wire_setup) { - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. - * connect BSP's local APIC to INT and NMI lines. - */ - printk(KERN_INFO "leaving PIC mode, enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } -} + /* Go back to Virtual Wire compatibility mode */ + unsigned long value; -void disconnect_bsp_APIC(void) -{ - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect - * only on certain older boards). Note that APIC - * interrupts, including IPIs, won't work beyond - * this point! The only exception are INIT IPIs. - */ - printk(KERN_INFO "disabling APIC mode, entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); + + if (!virt_wire_setup) { + /* For LVT0 make it edge triggered, active high, external and enabled */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); } + + /* For LVT1 make it edge triggered, active high, nmi and enabled */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); } void disable_local_APIC(void) @@ -156,7 +192,7 @@ void disable_local_APIC(void) */ value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; - apic_write_around(APIC_SPIV, value); + apic_write(APIC_SPIV, value); } /* @@ -172,10 +208,10 @@ int __init verify_local_APIC(void) * The version register is read-only in a real APIC. */ reg0 = apic_read(APIC_LVR); - Dprintk("Getting VERSION: %x\n", reg0); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); reg1 = apic_read(APIC_LVR); - Dprintk("Getting VERSION: %x\n", reg1); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); /* * The two version reads above should print the same @@ -199,10 +235,10 @@ int __init verify_local_APIC(void) * The ID register is read/write in a real APIC. */ reg0 = apic_read(APIC_ID); - Dprintk("Getting ID: %x\n", reg0); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); reg1 = apic_read(APIC_ID); - Dprintk("Getting ID: %x\n", reg1); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) return 0; @@ -213,33 +249,36 @@ int __init verify_local_APIC(void) * compatibility mode, but most boxes are anymore. */ reg0 = apic_read(APIC_LVT0); - Dprintk("Getting LVT0: %x\n", reg0); + apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0); reg1 = apic_read(APIC_LVT1); - Dprintk("Getting LVT1: %x\n", reg1); + apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); return 1; } void __init sync_Arb_IDs(void) { + /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + if (ver >= 0x14) /* P4 or higher */ + return; + /* * Wait for idle. */ apic_wait_icr_idle(); - Dprintk("Synchronizing Arb IDs.\n"); - apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG + apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } -extern void __error_in_apic_c (void); - /* * An initial setup of the virtual wire mode. */ void __init init_bsp_APIC(void) { - unsigned int value, ver; + unsigned int value; /* * Don't do the setup now if we have a SMP BIOS as the @@ -249,7 +288,6 @@ void __init init_bsp_APIC(void) return; value = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(value); /* * Do not trust the local APIC being empty at bootup. @@ -264,42 +302,30 @@ void __init init_bsp_APIC(void) value |= APIC_SPIV_APIC_ENABLED; value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; - apic_write_around(APIC_SPIV, value); + apic_write(APIC_SPIV, value); /* * Set up the virtual wire mode. */ - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); + apic_write(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; - if (!APIC_INTEGRATED(ver)) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT1, value); + apic_write(APIC_LVT1, value); } -void __init setup_local_APIC (void) +void __cpuinit setup_local_APIC (void) { - unsigned int value, ver, maxlvt; - - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } + unsigned int value, maxlvt; + int i, j; value = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(value); - if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) - __error_in_apic_c(); + BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f); /* * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. */ - if (!clustered_apic_mode && - !physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map)) + if (!apic_id_registered()) BUG(); /* @@ -307,23 +333,7 @@ void __init setup_local_APIC (void) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ - - if (!clustered_apic_mode) { - /* - * In clustered apic mode, the firmware does this for us - * Put the APIC into flat delivery mode. - * Must be "all ones" explicitly for 82489DX. - */ - apic_write_around(APIC_DFR, 0xffffffff); - - /* - * Set up the logical destination ID. - */ - value = apic_read(APIC_LDR); - value &= ~APIC_LDR_MASK; - value |= (1<<(smp_processor_id()+24)); - apic_write_around(APIC_LDR, value); - } + init_apic_ldr(); /* * Set Task Priority to 'accept all'. We never change this @@ -331,7 +341,26 @@ void __init setup_local_APIC (void) */ value = apic_read(APIC_TASKPRI); value &= ~APIC_TPRI_MASK; - apic_write_around(APIC_TASKPRI, value); + apic_write(APIC_TASKPRI, value); + + /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - Dprintk("ESR value before enabling vector: %08x\n", value); - + oldvalue = apic_read(APIC_ESR); value = ERROR_APIC_VECTOR; // enables sending errors - apic_write_around(APIC_LVTERR, value); + apic_write(APIC_LVTERR, value); /* * spec says clear errors after enabling vector. */ if (maxlvt > 3) apic_write(APIC_ESR, 0); value = apic_read(APIC_ESR); - Dprintk("ESR value after enabling vector: %08x\n", value); - } else { - if (esr_disable) - /* - * Something untraceble is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk("Leaving ESR disabled.\n"); - else - printk("No ESR for 82489DX.\n"); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, + "ESR value after enabling vector: %08x, after %08x\n", + oldvalue, value); } nmi_watchdog_default(); - if (nmi_watchdog == NMI_LOCAL_APIC) - setup_apic_nmi_watchdog(); + setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } @@ -464,28 +455,34 @@ static struct { unsigned int apic_thmr; } apic_pm_state; -static int lapic_suspend(struct sys_device *dev, u32 state) +static int lapic_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; + maxlvt = get_maxlvt(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); apic_pm_state.apic_spiv = apic_read(APIC_SPIV); apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_save_flags(flags); - local_irq_disable(); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif + local_irq_save(flags); disable_local_APIC(); local_irq_restore(flags); return 0; @@ -495,17 +492,17 @@ static int lapic_resume(struct sys_device *dev) { unsigned int l, h; unsigned long flags; + int maxlvt; if (!apic_pm_state.active) return 0; - /* XXX: Pavel needs this for S3 resume, but can't explain why */ - set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); + maxlvt = get_maxlvt(); local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -515,8 +512,12 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); +#ifdef CONFIG_X86_MCE_INTEL + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); apic_write(APIC_TMICT, apic_pm_state.apic_tmict); @@ -540,7 +541,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __init apic_pm_activate(void) +static void __cpuinit apic_pm_activate(void) { apic_pm_state.active = 1; } @@ -564,6 +565,27 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ +static int __init apic_set_verbosity(char *str) +{ + if (str == NULL) { + skip_ioapic_setup = 0; + ioapic_force = 1; + return 0; + } + if (strcmp("debug", str) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", str) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", str); + return -EINVAL; + } + + return 0; +} +early_param("apic", apic_set_verbosity); + /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -583,6 +605,64 @@ static int __init detect_init_APIC (void) return 0; } +#ifdef CONFIG_X86_IO_APIC +static struct resource * __init ioapic_setup_resources(void) +{ +#define IOAPIC_RESOURCE_NAME_SIZE 11 + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + memset(mem, 0, n); + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} + +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk("IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); +#endif + void __init init_apic_mappings(void) { unsigned long apic_phys; @@ -599,20 +679,26 @@ void __init init_apic_mappings(void) apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - Dprintk("mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); + apic_mapped = 1; + apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); + + /* Put local APIC into the resource map. */ + lapic_resource.start = apic_phys; + lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + insert_resource(&iomem_resource, &lapic_resource); /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - if (boot_cpu_id == -1U) - boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); -#ifdef CONFIG_X86_IO_APIC { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; + struct resource *ioapic_res; + ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; @@ -621,12 +707,17 @@ void __init init_apic_mappings(void) ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - Dprintk("mapped IOAPIC to %016lx (%016lx)\n", + apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", __fix_to_virt(idx), ioapic_phys); idx++; + + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } } } -#endif } /* @@ -642,25 +733,27 @@ void __init init_apic_mappings(void) #define APIC_DIVISOR 16 -void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks) { - unsigned int lvtt_value, tmp_value, ver; + unsigned int lvtt_value, tmp_value; + int cpu = smp_processor_id(); - ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - if (!APIC_INTEGRATED(ver)) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - apic_write_around(APIC_LVTT, lvtt_value); + + if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) + lvtt_value |= APIC_LVT_MASKED; + + apic_write(APIC_LVTT, lvtt_value); /* * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write_around(APIC_TDCR, (tmp_value + apic_write(APIC_TDCR, (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); + apic_write(APIC_TMICT, clocks/APIC_DIVISOR); } static void setup_APIC_timer(unsigned int clocks) @@ -669,14 +762,8 @@ static void setup_APIC_timer(unsigned int clocks) local_irq_save(flags); - /* For some reasons this doesn't work on Simics, so fake it for now */ - if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) { - __setup_APIC_LVTT(clocks); - return; - } - /* wait for irq slice */ - if (vxtime.hpet_address) { + if (vxtime.hpet_address && hpet_use_timer) { int trigger = hpet_readl(HPET_T0_CMP); while (hpet_readl(HPET_COUNTER) >= trigger) /* do nothing */ ; @@ -687,16 +774,24 @@ static void setup_APIC_timer(unsigned int clocks) outb_p(0x00, 0x43); c2 = inb_p(0x40); c2 |= inb_p(0x40) << 8; - do { + do { c1 = c2; outb_p(0x00, 0x43); c2 = inb_p(0x40); c2 |= inb_p(0x40) << 8; } while (c2 - c1 < 300); } - __setup_APIC_LVTT(clocks); - + /* Turn off PIT interrupt if we use APIC timer as main timer. + Only works with the PM timer right now + TBD fix it for HPET too. */ + if (vxtime.mode == VXTIME_PMTMR && + smp_processor_id() == boot_cpu_id && + apic_runs_main_timer == 1 && + !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) { + stop_timer_interrupt(); + apic_runs_main_timer++; + } local_irq_restore(flags); } @@ -715,7 +810,7 @@ static void setup_APIC_timer(unsigned int clocks) #define TICK_COUNT 100000000 -int __init calibrate_APIC_clock(void) +static int __init calibrate_APIC_clock(void) { int apic, apic_start, tsc, tsc_start; int result; @@ -727,14 +822,27 @@ int __init calibrate_APIC_clock(void) __setup_APIC_LVTT(1000000000); apic_start = apic_read(APIC_TMCCT); - rdtscl(tsc_start); - - do { +#ifdef CONFIG_X86_PM_TIMER + if (apic_calibrate_pmtmr && pmtmr_ioport) { + pmtimer_wait(5000); /* 5ms wait */ apic = apic_read(APIC_TMCCT); - rdtscl(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && (apic - apic_start) < TICK_COUNT); + result = (apic_start - apic) * 1000L / 5; + } else +#endif + { + rdtscl(tsc_start); + + do { + apic = apic_read(APIC_TMCCT); + rdtscl(tsc); + } while ((tsc - tsc_start) < TICK_COUNT && + (apic - apic_start) < TICK_COUNT); + + result = (apic_start - apic) * 1000L * cpu_khz / + (tsc - tsc_start); + } + printk("result %d\n", result); - result = (apic_start - apic) * 1000L * cpu_khz / (tsc - tsc_start); printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", result / 1000 / 1000, result / 1000 % 1000); @@ -765,59 +873,94 @@ void __init setup_boot_APIC_clock (void) local_irq_enable(); } -void __init setup_secondary_APIC_clock(void) +void __cpuinit setup_secondary_APIC_clock(void) { local_irq_disable(); /* FIXME: Do we need this? --RR */ setup_APIC_timer(calibration_result); local_irq_enable(); } -void __init disable_APIC_timer(void) +void disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); + /* + * When an illegal vector value (0-15) is written to an LVT + * entry and delivery mode is Fixed, the APIC may signal an + * illegal vector error, with out regard to whether the mask + * bit is set or whether an interrupt is actually seen on input. + * + * Boot sequence might call this function when the LVTT has + * '0' vector value. So make sure vector field is set to + * valid value. + */ + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); } } void enable_APIC_timer(void) { - if (using_apic_timer) { + int cpu = smp_processor_id(); + + if (using_apic_timer && + !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { unsigned long v; v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED); + apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED); } } -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) +void switch_APIC_timer_to_ipi(void *cpumask) { - int i; + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); - /* - * Sanity check. [at least 500 APIC cycles should be - * between APIC interrupts as a rule of thumb, to avoid - * irqs flooding us] - */ - if ( (!multiplier) || (calibration_result/multiplier < 500)) - return -EINVAL; + if (cpu_isset(cpu, mask) && + !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { + disable_APIC_timer(); + cpu_set(cpu, timer_interrupt_broadcast_ipi_mask); + } +} +EXPORT_SYMBOL(switch_APIC_timer_to_ipi); - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. At that time they also adjust their APIC timers - * accordingly. - */ - for (i = 0; i < NR_CPUS; ++i) - per_cpu(prof_multiplier, i) = multiplier; +void smp_send_timer_broadcast_ipi(void) +{ + cpumask_t mask; - return 0; + cpus_and(mask, cpu_online_map, timer_interrupt_broadcast_ipi_mask); + if (!cpus_empty(mask)) { + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + } +} + +void switch_ipi_to_APIC_timer(void *cpumask) +{ + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); + + if (cpu_isset(cpu, mask) && + cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { + cpu_clear(cpu, timer_interrupt_broadcast_ipi_mask); + enable_APIC_timer(); + } +} +EXPORT_SYMBOL(switch_ipi_to_APIC_timer); + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector, + unsigned char msg_type, unsigned char mask) +{ + unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + apic_write(reg, v); } #undef APIC_DIVISOR @@ -832,40 +975,19 @@ int setup_profiling_timer(unsigned int multiplier) * value into /proc/profile. */ -void smp_local_timer_interrupt(struct pt_regs *regs) +void smp_local_timer_interrupt(void) { - int cpu = smp_processor_id(); - - x86_do_profile(regs); - - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT(calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - + profile_tick(CPU_PROFILING); #ifdef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(get_irq_regs())); #endif - } - + if (apic_runs_main_timer > 1 && smp_processor_id() == boot_cpu_id) + main_timer_handler(); /* * We take the 'long' return path, and there every subsystem * grabs the appropriate locks (kernel lock/ irq lock). * - * we might want to decouple profiling from the 'long path', + * We might want to decouple profiling from the 'long path', * and do the profiling totally in assembly. * * Currently this isn't too much of an issue (performance wise), @@ -883,6 +1005,8 @@ void smp_local_timer_interrupt(struct pt_regs *regs) */ void smp_apic_timer_interrupt(struct pt_regs *regs) { + struct pt_regs *old_regs = set_irq_regs(regs); + /* * the NMI deadlock-detector uses this. */ @@ -898,9 +1022,58 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ + exit_idle(); irq_enter(); - smp_local_timer_interrupt(regs); + smp_local_timer_interrupt(); irq_exit(); + set_irq_regs(old_regs); +} + +/* + * apic_is_clustered_box() -- Check if we can expect good TSC + * + * Thus far, the major user of this is IBM's Summit2 series: + * + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. Use available data to take a good guess. + * If in doubt, go HPET. + */ +__cpuinit int apic_is_clustered_box(void) +{ + int i, clusters, zeros; + unsigned id; + DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); + + for (i = 0; i < NR_CPUS; i++) { + id = bios_cpu_apicid[i]; + if (id != BAD_APICID) + __set_bit(APIC_CLUSTERID(id), clustermap); + } + + /* Problem: Partially populated chassis may not have CPUs in some of + * the APIC clusters they have been allocated. Only present CPUs have + * bios_cpu_apicid entries, thus causing zeroes in the bitmap. Since + * clusters are allocated sequentially, count zeros only if they are + * bounded by ones. + */ + clusters = 0; + zeros = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { + if (test_bit(i, clustermap)) { + clusters += 1 + zeros; + zeros = 0; + } else + ++zeros; + } + + /* + * If clusters > 2, then should be multi-chassis. + * May have to revisit this when multi-core + hyperthreaded CPUs come + * out, but AFAIK this will work even for them. + */ + return (clusters > 2); } /* @@ -909,6 +1082,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) asmlinkage void smp_spurious_interrupt(void) { unsigned int v; + exit_idle(); irq_enter(); /* * Check if this really is a spurious interrupt and ACK it @@ -944,6 +1118,7 @@ asmlinkage void smp_error_interrupt(void) { unsigned int v, v1; + exit_idle(); irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); @@ -962,7 +1137,8 @@ asmlinkage void smp_error_interrupt(void) 6: Received illegal vector 7: Illegal register address */ - printk (KERN_INFO "APIC error on CPU%d: %02x(%02x)\n", + if (num_online_cpus() > 1) + printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", smp_processor_id(), v , v1); irq_exit(); } @@ -987,47 +1163,65 @@ int __init APIC_init_uniprocessor (void) verify_local_APIC(); - connect_bsp_APIC(); - - phys_cpu_present_map = physid_mask_of_physid(0); - apic_write_around(APIC_ID, boot_cpu_id); + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); setup_local_APIC(); -#ifdef CONFIG_X86_IO_APIC if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); + setup_IO_APIC(); else nr_ioapics = 0; -#endif setup_boot_APIC_clock(); - + check_nmi_watchdog(); return 0; } static __init int setup_disableapic(char *str) { disable_apic = 1; + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); return 0; -} +} +early_param("disableapic", setup_disableapic); +/* same as disableapic, for compatibility */ static __init int setup_nolapic(char *str) { - disable_apic = 1; - return 0; + return setup_disableapic(str); } +early_param("nolapic", setup_nolapic); static __init int setup_noapictimer(char *str) { + if (str[0] != ' ' && str[0] != 0) + return 0; disable_apic_timer = 1; - return 0; + return 1; } -/* dummy parsing: see setup.c */ +static __init int setup_apicmaintimer(char *str) +{ + apic_runs_main_timer = 1; + nohpet = 1; + return 1; +} +__setup("apicmaintimer", setup_apicmaintimer); -__setup("disableapic", setup_disableapic); -__setup("nolapic", setup_nolapic); /* same as disableapic, for compatibility */ +static __init int setup_noapicmaintimer(char *str) +{ + apic_runs_main_timer = -1; + return 1; +} +__setup("noapicmaintimer", setup_noapicmaintimer); + +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return setup_apicmaintimer(NULL); +} +__setup("apicpmtimer", setup_apicpmtimer); __setup("noapictimer", setup_noapictimer); -/* no "lapic" flag - we only use the lapic when the BIOS tells us so. */