X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fx86_64%2Fkernel%2Fapic.c;h=357c8fd2f0fc2f6002c7362894fcf67f04eb6717;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=6fb84e2c7cec08d867c7070a65a58035149174d9;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 6fb84e2c7..357c8fd2f 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -26,44 +25,73 @@ #include #include #include +#include #include #include #include #include #include +#include +#include +#include +#include +#include + +int apic_verbosity; +int apic_runs_main_timer; +int apic_calibrate_pmtmr __initdata; int disable_apic_timer __initdata; +/* + * cpu_mask that denotes the CPUs that needs timer interrupt coming in as + * IPIs in place of local APIC timers + */ +static cpumask_t timer_interrupt_broadcast_ipi_mask; + /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - static void apic_pm_activate(void); void enable_NMI_through_LVT0 (void * dummy) { - unsigned int v, ver; + unsigned int v; - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); v = APIC_DM_NMI; /* unmask and set to NMI */ - apic_write_around(APIC_LVT0, v); + apic_write(APIC_LVT0, v); } int get_maxlvt(void) { - unsigned int v, ver, maxlvt; + unsigned int v, maxlvt; v = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(v); maxlvt = GET_APIC_MAXLVT(v); return maxlvt; } +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk("unexpected IRQ trap at vector %02x\n", irq); + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + * But don't ack when the APIC is disabled. -AK + */ + if (!disable_apic) + ack_APIC_irq(); +} + void clear_local_APIC(void) { int maxlvt; @@ -77,39 +105,36 @@ void clear_local_APIC(void) */ if (maxlvt >= 3) { v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); } /* * Careful: we have to set masks only first to deassert * any level-triggered sources. */ v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); + apic_write(APIC_LVTT, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT1); - apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); + apic_write(APIC_LVT1, v | APIC_LVT_MASKED); if (maxlvt >= 4) { v = apic_read(APIC_LVTPC); - apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); + apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); } /* * Clean APIC state for other OSs: */ - apic_write_around(APIC_LVTT, APIC_LVT_MASKED); - apic_write_around(APIC_LVT0, APIC_LVT_MASKED); - apic_write_around(APIC_LVT1, APIC_LVT_MASKED); + apic_write(APIC_LVTT, APIC_LVT_MASKED); + apic_write(APIC_LVT0, APIC_LVT_MASKED); + apic_write(APIC_LVT1, APIC_LVT_MASKED); if (maxlvt >= 3) - apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); + apic_write(APIC_LVTERR, APIC_LVT_MASKED); if (maxlvt >= 4) - apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); + apic_write(APIC_LVTPC, APIC_LVT_MASKED); v = GET_APIC_VERSION(apic_read(APIC_LVR)); - if (APIC_INTEGRATED(v)) { /* !82489DX */ - if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); } void __init connect_bsp_APIC(void) @@ -123,13 +148,13 @@ void __init connect_bsp_APIC(void) * PIC mode, enable APIC mode in the IMCR, i.e. * connect BSP's local APIC to INT and NMI lines. */ - printk(KERN_INFO "leaving PIC mode, enabling APIC mode.\n"); + apic_printk(APIC_VERBOSE, "leaving PIC mode, enabling APIC mode.\n"); outb(0x70, 0x22); outb(0x01, 0x23); } } -void disconnect_bsp_APIC(void) +void disconnect_bsp_APIC(int virt_wire_setup) { if (pic_mode) { /* @@ -138,10 +163,46 @@ void disconnect_bsp_APIC(void) * interrupts, including IPIs, won't work beyond * this point! The only exception are INIT IPIs. */ - printk(KERN_INFO "disabling APIC mode, entering PIC mode.\n"); + apic_printk(APIC_QUIET, "disabling APIC mode, entering PIC mode.\n"); outb(0x70, 0x22); outb(0x00, 0x23); } + else { + /* Go back to Virtual Wire compatibility mode */ + unsigned long value; + + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); + + if (!virt_wire_setup) { + /* For LVT0 make it edge triggered, active high, external and enabled */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } + else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); + } + + /* For LVT1 make it edge triggered, active high, nmi and enabled */ + value = apic_read(APIC_LVT1); + value &= ~( + APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); + } } void disable_local_APIC(void) @@ -156,7 +217,7 @@ void disable_local_APIC(void) */ value = apic_read(APIC_SPIV); value &= ~APIC_SPIV_APIC_ENABLED; - apic_write_around(APIC_SPIV, value); + apic_write(APIC_SPIV, value); } /* @@ -172,10 +233,10 @@ int __init verify_local_APIC(void) * The version register is read-only in a real APIC. */ reg0 = apic_read(APIC_LVR); - Dprintk("Getting VERSION: %x\n", reg0); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); reg1 = apic_read(APIC_LVR); - Dprintk("Getting VERSION: %x\n", reg1); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); /* * The two version reads above should print the same @@ -199,10 +260,10 @@ int __init verify_local_APIC(void) * The ID register is read/write in a real APIC. */ reg0 = apic_read(APIC_ID); - Dprintk("Getting ID: %x\n", reg0); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); reg1 = apic_read(APIC_ID); - Dprintk("Getting ID: %x\n", reg1); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) return 0; @@ -213,22 +274,27 @@ int __init verify_local_APIC(void) * compatibility mode, but most boxes are anymore. */ reg0 = apic_read(APIC_LVT0); - Dprintk("Getting LVT0: %x\n", reg0); + apic_printk(APIC_DEBUG,"Getting LVT0: %x\n", reg0); reg1 = apic_read(APIC_LVT1); - Dprintk("Getting LVT1: %x\n", reg1); + apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); return 1; } void __init sync_Arb_IDs(void) { + /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + if (ver >= 0x14) /* P4 or higher */ + return; + /* * Wait for idle. */ apic_wait_icr_idle(); - Dprintk("Synchronizing Arb IDs.\n"); - apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG + apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); + apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); } @@ -239,7 +305,7 @@ extern void __error_in_apic_c (void); */ void __init init_bsp_APIC(void) { - unsigned int value, ver; + unsigned int value; /* * Don't do the setup now if we have a SMP BIOS as the @@ -249,7 +315,6 @@ void __init init_bsp_APIC(void) return; value = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(value); /* * Do not trust the local APIC being empty at bootup. @@ -264,32 +329,22 @@ void __init init_bsp_APIC(void) value |= APIC_SPIV_APIC_ENABLED; value |= APIC_SPIV_FOCUS_DISABLED; value |= SPURIOUS_APIC_VECTOR; - apic_write_around(APIC_SPIV, value); + apic_write(APIC_SPIV, value); /* * Set up the virtual wire mode. */ - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); + apic_write(APIC_LVT0, APIC_DM_EXTINT); value = APIC_DM_NMI; - if (!APIC_INTEGRATED(ver)) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT1, value); + apic_write(APIC_LVT1, value); } -void __init setup_local_APIC (void) +void __cpuinit setup_local_APIC (void) { - unsigned int value, ver, maxlvt; - - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } + unsigned int value, maxlvt; + int i, j; value = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(value); if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) __error_in_apic_c(); @@ -298,8 +353,7 @@ void __init setup_local_APIC (void) * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. */ - if (!clustered_apic_mode && - !physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map)) + if (!apic_id_registered()) BUG(); /* @@ -307,23 +361,7 @@ void __init setup_local_APIC (void) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ - - if (!clustered_apic_mode) { - /* - * In clustered apic mode, the firmware does this for us - * Put the APIC into flat delivery mode. - * Must be "all ones" explicitly for 82489DX. - */ - apic_write_around(APIC_DFR, 0xffffffff); - - /* - * Set up the logical destination ID. - */ - value = apic_read(APIC_LDR); - value &= ~APIC_LDR_MASK; - value |= (1<<(smp_processor_id()+24)); - apic_write_around(APIC_LDR, value); - } + init_apic_ldr(); /* * Set Task Priority to 'accept all'. We never change this @@ -331,7 +369,26 @@ void __init setup_local_APIC (void) */ value = apic_read(APIC_TASKPRI); value &= ~APIC_TPRI_MASK; - apic_write_around(APIC_TASKPRI, value); + apic_write(APIC_TASKPRI, value); + + /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - Dprintk("ESR value before enabling vector: %08x\n", value); - + oldvalue = apic_read(APIC_ESR); value = ERROR_APIC_VECTOR; // enables sending errors - apic_write_around(APIC_LVTERR, value); + apic_write(APIC_LVTERR, value); /* * spec says clear errors after enabling vector. */ if (maxlvt > 3) apic_write(APIC_ESR, 0); value = apic_read(APIC_ESR); - Dprintk("ESR value after enabling vector: %08x\n", value); - } else { - if (esr_disable) - /* - * Something untraceble is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk("Leaving ESR disabled.\n"); - else - printk("No ESR for 82489DX.\n"); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, + "ESR value after enabling vector: %08x, after %08x\n", + oldvalue, value); } nmi_watchdog_default(); @@ -464,7 +508,7 @@ static struct { unsigned int apic_thmr; } apic_pm_state; -static int lapic_suspend(struct sys_device *dev, u32 state) +static int lapic_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; @@ -499,13 +543,10 @@ static int lapic_resume(struct sys_device *dev) if (!apic_pm_state.active) return 0; - /* XXX: Pavel needs this for S3 resume, but can't explain why */ - set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); @@ -540,7 +581,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __init apic_pm_activate(void) +static void __cpuinit apic_pm_activate(void) { apic_pm_state.active = 1; } @@ -564,6 +605,21 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ +static int __init apic_set_verbosity(char *str) +{ + if (strcmp("debug", str) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", str) == 0) + apic_verbosity = APIC_VERBOSE; + else + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug", str); + + return 1; +} + +__setup("apic=", apic_set_verbosity); + /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -599,14 +655,13 @@ void __init init_apic_mappings(void) apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - Dprintk("mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); + apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys); /* * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - if (boot_cpu_id == -1U) - boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); + boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); #ifdef CONFIG_X86_IO_APIC { @@ -621,7 +676,7 @@ void __init init_apic_mappings(void) ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - Dprintk("mapped IOAPIC to %016lx (%016lx)\n", + apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n", __fix_to_virt(idx), ioapic_phys); idx++; } @@ -642,25 +697,28 @@ void __init init_apic_mappings(void) #define APIC_DIVISOR 16 -void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; + int cpu = smp_processor_id(); ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - if (!APIC_INTEGRATED(ver)) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - apic_write_around(APIC_LVTT, lvtt_value); + + if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) + lvtt_value |= APIC_LVT_MASKED; + + apic_write(APIC_LVTT, lvtt_value); /* * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write_around(APIC_TDCR, (tmp_value + apic_write(APIC_TDCR, (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); + apic_write(APIC_TMICT, clocks/APIC_DIVISOR); } static void setup_APIC_timer(unsigned int clocks) @@ -669,14 +727,8 @@ static void setup_APIC_timer(unsigned int clocks) local_irq_save(flags); - /* For some reasons this doesn't work on Simics, so fake it for now */ - if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) { - __setup_APIC_LVTT(clocks); - return; - } - /* wait for irq slice */ - if (vxtime.hpet_address) { + if (vxtime.hpet_address && hpet_use_timer) { int trigger = hpet_readl(HPET_T0_CMP); while (hpet_readl(HPET_COUNTER) >= trigger) /* do nothing */ ; @@ -687,16 +739,24 @@ static void setup_APIC_timer(unsigned int clocks) outb_p(0x00, 0x43); c2 = inb_p(0x40); c2 |= inb_p(0x40) << 8; - do { + do { c1 = c2; outb_p(0x00, 0x43); c2 = inb_p(0x40); c2 |= inb_p(0x40) << 8; } while (c2 - c1 < 300); } - __setup_APIC_LVTT(clocks); - + /* Turn off PIT interrupt if we use APIC timer as main timer. + Only works with the PM timer right now + TBD fix it for HPET too. */ + if (vxtime.mode == VXTIME_PMTMR && + smp_processor_id() == boot_cpu_id && + apic_runs_main_timer == 1 && + !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) { + stop_timer_interrupt(); + apic_runs_main_timer++; + } local_irq_restore(flags); } @@ -715,7 +775,7 @@ static void setup_APIC_timer(unsigned int clocks) #define TICK_COUNT 100000000 -int __init calibrate_APIC_clock(void) +static int __init calibrate_APIC_clock(void) { int apic, apic_start, tsc, tsc_start; int result; @@ -727,14 +787,27 @@ int __init calibrate_APIC_clock(void) __setup_APIC_LVTT(1000000000); apic_start = apic_read(APIC_TMCCT); - rdtscl(tsc_start); - - do { +#ifdef CONFIG_X86_PM_TIMER + if (apic_calibrate_pmtmr && pmtmr_ioport) { + pmtimer_wait(5000); /* 5ms wait */ apic = apic_read(APIC_TMCCT); - rdtscl(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && (apic - apic_start) < TICK_COUNT); + result = (apic_start - apic) * 1000L / 5; + } else +#endif + { + rdtscl(tsc_start); + + do { + apic = apic_read(APIC_TMCCT); + rdtscl(tsc); + } while ((tsc - tsc_start) < TICK_COUNT && + (apic - apic_start) < TICK_COUNT); + + result = (apic_start - apic) * 1000L * cpu_khz / + (tsc - tsc_start); + } + printk("result %d\n", result); - result = (apic_start - apic) * 1000L * cpu_khz / (tsc - tsc_start); printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", result / 1000 / 1000, result / 1000 % 1000); @@ -765,60 +838,86 @@ void __init setup_boot_APIC_clock (void) local_irq_enable(); } -void __init setup_secondary_APIC_clock(void) +void __cpuinit setup_secondary_APIC_clock(void) { local_irq_disable(); /* FIXME: Do we need this? --RR */ setup_APIC_timer(calibration_result); local_irq_enable(); } -void __init disable_APIC_timer(void) +void disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); + apic_write(APIC_LVTT, v | APIC_LVT_MASKED); } } void enable_APIC_timer(void) { - if (using_apic_timer) { + int cpu = smp_processor_id(); + + if (using_apic_timer && + !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { unsigned long v; v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED); + apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED); } } -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) +void switch_APIC_timer_to_ipi(void *cpumask) { - int i; + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); - /* - * Sanity check. [at least 500 APIC cycles should be - * between APIC interrupts as a rule of thumb, to avoid - * irqs flooding us] - */ - if ( (!multiplier) || (calibration_result/multiplier < 500)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. At that time they also adjust their APIC timers - * accordingly. - */ - for (i = 0; i < NR_CPUS; ++i) - per_cpu(prof_multiplier, i) = multiplier; + if (cpu_isset(cpu, mask) && + !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { + disable_APIC_timer(); + cpu_set(cpu, timer_interrupt_broadcast_ipi_mask); + } +} +EXPORT_SYMBOL(switch_APIC_timer_to_ipi); - return 0; +void smp_send_timer_broadcast_ipi(void) +{ + cpumask_t mask; + + cpus_and(mask, cpu_online_map, timer_interrupt_broadcast_ipi_mask); + if (!cpus_empty(mask)) { + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + } +} + +void switch_ipi_to_APIC_timer(void *cpumask) +{ + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); + + if (cpu_isset(cpu, mask) && + cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { + cpu_clear(cpu, timer_interrupt_broadcast_ipi_mask); + enable_APIC_timer(); + } +} +EXPORT_SYMBOL(switch_ipi_to_APIC_timer); + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +#ifdef CONFIG_X86_MCE_AMD +void setup_threshold_lvt(unsigned long lvt_off) +{ + unsigned int v = 0; + unsigned long reg = (lvt_off << 4) + 0x500; + v |= THRESHOLD_APIC_VECTOR; + apic_write(reg, v); } +#endif /* CONFIG_X86_MCE_AMD */ #undef APIC_DIVISOR @@ -834,33 +933,12 @@ int setup_profiling_timer(unsigned int multiplier) void smp_local_timer_interrupt(struct pt_regs *regs) { - int cpu = smp_processor_id(); - - x86_do_profile(regs); - - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT(calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - + profile_tick(CPU_PROFILING, regs); #ifdef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(regs)); #endif - } - + if (apic_runs_main_timer > 1 && smp_processor_id() == boot_cpu_id) + main_timer_handler(regs); /* * We take the 'long' return path, and there every subsystem * grabs the appropriate locks (kernel lock/ irq lock). @@ -898,17 +976,67 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ + exit_idle(); irq_enter(); smp_local_timer_interrupt(regs); irq_exit(); } +/* + * oem_force_hpet_timer -- force HPET mode for some boxes. + * + * Thus far, the major user of this is IBM's Summit2 series: + * + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. Use available data to take a good guess. + * If in doubt, go HPET. + */ +__cpuinit int oem_force_hpet_timer(void) +{ + int i, clusters, zeros; + unsigned id; + DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); + + for (i = 0; i < NR_CPUS; i++) { + id = bios_cpu_apicid[i]; + if (id != BAD_APICID) + __set_bit(APIC_CLUSTERID(id), clustermap); + } + + /* Problem: Partially populated chassis may not have CPUs in some of + * the APIC clusters they have been allocated. Only present CPUs have + * bios_cpu_apicid entries, thus causing zeroes in the bitmap. Since + * clusters are allocated sequentially, count zeros only if they are + * bounded by ones. + */ + clusters = 0; + zeros = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { + if (test_bit(i, clustermap)) { + clusters += 1 + zeros; + zeros = 0; + } else + ++zeros; + } + + /* + * If clusters > 2, then should be multi-chassis. Return 1 for HPET. + * Else return 0 to use TSC. + * May have to revisit this when multi-core + hyperthreaded CPUs come + * out, but AFAIK this will work even for them. + */ + return (clusters > 2); +} + /* * This interrupt should _never_ happen with our APIC/SMP architecture */ asmlinkage void smp_spurious_interrupt(void) { unsigned int v; + exit_idle(); irq_enter(); /* * Check if this really is a spurious interrupt and ACK it @@ -944,6 +1072,7 @@ asmlinkage void smp_error_interrupt(void) { unsigned int v, v1; + exit_idle(); irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); @@ -962,7 +1091,8 @@ asmlinkage void smp_error_interrupt(void) 6: Received illegal vector 7: Illegal register address */ - printk (KERN_INFO "APIC error on CPU%d: %02x(%02x)\n", + if (num_online_cpus() > 1) + printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", smp_processor_id(), v , v1); irq_exit(); } @@ -989,8 +1119,8 @@ int __init APIC_init_uniprocessor (void) connect_bsp_APIC(); - phys_cpu_present_map = physid_mask_of_physid(0); - apic_write_around(APIC_ID, boot_cpu_id); + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); setup_local_APIC(); @@ -1001,28 +1131,53 @@ int __init APIC_init_uniprocessor (void) nr_ioapics = 0; #endif setup_boot_APIC_clock(); - + check_nmi_watchdog(); return 0; } static __init int setup_disableapic(char *str) { disable_apic = 1; - return 0; + return 1; } static __init int setup_nolapic(char *str) { disable_apic = 1; - return 0; + return 1; } static __init int setup_noapictimer(char *str) { + if (str[0] != ' ' && str[0] != 0) + return 0; disable_apic_timer = 1; - return 0; + return 1; } +static __init int setup_apicmaintimer(char *str) +{ + apic_runs_main_timer = 1; + nohpet = 1; + return 1; +} +__setup("apicmaintimer", setup_apicmaintimer); + +static __init int setup_noapicmaintimer(char *str) +{ + apic_runs_main_timer = -1; + return 1; +} +__setup("noapicmaintimer", setup_noapicmaintimer); + +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return setup_apicmaintimer(NULL); +} +__setup("apicpmtimer", setup_apicpmtimer); + /* dummy parsing: see setup.c */ __setup("disableapic", setup_disableapic);