X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fkernel%2Fapic.c;h=3d4b2f3d116a796ffad8ef911353b6a71a2f43b7;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=64c175ff4a9228c1cd68564ad335eaf55e0f61ec;hpb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;p=linux-2.6.git diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 64c175ff4..3d4b2f3d1 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -26,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -34,11 +35,25 @@ #include #include #include +#include #include +#include +#include #include "io_ports.h" +/* + * cpu_mask that denotes the CPUs that needs timer interrupt coming in as + * IPIs in place of local APIC timers + */ +static cpumask_t timer_bcast_ipi; + +/* + * Knob to control our willingness to enable the local APIC. + */ +int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ + /* * Debug level */ @@ -47,6 +62,38 @@ int apic_verbosity; static void apic_pm_activate(void); +int modern_apic(void) +{ + unsigned int lvr, version; + /* AMD systems use old APIC versions, so check the CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 0xf) + return 1; + lvr = apic_read(APIC_LVR); + version = GET_APIC_VERSION(lvr); + return version >= 0x14; +} + +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk("unexpected IRQ trap at vector %02x\n", irq); + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + * But only ack when the APIC is enabled -AK + */ + if (cpu_has_apic) + ack_APIC_irq(); +} + void __init apic_intr_init(void) { #ifdef CONFIG_SMP @@ -68,10 +115,6 @@ void __init apic_intr_init(void) /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - static int enabled_via_apicbase; void enable_NMI_through_LVT0 (void * dummy) @@ -88,10 +131,7 @@ void enable_NMI_through_LVT0 (void * dummy) int get_physical_broadcast(void) { - unsigned int lvr, version; - lvr = apic_read(APIC_LVR); - version = GET_APIC_VERSION(lvr); - if (!APIC_INTEGRATED(version) || version >= 0x14) + if (modern_apic()) return 0xff; else return 0xf; @@ -187,7 +227,7 @@ void __init connect_bsp_APIC(void) enable_apic_mode(); } -void disconnect_bsp_APIC(void) +void disconnect_bsp_APIC(int virt_wire_setup) { if (pic_mode) { /* @@ -201,6 +241,42 @@ void disconnect_bsp_APIC(void) outb(0x70, 0x22); outb(0x00, 0x23); } + else { + /* Go back to Virtual Wire compatibility mode */ + unsigned long value; + + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write_around(APIC_SPIV, value); + + if (!virt_wire_setup) { + /* For LVT0 make it edge triggered, active high, external and enabled */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write_around(APIC_LVT0, value); + } + else { + /* Disable LVT0 */ + apic_write_around(APIC_LVT0, APIC_LVT_MASKED); + } + + /* For LVT1 make it edge triggered, active high, nmi and enabled */ + value = apic_read(APIC_LVT1); + value &= ~( + APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write_around(APIC_LVT1, value); + } } void disable_local_APIC(void) @@ -282,6 +358,10 @@ int __init verify_local_APIC(void) void __init sync_Arb_IDs(void) { + /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 + And not needed on AMD */ + if (modern_apic()) + return; /* * Wait for idle. */ @@ -341,9 +421,10 @@ void __init init_bsp_APIC(void) apic_write_around(APIC_LVT1, value); } -void __init setup_local_APIC (void) +void __devinit setup_local_APIC(void) { unsigned long oldvalue, value, ver, maxlvt; + int i, j; /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (esr_disable) { @@ -380,6 +461,25 @@ void __init setup_local_APIC (void) value &= ~APIC_TPRI_MASK; apic_write_around(APIC_TASKPRI, value); + /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1< 1). + * software for Intel P6 or later and AMD K7 + * (Model > 1) or later. */ rdmsr(MSR_IA32_APICBASE, l, h); if (!(l & MSR_IA32_APICBASE_ENABLE)) { - apic_printk(APIC_VERBOSE, "Local APIC disabled " - "by BIOS -- reenabling.\n"); + printk("Local APIC disabled by BIOS -- reenabling.\n"); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; wrmsr(MSR_IA32_APICBASE, l, h); @@ -732,7 +830,7 @@ static int __init detect_init_APIC (void) if (nmi_watchdog != NMI_NONE) nmi_watchdog = NMI_LOCAL_APIC; - apic_printk(APIC_VERBOSE, "Found and enabled local APIC!\n"); + printk("Found and enabled local APIC!\n"); apic_pm_activate(); @@ -759,8 +857,8 @@ void __init init_apic_mappings(void) apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_DEBUG, "mapped APIC to %08lx (%08lx)\n", APIC_BASE, - apic_phys); + printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, + apic_phys); /* * Fetch the APIC ID of the BSP in case we have a @@ -778,21 +876,23 @@ void __init init_apic_mappings(void) if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; if (!ioapic_phys) { - printk(KERN_ERR "WARNING: bogus zero IO-APIC address found in MPTABLE, disabling IO/APIC support!\n"); - + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); smp_found_config = 0; skip_ioapic_setup = 1; goto fake_ioapic_page; } } else { fake_ioapic_page: - ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long) + alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_DEBUG, "mapped IOAPIC to " - "%08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); + printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); idx++; } } @@ -813,9 +913,8 @@ fake_ioapic_page: * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. */ -static unsigned int __init get_8254_timer_count(void) +static unsigned int __devinit get_8254_timer_count(void) { - extern spinlock_t i8253_lock; unsigned long flags; unsigned int count; @@ -832,32 +931,27 @@ static unsigned int __init get_8254_timer_count(void) } /* next tick in 8254 can be caught by catching timer wraparound */ -static void __init wait_8254_wraparound(void) +static void __devinit wait_8254_wraparound(void) { - unsigned int curr_count, prev_count=~0; - int delta; + unsigned int curr_count, prev_count; curr_count = get_8254_timer_count(); - do { prev_count = curr_count; curr_count = get_8254_timer_count(); - delta = curr_count-prev_count; - /* - * This limit for delta seems arbitrary, but it isn't, it's - * slightly above the level of error a buggy Mercury/Neptune - * chipset timer can cause. - */ + /* workaround for broken Mercury/Neptune */ + if (prev_count >= curr_count + 0x100) + curr_count = get_8254_timer_count(); - } while (delta < 300); + } while (prev_count >= curr_count); } /* * Default initialization for 8254 timers. If we use other timers like HPET, * we override this later */ -void (*wait_timer_tick)(void) = wait_8254_wraparound; +void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound; /* * This function sets up the local APIC timer, with a timeout of @@ -872,14 +966,19 @@ void (*wait_timer_tick)(void) = wait_8254_wraparound; #define APIC_DIVISOR 16 -void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; + int cpu = smp_processor_id(); ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; if (!APIC_INTEGRATED(ver)) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + + if (cpu_isset(cpu, timer_bcast_ipi)) + lvtt_value |= APIC_LVT_MASKED; + apic_write_around(APIC_LVTT, lvtt_value); /* @@ -893,7 +992,7 @@ void __setup_APIC_LVTT(unsigned int clocks) apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } -static void setup_APIC_timer(unsigned int clocks) +static void __devinit setup_APIC_timer(unsigned int clocks) { unsigned long flags; @@ -922,7 +1021,7 @@ static void setup_APIC_timer(unsigned int clocks) * APIC irq that way. */ -int __init calibrate_APIC_clock(void) +static int __init calibrate_APIC_clock(void) { unsigned long long t1 = 0, t2 = 0; long tt1, tt2; @@ -992,10 +1091,11 @@ static unsigned int calibration_result; void __init setup_boot_APIC_clock(void) { + unsigned long flags; apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + local_irq_save(flags); calibration_result = calibrate_APIC_clock(); /* @@ -1003,17 +1103,15 @@ void __init setup_boot_APIC_clock(void) */ setup_APIC_timer(calibration_result); - local_irq_enable(); + local_irq_restore(flags); } -void __init setup_secondary_APIC_clock(void) +void __devinit setup_secondary_APIC_clock(void) { - local_irq_disable(); /* FIXME: Do we need this? --RR */ setup_APIC_timer(calibration_result); - local_irq_enable(); } -void __init disable_APIC_timer(void) +void disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; @@ -1025,7 +1123,10 @@ void __init disable_APIC_timer(void) void enable_APIC_timer(void) { - if (using_apic_timer) { + int cpu = smp_processor_id(); + + if (using_apic_timer && + !cpu_isset(cpu, timer_bcast_ipi)) { unsigned long v; v = apic_read(APIC_LVTT); @@ -1033,33 +1134,31 @@ void enable_APIC_timer(void) } } -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) +void switch_APIC_timer_to_ipi(void *cpumask) { - int i; + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); - /* - * Sanity check. [at least 500 APIC cycles should be - * between APIC interrupts as a rule of thumb, to avoid - * irqs flooding us] - */ - if ( (!multiplier) || (calibration_result/multiplier < 500)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. At that time they also adjust their APIC timers - * accordingly. - */ - for (i = 0; i < NR_CPUS; ++i) - per_cpu(prof_multiplier, i) = multiplier; + if (cpu_isset(cpu, mask) && + !cpu_isset(cpu, timer_bcast_ipi)) { + disable_APIC_timer(); + cpu_set(cpu, timer_bcast_ipi); + } +} +EXPORT_SYMBOL(switch_APIC_timer_to_ipi); - return 0; +void switch_ipi_to_APIC_timer(void *cpumask) +{ + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); + + if (cpu_isset(cpu, mask) && + cpu_isset(cpu, timer_bcast_ipi)) { + cpu_clear(cpu, timer_bcast_ipi); + enable_APIC_timer(); + } } +EXPORT_SYMBOL(switch_ipi_to_APIC_timer); #undef APIC_DIVISOR @@ -1075,32 +1174,10 @@ int setup_profiling_timer(unsigned int multiplier) inline void smp_local_timer_interrupt(struct pt_regs * regs) { - int cpu = smp_processor_id(); - profile_tick(CPU_PROFILING, regs); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT( - calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - #ifdef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode_vm(regs)); #endif - } /* * We take the 'long' return path, and there every subsystem @@ -1123,14 +1200,14 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs) * interrupt as well. Thus we cannot inline the local irq ... ] */ -void smp_apic_timer_interrupt(struct pt_regs regs) +fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); /* * the NMI deadlock-detector uses this. */ - irq_stat[cpu].apic_timer_irqs++; + per_cpu(irq_stat, cpu).apic_timer_irqs++; /* * NOTE! We'd better ACK the irq immediately, @@ -1143,14 +1220,51 @@ void smp_apic_timer_interrupt(struct pt_regs regs) * interrupt lock, which is the WrongThing (tm) to do. */ irq_enter(); - smp_local_timer_interrupt(®s); + smp_local_timer_interrupt(regs); irq_exit(); } +#ifndef CONFIG_SMP +static void up_apic_timer_interrupt_call(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + /* + * the NMI deadlock-detector uses this. + */ + per_cpu(irq_stat, cpu).apic_timer_irqs++; + + smp_local_timer_interrupt(regs); +} +#endif + +void smp_send_timer_broadcast_ipi(struct pt_regs *regs) +{ + cpumask_t mask; + + cpus_and(mask, cpu_online_map, timer_bcast_ipi); + if (!cpus_empty(mask)) { +#ifdef CONFIG_SMP + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +#else + /* + * We can directly call the apic timer interrupt handler + * in UP case. Minus all irq related functions + */ + up_apic_timer_interrupt_call(regs); +#endif + } +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -asmlinkage void smp_spurious_interrupt(void) +fastcall void smp_spurious_interrupt(struct pt_regs *regs) { unsigned long v; @@ -1174,7 +1288,7 @@ asmlinkage void smp_spurious_interrupt(void) * This interrupt should never happen with our APIC/SMP architecture */ -asmlinkage void smp_error_interrupt(void) +fastcall void smp_error_interrupt(struct pt_regs *regs) { unsigned long v, v1; @@ -1219,6 +1333,7 @@ int __init APIC_init_uniprocessor (void) if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); return -1; } @@ -1226,12 +1341,18 @@ int __init APIC_init_uniprocessor (void) connect_bsp_APIC(); + /* + * Hack: In case of kdump, after a crash, kernel might be booting + * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid + * might be zero if read from MP tables. Get it from LAPIC. + */ +#ifdef CONFIG_CRASH_DUMP + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); +#endif phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); setup_local_APIC(); - if (nmi_watchdog == NMI_LOCAL_APIC) - check_nmi_watchdog(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config) if (!skip_ioapic_setup && nr_ioapics)