X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fsn%2Fkernel%2Fsn2%2Fsn2_smp.c;h=d9d306c79f2d7a1743369380e637c4118d012c22;hb=refs%2Fheads%2Fvserver;hp=3ee7c10afe003c8cea77ee56d1f9faefd923bc0b;hpb=9bf4aaab3e101692164d49b7ca357651eb691cb6;p=linux-2.6.git diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 3ee7c10af..d9d306c79 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved. */ #include @@ -18,10 +18,13 @@ #include #include #include +#include +#include +#include +#include #include #include -#include #include #include #include @@ -29,48 +32,98 @@ #include #include #include -#include #include #include #include +#include #include -#include +#include #include #include -void sn2_ptc_deadlock_recovery(unsigned long data0, unsigned long data1); +DEFINE_PER_CPU(struct ptc_stats, ptcstats); +DECLARE_PER_CPU(struct ptc_stats, ptcstats); +static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); -static spinlock_t sn2_global_ptc_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; - -static unsigned long sn2_ptc_deadlock_count; - +extern unsigned long +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); +void +sn2_ptc_deadlock_recovery(short *, short, short, int, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); -static inline unsigned long -wait_piowc(void) +/* + * Note: some is the following is captured here to make degugging easier + * (the macros make more sense if you see the debug patch - not posted) + */ +#define sn2_ptctest 0 +#define local_node_uses_ptc_ga(sh1) ((sh1) ? 1 : 0) +#define max_active_pio(sh1) ((sh1) ? 32 : 7) +#define reset_max_active_on_deadlock() 1 +#define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock) + +struct ptc_stats { + unsigned long ptc_l; + unsigned long change_rid; + unsigned long shub_ptc_flushes; + unsigned long nodes_flushed; + unsigned long deadlocks; + unsigned long deadlocks2; + unsigned long lock_itc_clocks; + unsigned long shub_itc_clocks; + unsigned long shub_itc_clocks_max; + unsigned long shub_ptc_flushes_not_my_mm; +}; + +#define sn2_ptctest 0 + +static inline unsigned long wait_piowc(void) { volatile unsigned long *piows; - unsigned long ws; + unsigned long zeroval, ws; piows = pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; do { - ia64_mfa(); - } while (((ws = *piows) & SH_PIO_WRITE_STATUS_0_PENDING_WRITE_COUNT_MASK) != - SH_PIO_WRITE_STATUS_0_PENDING_WRITE_COUNT_MASK); - return ws; + cpu_relax(); + } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); + return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; } +/** + * sn_migrate - SN-specific task migration actions + * @task: Task being migrated to new CPU + * + * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. + * Context switching user threads which have memory-mapped MMIO may cause + * PIOs to issue from seperate CPUs, thus the PIO writes must be drained + * from the previous CPU's Shub before execution resumes on the new CPU. + */ +void sn_migrate(struct task_struct *task) +{ + pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu); + volatile unsigned long *adr = last_pda->pio_write_status_addr; + unsigned long val = last_pda->pio_write_status_val; + + /* Drain PIO writes from old CPU's Shub */ + while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) + != val)) + cpu_relax(); +} -void -sn_tlb_migrate_finish(struct mm_struct *mm) +void sn_tlb_migrate_finish(struct mm_struct *mm) { - if (mm == current->mm) + /* flush_tlb_mm is inefficient if more than 1 users of mm */ + if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1) flush_tlb_mm(mm); } - /** * sn2_global_tlb_purge - globally purge translation cache of virtual address range + * @mm: mm_struct containing virtual address range * @start: start of virtual address range * @end: end of virtual address range * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) @@ -82,96 +135,149 @@ sn_tlb_migrate_finish(struct mm_struct *mm) * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. * - cpu_vm_mask is converted into a nodemask of the nodes containing the * cpus in cpu_vm_mask. - * - if only one bit is set in cpu_vm_mask & it is the current cpu, - * then only the local TLB needs to be flushed. This flushing can be done - * using ptc.l. This is the common case & avoids the global spinlock. + * - if only one bit is set in cpu_vm_mask & it is the current cpu & the + * process is purging its own virtual address range, then only the + * local TLB needs to be flushed. This flushing can be done using + * ptc.l. This is the common case & avoids the global spinlock. * - if multiple cpus have loaded the context, then flushing has to be * done with ptc.g/MMRs under protection of the global ptc_lock. */ void -sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) +sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) { - int i, cnode, mynasid, cpu, lcpu=0, nasid, flushed=0; - volatile unsigned long *ptc0, *ptc1; - unsigned long flags=0, data0, data1; - struct mm_struct *mm=current->active_mm; - short nasids[NR_NODES], nix; - DECLARE_BITMAP(nodes_flushed, NR_NODES); - - bitmap_zero(nodes_flushed, NR_NODES); - + int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid; + int mymm = (mm == current->active_mm && mm == current->mm); + int use_cpu_ptcga; + volatile unsigned long *ptc0, *ptc1; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; + short nasids[MAX_NUMNODES], nix; + nodemask_t nodes_flushed; + int active, max_active, deadlock; + + nodes_clear(nodes_flushed); i = 0; for_each_cpu_mask(cpu, mm->cpu_vm_mask) { cnode = cpu_to_node(cpu); - __set_bit(cnode, nodes_flushed); + node_set(cnode, nodes_flushed); lcpu = cpu; i++; } + if (i == 0) + return; + preempt_disable(); - if (likely(i == 1 && lcpu == smp_processor_id())) { + if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) { do { - ia64_ptcl(start, nbits<<2); + ia64_ptcl(start, nbits << 2); start += (1UL << nbits); } while (start < end); ia64_srlz_i(); + __get_cpu_var(ptcstats).ptc_l++; preempt_enable(); return; } - if (atomic_read(&mm->mm_users) == 1) { + if (atomic_read(&mm->mm_users) == 1 && mymm) { flush_tlb_mm(mm); + __get_cpu_var(ptcstats).change_rid++; preempt_enable(); return; } - + itc = ia64_get_itc(); nix = 0; - for (cnode=find_first_bit(&nodes_flushed, NR_NODES); cnode < NR_NODES; - cnode=find_next_bit(&nodes_flushed, NR_NODES, ++cnode)) + for_each_node_mask(cnode, nodes_flushed) nasids[nix++] = cnodeid_to_nasid(cnode); + rr_value = (mm->context << 3) | REGION_NUMBER(start); + + shub1 = is_shub1(); + if (shub1) { + data0 = (1UL << SH1_PTC_0_A_SHFT) | + (nbits << SH1_PTC_0_PS_SHFT) | + (rr_value << SH1_PTC_0_RID_SHFT) | + (1UL << SH1_PTC_0_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); + ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); + } else { + data0 = (1UL << SH2_PTC_A_SHFT) | + (nbits << SH2_PTC_PS_SHFT) | + (1UL << SH2_PTC_START_SHFT); + ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + + (rr_value << SH2_PTC_RID_SHFT)); + ptc1 = NULL; + } + + + mynasid = get_nasid(); + use_cpu_ptcga = local_node_uses_ptc_ga(shub1); + max_active = max_active_pio(shub1); + + itc = ia64_get_itc(); + spin_lock_irqsave(PTC_LOCK(shub1), flags); + itc2 = ia64_get_itc(); + + __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; + __get_cpu_var(ptcstats).shub_ptc_flushes++; + __get_cpu_var(ptcstats).nodes_flushed += nix; + if (!mymm) + __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++; + + if (use_cpu_ptcga && !mymm) { + old_rr = ia64_get_rr(start); + ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8)); + ia64_srlz_d(); + } - data0 = (1UL<>8)<= max_active || i == (nix - 1)) { + if ((deadlock = wait_piowc())) { + sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); + if (reset_max_active_on_deadlock()) + max_active = 1; + } + active = 0; + ibegin = i + 1; } } - - if (flushed && (wait_piowc() & SH_PIO_WRITE_STATUS_0_WRITE_DEADLOCK_MASK)) { - sn2_ptc_deadlock_recovery(data0, data1); - } - start += (1UL << nbits); - } while (start < end); - spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + itc2 = ia64_get_itc() - itc2; + __get_cpu_var(ptcstats).shub_itc_clocks += itc2; + if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) + __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; + + if (old_rr) { + ia64_set_rr(start, old_rr); + ia64_srlz_d(); + } + + spin_unlock_irqrestore(PTC_LOCK(shub1), flags); preempt_enable(); } @@ -183,33 +289,38 @@ sn2_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbit * TLB flush transaction. The recovery sequence is somewhat tricky & is * coded in assembly language. */ + void -sn2_ptc_deadlock_recovery(unsigned long data0, unsigned long data1) +sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, + volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) { - extern void sn2_ptc_deadlock_recovery_core(long*, long, long*, long, long*); - int cnode, mycnode, nasid; - long *ptc0, *ptc1, *piows; + short nasid, i; + unsigned long *piows, zeroval, n; - sn2_ptc_deadlock_count++; + __get_cpu_var(ptcstats).deadlocks++; - ptc0 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_0); - ptc1 = (long*)GLOBAL_MMR_PHYS_ADDR(0, SH_PTC_1); - piows = (long*)pda->pio_write_status_addr; + piows = (unsigned long *) pda->pio_write_status_addr; + zeroval = pda->pio_write_status_val; - mycnode = numa_node_id(); - for (cnode = 0; cnode < numnodes; cnode++) { - if (is_headless_node(cnode) || cnode == mycnode) + for (i=ib; i <= ie; i++) { + nasid = nasids[i]; + if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid) continue; - nasid = cnodeid_to_nasid(cnode); ptc0 = CHANGE_NASID(nasid, ptc0); - ptc1 = CHANGE_NASID(nasid, ptc1); - sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows); + if (ptc1) + ptc1 = CHANGE_NASID(nasid, ptc1); + + n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); + __get_cpu_var(ptcstats).deadlocks2 += n; } + } /** * sn_send_IPI_phys - send an IPI to a Nasid and slice + * @nasid: nasid to receive the interrupt (may be outside partition) * @physid: physical cpuid to receive the interrupt. * @vector: command to send * @delivery_mode: delivery mechanism @@ -224,34 +335,31 @@ sn2_ptc_deadlock_recovery(unsigned long data0, unsigned long data1) * %IA64_IPI_DM_NMI - pend an NMI * %IA64_IPI_DM_INIT - pend an INIT interrupt */ -void -sn_send_IPI_phys(long physid, int vector, int delivery_mode) +void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode) { - long nasid, slice, val; - unsigned long flags=0; - volatile long *p; + long val; + unsigned long flags = 0; + volatile long *p; - nasid = cpu_physical_id_to_nasid(physid); - slice = cpu_physical_id_to_slice(physid); - - p = (long*)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT); - val = (1UL<ptc_l, + stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, + stat->deadlocks, + 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, + stat->shub_ptc_flushes_not_my_mm, + stat->deadlocks2); + } + return 0; +} + +static struct seq_operations sn2_ptc_seq_ops = { + .start = sn2_ptc_seq_start, + .next = sn2_ptc_seq_next, + .stop = sn2_ptc_seq_stop, + .show = sn2_ptc_seq_show +}; + +static int sn2_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sn2_ptc_seq_ops); +} + +static struct file_operations proc_sn2_ptc_operations = { + .open = sn2_ptc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_dir_entry *proc_sn2_ptc; + +static int __init sn2_ptc_init(void) +{ + if (!ia64_platform_is("sn2")) + return 0; + + if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) { + printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); + return -EINVAL; + } + proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations; + spin_lock_init(&sn2_global_ptc_lock); + return 0; +} + +static void __exit sn2_ptc_exit(void) +{ + remove_proc_entry(PTC_BASENAME, NULL); +} + +module_init(sn2_ptc_init); +module_exit(sn2_ptc_exit); +#endif /* CONFIG_PROC_FS */ +