X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Fasm-sparc64%2Fsystem.h;h=a18ec87a52c1b2530c55f5dd2452133c5ce4703c;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=fd12ca386f486047b141926aa41890f2d129d095;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git

diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index fd12ca386..a18ec87a5 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -28,6 +28,49 @@ enum sparc_cpu {
 #define ARCH_SUN4C_SUN4 0
 #define ARCH_SUN4 0
 
+/* These are here in an effort to more fully work around Spitfire Errata
+ * #51.  Essentially, if a memory barrier occurs soon after a mispredicted
+ * branch, the chip can stop executing instructions until a trap occurs.
+ * Therefore, if interrupts are disabled, the chip can hang forever.
+ *
+ * It used to be believed that the memory barrier had to be right in the
+ * delay slot, but a case has been traced recently wherein the memory barrier
+ * was one instruction after the branch delay slot and the chip still hung.
+ * The offending sequence was the following in sym_wakeup_done() of the
+ * sym53c8xx_2 driver:
+ *
+ *	call	sym_ccb_from_dsa, 0
+ *	 movge	%icc, 0, %l0
+ *	brz,pn	%o0, .LL1303
+ *	 mov	%o0, %l2
+ *	membar	#LoadLoad
+ *
+ * The branch has to be mispredicted for the bug to occur.  Therefore, we put
+ * the memory barrier explicitly into a "branch always, predicted taken"
+ * delay slot to avoid the problem case.
+ */
+#define membar_safe(type) \
+do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
+			     " membar	" type "\n" \
+			     "1:\n" \
+			     : : : "memory"); \
+} while (0)
+
+#define mb()	\
+	membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
+#define rmb()	\
+	membar_safe("#LoadLoad")
+#define wmb()	\
+	membar_safe("#StoreStore")
+#define membar_storeload() \
+	membar_safe("#StoreLoad")
+#define membar_storeload_storestore() \
+	membar_safe("#StoreLoad | #StoreStore")
+#define membar_storeload_loadload() \
+	membar_safe("#StoreLoad | #LoadLoad")
+#define membar_storestore_loadstore() \
+	membar_safe("#StoreStore | #LoadStore")
+
 #endif
 
 #define setipl(__new_ipl) \
@@ -78,16 +121,11 @@ enum sparc_cpu {
 
 #define nop() 		__asm__ __volatile__ ("nop")
 
-#define membar(type)	__asm__ __volatile__ ("membar " type : : : "memory")
-#define mb()		\
-	membar("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
-#define rmb()		membar("#LoadLoad")
-#define wmb()		membar("#StoreStore")
 #define read_barrier_depends()		do { } while(0)
 #define set_mb(__var, __value) \
-	do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0)
+	do { __var = __value; membar_storeload_storestore(); } while(0)
 #define set_wmb(__var, __value) \
-	do { __var = __value; membar("#StoreStore"); } while(0)
+	do { __var = __value; wmb(); } while(0)
 
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
@@ -139,19 +177,13 @@ extern void __flushw_user(void);
 #define flush_user_windows flushw_user
 #define flush_register_windows flushw_all
 
-#define prepare_arch_switch(rq, next)		\
-do {	spin_lock(&(next)->switch_lock);	\
-	spin_unlock(&(rq)->lock);		\
+/* Don't hold the runqueue lock over context switch */
+#define __ARCH_WANT_UNLOCKED_CTXSW
+#define prepare_arch_switch(next)		\
+do {						\
 	flushw_all();				\
 } while (0)
 
-#define finish_arch_switch(rq, prev)		\
-do {	spin_unlock_irq(&(prev)->switch_lock);	\
-} while (0)
-
-#define task_running(rq, p) \
-	((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
-
 	/* See what happens when you design the chip correctly?
 	 *
 	 * We tell gcc we clobber all non-fixed-usage registers except
@@ -161,11 +193,7 @@ do {	spin_unlock_irq(&(prev)->switch_lock);	\
 	 * not preserve it's value.  Hairy, but it lets us remove 2 loads
 	 * and 2 stores in this critical code path.  -DaveM
 	 */
-#if __GNUC__ >= 3
 #define EXTRA_CLOBBER ,"%l1"
-#else
-#define EXTRA_CLOBBER
-#endif
 #define switch_to(prev, next, last)					\
 do {	if (test_thread_flag(TIF_PERFCTR)) {				\
 		unsigned long __tmp;					\
@@ -180,10 +208,11 @@ do {	if (test_thread_flag(TIF_PERFCTR)) {				\
 	/* If you are tempted to conditionalize the following */	\
 	/* so that ASI is only written if it changes, think again. */	\
 	__asm__ __volatile__("wr %%g0, %0, %%asi"			\
-	: : "r" (__thread_flag_byte_ptr(next->thread_info)[TI_FLAG_BYTE_CURRENT_DS]));\
+	: : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\
+	trap_block[current_thread_info()->cpu].thread =			\
+		task_thread_info(next);					\
 	__asm__ __volatile__(						\
 	"mov	%%g4, %%g7\n\t"						\
-	"wrpr	%%g0, 0x95, %%pstate\n\t"				\
 	"stx	%%i6, [%%sp + 2047 + 0x70]\n\t"				\
 	"stx	%%i7, [%%sp + 2047 + 0x78]\n\t"				\
 	"rdpr	%%wstate, %%o5\n\t"					\
@@ -196,24 +225,19 @@ do {	if (test_thread_flag(TIF_PERFCTR)) {				\
 	"wrpr	%%g1, %%cwp\n\t"					\
 	"ldx	[%%g6 + %3], %%o6\n\t"					\
 	"ldub	[%%g6 + %2], %%o5\n\t"					\
-	"ldx	[%%g6 + %4], %%o7\n\t"					\
-	"mov	%%g6, %%l2\n\t"						\
+	"ldub	[%%g6 + %4], %%o7\n\t"					\
 	"wrpr	%%o5, 0x0, %%wstate\n\t"				\
 	"ldx	[%%sp + 2047 + 0x70], %%i6\n\t"				\
 	"ldx	[%%sp + 2047 + 0x78], %%i7\n\t"				\
-	"wrpr	%%g0, 0x94, %%pstate\n\t"				\
-	"mov	%%l2, %%g6\n\t"						\
-	"ldx	[%%g6 + %7], %%g4\n\t"					\
-	"wrpr	%%g0, 0x96, %%pstate\n\t"				\
-	"andcc	%%o7, %6, %%g0\n\t"					\
-	"beq,pt %%icc, 1f\n\t"						\
+	"ldx	[%%g6 + %6], %%g4\n\t"					\
+	"brz,pt %%o7, 1f\n\t"						\
 	" mov	%%g7, %0\n\t"						\
 	"b,a ret_from_syscall\n\t"					\
 	"1:\n\t"							\
 	: "=&r" (last)							\
-	: "0" (next->thread_info),					\
-	  "i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_FLAGS), "i" (TI_CWP),	\
-	  "i" (_TIF_NEWCHILD), "i" (TI_TASK)				\
+	: "0" (task_thread_info(next)),					\
+	  "i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_NEW_CHILD),            \
+	  "i" (TI_CWP), "i" (TI_TASK)					\
 	: "cc",								\
 	        "g1", "g2", "g3",                   "g7",		\
 	              "l2", "l3", "l4", "l5", "l6", "l7",		\
@@ -226,6 +250,16 @@ do {	if (test_thread_flag(TIF_PERFCTR)) {				\
 	}								\
 } while(0)
 
+/*
+ * On SMP systems, when the scheduler does migration-cost autodetection,
+ * it needs a way to flush as much of the CPU's caches as possible.
+ *
+ * TODO: fill this in!
+ */
+static inline void sched_cacheflush(void)
+{
+}
+
 static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
 {
 	unsigned long tmp1, tmp2;