VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / arch / ppc64 / kernel / head.S
index e868fe7..a5d67f5 100644 (file)
 #include <asm/offsets.h>
 #include <asm/bug.h>
 #include <asm/cputable.h>
+#include <asm/setup.h>
 
 #ifdef CONFIG_PPC_ISERIES
 #define DO_SOFT_DISABLE
 #endif
 
-/* copy saved SOFTE bit or EE bit from saved MSR depending
- * if we are doing soft-disable or not
- */
-#ifdef DO_SOFT_DISABLE
-#define DO_COPY_EE()   ld      r20,SOFTE(r1)
-#else
-#define DO_COPY_EE()   rldicl  r20,r23,49,63
-#endif
-
 /*
  * hcall interface to pSeries LPAR
  */
-#define HVSC .long 0x44000022
-#define H_SET_ASR              0x30
+#define HVSC           .long 0x44000022
+#define H_SET_ASR      0x30
 
 /*
  * We layout physical memory as follows:
  * 0x3000 - 0x3fff : Interrupt support
  * 0x4000 - 0x4fff : NACA
  * 0x5000 - 0x5fff : SystemCfg
- * 0x6000          : iSeries and common interrupt prologs
+ * 0x6000         : iSeries and common interrupt prologs
  * 0x9000 - 0x9fff : Initial segment table
  */
 
 /*
  *   SPRG Usage
  *
- *   Register          Definition
+ *   Register  Definition
  *
- *   SPRG0             reserved for hypervisor
- *   SPRG1             temp - used to save gpr
- *   SPRG2             temp - used to save gpr
- *   SPRG3             virt addr of paca
+ *   SPRG0     reserved for hypervisor
+ *   SPRG1     temp - used to save gpr
+ *   SPRG2     temp - used to save gpr
+ *   SPRG3     virt addr of paca
  */
 
 /*
@@ -115,7 +107,7 @@ END_FTR_SECTION(0, 1)
         * to the pidhash table (also used by the debugger)
         */
        .llong msChunks-KERNELBASE
-       .llong 0 /* pidhash-KERNELBASE SFRXXX */
+       .llong 0        /* pidhash-KERNELBASE SFRXXX */
 
        /* Offset 0x38 - Pointer to start of embedded System.map */
        .globl  embedded_sysmap_start
@@ -130,13 +122,13 @@ embedded_sysmap_end:
        /* Secondary processors spin on this value until it goes to 1. */
        .globl  __secondary_hold_spinloop
 __secondary_hold_spinloop:
-       .llong  0x0
+       .llong  0x0
 
        /* Secondary processors write this value with their cpu # */
-       /* after they enter the spin loop immediately below.       */
-       .globl  __secondary_hold_acknowledge
+       /* after they enter the spin loop immediately below.      */
+       .globl  __secondary_hold_acknowledge
 __secondary_hold_acknowledge:
-       .llong  0x0
+       .llong  0x0
 
        . = 0x60
 /*
@@ -152,36 +144,43 @@ _GLOBAL(__secondary_hold)
        mtmsrd  r24                     /* RI on */
 
        /* Grab our linux cpu number */
-       mr      r24,r3
+       mr      r24,r3
 
        /* Tell the master cpu we're here */
        /* Relocation is off & we are located at an address less */
        /* than 0x100, so only need to grab low order offset.    */
-       std     r24,__secondary_hold_acknowledge@l(0)
+       std     r24,__secondary_hold_acknowledge@l(0)
        sync
 
        /* All secondary cpu's wait here until told to start. */
-100:    ld      r4,__secondary_hold_spinloop@l(0)
-       cmpdi   0,r4,1
-       bne     100b
+100:   ld      r4,__secondary_hold_spinloop@l(0)
+       cmpdi   0,r4,1
+       bne     100b
 
 #ifdef CONFIG_HMT
        b       .hmt_init
 #else
 #ifdef CONFIG_SMP
-       mr      r3,r24
-       b       .pseries_secondary_smp_init
+       mr      r3,r24
+       b       .pseries_secondary_smp_init
 #else
        BUG_OPCODE
 #endif
 #endif
 #endif
 
+/* This value is used to mark exception frames on the stack. */
+       .section ".toc","aw"
+exception_marker:
+       .tc     ID_72656773_68657265[TC],0x7265677368657265
+       .text
+
 /*
  * The following macros define the code that appears as
  * the prologue to each of the exception handlers.  They
  * are split into two parts to allow a single kernel binary
- * to be used for pSeries, and iSeries.
+ * to be used for pSeries and iSeries.
+ * LOL.  One day... - paulus
  */
 
 /*
@@ -194,81 +193,57 @@ _GLOBAL(__secondary_hold)
  * This is the start of the interrupt handlers for pSeries
  * This code runs with relocation off.
  */
-#define EX_SRR0                0
-#define EX_SRR1                8
-#define EX_R20         16
-#define EX_R21         24
-#define EX_R22         32
-#define EX_R23         40
+#define EX_R9          0
+#define EX_R10         8
+#define EX_R11         16
+#define EX_R12         24
+#define EX_R13         32
+#define EX_SRR0                40
 #define EX_DAR         48
+#define EX_LR          48      /* SLB miss saves LR, but not DAR */
 #define EX_DSISR       56
-#define EX_CCR         60
-#define EX_TRAP        60
-
-#define EXCEPTION_PROLOG_PSERIES(n,label)                                \
-       mtspr   SPRG2,r20;              /* use SPRG2 as scratch reg   */ \
-       mtspr   SPRG1,r21;              /* save r21                   */ \
-       mfspr   r20,SPRG3;              /* get paca virt addr         */ \
-       ld      r21,PACAEXCSP(r20);     /* get exception stack ptr    */ \
-       addi    r21,r21,EXC_FRAME_SIZE; /* make exception frame       */ \
-       std     r22,EX_R22(r21);        /* Save r22 in exc. frame     */ \
-       li      r22,n;                  /* Save the ex # in exc. frame*/ \
-       stw     r22,EX_TRAP(r21);       /*                            */ \
-       std     r23,EX_R23(r21);        /* Save r23 in exc. frame     */ \
-       mfspr   r22,SRR0;               /* EA of interrupted instr    */ \
-       std     r22,EX_SRR0(r21);       /* Save SRR0 in exc. frame    */ \
-       mfspr   r23,SRR1;               /* machine state at interrupt */ \
-       std     r23,EX_SRR1(r21);       /* Save SRR1 in exc. frame    */ \
-                                                                         \
-       mfspr   r23,DAR;                /* Save DAR in exc. frame      */ \
-       std     r23,EX_DAR(r21);                                          \
-       mfspr   r23,DSISR;              /* Save DSISR in exc. frame    */ \
-       stw     r23,EX_DSISR(r21);                                        \
-       mfspr   r23,SPRG2;              /* Save r20 in exc. frame      */ \
-       std     r23,EX_R20(r21);                                          \
-                                                                         \
-       clrrdi  r22,r20,60;             /* Get 0xc part of the vaddr  */ \
-       ori     r22,r22,(label)@l;      /* add in the vaddr offset    */ \
-                                       /*   assumes *_common < 16b   */ \
-       mfmsr   r23;                                                     \
-       rotldi  r23,r23,4;                                               \
-       ori     r23,r23,0x32B;          /* Set IR, DR, RI, SF, ISF, HV*/ \
-       rotldi  r23,r23,60;             /* for generic handlers       */ \
-       mtspr   SRR0,r22;                                                \
-       mtspr   SRR1,r23;                                                \
-       mfcr    r23;                    /* save CR in r23             */ \
-       rfid
+#define EX_CCR         60
+
+#define EXCEPTION_PROLOG_PSERIES(area, label)                          \
+       mfspr   r13,SPRG3;              /* get paca address into r13 */ \
+       std     r9,area+EX_R9(r13);     /* save r9 - r12 */             \
+       std     r10,area+EX_R10(r13);                                   \
+       std     r11,area+EX_R11(r13);                                   \
+       std     r12,area+EX_R12(r13);                                   \
+       mfspr   r9,SPRG1;                                               \
+       std     r9,area+EX_R13(r13);                                    \
+       mfcr    r9;                                                     \
+       clrrdi  r12,r13,32;             /* get high part of &label */   \
+       mfmsr   r10;                                                    \
+       mfspr   r11,SRR0;               /* save SRR0 */                 \
+       ori     r12,r12,(label)@l;      /* virt addr of handler */      \
+       ori     r10,r10,MSR_IR|MSR_DR|MSR_RI;                           \
+       mtspr   SRR0,r12;                                               \
+       mfspr   r12,SRR1;               /* and SRR1 */                  \
+       mtspr   SRR1,r10;                                               \
+       rfid;                                                           \
+       b       .       /* prevent speculative execution */
 
 /*
  * This is the start of the interrupt handlers for iSeries
  * This code runs with relocation on.
  */
-#define EXCEPTION_PROLOG_ISERIES(n)                                          \
-       mtspr   SPRG2,r20;                  /* use SPRG2 as scratch reg    */ \
-       mtspr   SPRG1,r21;                  /* save r21                    */ \
-       mfspr   r20,SPRG3;                  /* get paca                    */ \
-       ld      r21,PACAEXCSP(r20);         /* get exception stack ptr     */ \
-       addi    r21,r21,EXC_FRAME_SIZE;     /* make exception frame        */ \
-       std     r22,EX_R22(r21);            /* save r22 on exception frame */ \
-       li      r22,n;                      /* Save the ex # in exc. frame */ \
-       stw     r22,EX_TRAP(r21);           /*                             */ \
-       std     r23,EX_R23(r21);            /* Save r23 in exc. frame      */ \
-       ld      r22,LPPACA+LPPACASRR0(r20); /* Get SRR0 from ItLpPaca      */ \
-       std     r22,EX_SRR0(r21);           /* save SRR0 in exc. frame     */ \
-       ld      r23,LPPACA+LPPACASRR1(r20); /* Get SRR1 from ItLpPaca      */ \
-       std     r23,EX_SRR1(r21);           /* save SRR1 in exc. frame     */ \
-                                                                         \
-       mfspr   r23,DAR;                /* Save DAR in exc. frame      */ \
-       std     r23,EX_DAR(r21);                                          \
-       mfspr   r23,DSISR;              /* Save DSISR in exc. frame    */ \
-       stw     r23,EX_DSISR(r21);                                        \
-       mfspr   r23,SPRG2;              /* Save r20 in exc. frame      */ \
-       std     r23,EX_R20(r21);                                          \
-                                                                         \
-       mfmsr   r22;                        /* set MSR.RI                  */ \
-       ori     r22,r22,MSR_RI;                                               \
-       mtmsrd  r22,1;                                                        \
-       mfcr    r23;                        /* save CR in r23              */
+#define EXCEPTION_PROLOG_ISERIES_1(area)                               \
+       mfspr   r13,SPRG3;              /* get paca address into r13 */ \
+       std     r9,area+EX_R9(r13);     /* save r9 - r12 */             \
+       std     r10,area+EX_R10(r13);                                   \
+       std     r11,area+EX_R11(r13);                                   \
+       std     r12,area+EX_R12(r13);                                   \
+       mfspr   r9,SPRG1;                                               \
+       std     r9,area+EX_R13(r13);                                    \
+       mfcr    r9
+
+#define EXCEPTION_PROLOG_ISERIES_2                                     \
+       mfmsr   r10;                                                    \
+       ld      r11,PACALPPACA+LPPACASRR0(r13);                         \
+       ld      r12,PACALPPACA+LPPACASRR1(r13);                         \
+       ori     r10,r10,MSR_RI;                                         \
+       mtmsrd  r10,1
 
 /*
  * The common exception prolog is used for all except a few exceptions
@@ -276,106 +251,156 @@ _GLOBAL(__secondary_hold)
  * to take another exception from the point where we first touch the
  * kernel stack onwards.
  *
- * On entry r20 points to the paca and r21 points to the exception
- * frame on entry, r23 contains the saved CR, and relocation is on.
- */
-#define EXCEPTION_PROLOG_COMMON                                           \
-       mfspr   r22,SPRG1;              /* Save r21 in exc. frame      */ \
-       std     r22,EX_R21(r21);                                          \
-       std     r21,PACAEXCSP(r20);     /* update exception stack ptr  */ \
-       ld      r22,EX_SRR1(r21);       /* Get SRR1 from exc. frame    */ \
-       andi.   r22,r22,MSR_PR;         /* Set CR for later branch     */ \
-       mr      r22,r1;                 /* Save r1                     */ \
-       subi    r1,r1,INT_FRAME_SIZE;   /* alloc frame on kernel stack */ \
-       beq-    1f;                                                       \
-       ld      r1,PACAKSAVE(r20);      /* kernel stack to use         */ \
-1:      cmpdi  cr1,r1,0;               /* check if r1 is in userspace */ \
-       bge     cr1,bad_stack;          /* abort if it is              */ \
-       std     r22,GPR1(r1);           /* save r1 in stackframe       */ \
-       std     r22,0(r1);              /* make stack chain pointer    */ \
-       std     r23,_CCR(r1);           /* save CR in stackframe       */ \
-       ld      r22,EX_R20(r21);        /* move r20 to stackframe      */ \
-       std     r22,GPR20(r1);                                            \
-       ld      r23,EX_R21(r21);        /* move r21 to stackframe      */ \
-       std     r23,GPR21(r1);                                            \
-       ld      r22,EX_R22(r21);        /* move r22 to stackframe      */ \
-       std     r22,GPR22(r1);                                            \
-       ld      r23,EX_R23(r21);        /* move r23 to stackframe      */ \
-       std     r23,GPR23(r1);                                            \
-       mflr    r22;                    /* save LR in stackframe       */ \
-       std     r22,_LINK(r1);                                            \
-       mfctr   r23;                    /* save CTR in stackframe      */ \
-       std     r23,_CTR(r1);                                             \
-       mfspr   r22,XER;                /* save XER in stackframe      */ \
-       std     r22,_XER(r1);                                             \
-       ld      r23,EX_DAR(r21);        /* move DAR to stackframe      */ \
-       std     r23,_DAR(r1);                                             \
-       lwz     r22,EX_DSISR(r21);      /* move DSISR to stackframe    */ \
-       std     r22,_DSISR(r1);                                           \
-       lbz     r22,PACAPROCENABLED(r20);                                 \
-       std     r22,SOFTE(r1);                                            \
-       ld      r22,EX_SRR0(r21);       /* get SRR0 from exc. frame    */ \
-       ld      r23,EX_SRR1(r21);       /* get SRR1 from exc. frame    */ \
-       addi    r21,r21,-EXC_FRAME_SIZE;/* pop off exception frame     */ \
-       std     r21,PACAEXCSP(r20);                                       \
-       SAVE_GPR(0, r1);                /* save r0 in stackframe       */ \
-       SAVE_8GPRS(2, r1);              /* save r2 - r13 in stackframe */ \
-       SAVE_4GPRS(10, r1);                                               \
-       ld      r2,PACATOC(r20);                                          \
-       mr      r13,r20
-
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r1, r22 (SRR0), and r23 (SRR1).
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
  */
+#define EXCEPTION_PROLOG_COMMON(n, area)                                  \
+       andi.   r10,r12,MSR_PR;         /* See if coming from user      */ \
+       mr      r10,r1;                 /* Save r1                      */ \
+       subi    r1,r1,INT_FRAME_SIZE;   /* alloc frame on kernel stack  */ \
+       beq-    1f;                                                        \
+       ld      r1,PACAKSAVE(r13);      /* kernel stack to use          */ \
+1:     cmpdi   cr1,r1,0;               /* check if r1 is in userspace  */ \
+       bge-    cr1,bad_stack;          /* abort if it is               */ \
+       std     r9,_CCR(r1);            /* save CR in stackframe        */ \
+       std     r11,_NIP(r1);           /* save SRR0 in stackframe      */ \
+       std     r12,_MSR(r1);           /* save SRR1 in stackframe      */ \
+       std     r10,0(r1);              /* make stack chain pointer     */ \
+       std     r0,GPR0(r1);            /* save r0 in stackframe        */ \
+       std     r10,GPR1(r1);           /* save r1 in stackframe        */ \
+       std     r2,GPR2(r1);            /* save r2 in stackframe        */ \
+       SAVE_4GPRS(3, r1);              /* save r3 - r6 in stackframe   */ \
+       SAVE_2GPRS(7, r1);              /* save r7, r8 in stackframe    */ \
+       ld      r9,area+EX_R9(r13);     /* move r9, r10 to stackframe   */ \
+       ld      r10,area+EX_R10(r13);                                      \
+       std     r9,GPR9(r1);                                               \
+       std     r10,GPR10(r1);                                             \
+       ld      r9,area+EX_R11(r13);    /* move r11 - r13 to stackframe */ \
+       ld      r10,area+EX_R12(r13);                                      \
+       ld      r11,area+EX_R13(r13);                                      \
+       std     r9,GPR11(r1);                                              \
+       std     r10,GPR12(r1);                                             \
+       std     r11,GPR13(r1);                                             \
+       ld      r2,PACATOC(r13);        /* get kernel TOC into r2       */ \
+       mflr    r9;                     /* save LR in stackframe        */ \
+       std     r9,_LINK(r1);                                              \
+       mfctr   r10;                    /* save CTR in stackframe       */ \
+       std     r10,_CTR(r1);                                              \
+       mfspr   r11,XER;                /* save XER in stackframe       */ \
+       std     r11,_XER(r1);                                              \
+       li      r9,(n)+1;                                                  \
+       std     r9,_TRAP(r1);           /* set trap number              */ \
+       li      r10,0;                                                     \
+       ld      r11,exception_marker@toc(r2);                              \
+       std     r10,RESULT(r1);         /* clear regs->result           */ \
+       std     r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame      */
 
 /*
  * Exception vectors.
  */
-#define STD_EXCEPTION_PSERIES(n, label )       \
-       . = n;                                  \
-       .globl label##_Pseries;                 \
-label##_Pseries:                               \
-       EXCEPTION_PROLOG_PSERIES( n, label##_common )
-
-#define STD_EXCEPTION_ISERIES( n, label )      \
-       .globl label##_Iseries;                 \
-label##_Iseries:                               \
-       EXCEPTION_PROLOG_ISERIES( n );          \
+#define STD_EXCEPTION_PSERIES(n, label)                        \
+       . = n;                                          \
+       .globl label##_Pseries;                         \
+label##_Pseries:                                       \
+       HMT_MEDIUM;                                     \
+       mtspr   SPRG1,r13;              /* save r13 */  \
+       EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+
+#define STD_EXCEPTION_ISERIES(n, label, area)          \
+       .globl label##_Iseries;                         \
+label##_Iseries:                                       \
+       HMT_MEDIUM;                                     \
+       mtspr   SPRG1,r13;              /* save r13 */  \
+       EXCEPTION_PROLOG_ISERIES_1(area);               \
+       EXCEPTION_PROLOG_ISERIES_2;                     \
        b       label##_common
 
-#define MASKABLE_EXCEPTION_ISERIES( n, label ) \
-       .globl label##_Iseries;                 \
-label##_Iseries:                               \
-       EXCEPTION_PROLOG_ISERIES( n );          \
-       lbz     r22,PACAPROFENABLED(r20);       \
-       cmpi    0,r22,0;                        \
-       bne-    label##_Iseries_profile;        \
-label##_Iseries_prof_ret:                      \
-       lbz     r22,PACAPROCENABLED(r20);       \
-       cmpi    0,r22,0;                        \
-       beq-    label##_Iseries_masked;         \
-       b       label##_common;                 \
-label##_Iseries_profile:                       \
-       std     r24,48(r21);                    \
-       std     r25,56(r21);                    \
-       mflr    r24;                            \
-       bl      do_profile;                     \
-       mtlr    r24;                            \
-       ld      r24,48(r21);                    \
-       ld      r25,56(r21);                    \
+#define MASKABLE_EXCEPTION_ISERIES(n, label)                           \
+       .globl label##_Iseries;                                         \
+label##_Iseries:                                                       \
+       HMT_MEDIUM;                                                     \
+       mtspr   SPRG1,r13;              /* save r13 */                  \
+       EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN);                         \
+       lbz     r10,PACAPROFENABLED(r13);                               \
+       cmpwi   r10,0;                                                  \
+       bne-    label##_Iseries_profile;                                \
+label##_Iseries_prof_ret:                                              \
+       lbz     r10,PACAPROCENABLED(r13);                               \
+       cmpwi   0,r10,0;                                                \
+       beq-    label##_Iseries_masked;                                 \
+       EXCEPTION_PROLOG_ISERIES_2;                                     \
+       b       label##_common;                                         \
+label##_Iseries_profile:                                               \
+       ld      r12,PACALPPACA+LPPACASRR1(r13);                         \
+       andi.   r12,r12,MSR_PR;         /* Test if in kernel */         \
+       bne     label##_Iseries_prof_ret;                               \
+       ld      r11,PACALPPACA+LPPACASRR0(r13);                         \
+       ld      r12,PACAPROFSTEXT(r13); /* _stext */                    \
+       subf    r11,r12,r11;            /* offset into kernel */        \
+       lwz     r12,PACAPROFSHIFT(r13);                                 \
+       srd     r11,r11,r12;                                            \
+       lwz     r12,PACAPROFLEN(r13);   /* profile table length - 1 */  \
+       cmpd    r11,r12;                /* off end? */                  \
+       ble     1f;                                                     \
+       mr      r11,r12;                /* force into last entry */     \
+1:     sldi    r11,r11,2;              /* convert to offset */         \
+       ld      r12,PACAPROFBUFFER(r13);/* profile buffer */            \
+       add     r12,r12,r11;                                            \
+2:     lwarx   r11,0,r12;              /* atomically increment */      \
+       addi    r11,r11,1;                                              \
+       stwcx.  r11,0,r12;                                              \
+       bne-    2b;                                                     \
        b       label##_Iseries_prof_ret
 
-#define STD_EXCEPTION_COMMON( trap, label, hdlr )      \
-       .globl label##_common;                  \
-label##_common:                                        \
-       EXCEPTION_PROLOG_COMMON;                \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;     \
-       li      r20,0;                          \
-       li      r6,trap;                        \
-       bl      .save_remaining_regs;           \
-       bl      hdlr;                           \
-       b       .ret_from_except
+#ifdef DO_SOFT_DISABLE
+#define DISABLE_INTS                           \
+       lbz     r10,PACAPROCENABLED(r13);       \
+       li      r11,0;                          \
+       std     r10,SOFTE(r1);                  \
+       mfmsr   r10;                            \
+       stb     r11,PACAPROCENABLED(r13);       \
+       ori     r10,r10,MSR_EE;                 \
+       mtmsrd  r10,1
+
+#define ENABLE_INTS                            \
+       lbz     r10,PACAPROCENABLED(r13);       \
+       mfmsr   r11;                            \
+       std     r10,SOFTE(r1);                  \
+       ori     r11,r11,MSR_EE;                 \
+       mtmsrd  r11,1
+
+#else  /* hard enable/disable interrupts */
+#define DISABLE_INTS
+
+#define ENABLE_INTS                            \
+       ld      r12,_MSR(r1);                   \
+       mfmsr   r11;                            \
+       rlwimi  r11,r12,0,MSR_EE;               \
+       mtmsrd  r11,1
+
+#endif
+
+#define STD_EXCEPTION_COMMON(trap, label, hdlr)                \
+       .align  7;                                      \
+       .globl label##_common;                          \
+label##_common:                                                \
+       EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);      \
+       DISABLE_INTS;                                   \
+       bl      .save_nvgprs;                           \
+       addi    r3,r1,STACK_FRAME_OVERHEAD;             \
+       bl      hdlr;                                   \
+       b       .ret_from_except
+
+#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr)   \
+       .align  7;                                      \
+       .globl label##_common;                          \
+label##_common:                                                \
+       EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);      \
+       DISABLE_INTS;                                   \
+       addi    r3,r1,STACK_FRAME_OVERHEAD;             \
+       bl      hdlr;                                   \
+       b       .ret_from_except_lite
 
 /*
  * Start of pSeries system interrupt routines
@@ -384,22 +409,115 @@ label##_common:                                  \
        .globl __start_interrupts
 __start_interrupts:
 
-       STD_EXCEPTION_PSERIES( 0x100, SystemReset )
-       STD_EXCEPTION_PSERIES( 0x200, MachineCheck )
-       STD_EXCEPTION_PSERIES( 0x300, DataAccess )
-       STD_EXCEPTION_PSERIES( 0x380, DataAccessSLB )
-       STD_EXCEPTION_PSERIES( 0x400, InstructionAccess )
-       STD_EXCEPTION_PSERIES( 0x480, InstructionAccessSLB )
-       STD_EXCEPTION_PSERIES( 0x500, HardwareInterrupt )
-       STD_EXCEPTION_PSERIES( 0x600, Alignment )
-       STD_EXCEPTION_PSERIES( 0x700, ProgramCheck )
-       STD_EXCEPTION_PSERIES( 0x800, FPUnavailable )
-       STD_EXCEPTION_PSERIES( 0x900, Decrementer )
-       STD_EXCEPTION_PSERIES( 0xa00, Trap_0a )
-       STD_EXCEPTION_PSERIES( 0xb00, Trap_0b )
-       STD_EXCEPTION_PSERIES( 0xc00, SystemCall )
-       STD_EXCEPTION_PSERIES( 0xd00, SingleStep )
-       STD_EXCEPTION_PSERIES( 0xe00, Trap_0e )
+       STD_EXCEPTION_PSERIES(0x100, SystemReset)
+
+       . = 0x200
+_MachineCheckPseries:
+       HMT_MEDIUM
+       mtspr   SPRG1,r13               /* save r13 */
+       EXCEPTION_PROLOG_PSERIES(PACA_EXMC, MachineCheck_common)
+
+       . = 0x300
+       .globl DataAccess_Pseries
+DataAccess_Pseries:
+       HMT_MEDIUM
+       mtspr   SPRG1,r13
+BEGIN_FTR_SECTION
+       mtspr   SPRG2,r12
+       mfspr   r13,DAR
+       mfspr   r12,DSISR
+       srdi    r13,r13,60
+       rlwimi  r13,r12,16,0x20
+       mfcr    r12
+       cmpwi   r13,0x2c
+       beq     .do_stab_bolted_Pseries
+       mtcrf   0x80,r12
+       mfspr   r12,SPRG2
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+       EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, DataAccess_common)
+
+       . = 0x380
+       .globl DataAccessSLB_Pseries
+DataAccessSLB_Pseries:
+       HMT_MEDIUM
+       mtspr   SPRG1,r13
+       mfspr   r13,SPRG3               /* get paca address into r13 */
+       std     r9,PACA_EXSLB+EX_R9(r13)        /* save r9 - r12 */
+       std     r10,PACA_EXSLB+EX_R10(r13)
+       std     r11,PACA_EXSLB+EX_R11(r13)
+       std     r12,PACA_EXSLB+EX_R12(r13)
+       std     r3,PACASLBR3(r13)
+       mfspr   r9,SPRG1
+       std     r9,PACA_EXSLB+EX_R13(r13)
+       mfcr    r9
+       clrrdi  r12,r13,32              /* get high part of &label */
+       mfmsr   r10
+       mfspr   r11,SRR0                /* save SRR0 */
+       ori     r12,r12,(.do_slb_miss)@l
+       ori     r10,r10,MSR_IR|MSR_DR   /* DON'T set RI for SLB miss */
+       mtspr   SRR0,r12
+       mfspr   r12,SRR1                /* and SRR1 */
+       mtspr   SRR1,r10
+       mfspr   r3,DAR
+       rfid
+       b       .       /* prevent speculative execution */
+
+       STD_EXCEPTION_PSERIES(0x400, InstructionAccess)
+
+       . = 0x480
+       .globl InstructionAccessSLB_Pseries
+InstructionAccessSLB_Pseries:
+       HMT_MEDIUM
+       mtspr   SPRG1,r13
+       mfspr   r13,SPRG3               /* get paca address into r13 */
+       std     r9,PACA_EXSLB+EX_R9(r13)        /* save r9 - r12 */
+       std     r10,PACA_EXSLB+EX_R10(r13)
+       std     r11,PACA_EXSLB+EX_R11(r13)
+       std     r12,PACA_EXSLB+EX_R12(r13)
+       std     r3,PACASLBR3(r13)
+       mfspr   r9,SPRG1
+       std     r9,PACA_EXSLB+EX_R13(r13)
+       mfcr    r9
+       clrrdi  r12,r13,32              /* get high part of &label */
+       mfmsr   r10
+       mfspr   r11,SRR0                /* save SRR0 */
+       ori     r12,r12,(.do_slb_miss)@l
+       ori     r10,r10,MSR_IR|MSR_DR   /* DON'T set RI for SLB miss */
+       mtspr   SRR0,r12
+       mfspr   r12,SRR1                /* and SRR1 */
+       mtspr   SRR1,r10
+       mr      r3,r11                  /* SRR0 is faulting address */
+       rfid
+       b       .       /* prevent speculative execution */
+
+       STD_EXCEPTION_PSERIES(0x500, HardwareInterrupt)
+       STD_EXCEPTION_PSERIES(0x600, Alignment)
+       STD_EXCEPTION_PSERIES(0x700, ProgramCheck)
+       STD_EXCEPTION_PSERIES(0x800, FPUnavailable)
+       STD_EXCEPTION_PSERIES(0x900, Decrementer)
+       STD_EXCEPTION_PSERIES(0xa00, Trap_0a)
+       STD_EXCEPTION_PSERIES(0xb00, Trap_0b)
+
+       . = 0xc00
+       .globl  SystemCall_Pseries
+SystemCall_Pseries:
+       HMT_MEDIUM
+       mr      r9,r13
+       mfmsr   r10
+       mfspr   r13,SPRG3
+       mfspr   r11,SRR0
+       clrrdi  r12,r13,32
+       oris    r12,r12,SystemCall_common@h
+       ori     r12,r12,SystemCall_common@l
+       mtspr   SRR0,r12
+       ori     r10,r10,MSR_IR|MSR_DR|MSR_RI
+       mfspr   r12,SRR1
+       mtspr   SRR1,r10
+       rfid
+       b       .       /* prevent speculative execution */
+
+       STD_EXCEPTION_PSERIES(0xd00, SingleStep)
+       STD_EXCEPTION_PSERIES(0xe00, Trap_0e)
 
        /* We need to deal with the Altivec unavailable exception
         * here which is at 0xf20, thus in the middle of the
@@ -407,27 +525,23 @@ __start_interrupts:
         * trickery is thus necessary
         */
        . = 0xf00
-       b       .PerformanceMonitor_Pseries
-       . = 0xf20
-       b       .AltivecUnavailable_Pseries
+       b       PerformanceMonitor_Pseries
 
-       STD_EXCEPTION_PSERIES( 0x1300, InstructionBreakpoint )
-       STD_EXCEPTION_PSERIES( 0x1700, AltivecAssist )
+       STD_EXCEPTION_PSERIES(0xf20, AltivecUnavailable)
+
+       STD_EXCEPTION_PSERIES(0x1300, InstructionBreakpoint)
+       STD_EXCEPTION_PSERIES(0x1700, AltivecAssist)
+
+       /* moved from 0xf00 */
+       STD_EXCEPTION_PSERIES(0x3000, PerformanceMonitor)
 
-       /* Here are the "moved" performance monitor and
-        * altivec unavailable exceptions
-        */
-       . = 0x3000
-       .globl PerformanceMonitor_Pseries;
-.PerformanceMonitor_Pseries:
-       EXCEPTION_PROLOG_PSERIES(0xf00, PerformanceMonitor_common)
-       
        . = 0x3100
-       .globl AltivecUnavailable_Pseries;
-.AltivecUnavailable_Pseries:
-       EXCEPTION_PROLOG_PSERIES(0xf20, AltivecUnavailable_common)
+_GLOBAL(do_stab_bolted_Pseries)
+       mtcrf   0x80,r12
+       mfspr   r12,SPRG2
+       EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
+
        
-               
        /* Space for the naca.  Architected to be located at real address
         * NACA_PHYS_ADDR.  Various tools rely on this location being fixed.
         * The first dword of the naca is required by iSeries LPAR to
@@ -466,16 +580,16 @@ __end_systemcfg:
         * VSID generation algorithm.  See include/asm/mmu_context.h.
         */
 
-       .llong  1               /* # ESIDs to be mapped by hypervisor         */
+       .llong  1               /* # ESIDs to be mapped by hypervisor    */
        .llong  1               /* # memory ranges to be mapped by hypervisor */
-       .llong  STAB0_PAGE      /* Page # of segment table within load area   */
+       .llong  STAB0_PAGE      /* Page # of segment table within load area     */
+       .llong  0               /* Reserved */
+       .llong  0               /* Reserved */
        .llong  0               /* Reserved */
-       .llong  0               /* Reserved */
-       .llong  0               /* Reserved */
        .llong  0               /* Reserved */
        .llong  0               /* Reserved */
        .llong  0x0c00000000    /* ESID to map (Kernel at EA = 0xC000000000000000) */
-       .llong  0x06a99b4b14    /* VSID to map (Kernel at VA = 0x6a99b4b140000000) */
+       .llong  0x06a99b4b14    /* VSID to map (Kernel at VA = 0x6a99b4b140000000) */
        .llong  8192            /* # pages to map (32 MB) */
        .llong  0               /* Offset from start of loadarea to start of map */
        .llong  0x0006a99b4b140000      /* VPN of first page to map */
@@ -484,22 +598,74 @@ __end_systemcfg:
 
 /***  ISeries-LPAR interrupt handlers ***/
 
-       STD_EXCEPTION_ISERIES( 0x200, MachineCheck )
-       STD_EXCEPTION_ISERIES( 0x300, DataAccess )
-       STD_EXCEPTION_ISERIES( 0x380, DataAccessSLB )
-       STD_EXCEPTION_ISERIES( 0x400, InstructionAccess )
-       STD_EXCEPTION_ISERIES( 0x480, InstructionAccessSLB )
-       MASKABLE_EXCEPTION_ISERIES( 0x500, HardwareInterrupt )
-       STD_EXCEPTION_ISERIES( 0x600, Alignment )
-       STD_EXCEPTION_ISERIES( 0x700, ProgramCheck )
-       STD_EXCEPTION_ISERIES( 0x800, FPUnavailable )
-       MASKABLE_EXCEPTION_ISERIES( 0x900, Decrementer )
-       STD_EXCEPTION_ISERIES( 0xa00, Trap_0a )
-       STD_EXCEPTION_ISERIES( 0xb00, Trap_0b )
-       STD_EXCEPTION_ISERIES( 0xc00, SystemCall )
-       STD_EXCEPTION_ISERIES( 0xd00, SingleStep )
-       STD_EXCEPTION_ISERIES( 0xe00, Trap_0e )
-       STD_EXCEPTION_ISERIES( 0xf00, PerformanceMonitor )
+       STD_EXCEPTION_ISERIES(0x200, MachineCheck, PACA_EXMC)
+
+       .globl DataAccess_Iseries
+DataAccess_Iseries:
+       mtspr   SPRG1,r13
+BEGIN_FTR_SECTION
+       mtspr   SPRG2,r12
+       mfspr   r13,DAR
+       mfspr   r12,DSISR
+       srdi    r13,r13,60
+       rlwimi  r13,r12,16,0x20
+       mfcr    r12
+       cmpwi   r13,0x2c
+       beq     .do_stab_bolted_Iseries
+       mtcrf   0x80,r12
+       mfspr   r12,SPRG2
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+       EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN)
+       EXCEPTION_PROLOG_ISERIES_2
+       b       DataAccess_common
+
+.do_stab_bolted_Iseries:
+       mtcrf   0x80,r12
+       mfspr   r12,SPRG2
+       EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+       EXCEPTION_PROLOG_ISERIES_2
+       b       .do_stab_bolted
+
+       .globl  DataAccessSLB_Iseries
+DataAccessSLB_Iseries:
+       mtspr   SPRG1,r13               /* save r13 */
+       EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+       std     r3,PACASLBR3(r13)
+       ld      r11,PACALPPACA+LPPACASRR0(r13)
+       ld      r12,PACALPPACA+LPPACASRR1(r13)
+       mfspr   r3,DAR
+       b       .do_slb_miss
+
+       STD_EXCEPTION_ISERIES(0x400, InstructionAccess, PACA_EXGEN)
+
+       .globl  InstructionAccessSLB_Iseries
+InstructionAccessSLB_Iseries:
+       mtspr   SPRG1,r13               /* save r13 */
+       EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+       std     r3,PACASLBR3(r13)
+       ld      r11,PACALPPACA+LPPACASRR0(r13)
+       ld      r12,PACALPPACA+LPPACASRR1(r13)
+       mr      r3,r11
+       b       .do_slb_miss
+
+       MASKABLE_EXCEPTION_ISERIES(0x500, HardwareInterrupt)
+       STD_EXCEPTION_ISERIES(0x600, Alignment, PACA_EXGEN)
+       STD_EXCEPTION_ISERIES(0x700, ProgramCheck, PACA_EXGEN)
+       STD_EXCEPTION_ISERIES(0x800, FPUnavailable, PACA_EXGEN)
+       MASKABLE_EXCEPTION_ISERIES(0x900, Decrementer)
+       STD_EXCEPTION_ISERIES(0xa00, Trap_0a, PACA_EXGEN)
+       STD_EXCEPTION_ISERIES(0xb00, Trap_0b, PACA_EXGEN)
+
+       .globl  SystemCall_Iseries
+SystemCall_Iseries:
+       mr      r9,r13
+       mfspr   r13,SPRG3
+       EXCEPTION_PROLOG_ISERIES_2
+       b       SystemCall_common
+
+       STD_EXCEPTION_ISERIES( 0xd00, SingleStep, PACA_EXGEN)
+       STD_EXCEPTION_ISERIES( 0xe00, Trap_0e, PACA_EXGEN)
+       STD_EXCEPTION_ISERIES( 0xf00, PerformanceMonitor, PACA_EXGEN)
 
        .globl SystemReset_Iseries
 SystemReset_Iseries:
@@ -508,7 +674,7 @@ SystemReset_Iseries:
        ori     r24,r24,MSR_RI
        mtmsrd  r24                     /* RI on */
        lhz     r24,PACAPACAINDEX(r13)  /* Get processor # */
-       cmp   0,r24,0                 /* Are we processor 0? */
+       cmpwi   0,r24,0                 /* Are we processor 0? */
        beq     .__start_initialization_iSeries /* Start up the first processor */
        mfspr   r4,CTRLF
        li      r5,RUNLATCH             /* Turn off the run light */
@@ -527,7 +693,7 @@ SystemReset_Iseries:
        addi    r1,r3,THREAD_SIZE
        subi    r1,r1,STACK_FRAME_OVERHEAD
 
-       cmp   0,r23,0
+       cmpwi   0,r23,0
        beq     iseries_secondary_smp_loop      /* Loop until told to go */
 #ifdef SECONDARY_PROCESSORS
        bne     .__secondary_start              /* Loop until told to go */
@@ -539,7 +705,7 @@ iseries_secondary_smp_loop:
        rldicr  r3,r3,32,15             /* r0 = (r3 << 32) & 0xffff000000000000 */
 #else /* CONFIG_SMP */
        /* Yield the processor.  This is required for non-SMP kernels
-          which are running on multi-threaded machines. */
+               which are running on multi-threaded machines. */
        lis     r3,0x8000
        rldicr  r3,r3,32,15             /* r3 = (r3 << 32) & 0xffff000000000000 */
        addi    r3,r3,18                /* r3 = 0x8000000000000012 which is "yield" */
@@ -552,32 +718,34 @@ iseries_secondary_smp_loop:
        b       1b                      /* If SMP not configured, secondaries
                                         * loop forever */
 
-       .globl HardwareInterrupt_Iseries_masked
-HardwareInterrupt_Iseries_masked:
-       b       maskable_exception_exit
-
        .globl Decrementer_Iseries_masked
 Decrementer_Iseries_masked:
-       li      r22,1
-       stb     r22,PACALPPACA+LPPACADECRINT(r20)
-       lwz     r22,PACADEFAULTDECR(r20)
-       mtspr   DEC,r22
-maskable_exception_exit:
-       mtcrf   0xff,r23                /* Restore regs and free exception frame */
-       ld      r22,EX_SRR0(r21)
-       ld      r23,EX_SRR1(r21)
-       mtspr   SRR0,r22
-       mtspr   SRR1,r23
-       ld      r22,EX_R22(r21)
-       ld      r23,EX_R23(r21)
-       mfspr   r21,SPRG1
-       mfspr   r20,SPRG2
+       li      r11,1
+       stb     r11,PACALPPACA+LPPACADECRINT(r13)
+       lwz     r12,PACADEFAULTDECR(r13)
+       mtspr   SPRN_DEC,r12
+       /* fall through */
+
+       .globl HardwareInterrupt_Iseries_masked
+HardwareInterrupt_Iseries_masked:
+       mtcrf   0x80,r9         /* Restore regs */
+       ld      r11,PACALPPACA+LPPACASRR0(r13)
+       ld      r12,PACALPPACA+LPPACASRR1(r13)
+       mtspr   SRR0,r11
+       mtspr   SRR1,r12
+       ld      r9,PACA_EXGEN+EX_R9(r13)
+       ld      r10,PACA_EXGEN+EX_R10(r13)
+       ld      r11,PACA_EXGEN+EX_R11(r13)
+       ld      r12,PACA_EXGEN+EX_R12(r13)
+       ld      r13,PACA_EXGEN+EX_R13(r13)
        rfid
+       b       .       /* prevent speculative execution */
 #endif
+
 /*
  * Data area reserved for FWNMI option.
  */
-        .= 0x7000
+       .= 0x7000
        .globl fwnmi_data_area
 fwnmi_data_area:
 
@@ -587,10 +755,14 @@ fwnmi_data_area:
        . = 0x8000
        .globl SystemReset_FWNMI
 SystemReset_FWNMI:
-       EXCEPTION_PROLOG_PSERIES(0x100, SystemReset_common)
+       HMT_MEDIUM
+       mtspr   SPRG1,r13               /* save r13 */
+       EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, SystemReset_common)
        .globl MachineCheck_FWNMI
 MachineCheck_FWNMI:
-       EXCEPTION_PROLOG_PSERIES(0x200, MachineCheck_common)
+       HMT_MEDIUM
+       mtspr   SPRG1,r13               /* save r13 */
+       EXCEPTION_PROLOG_PSERIES(PACA_EXMC, MachineCheck_common)
 
        /*
         * Space for the initial segment table
@@ -608,327 +780,238 @@ __end_stab:
 
 /*** Common interrupt handlers ***/
 
-       STD_EXCEPTION_COMMON( 0x100, SystemReset, .SystemResetException )
-       STD_EXCEPTION_COMMON( 0x200, MachineCheck, .MachineCheckException )
-       STD_EXCEPTION_COMMON( 0x900, Decrementer, .timer_interrupt )
-       STD_EXCEPTION_COMMON( 0xa00, Trap_0a, .UnknownException )
-       STD_EXCEPTION_COMMON( 0xb00, Trap_0b, .UnknownException )
-       STD_EXCEPTION_COMMON( 0xd00, SingleStep, .SingleStepException )
-       STD_EXCEPTION_COMMON( 0xe00, Trap_0e, .UnknownException )
-       STD_EXCEPTION_COMMON( 0xf00, PerformanceMonitor, .PerformanceMonitorException )
-       STD_EXCEPTION_COMMON(0x1300, InstructionBreakpoint, .InstructionBreakpointException )
+       STD_EXCEPTION_COMMON(0x100, SystemReset, .SystemResetException)
+
+       /*
+        * Machine check is different because we use a different
+        * save area: PACA_EXMC instead of PACA_EXGEN.
+        */
+       .align  7
+       .globl MachineCheck_common
+MachineCheck_common:
+       EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+       DISABLE_INTS
+       bl      .save_nvgprs
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      .MachineCheckException
+       b       .ret_from_except
+
+       STD_EXCEPTION_COMMON_LITE(0x900, Decrementer, .timer_interrupt)
+       STD_EXCEPTION_COMMON(0xa00, Trap_0a, .UnknownException)
+       STD_EXCEPTION_COMMON(0xb00, Trap_0b, .UnknownException)
+       STD_EXCEPTION_COMMON(0xd00, SingleStep, .SingleStepException)
+       STD_EXCEPTION_COMMON(0xe00, Trap_0e, .UnknownException)
+       STD_EXCEPTION_COMMON(0xf00, PerformanceMonitor, .PerformanceMonitorException)
+       STD_EXCEPTION_COMMON(0x1300, InstructionBreakpoint, .InstructionBreakpointException)
 #ifdef CONFIG_ALTIVEC
-       STD_EXCEPTION_COMMON(0x1700, AltivecAssist, .AltivecAssistException )
+       STD_EXCEPTION_COMMON(0x1700, AltivecAssist, .AltivecAssistException)
 #else
-       STD_EXCEPTION_COMMON(0x1700, AltivecAssist, .UnknownException )
+       STD_EXCEPTION_COMMON(0x1700, AltivecAssist, .UnknownException)
 #endif
 
 /*
- * Here the exception frame is filled out and we have detected that
- * the kernel stack pointer is bad.  R23 contains the saved CR, r20
- * points to the paca, r21 points to the exception frame, and r22
- * contains the (bad) kernel stack pointer.
+ * Here we have detected that the kernel stack pointer is bad.
+ * R9 contains the saved CR, r13 points to the paca,
+ * r10 contains the (bad) kernel stack pointer,
+ * r11 and r12 contain the saved SRR0 and SRR1.
  * We switch to using the paca guard page as an emergency stack,
- * save the registers on there, and call kernel_bad_stack(),
- * which panics.
+ * save the registers there, and call kernel_bad_stack(), which panics.
  */
 bad_stack:
-       addi    r1,r20,8192-64-INT_FRAME_SIZE
-       std     r22,GPR1(r1)
-       std     r23,_CCR(r1)
-       ld      r22,EX_R20(r21)
-       std     r22,GPR20(r1)
-       ld      r23,EX_R21(r21)
-       std     r23,GPR21(r1)
-       ld      r22,EX_R22(r21)
-       std     r22,GPR22(r1)
-       ld      r23,EX_R23(r21)
-       std     r23,GPR23(r1)
-       ld      r23,EX_DAR(r21)
-       std     r23,_DAR(r1)
-       lwz     r22,EX_DSISR(r21)
-       std     r22,_DSISR(r1)
-       lwz     r23,EX_TRAP(r21)
-       std     r23,TRAP(r1)
-       ld      r22,EX_SRR0(r21)
-       ld      r23,EX_SRR1(r21)
-       std     r22,_NIP(r1)
-       std     r23,_MSR(r1)
-       addi    r21,r21,-EXC_FRAME_SIZE
-       std     r21,PACAEXCSP(r20)
-       mflr    r22
-       std     r22,_LINK(r1)
-       mfctr   r23
-       std     r23,_CTR(r1)
-       mfspr   r22,XER
-       std     r22,_XER(r1)
-       SAVE_GPR(0, r1)
-       SAVE_10GPRS(2, r1)
-       SAVE_8GPRS(12, r1)
-       SAVE_8GPRS(24, r1)
-       addi    r21,r1,INT_FRAME_SIZE
-       std     r21,0(r1)
-       li      r22,0
-       std     r22,0(r21)
-       ld      r2,PACATOC(r20)
-       mr      r13,r20
+       ld      r1,PACAEMERGSP(r13)
+       subi    r1,r1,64+INT_FRAME_SIZE
+       std     r9,_CCR(r1)
+       std     r10,GPR1(r1)
+       std     r11,_NIP(r1)
+       std     r12,_MSR(r1)
+       mfspr   r11,DAR
+       mfspr   r12,DSISR
+       std     r11,_DAR(r1)
+       std     r12,_DSISR(r1)
+       mflr    r10
+       mfctr   r11
+       mfxer   r12
+       std     r10,_LINK(r1)
+       std     r11,_CTR(r1)
+       std     r12,_XER(r1)
+       SAVE_GPR(0,r1)
+       SAVE_GPR(2,r1)
+       SAVE_4GPRS(3,r1)
+       SAVE_2GPRS(7,r1)
+       SAVE_10GPRS(12,r1)
+       SAVE_10GPRS(22,r1)
+       addi    r11,r1,INT_FRAME_SIZE
+       std     r11,0(r1)
+       li      r12,0
+       std     r12,0(r11)
+       ld      r2,PACATOC(r13)
 1:     addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      .kernel_bad_stack
        b       1b
 
 /*
- * Return from an exception which is handled without calling
- * save_remaining_regs.  The caller is assumed to have done
- * EXCEPTION_PROLOG_COMMON.
+ * Return from an exception with minimal checks.
+ * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
+ * If interrupts have been enabled, or anything has been
+ * done that might have changed the scheduling status of
+ * any task or sent any task a signal, you should use
+ * ret_from_except or ret_from_except_lite instead of this.
  */
 fast_exception_return:
-       andi.   r3,r23,MSR_RI           /* check if RI is set */
+       ld      r12,_MSR(r1)
+       ld      r11,_NIP(r1)
+       andi.   r3,r12,MSR_RI           /* check if RI is set */
        beq-    unrecov_fer
-       ld      r3,_CCR(r1)
-       ld      r4,_LINK(r1)
-       ld      r5,_CTR(r1)
-       ld      r6,_XER(r1)
-       mtcr    r3
-       mtlr    r4
-       mtctr   r5
-       mtspr   XER,r6
+       ld      r3,_CCR(r1)
+       ld      r4,_LINK(r1)
+       ld      r5,_CTR(r1)
+       ld      r6,_XER(r1)
+       mtcr    r3
+       mtlr    r4
+       mtctr   r5
+       mtxer   r6
        REST_GPR(0, r1)
        REST_8GPRS(2, r1)
-       REST_4GPRS(10, r1)
 
-       mfmsr   r20
-       li      r21, MSR_RI
-       andc    r20,r20,r21
-       mtmsrd  r20,1
+       mfmsr   r10
+       clrrdi  r10,r10,2               /* clear RI (LE is 0 already) */
+       mtmsrd  r10,1
 
-       mtspr   SRR1,r23
-       mtspr   SRR0,r22
-       REST_4GPRS(20, r1)
-       ld      r1,GPR1(r1)
+       mtspr   SRR1,r12
+       mtspr   SRR0,r11
+       REST_4GPRS(10, r1)
+       ld      r1,GPR1(r1)
        rfid
+       b       .       /* prevent speculative execution */
 
 unrecov_fer:
-       li      r6,0x4000
-       li      r20,0
-       bl      .save_remaining_regs
+       bl      .save_nvgprs
 1:     addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      .unrecoverable_exception
        b       1b
 
 /*
- * Here r20 points to the PACA, r21 to the exception frame,
- * r23 contains the saved CR.
- * r20 - r23, SRR0 and SRR1 are saved in the exception frame.
+ * Here r13 points to the paca, r9 contains the saved CR,
+ * SRR0 and SRR1 are saved in r11 and r12,
+ * r9 - r13 are saved in paca->exgen.
  */
+       .align  7
        .globl DataAccess_common
 DataAccess_common:
-BEGIN_FTR_SECTION
-       mfspr   r22,DAR
-       srdi    r22,r22,60
-       cmpi    0,r22,0xc
-
-       /* Segment fault on a bolted segment. Go off and map that segment. */
-       beq-    .do_stab_bolted
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-stab_bolted_user_return:
-       EXCEPTION_PROLOG_COMMON
-       ld      r3,_DSISR(r1)
-       andis.  r0,r3,0xa450            /* weird error? */
-       bne     1f                      /* if not, try to put a PTE */
-       andis.  r0,r3,0x0020            /* Is it a page table fault? */
-       rlwinm  r4,r3,32-23,29,29       /* DSISR_STORE -> _PAGE_RW */
-       ld      r3,_DAR(r1)             /* into the hash table */
-
-BEGIN_FTR_SECTION
-       beq+    2f                      /* If so handle it */
-       li      r4,0x300                /* Trap number */
-       bl      .do_stab_SI
-       b       1f
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-
-2:     li      r5,0x300
-       bl      .do_hash_page_DSI       /* Try to handle as hpte fault */
-1:
-       ld      r4,_DAR(r1)
-       ld      r5,_DSISR(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       DO_COPY_EE()
-       li      r6,0x300
-       bl      .save_remaining_regs
-       bl      .do_page_fault
-       b       .ret_from_except
-
-       .globl DataAccessSLB_common
-DataAccessSLB_common:
-       mfspr   r22,DAR
-       srdi    r22,r22,60
-       cmpi    0,r22,0xc
-
-       /* Segment fault on a bolted segment. Go off and map that segment. */
-       beq     .do_slb_bolted
-
-       EXCEPTION_PROLOG_COMMON
-       ld      r3,_DAR(r1)
-       li      r4,0x380                /* Exception vector  */
-       bl      .slb_allocate
-       or.     r3,r3,r3                /* Check return code */
-       beq     fast_exception_return   /* Return if we succeeded */
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       DO_COPY_EE()
-       ld      r4,_DAR(r1)
-       li      r6,0x380
-       li      r5,0
-       bl      .save_remaining_regs
-       bl      .do_page_fault
-       b       .ret_from_except
-
+       mfspr   r10,DAR
+       std     r10,PACA_EXGEN+EX_DAR(r13)
+       mfspr   r10,DSISR
+       stw     r10,PACA_EXGEN+EX_DSISR(r13)
+       EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
+       ld      r3,PACA_EXGEN+EX_DAR(r13)
+       lwz     r4,PACA_EXGEN+EX_DSISR(r13)
+       li      r5,0x300
+       b       .do_hash_page           /* Try to handle as hpte fault */
+
+       .align  7
        .globl InstructionAccess_common
 InstructionAccess_common:
-       EXCEPTION_PROLOG_COMMON
-
-BEGIN_FTR_SECTION
-       andis.  r0,r23,0x0020           /* no ste found? */
-       beq+    2f
-       mr      r3,r22                  /* SRR0 at interrupt */
-       li      r4,0x400                /* Trap number       */
-       bl      .do_stab_SI
-       b       1f
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-
-2:     mr      r3,r22
+       EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
+       ld      r3,_NIP(r1)
+       andis.  r4,r12,0x5820
        li      r5,0x400
-       bl      .do_hash_page_ISI       /* Try to handle as hpte fault */
-1:
-       mr      r4,r22
-       rlwinm  r5,r23,0,4,4            /* We only care about PR in error_code */
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       DO_COPY_EE()
-       li      r6,0x400
-       bl      .save_remaining_regs
-       bl      .do_page_fault
-       b       .ret_from_except
-
-       .globl InstructionAccessSLB_common
-InstructionAccessSLB_common:
-       EXCEPTION_PROLOG_COMMON
-       mr      r3,r22                  /* SRR0 = NIA        */
-       li      r4,0x480                /* Exception vector  */
-       bl      .slb_allocate
-       or.     r3,r3,r3                /* Check return code */
-       beq+    fast_exception_return   /* Return if we succeeded */
-
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       DO_COPY_EE()
-       mr      r4,r22                  /* SRR0 = NIA        */
-       li      r6,0x480
-       li      r5,0
-       bl      .save_remaining_regs
-       bl      .do_page_fault
-       b       .ret_from_except
+       b       .do_hash_page           /* Try to handle as hpte fault */
 
+       .align  7
        .globl HardwareInterrupt_common
+       .globl HardwareInterrupt_entry
 HardwareInterrupt_common:
-       EXCEPTION_PROLOG_COMMON
+       EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
 HardwareInterrupt_entry:
+       DISABLE_INTS
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       li      r20,0
-       li      r6,0x500
-       bl      .save_remaining_regs
-       bl      .do_IRQ
-       b       .ret_from_except
+       bl      .do_IRQ
+       b       .ret_from_except_lite
 
+       .align  7
        .globl Alignment_common
 Alignment_common:
-       EXCEPTION_PROLOG_COMMON
+       mfspr   r10,DAR
+       std     r10,PACA_EXGEN+EX_DAR(r13)
+       mfspr   r10,DSISR
+       stw     r10,PACA_EXGEN+EX_DSISR(r13)
+       EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
+       ld      r3,PACA_EXGEN+EX_DAR(r13)
+       lwz     r4,PACA_EXGEN+EX_DSISR(r13)
+       std     r3,_DAR(r1)
+       std     r4,_DSISR(r1)
+       bl      .save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       DO_COPY_EE()
-       li      r6,0x600
-       bl      .save_remaining_regs
-       bl      .AlignmentException
-       b       .ret_from_except
+       ENABLE_INTS
+       bl      .AlignmentException
+       b       .ret_from_except
 
+       .align  7
        .globl ProgramCheck_common
 ProgramCheck_common:
-       EXCEPTION_PROLOG_COMMON
+       EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
+       bl      .save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       DO_COPY_EE()
-       li      r6,0x700
-       bl      .save_remaining_regs
-       bl      .ProgramCheckException
-       b       .ret_from_except
+       ENABLE_INTS
+       bl      .ProgramCheckException
+       b       .ret_from_except
 
+       .align  7
        .globl FPUnavailable_common
 FPUnavailable_common:
-       EXCEPTION_PROLOG_COMMON
+       EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
        bne     .load_up_fpu            /* if from user, just load it up */
+       bl      .save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       DO_COPY_EE()
-       li      r6,0x800
-       bl      .save_remaining_regs
-       bl      .KernelFPUnavailableException
+       ENABLE_INTS
+       bl      .KernelFPUnavailableException
        BUG_OPCODE
 
+       .align  7
        .globl AltivecUnavailable_common
 AltivecUnavailable_common:
-       EXCEPTION_PROLOG_COMMON
+       EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
 #ifdef CONFIG_ALTIVEC
-       bne     .load_up_altivec                /* if from user, just load it up */
+       bne     .load_up_altivec        /* if from user, just load it up */
 #endif
+       bl      .save_nvgprs
        addi    r3,r1,STACK_FRAME_OVERHEAD
-       DO_COPY_EE()
-       li      r6,0xf20
-       bl      .save_remaining_regs
-#ifdef CONFIG_ALTIVEC
-       bl      .KernelAltivecUnavailableException
-#else
-       bl      .UnknownException
-#endif
-       BUG_OPCODE
-               
-       .globl SystemCall_common
-SystemCall_common:
-       EXCEPTION_PROLOG_COMMON
-#ifdef CONFIG_PPC_ISERIES
-       cmpi    0,r0,0x5555             /* Special syscall to handle pending */
-       bne+    1f                      /* interrupts */
-       andi.   r6,r23,MSR_PR           /* Only allowed from kernel */
-       beq+    HardwareInterrupt_entry
-1:
-#endif
-       DO_COPY_EE()
-       li      r6,0xC00
-       bl      .save_remaining_regs
-       bl      .DoSyscall
-       b       .ret_from_except
-
-_GLOBAL(do_hash_page_ISI)
-       li      r4,0
-_GLOBAL(do_hash_page_DSI)
+       ENABLE_INTS
+       bl      .AltivecUnavailableException
+       b       .ret_from_except
+
+/*
+ * Hash table stuff
+ */
+       .align  7
+_GLOBAL(do_hash_page)
+       std     r3,_DAR(r1)
+       std     r4,_DSISR(r1)
+
+       andis.  r0,r4,0xa450            /* weird error? */
+       bne-    .handle_page_fault      /* if not, try to insert a HPTE */
+BEGIN_FTR_SECTION
+       andis.  r0,r4,0x0020            /* Is it a segment table fault? */
+       bne-    .do_ste_alloc           /* If so handle it */
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+
        /*
         * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
         * accessing a userspace segment (even from the kernel). We assume
         * kernel addresses always have the high bit set.
         */
-       rotldi  r0,r3,15                /* Move high bit into MSR_PR position */
-       orc     r0,r23,r0
-       rlwimi  r4,r0,32-13,30,30       /* Insert into _PAGE_USER */
+       rlwinm  r4,r4,32-23,29,29       /* DSISR_STORE -> _PAGE_RW */
+       rotldi  r0,r3,15                /* Move high bit into MSR_PR posn */
+       orc     r0,r12,r0               /* MSR_PR | ~high_bit */
+       rlwimi  r4,r0,32-13,30,30       /* becomes _PAGE_USER access bit */
        ori     r4,r4,1                 /* add _PAGE_PRESENT */
 
-       mflr    r21                     /* Save LR in r21 */
-
-#ifdef DO_SOFT_DISABLE
        /*
-        * We hard enable here (but first soft disable) so that the hash_page
-        * code can spin on the hash_table_lock with problem on a shared
-        * processor.
+        * On iSeries, we soft-disable interrupts here, then
+        * hard-enable interrupts so that the hash_page code can spin on
+        * the hash_table_lock without problems on a shared processor.
         */
-       li      r0,0
-       stb     r0,PACAPROCENABLED(r20) /* Soft Disabled */
-
-       mfmsr   r0
-       ori     r0,r0,MSR_EE
-       mtmsrd  r0,1                    /* Hard Enable */
-#endif
+       DISABLE_INTS
 
        /*
         * r3 contains the faulting address
@@ -937,413 +1020,202 @@ _GLOBAL(do_hash_page_DSI)
         *
         * at return r3 = 0 for success
         */
-
        bl      .hash_page              /* build HPTE if possible */
+       cmpdi   r3,0                    /* see if hash_page succeeded */
 
 #ifdef DO_SOFT_DISABLE
        /*
-        * Now go back to hard disabled.
+        * If we had interrupts soft-enabled at the point where the
+        * DSI/ISI occurred, and an interrupt came in during hash_page,
+        * handle it now.
+        * We jump to ret_from_except_lite rather than fast_exception_return
+        * because ret_from_except_lite will check for and handle pending
+        * interrupts if necessary.
         */
-       mfmsr   r0
-       li      r4,0
-       ori     r4,r4,MSR_EE
-       andc    r0,r0,r4
-       mtmsrd  r0,1                    /* Hard Disable */
-
-       ld      r0,SOFTE(r1)
-       cmpdi   0,r0,0                  /* See if we will soft enable in */
-                                       /* save_remaining_regs */
-       beq     5f
-       CHECKANYINT(r4,r5)
-       bne-    HardwareInterrupt_entry /* Convert this DSI into an External */
-                                       /* to process interrupts which occurred */
-                                       /* during hash_page */
-5:
-       stb     r0,PACAPROCENABLED(r20) /* Restore soft enable/disable status */
+       beq     .ret_from_except_lite
+       /*
+        * hash_page couldn't handle it, set soft interrupt enable back
+        * to what it was before the trap.  Note that .local_irq_restore
+        * handles any interrupts pending at this point.
+        */
+       ld      r3,SOFTE(r1)
+       bl      .local_irq_restore
+       b       11f
+#else
+       beq     fast_exception_return   /* Return from exception on success */
+       /* fall through */
 #endif
-       or.     r3,r3,r3                /* Check return code */
-       beq     fast_exception_return   /* Return from exception on success */
 
-       mtlr    r21                     /* restore LR */
-       blr                             /* Return to DSI or ISI on failure */
+/* Here we have a page fault that hash_page can't handle. */
+_GLOBAL(handle_page_fault)
+       ENABLE_INTS
+11:    ld      r4,_DAR(r1)
+       ld      r5,_DSISR(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      .do_page_fault
+       cmpdi   r3,0
+       beq+    .ret_from_except_lite
+       bl      .save_nvgprs
+       mr      r5,r3
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       lwz     r4,_DAR(r1)
+       bl      .bad_page_fault
+       b       .ret_from_except
+
+       /* here we have a segment miss */
+_GLOBAL(do_ste_alloc)
+       bl      .ste_allocate           /* try to insert stab entry */
+       cmpdi   r3,0
+       beq+    fast_exception_return
+       b       .handle_page_fault
 
 /*
- * r20 points to the PACA, r21 to the exception frame,
- * r23 contains the saved CR.
- * r20 - r23, SRR0 and SRR1 are saved in the exception frame.
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r11 and r12 contain the saved SRR0 and SRR1.
+ * r9 - r13 are saved in paca->exslb.
  * We assume we aren't going to take any exceptions during this procedure.
+ * We assume (DAR >> 60) == 0xc.
  */
+       .align  7
 _GLOBAL(do_stab_bolted)
-       stw     r23,EX_CCR(r21) /* save CR in exc. frame */
+       stw     r9,PACA_EXSLB+EX_CCR(r13)       /* save CR in exc. frame */
+       std     r11,PACA_EXSLB+EX_SRR0(r13)     /* save SRR0 in exc. frame */
 
-       mfspr   r22,DSISR
-       andis.  r22,r22,0x0020
-       beq-    stab_bolted_user_return
+       /* Hash to the primary group */
+       ld      r10,PACASTABVIRT(r13)
+       mfspr   r11,DAR
+       srdi    r11,r11,28
+       rldimi  r10,r11,7,52    /* r10 = first ste of the group */
 
+       /* Calculate VSID */
        /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */
-       mfspr   r21,DAR
-       rldicl  r20,r21,36,51
-       sldi    r20,r20,15
-       srdi    r21,r21,60
-       or      r20,r20,r21
+       rldic   r11,r11,15,36
+       ori     r11,r11,0xc
 
        /* VSID_RANDOMIZER */
-       li      r21,9
-       sldi    r21,r21,32
-       oris    r21,r21,58231
-       ori     r21,r21,39831
-
-       mulld   r20,r20,r21
-       clrldi  r20,r20,28      /* r20 = vsid */
+       li      r9,9
+       sldi    r9,r9,32
+       oris    r9,r9,58231
+       ori     r9,r9,39831
 
-       mfsprg  r21,3
-       ld      r21,PACASTABVIRT(r21)
-
-       /* Hash to the primary group */
-       mfspr   r22,DAR
-       rldicl  r22,r22,36,59
-       rldicr  r22,r22,7,56
-       or      r21,r21,r22     /* r21 = first ste of the group */
+       mulld   r9,r11,r9
+       rldic   r9,r9,12,16     /* r9 = vsid << 12 */
 
        /* Search the primary group for a free entry */
-       li      r22,0
-1:
-       ld      r23,0(r21)      /* Test valid bit of the current ste   */
-       rldicl  r23,r23,57,63
-       cmpwi   r23,0
-       bne     2f
-       li      r23,0
-       rldimi  r23,r20,12,0    /* Insert the new vsid value            */
-       std     r23,8(r21)      /* Put new entry back into the stab     */
-       eieio                  /* Order vsid update                    */
-       li      r23,0
-       mfspr   r20,DAR        /* Get the new esid                     */
-       rldicl  r20,r20,36,28  /* Permits a full 36b of ESID           */
-       rldimi  r23,r20,28,0    /* Insert the new esid value            */
-       ori     r23,r23,144      /* Turn on valid and kp                 */
-       std     r23,0(r21)      /* Put new entry back into the stab     */
-       sync                   /* Order the update                     */
-       b       3f
-2:
-       addi    r22,r22,1
-       addi    r21,r21,16
-       cmpldi  r22,7
-       ble     1b
-
-       /* Stick for only searching the primary group for now.          */
+1:     ld      r11,0(r10)      /* Test valid bit of the current ste    */
+       andi.   r11,r11,0x80
+       beq     2f
+       addi    r10,r10,16
+       andi.   r11,r10,0x70
+       bne     1b
+
+       /* Stick for only searching the primary group for now.          */
        /* At least for now, we use a very simple random castout scheme */
-       /* Use the TB as a random number ;  OR in 1 to avoid entry 0    */
-       mftb    r22
-       andi.   r22,r22,7
-       ori     r22,r22,1
-       sldi    r22,r22,4
-
-       /* r21 currently points to and ste one past the group of interest */
-       /* make it point to the randomly selected entry                   */
-       subi    r21,r21,128
-       or      r21,r21,r22      /* r21 is the entry to invalidate        */
-
-       isync                    /* mark the entry invalid                */
-       ld      r23,0(r21)
-       li      r22,-129
-       and     r23,r23,r22
-       std     r23,0(r21)
+       /* Use the TB as a random number ;  OR in 1 to avoid entry 0    */
+       mftb    r11
+       rldic   r11,r11,4,57    /* r11 = (r11 << 4) & 0x70 */
+       ori     r11,r11,0x10
+
+       /* r10 currently points to an ste one past the group of interest */
+       /* make it point to the randomly selected entry                 */
+       subi    r10,r10,128
+       or      r10,r10,r11     /* r10 is the entry to invalidate       */
+
+       isync                   /* mark the entry invalid               */
+       ld      r11,0(r10)
+       rldicl  r11,r11,56,1    /* clear the valid bit */
+       rotldi  r11,r11,8
+       std     r11,0(r10)
        sync
 
-       li      r23,0
-       rldimi  r23,r20,12,0
-       std     r23,8(r21)
+       clrrdi  r11,r11,28      /* Get the esid part of the ste         */
+       slbie   r11
+
+2:     std     r9,8(r10)       /* Store the vsid part of the ste       */
        eieio
 
-       ld      r22,0(r21)      /* Get the esid part of the ste         */
-       li      r23,0
-       mfspr   r20,DAR         /* Get the new esid                     */
-       rldicl  r20,r20,36,28   /* Permits a full 32b of ESID           */
-       rldimi  r23,r20,28,0    /* Insert the new esid value            */
-       ori     r23,r23,144     /* Turn on valid and kp                 */
-       std     r23,0(r21)      /* Put new entry back into the stab     */
-
-       rldicl  r22,r22,36,28
-       rldicr  r22,r22,28,35
-       slbie   r22
+       mfspr   r11,DAR         /* Get the new esid                     */
+       clrrdi  r11,r11,28      /* Permits a full 32b of ESID           */
+       ori     r11,r11,0x90    /* Turn on valid and kp                 */
+       std     r11,0(r10)      /* Put new entry back into the stab     */
+
        sync
 
-3:
        /* All done -- return from exception. */
-       mfsprg  r20,3                   /* Load the PACA pointer  */
-       ld      r21,PACAEXCSP(r20)      /* Get the exception frame pointer */
-       addi    r21,r21,EXC_FRAME_SIZE
-       lwz     r23,EX_CCR(r21)         /* get saved CR */
-
-       ld      r22,EX_SRR1(r21)
-       andi.   r22,r22,MSR_RI
-       beq-    unrecov_stab
-
-       /* note that this is almost identical to maskable_exception_exit */
-       mtcr    r23                     /* restore CR */
-
-       mfmsr   r22
-       li      r23, MSR_RI
-       andc    r22,r22,r23
-       mtmsrd  r22,1
-
-       ld      r22,EX_SRR0(r21)        /* Get SRR0 from exc. frame */
-       ld      r23,EX_SRR1(r21)        /* Get SRR1 from exc. frame */
-       mtspr   SRR0,r22
-       mtspr   SRR1,r23
-       ld      r22,EX_R22(r21)         /* restore r22 and r23 */
-       ld      r23,EX_R23(r21)
-       mfspr   r20,SPRG2
-       mfspr   r21,SPRG1
-       rfid
-
-unrecov_stab:
-       EXCEPTION_PROLOG_COMMON
-       li      r6,0x4100
-       li      r20,0
-       bl      .save_remaining_regs
-1:     addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      .unrecoverable_exception
-       b       1b
-
-/*
- * r20 points to the PACA, r21 to the exception frame,
- * r23 contains the saved CR.
- * r20 - r23, SRR0 and SRR1 are saved in the exception frame.
- * We assume we aren't going to take any exceptions during this procedure.
- */
-/* XXX note fix masking in get_kernel_vsid to match */
-_GLOBAL(do_slb_bolted)
-       stw     r23,EX_CCR(r21)         /* save CR in exc. frame */
-
-       /*
-        * We take the next entry, round robin. Previously we tried
-        * to find a free slot first but that took too long. Unfortunately
-        * we dont have any LRU information to help us choose a slot.
-        */
-
-       /* r20 = paca */
-1:     ld      r22,PACASTABRR(r20)
-       addi    r21,r22,1
-       cmpdi   r21,SLB_NUM_ENTRIES
-       blt+    2f
-       li      r21,2                   /* dont touch slot 0 or 1 */
-2:     std     r21,PACASTABRR(r20)
-
-       /* r20 = paca, r22 = entry */
-
-       /* 
-        * Never cast out the segment for our kernel stack. Since we
-        * dont invalidate the ERAT we could have a valid translation
-        * for the kernel stack during the first part of exception exit 
-        * which gets invalidated due to a tlbie from another cpu at a
-        * non recoverable point (after setting srr0/1) - Anton
-        */
-       slbmfee r21,r22
-       srdi    r21,r21,27
-       /*
-        * Use paca->ksave as the value of the kernel stack pointer,
-        * because this is valid at all times.
-        * The >> 27 (rather than >> 28) is so that the LSB is the
-        * valid bit - this way we check valid and ESID in one compare.
-        * In order to completely close the tiny race in the context
-        * switch (between updating r1 and updating paca->ksave),
-        * we check against both r1 and paca->ksave.
-        */
-       srdi    r23,r1,27
-       ori     r23,r23,1
-       cmpd    r23,r21
-       beq-    1b
-       ld      r23,PACAKSAVE(r20)
-       srdi    r23,r23,27
-       ori     r23,r23,1
-       cmpd    r23,r21
-       beq-    1b
-
-       /* r20 = paca, r22 = entry */
-
-       /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */
-       mfspr   r21,DAR
-       rldicl  r23,r21,36,51
-       sldi    r23,r23,15
-       srdi    r21,r21,60
-       or      r23,r23,r21
-
-       /* VSID_RANDOMIZER */
-       li      r21,9
-       sldi    r21,r21,32
-       oris    r21,r21,58231
-       ori     r21,r21,39831
-
-       /* vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK */
-       mulld   r23,r23,r21
-       clrldi  r23,r23,28
+       lwz     r9,PACA_EXSLB+EX_CCR(r13)       /* get saved CR */
+       ld      r11,PACA_EXSLB+EX_SRR0(r13)     /* get saved SRR0 */
 
-       /* r20 = paca, r22 = entry, r23 = vsid */
+       andi.   r10,r12,MSR_RI
+       beq-    unrecov_slb
 
-       /* Put together slb word1 */
-       sldi    r23,r23,12
+       mtcrf   0x80,r9                 /* restore CR */
 
-BEGIN_FTR_SECTION
-       /* set kp and c bits */
-       ori     r23,r23,0x480
-END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
-BEGIN_FTR_SECTION
-       /* set kp, l and c bits */
-       ori     r23,r23,0x580
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-
-       /* r20 = paca, r22 = entry, r23 = slb word1 */
-
-       /* Put together slb word0 */
-       mfspr   r21,DAR
-       rldicr  r21,r21,0,35    /* get the new esid */
-       oris    r21,r21,2048    /* set valid bit */
-       rldimi  r21,r22,0,52    /* insert entry */
-
-       /* r20 = paca, r21 = slb word0, r23 = slb word1 */
-
-       /* 
-        * No need for an isync before or after this slbmte. The exception
-        * we enter with and the rfid we exit with are context synchronizing .
-        */
-       slbmte  r23,r21
+       mfmsr   r10
+       clrrdi  r10,r10,2
+       mtmsrd  r10,1
 
-       /* All done -- return from exception. */
-       ld      r21,PACAEXCSP(r20)      /* Get the exception frame pointer */
-       addi    r21,r21,EXC_FRAME_SIZE
-       lwz     r23,EX_CCR(r21)         /* get saved CR */
-       /* note that this is almost identical to maskable_exception_exit */
-
-       ld      r22,EX_SRR1(r21)
-       andi.   r22,r22,MSR_RI
-       beq-    unrecov_stab
-
-       /*
-        * Until everyone updates binutils hardwire the POWER4 optimised
-        * single field mtcrf
-        */
-#if 0
-       .machine        push
-       .machine        "power4"
-       mtcrf   0x80,r23
-       .machine        pop
-#else
-       .long 0x7ef80120
-#endif
-
-       mfmsr   r22
-       li      r23, MSR_RI
-       andc    r22,r22,r23
-       mtmsrd  r22,1
-
-       ld      r22,EX_SRR0(r21)        /* Get SRR0 from exc. frame */
-       ld      r23,EX_SRR1(r21)        /* Get SRR1 from exc. frame */
-       mtspr   SRR0,r22
-       mtspr   SRR1,r23
-       ld      r22,EX_R22(r21)         /* restore r22 and r23 */
-       ld      r23,EX_R23(r21)
-       ld      r20,EX_R20(r21)
-       mfspr   r21,SPRG1
+       mtspr   SRR0,r11
+       mtspr   SRR1,r12
+       ld      r9,PACA_EXSLB+EX_R9(r13)
+       ld      r10,PACA_EXSLB+EX_R10(r13)
+       ld      r11,PACA_EXSLB+EX_R11(r13)
+       ld      r12,PACA_EXSLB+EX_R12(r13)
+       ld      r13,PACA_EXSLB+EX_R13(r13)
        rfid
-
-_GLOBAL(do_stab_SI)
-       mflr    r21                     /* Save LR in r21 */
-
-       /*
-        * r3 contains the faulting address
-        * r4 contains the required access permissions
-        *
-        * at return r3 = 0 for success
-        */
-
-       bl      .ste_allocate           /* build STE if possible */
-       or.     r3,r3,r3                /* Check return code */
-       beq     fast_exception_return   /* Return from exception on success */
-       mtlr    r21                     /* restore LR */
-       blr                             /* Return to DSI or ISI on failure */
+       b       .       /* prevent speculative execution */
 
 /*
- * This code finishes saving the registers to the exception frame.
- * Address translation is already on.
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r11 and r12 contain the saved SRR0 and SRR1.
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
  */
-_GLOBAL(save_remaining_regs)
-       /*
-        * Save the rest of the registers into the pt_regs structure
-        */
-       std     r22,_NIP(r1)
-       std     r23,_MSR(r1)
-       std     r6,TRAP(r1)
-       ld      r6,GPR6(r1)
-       SAVE_2GPRS(14, r1)
-       SAVE_4GPRS(16, r1)
-       SAVE_8GPRS(24, r1)
-
-       /* Set the marker value "regshere" just before the reg values */
-       SET_REG_TO_CONST(r22, 0x7265677368657265)
-       std     r22,STACK_FRAME_OVERHEAD-16(r1)
+_GLOBAL(do_slb_miss)
+       mflr    r10
 
-       /*
-        * Clear the RESULT field
-        */
-       li      r22,0
-       std     r22,RESULT(r1)
+       stw     r9,PACA_EXSLB+EX_CCR(r13)       /* save CR in exc. frame */
+       std     r11,PACA_EXSLB+EX_SRR0(r13)     /* save SRR0 in exc. frame */
+       std     r10,PACA_EXSLB+EX_LR(r13)       /* save LR */
 
-       /*
-        * Test if from user state; result will be tested later
-        */
-       andi.   r23,r23,MSR_PR          /* Set CR for later branch */
+       bl      .slb_allocate                   /* handle it */
 
-       /*
-        * Indicate that r1 contains the kernel stack and
-        * get the Kernel TOC pointer from the paca
-        */
-       ld      r2,PACATOC(r13)         /* Get Kernel TOC pointer */
-
-       /*
-        * If from user state, update THREAD.regs
-        */
-       beq     2f                      /* Modify THREAD.regs if from user */
-       addi    r23,r1,STACK_FRAME_OVERHEAD
-       ld      r22, PACACURRENT(r13)
-       std     r23,THREAD+PT_REGS(r22)
-2:
-       SET_REG_TO_CONST(r22, MSR_KERNEL)
-
-#ifdef DO_SOFT_DISABLE
-       stb     r20,PACAPROCENABLED(r13) /* possibly soft enable */
-       ori     r22,r22,MSR_EE          /* always hard enable */
-#else
-       rldimi  r22,r20,15,48           /* Insert desired EE value */
-#endif
+       /* All done -- return from exception. */
 
-       mtmsrd  r22,1
-       blr
+       ld      r10,PACA_EXSLB+EX_LR(r13)
+       ld      r3,PACASLBR3(r13)
+       lwz     r9,PACA_EXSLB+EX_CCR(r13)       /* get saved CR */
+       ld      r11,PACA_EXSLB+EX_SRR0(r13)     /* get saved SRR0 */
+
+       mtlr    r10
+
+       andi.   r10,r12,MSR_RI  /* check for unrecoverable exception */
+       beq-    unrecov_slb
+
+.machine       push
+.machine       "power4"
+       mtcrf   0x80,r9
+       mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
+.machine       pop
+
+       mtspr   SRR0,r11
+       mtspr   SRR1,r12
+       ld      r9,PACA_EXSLB+EX_R9(r13)
+       ld      r10,PACA_EXSLB+EX_R10(r13)
+       ld      r11,PACA_EXSLB+EX_R11(r13)
+       ld      r12,PACA_EXSLB+EX_R12(r13)
+       ld      r13,PACA_EXSLB+EX_R13(r13)
+       rfid
+       b       .       /* prevent speculative execution */
 
-/*
- * Kernel profiling with soft disable on iSeries
- */
-do_profile:
-       ld      r22,8(r21)              /* Get SRR1 */
-       andi.   r22,r22,MSR_PR          /* Test if in kernel */
-       bnelr                           /* return if not in kernel */
-       ld      r22,0(r21)              /* Get SRR0 */
-       ld      r25,PACAPROFSTEXT(r20)  /* _stext */
-       subf    r22,r25,r22             /* offset into kernel */
-       lwz     r25,PACAPROFSHIFT(r20)
-       srd     r22,r22,r25
-       lwz     r25,PACAPROFLEN(r20)    /* length of profile table (-1) */
-       cmp     0,r22,r25               /* off end? */
-       ble     1f
-       mr      r22,r25                 /* force into last entry */
-1:     sldi    r22,r22,2               /* convert to offset into buffer */
-       ld      r25,PACAPROFBUFFER(r20) /* profile buffer */
-       add     r25,r25,r22
-2:     lwarx   r22,0,r25               /* atomically increment */
-       addi    r22,r22,1
-       stwcx.  r22,0,r25
-       bne-    2b
-       blr
+unrecov_slb:
+       EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+       DISABLE_INTS
+       bl      .save_nvgprs
+1:     addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      .unrecoverable_exception
+       b       1b
 
 
 /*
@@ -1356,32 +1228,30 @@ _GLOBAL(pseries_secondary_smp_init)
        isync
 
        /* Set up a paca value for this processor. */
-       LOADADDR(r24, paca)              /* Get base vaddr of paca array  */
-       mulli   r13,r3,PACA_SIZE         /* Calculate vaddr of right paca */
-       add     r13,r13,r24              /* for this processor.           */
+       LOADADDR(r24, paca)             /* Get base vaddr of paca array  */
+       mulli   r13,r3,PACA_SIZE        /* Calculate vaddr of right paca */
+       add     r13,r13,r24             /* for this processor.           */
 
-       mtspr   SPRG3,r13                /* Save vaddr of paca in SPRG3   */
-       mr      r24,r3                   /* __secondary_start needs cpu#  */
+       mtspr   SPRG3,r13               /* Save vaddr of paca in SPRG3   */
+       mr      r24,r3                  /* __secondary_start needs cpu#  */
 
 1:
        HMT_LOW
-       lbz     r23,PACAPROCSTART(r13)   /* Test if this processor should */
-                                        /* start.                        */
+       lbz     r23,PACAPROCSTART(r13)  /* Test if this processor should */
+                                       /* start.                        */
        sync
 
-        /* Create a temp kernel stack for use before relocation is on.    */
-        mr      r1,r13
-        addi    r1,r1,PACAGUARD
-        addi    r1,r1,0x1000
-        subi    r1,r1,STACK_FRAME_OVERHEAD
+       /* Create a temp kernel stack for use before relocation is on.  */
+       ld      r1,PACAEMERGSP(r13)
+       subi    r1,r1,STACK_FRAME_OVERHEAD
 
-       cmp   0,r23,0
+       cmpwi   0,r23,0
 #ifdef CONFIG_SMP
 #ifdef SECONDARY_PROCESSORS
        bne     .__secondary_start
 #endif
 #endif
-       b       1b                       /* Loop until told to go         */
+       b       1b                      /* Loop until told to go         */
 #ifdef CONFIG_PPC_ISERIES
 _GLOBAL(__start_initialization_iSeries)
        /* Clear out the BSS */
@@ -1389,13 +1259,13 @@ _GLOBAL(__start_initialization_iSeries)
 
        LOADADDR(r8,__bss_start)
 
-       sub     r11,r11,r8        /* bss size                        */
-       addi    r11,r11,7         /* round up to an even double word */
-       rldicl. r11,r11,61,3      /* shift right by 3                */
+       sub     r11,r11,r8              /* bss size                     */
+       addi    r11,r11,7               /* round up to an even double word */
+       rldicl. r11,r11,61,3            /* shift right by 3             */
        beq     4f
        addi    r8,r8,-8
        li      r0,0
-       mtctr   r11               /* zero this many doublewords      */
+       mtctr   r11                     /* zero this many doublewords   */
 3:     stdu    r0,8(r8)
        bdnz    3b
 4:
@@ -1422,10 +1292,10 @@ _GLOBAL(__start_initialization_iSeries)
        std     r4,0(r9)                /* set the naca pointer */
 
        /* Get the pointer to the segment table */
-       ld      r6,PACA(r4)             /* Get the base paca pointer       */
+       ld      r6,PACA(r4)             /* Get the base paca pointer    */
        ld      r4,PACASTABVIRT(r6)
 
-       bl      .iSeries_fixup_klimit
+       bl      .iSeries_fixup_klimit
 
        /* relocation is on at this point */
 
@@ -1443,6 +1313,7 @@ _STATIC(mmu_off)
        mtspr   SRR1,r3
        sync
        rfid
+       b       .       /* prevent speculative execution */
 _GLOBAL(__start_initialization_pSeries)
        mr      r31,r3                  /* save parameters */
        mr      r30,r4
@@ -1456,8 +1327,8 @@ _GLOBAL(__start_initialization_pSeries)
        bl      .reloc_offset
 
        LOADADDR(r2,__toc_start)
-       addi    r2,r2,0x4000
-       addi    r2,r2,0x4000
+       addi    r2,r2,0x4000
+       addi    r2,r2,0x4000
 
        /* Relocate the TOC from a virt addr to a real addr */
        sub     r2,r2,r3
@@ -1498,33 +1369,33 @@ _STATIC(__after_prom_start)
  * unknown exception placeholders.
  *
  * Note: This process overwrites the OF exception vectors.
- *       r26 == relocation offset
- *       r27 == KERNELBASE
+ *     r26 == relocation offset
+ *     r27 == KERNELBASE
  */
        bl      .reloc_offset
        mr      r26,r3
        SET_REG_TO_CONST(r27,KERNELBASE)
 
-       li      r3,0                    /* target addr */
+       li      r3,0                    /* target addr */
 
        // XXX FIXME: Use phys returned by OF (r23)
-       sub     r4,r27,r26              /* source addr */
-                                       /* current address of _start   */
-                                       /*   i.e. where we are running */
-                                       /*        the source addr      */
+       sub     r4,r27,r26              /* source addr                   */
+                                       /* current address of _start     */
+                                       /*   i.e. where we are running   */
+                                       /*      the source addr          */
 
-       LOADADDR(r5,copy_to_here)       /* # bytes of memory to copy      */
+       LOADADDR(r5,copy_to_here)       /* # bytes of memory to copy     */
        sub     r5,r5,r27
 
-       li      r6,0x100                /* Start offset, the first 0x100  */
-                                       /* bytes were copied earlier.     */
+       li      r6,0x100                /* Start offset, the first 0x100 */
+                                       /* bytes were copied earlier.    */
 
-       bl      .copy_and_flush         /* copy the first n bytes         */
-                                       /* this includes the code being   */
-                                       /* executed here.                 */
+       bl      .copy_and_flush         /* copy the first n bytes        */
+                                       /* this includes the code being  */
+                                       /* executed here.                */
 
-        LOADADDR(r0, 4f)                /* Jump to the copy of this code  */
-       mtctr   r0                      /* that we just made/relocated    */
+       LOADADDR(r0, 4f)                /* Jump to the copy of this code */
+       mtctr   r0                      /* that we just made/relocated   */
        bctr
 
 4:     LOADADDR(r5,klimit)
@@ -1546,23 +1417,23 @@ _STATIC(__after_prom_start)
 _GLOBAL(copy_and_flush)
        addi    r5,r5,-8
        addi    r6,r6,-8
-4:     li      r0,16                   /* Use the least common      */
-                                       /* denominator cache line    */
-                                       /* size.  This results in    */
-                                       /* extra cache line flushes  */
-                                       /* but operation is correct. */
-                                       /* Can't get cache line size */
-                                       /* from NACA as it is being  */
-                                       /* moved too.                */
-
-       mtctr   r0                      /* put # words/line in ctr */
-3:     addi    r6,r6,8                 /* copy a cache line */
+4:     li      r0,16                   /* Use the least common         */
+                                       /* denominator cache line       */
+                                       /* size.  This results in       */
+                                       /* extra cache line flushes     */
+                                       /* but operation is correct.    */
+                                       /* Can't get cache line size    */
+                                       /* from NACA as it is being     */
+                                       /* moved too.                   */
+
+       mtctr   r0                      /* put # words/line in ctr      */
+3:     addi    r6,r6,8                 /* copy a cache line            */
        ldx     r0,r6,r4
        stdx    r0,r6,r3
        bdnz    3b
-       dcbst   r6,r3                   /* write it to memory */
+       dcbst   r6,r3                   /* write it to memory           */
        sync
-       icbi    r6,r3                   /* flush the icache line */
+       icbi    r6,r3                   /* flush the icache line        */
        cmpld   0,r6,r5
        blt     4b
        sync
@@ -1583,9 +1454,9 @@ copy_to_here:
  * On entry: r13 == 'current' && last_task_used_math != 'current'
  */
 _STATIC(load_up_fpu)
-       mfmsr   r5                      /* grab the current MSR */
+       mfmsr   r5                      /* grab the current MSR */
        ori     r5,r5,MSR_FP
-       mtmsrd  r5                      /* enable use of fpu now */
+       mtmsrd  r5                      /* enable use of fpu now */
        isync
 /*
  * For SMP, we don't do lazy FPU switching because it just gets too
@@ -1594,9 +1465,9 @@ _STATIC(load_up_fpu)
  *
  */
 #ifndef CONFIG_SMP
-       LOADBASE(r3,last_task_used_math)
-       ld      r4,last_task_used_math@l(r3)
-       cmp   0,r4,0
+       ld      r3,last_task_used_math@got(r2)
+       ld      r4,0(r3)
+       cmpdi   0,r4,0
        beq     1f
        /* Save FP state to last_task_used_math's THREAD struct */
        addi    r4,r4,THREAD
@@ -1606,8 +1477,8 @@ _STATIC(load_up_fpu)
        /* Disable FP for last_task_used_math */
        ld      r5,PT_REGS(r4)
        ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-       li      r20,MSR_FP|MSR_FE0|MSR_FE1
-       andc    r4,r4,r20
+       li      r6,MSR_FP|MSR_FE0|MSR_FE1
+       andc    r4,r4,r6
        std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 1:
 #endif /* CONFIG_SMP */
@@ -1615,15 +1486,16 @@ _STATIC(load_up_fpu)
        ld      r4,PACACURRENT(r13)
        addi    r5,r4,THREAD            /* Get THREAD */
        ld      r4,THREAD_FPEXC_MODE(r5)
-       ori     r23,r23,MSR_FP
-       or      r23,r23,r4
+       ori     r12,r12,MSR_FP
+       or      r12,r12,r4
+       std     r12,_MSR(r1)
        lfd     fr0,THREAD_FPSCR(r5)
        mtfsf   0xff,fr0
        REST_32FPRS(0, r5)
 #ifndef CONFIG_SMP
        /* Update last_task_used_math to 'current' */
        subi    r4,r5,THREAD            /* Back to 'current' */
-       std     r4,last_task_used_math@l(r3)
+       std     r4,0(r3)
 #endif /* CONFIG_SMP */
        /* restore registers and return */
        b       fast_exception_return
@@ -1633,10 +1505,10 @@ _STATIC(load_up_fpu)
  * Disable the FPU.
  */
 _GLOBAL(disable_kernel_fp)
-       mfmsr   r3
-       rldicl  r0,r3,(63-MSR_FP_LG),1
-       rldicl  r3,r0,(MSR_FP_LG+1),0
-       mtmsrd  r3                      /* disable use of fpu now */
+       mfmsr   r3
+       rldicl  r0,r3,(63-MSR_FP_LG),1
+       rldicl  r3,r0,(MSR_FP_LG+1),0
+       mtmsrd  r3                      /* disable use of fpu now */
        isync
        blr
 
@@ -1651,11 +1523,11 @@ _GLOBAL(giveup_fpu)
        ori     r5,r5,MSR_FP
        mtmsrd  r5                      /* enable use of fpu now */
        isync
-       cmp   0,r3,0
+       cmpdi   0,r3,0
        beqlr-                          /* if no previous owner, done */
        addi    r3,r3,THREAD            /* want THREAD of task */
        ld      r5,PT_REGS(r3)
-       cmp   0,r5,0
+       cmpdi   0,r5,0
        SAVE_32FPRS(0, r3)
        mffs    fr0
        stfd    fr0,THREAD_FPSCR(r3)
@@ -1667,8 +1539,8 @@ _GLOBAL(giveup_fpu)
 1:
 #ifndef CONFIG_SMP
        li      r5,0
-       LOADBASE(r4,last_task_used_math)
-       std     r5,last_task_used_math@l(r4)
+       ld      r4,last_task_used_math@got(r2)
+       std     r5,0(r4)
 #endif /* CONFIG_SMP */
        blr
 
@@ -1685,9 +1557,9 @@ _GLOBAL(giveup_fpu)
  * On entry: r13 == 'current' && last_task_used_altivec != 'current'
  */
 _STATIC(load_up_altivec)
-       mfmsr   r5                      /* grab the current MSR */
+       mfmsr   r5                      /* grab the current MSR */
        oris    r5,r5,MSR_VEC@h
-       mtmsrd  r5                      /* enable use of VMX now */
+       mtmsrd  r5                      /* enable use of VMX now */
        isync
        
 /*
@@ -1699,9 +1571,9 @@ _STATIC(load_up_altivec)
  * avoid saving all of the VREGs here...
  */
 #ifndef CONFIG_SMP
-       LOADBASE(r3,last_task_used_altivec)
-       ld      r4,last_task_used_altivec@l(r3)
-       cmp   0,r4,0
+       ld      r3,last_task_used_altivec@got(r2)
+       ld      r4,0(r3)
+       cmpdi   0,r4,0
        beq     1f
        /* Save VMX state to last_task_used_altivec's THREAD struct */
        addi    r4,r4,THREAD
@@ -1712,8 +1584,8 @@ _STATIC(load_up_altivec)
        /* Disable VMX for last_task_used_altivec */
        ld      r5,PT_REGS(r4)
        ld      r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-       lis     r20,MSR_VEC@h
-       andc    r4,r4,r20
+       lis     r6,MSR_VEC@h
+       andc    r4,r4,r6
        std     r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 1:
 #endif /* CONFIG_SMP */
@@ -1723,7 +1595,7 @@ _STATIC(load_up_altivec)
         * all 1's
         */
        mfspr   r4,SPRN_VRSAVE
-       cmp   0,r4,0
+       cmpdi   0,r4,0
        bne+    1f
        li      r4,-1
        mtspr   SPRN_VRSAVE,r4
@@ -1731,7 +1603,8 @@ _STATIC(load_up_altivec)
        /* enable use of VMX after return */
        ld      r4,PACACURRENT(r13)
        addi    r5,r4,THREAD            /* Get THREAD */
-       oris    r23,r23,MSR_VEC@h
+       oris    r12,r12,MSR_VEC@h
+       std     r12,_MSR(r1)
        li      r4,1
        li      r10,THREAD_VSCR
        stw     r4,THREAD_USED_VR(r5)
@@ -1740,7 +1613,7 @@ _STATIC(load_up_altivec)
 #ifndef CONFIG_SMP
        /* Update last_task_used_math to 'current' */
        subi    r4,r5,THREAD            /* Back to 'current' */
-       std     r4,last_task_used_altivec@l(r3)
+       std     r4,0(r3)
 #endif /* CONFIG_SMP */
        /* restore registers and return */
        b       fast_exception_return
@@ -1750,10 +1623,10 @@ _STATIC(load_up_altivec)
  * Disable the VMX.
  */
 _GLOBAL(disable_kernel_altivec)
-       mfmsr   r3
-       rldicl  r0,r3,(63-MSR_VEC_LG),1
-       rldicl  r3,r0,(MSR_VEC_LG+1),0
-       mtmsrd  r3                      /* disable use of VMX now */
+       mfmsr   r3
+       rldicl  r0,r3,(63-MSR_VEC_LG),1
+       rldicl  r3,r0,(MSR_VEC_LG+1),0
+       mtmsrd  r3                      /* disable use of VMX now */
        isync
        blr
 
@@ -1768,11 +1641,11 @@ _GLOBAL(giveup_altivec)
        oris    r5,r5,MSR_VEC@h
        mtmsrd  r5                      /* enable use of VMX now */
        isync
-       cmp   0,r3,0
+       cmpdi   0,r3,0
        beqlr-                          /* if no previous owner, done */
        addi    r3,r3,THREAD            /* want THREAD of task */
        ld      r5,PT_REGS(r3)
-       cmp   0,r5,0
+       cmpdi   0,r5,0
        SAVE_32VRS(0,r4,r3)
        mfvscr  vr0
        li      r4,THREAD_VSCR
@@ -1785,8 +1658,8 @@ _GLOBAL(giveup_altivec)
 1:
 #ifndef CONFIG_SMP
        li      r5,0
-       LOADBASE(r4,last_task_used_altivec)
-       std     r5,last_task_used_altivec@l(r4)
+       ld      r4,last_task_used_altivec@got(r2)
+       std     r5,0(r4)
 #endif /* CONFIG_SMP */
        blr
 
@@ -1830,16 +1703,14 @@ _GLOBAL(pmac_secondary_start)
        mtmsrd  r3                      /* RI on */
 
        /* Set up a paca value for this processor. */
-       LOADADDR(r4, paca)               /* Get base vaddr of paca array  */
+       LOADADDR(r4, paca)               /* Get base vaddr of paca array        */
        mulli   r13,r24,PACA_SIZE        /* Calculate vaddr of right paca */
-       add     r13,r13,r4               /* for this processor.           */
-       mtspr   SPRG3,r13                /* Save vaddr of paca in SPRG3   */
+       add     r13,r13,r4              /* for this processor.          */
+       mtspr   SPRG3,r13                /* Save vaddr of paca in SPRG3 */
 
-        /* Create a temp kernel stack for use before relocation is on.    */
-        mr      r1,r13
-        addi    r1,r1,PACAGUARD
-        addi    r1,r1,0x1000
-        subi    r1,r1,STACK_FRAME_OVERHEAD
+       /* Create a temp kernel stack for use before relocation is on.  */
+       ld      r1,PACAEMERGSP(r13)
+       subi    r1,r1,STACK_FRAME_OVERHEAD
 
        b       .__secondary_start
 
@@ -1853,7 +1724,7 @@ _GLOBAL(pmac_secondary_start)
  *   1. Processor number
  *   2. Segment table pointer (virtual address)
  * On entry the following are set:
- *   r1    = stack pointer.  vaddr for iSeries, raddr (temp stack) for pSeries
+ *   r1        = stack pointer.  vaddr for iSeries, raddr (temp stack) for pSeries
  *   r24   = cpu# (in Linux terms)
  *   r13   = paca virtual address
  *   SPRG3 = paca virtual address
@@ -1862,35 +1733,29 @@ _GLOBAL(__secondary_start)
 
        HMT_MEDIUM                      /* Set thread priority to MEDIUM */
 
-       /* set up the TOC (virtual address) */
-       LOADADDR(r2,__toc_start)
-       addi    r2,r2,0x4000
-       addi    r2,r2,0x4000
-
-       std     r2,PACATOC(r13)
+       ld      r2,PACATOC(r13)
        li      r6,0
        stb     r6,PACAPROCENABLED(r13)
 
 #ifndef CONFIG_PPC_ISERIES
        /* Initialize the page table pointer register. */
        LOADADDR(r6,_SDR1)
-       ld      r6,0(r6)                /* get the value of _SDR1 */
-       mtspr   SDR1,r6                 /* set the htab location  */
+       ld      r6,0(r6)                /* get the value of _SDR1        */
+       mtspr   SDR1,r6                 /* set the htab location         */
 #endif
-       /* Initialize the first segment table (or SLB) entry                */
-       ld      r3,PACASTABVIRT(r13)    /* get addr of segment table        */
+       /* Initialize the first segment table (or SLB) entry             */
+       ld      r3,PACASTABVIRT(r13)    /* get addr of segment table     */
        bl      .stab_initialize
 
-       /* Initialize the kernel stack.  Just a repeat for iSeries.         */
+       /* Initialize the kernel stack.  Just a repeat for iSeries.      */
        LOADADDR(r3,current_set)
-       sldi    r28,r24,3               /* get current_set[cpu#] */
+       sldi    r28,r24,3               /* get current_set[cpu#]         */
        ldx     r1,r3,r28
-       addi    r1,r1,THREAD_SIZE
-       subi    r1,r1,STACK_FRAME_OVERHEAD
+       addi    r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
        std     r1,PACAKSAVE(r13)
 
-       ld      r3,PACASTABREAL(r13)    /* get raddr of segment table       */
-       ori     r4,r3,1                 /* turn on valid bit                */
+       ld      r3,PACASTABREAL(r13)    /* get raddr of segment table    */
+       ori     r4,r3,1                 /* turn on valid bit             */
 
 #ifdef CONFIG_PPC_ISERIES
        li      r0,-1                   /* hypervisor call */
@@ -1900,23 +1765,23 @@ _GLOBAL(__secondary_start)
        sc                              /* HvCall_setASR */
 #else
        /* set the ASR */
-       li      r3,SYSTEMCFG_PHYS_ADDR  /* r3 = ptr to systemcfg  */
-       lwz     r3,PLATFORM(r3)         /* r3 = platform flags */
+       li      r3,SYSTEMCFG_PHYS_ADDR  /* r3 = ptr to systemcfg         */
+       lwz     r3,PLATFORM(r3)         /* r3 = platform flags           */
        cmpldi  r3,PLATFORM_PSERIES_LPAR
-       bne     98f
+       bne     98f
        mfspr   r3,PVR
        srwi    r3,r3,16
-       cmpwi   r3,0x37         /* SStar  */
+       cmpwi   r3,0x37                 /* SStar  */
        beq     97f
-       cmpwi   r3,0x36         /* IStar  */
+       cmpwi   r3,0x36                 /* IStar  */
        beq     97f
-       cmpwi   r3,0x34         /* Pulsar */
+       cmpwi   r3,0x34                 /* Pulsar */
        bne     98f
-97:    li      r3,H_SET_ASR    /* hcall = H_SET_ASR */
-       HVSC                    /* Invoking hcall */
+97:    li      r3,H_SET_ASR            /* hcall = H_SET_ASR */
+       HVSC                            /* Invoking hcall */
        b       99f
-98:                             /* !(rpa hypervisor) || !(star)  */
-       mtasr   r4              /* set the stab location         */
+98:                                    /* !(rpa hypervisor) || !(star)  */
+       mtasr   r4                      /* set the stab location         */
 99:
 #endif
        li      r7,0
@@ -1931,6 +1796,7 @@ _GLOBAL(__secondary_start)
        mtspr   SRR0,r3
        mtspr   SRR1,r4
        rfid
+       b       .       /* prevent speculative execution */
 
 /* 
  * Running with relocation on at this point.  All we want to do is
@@ -1938,22 +1804,22 @@ _GLOBAL(__secondary_start)
  */
 _GLOBAL(start_secondary_prolog)
        li      r3,0
-       std     r3,0(r1)                /* Zero the stack frame pointer     */
+       std     r3,0(r1)                /* Zero the stack frame pointer */
        bl      .start_secondary
 #endif
 
 /*
- * This subroutine clobbers r11, r12 and the LR
+ * This subroutine clobbers r11 and r12
  */
 _GLOBAL(enable_64b_mode)
-       mfmsr   r11                      /* grab the current MSR */
-       li      r12,1
-       rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
-       or      r11,r11,r12
-       li      r12,1
-       rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
-       or      r11,r11,r12
-       mtmsrd  r11
+       mfmsr   r11                     /* grab the current MSR */
+       li      r12,1
+       rldicr  r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
+       or      r11,r11,r12
+       li      r12,1
+       rldicr  r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
+       or      r11,r11,r12
+       mtmsrd  r11
        isync
        blr
 
@@ -1970,30 +1836,30 @@ _STATIC(start_here_pSeries)
        ori     r6,r6,MSR_RI
        mtmsrd  r6                      /* RI on */
 
-       /* setup the systemcfg pointer which is needed by *tab_initialize  */
+       /* setup the systemcfg pointer which is needed by *tab_initialize       */
        LOADADDR(r6,systemcfg)
-       sub     r6,r6,r26                /* addr of the variable systemcfg */
+       sub     r6,r6,r26               /* addr of the variable systemcfg */
        li      r27,SYSTEMCFG_PHYS_ADDR
-       std     r27,0(r6)                /* set the value of systemcfg     */
+       std     r27,0(r6)               /* set the value of systemcfg   */
 
-       /* setup the naca pointer which is needed by *tab_initialize       */
+       /* setup the naca pointer which is needed by *tab_initialize    */
        LOADADDR(r6,naca)
-       sub     r6,r6,r26                /* addr of the variable naca      */
+       sub     r6,r6,r26               /* addr of the variable naca    */
        li      r27,NACA_PHYS_ADDR
-       std     r27,0(r6)                /* set the value of naca          */
+       std     r27,0(r6)               /* set the value of naca        */
 
 #ifdef CONFIG_HMT
        /* Start up the second thread on cpu 0 */
        mfspr   r3,PVR
        srwi    r3,r3,16
-       cmpwi   r3,0x34                 /* Pulsar  */
+       cmpwi   r3,0x34                 /* Pulsar  */
        beq     90f
-       cmpwi   r3,0x36                 /* Icestar */
+       cmpwi   r3,0x36                 /* Icestar */
        beq     90f
-       cmpwi   r3,0x37                 /* SStar   */
+       cmpwi   r3,0x37                 /* SStar   */
        beq     90f
-       b       91f                     /* HMT not supported */
-90:    li      r3,0
+       b       91f                     /* HMT not supported */
+90:    li      r3,0
        bl      .hmt_start_secondary
 91:
 #endif
@@ -2008,7 +1874,7 @@ _STATIC(start_here_pSeries)
        li      r3,1
        LOADADDR(r5,__secondary_hold_spinloop)
        tophys(r4,r5)
-       std     r3,0(r4)
+       std     r3,0(r4)
 #endif
 
        /* The following gets the stack and TOC set up with the regs */
@@ -2027,8 +1893,8 @@ _STATIC(start_here_pSeries)
 
                /* set up the TOC (physical address) */
        LOADADDR(r2,__toc_start)
-       addi    r2,r2,0x4000
-       addi    r2,r2,0x4000
+       addi    r2,r2,0x4000
+       addi    r2,r2,0x4000
        sub     r2,r2,r26
 
        LOADADDR(r3,cpu_specs)
@@ -2038,44 +1904,44 @@ _STATIC(start_here_pSeries)
        mr      r5,r26
        bl      .identify_cpu
 
-       /* Get the pointer to the segment table which is used by           */
-       /* stab_initialize                                                 */
+       /* Get the pointer to the segment table which is used by                */
+       /* stab_initialize                                               */
        LOADADDR(r27, boot_cpuid)
        sub     r27,r27,r26
        lwz     r27,0(r27)
 
-       LOADADDR(r24, paca)              /* Get base vaddr of paca array  */
-       mulli   r13,r27,PACA_SIZE        /* Calculate vaddr of right paca */
-       add     r13,r13,r24              /* for this processor.           */
-       sub     r13,r13,r26             /* convert to physical addr         */
+       LOADADDR(r24, paca)             /* Get base vaddr of paca array  */
+       mulli   r13,r27,PACA_SIZE       /* Calculate vaddr of right paca */
+       add     r13,r13,r24             /* for this processor.           */
+       sub     r13,r13,r26             /* convert to physical addr      */
 
        mtspr   SPRG3,r13               /* PPPBBB: Temp... -Peter */
        ld      r3,PACASTABREAL(r13)
-       ori     r4,r3,1                 /* turn on valid bit                */
+       ori     r4,r3,1                 /* turn on valid bit             */
        
        /* set the ASR */
        li      r3,SYSTEMCFG_PHYS_ADDR  /* r3 = ptr to systemcfg */
-       lwz     r3,PLATFORM(r3)         /* r3 = platform flags */
+       lwz     r3,PLATFORM(r3)         /* r3 = platform flags */
        cmpldi  r3,PLATFORM_PSERIES_LPAR
-       bne     98f
+       bne     98f
        mfspr   r3,PVR
        srwi    r3,r3,16
-       cmpwi   r3,0x37         /* SStar */
+       cmpwi   r3,0x37                 /* SStar */
        beq     97f
-       cmpwi   r3,0x36         /* IStar  */
+       cmpwi   r3,0x36                 /* IStar  */
        beq     97f
-       cmpwi   r3,0x34         /* Pulsar */
+       cmpwi   r3,0x34                 /* Pulsar */
        bne     98f
-97:    li      r3,H_SET_ASR    /* hcall = H_SET_ASR */
-       HVSC                    /* Invoking hcall */
-       b       99f
-98:                             /* !(rpa hypervisor) || !(star) */
-       mtasr   r4              /* set the stab location         */
+97:    li      r3,H_SET_ASR            /* hcall = H_SET_ASR */
+       HVSC                            /* Invoking hcall */
+       b       99f
+98:                                    /* !(rpa hypervisor) || !(star) */
+       mtasr   r4                      /* set the stab location        */
 99:
        mfspr   r6,SPRG3
-       ld      r3,PACASTABREAL(r6)     /* restore r3 for stab_initialize */
+       ld      r3,PACASTABREAL(r6)     /* restore r3 for stab_initialize */
 
-       /* Initialize an initial memory mapping and turn on relocation.   */
+       /* Initialize an initial memory mapping and turn on relocation. */
        bl      .stab_initialize
        bl      .htab_initialize
 
@@ -2083,7 +1949,7 @@ _STATIC(start_here_pSeries)
        lwz     r3,PLATFORM(r3)         /* r3 = platform flags */
        /* Test if bit 0 is set (LPAR bit) */
        andi.   r3,r3,0x1
-       bne    98f
+       bne     98f
        LOADADDR(r6,_SDR1)              /* Only if NOT LPAR */
        sub     r6,r6,r26
        ld      r6,0(r6)                /* get the value of _SDR1 */
@@ -2094,6 +1960,7 @@ _STATIC(start_here_pSeries)
        mtspr   SRR0,r3
        mtspr   SRR1,r4
        rfid
+       b       .       /* prevent speculative execution */
 #endif /* CONFIG_PPC_PSERIES */
        
        /* This is where all platforms converge execution */
@@ -2110,11 +1977,6 @@ _STATIC(start_here_common)
        li      r0,0
        stdu    r0,-STACK_FRAME_OVERHEAD(r1)
 
-       /* set up the TOC */
-       LOADADDR(r2,__toc_start)
-       addi    r2,r2,0x4000
-       addi    r2,r2,0x4000
-
        /* Apply the CPUs-specific fixups (nop out sections not relevant
         * to this CPU
         */
@@ -2129,22 +1991,22 @@ _STATIC(start_here_common)
        /* setup the naca pointer */
        LOADADDR(r9,naca)
        SET_REG_TO_CONST(r8, NACA_VIRT_ADDR)
-       std     r8,0(r9)                /* set the value of the naca ptr  */
+       std     r8,0(r9)                /* set the value of the naca ptr */
 
        LOADADDR(r26, boot_cpuid)
        lwz     r26,0(r26)
 
-       LOADADDR(r24, paca)              /* Get base vaddr of paca array  */
-       mulli   r13,r26,PACA_SIZE        /* Calculate vaddr of right paca */
-       add     r13,r13,r24              /* for this processor.           */
+       LOADADDR(r24, paca)             /* Get base vaddr of paca array  */
+       mulli   r13,r26,PACA_SIZE       /* Calculate vaddr of right paca */
+       add     r13,r13,r24             /* for this processor.           */
        mtspr   SPRG3,r13
 
        /* ptr to current */
        LOADADDR(r4,init_task)
        std     r4,PACACURRENT(r13)
 
-       std     r2,PACATOC(r13)
-       li      r5,0
+       /* Load the TOC */
+       ld      r2,PACATOC(r13)
        std     r1,PACAKSAVE(r13)
 
        /* Restore the parms passed in from the bootloader. */
@@ -2176,11 +2038,11 @@ _GLOBAL(hmt_init)
        LOADADDR(r5, hmt_thread_data)
        mfspr   r7,PVR
        srwi    r7,r7,16
-       cmpwi   r7,0x34                 /* Pulsar  */
+       cmpwi   r7,0x34                 /* Pulsar  */
        beq     90f
-       cmpwi   r7,0x36                 /* Icestar */
+       cmpwi   r7,0x36                 /* Icestar */
        beq     91f
-       cmpwi   r7,0x37                 /* SStar   */
+       cmpwi   r7,0x37                 /* SStar   */
        beq     91f
        b       101f
 90:    mfspr   r6,PIR
@@ -2214,32 +2076,32 @@ __hmt_secondary_hold:
 
 104:   addi    r7,r7,4
        lwzx    r9,r5,r7
-       mr      r24,r9
+       mr      r24,r9
 101:
 #endif
-       mr      r3,r24
-       b       .pseries_secondary_smp_init
+       mr      r3,r24
+       b       .pseries_secondary_smp_init
 
 #ifdef CONFIG_HMT
 _GLOBAL(hmt_start_secondary)
        LOADADDR(r4,__hmt_secondary_hold)
        clrldi  r4,r4,4
-       mtspr   NIADORM, r4
-       mfspr   r4, MSRDORM
-       li      r5, -65
-       and     r4, r4, r5
-       mtspr   MSRDORM, r4
+       mtspr   NIADORM, r4
+       mfspr   r4, MSRDORM
+       li      r5, -65
+       and     r4, r4, r5
+       mtspr   MSRDORM, r4
        lis     r4,0xffef
        ori     r4,r4,0x7403
        mtspr   TSC, r4
        li      r4,0x1f4
        mtspr   TST, r4
-       mfspr   r4, HID0
-       ori     r4, r4, 0x1
-       mtspr   HID0, r4
-       mfspr   r4, CTRLF
-       oris    r4, r4, 0x40
-       mtspr   CTRLT, r4
+       mfspr   r4, HID0
+       ori     r4, r4, 0x1
+       mtspr   HID0, r4
+       mfspr   r4, CTRLF
+       oris    r4, r4, 0x40
+       mtspr   CTRLT, r4
        blr
 #endif
 
@@ -2249,7 +2111,7 @@ _GLOBAL(hmt_start_secondary)
  * which is page-aligned.
  */
        .data
-       .align  12
+       .align  12
        .globl  sdata
 sdata:
        .globl  empty_zero_page
@@ -2267,7 +2129,7 @@ ioremap_dir:
 /* 1 page segment table per cpu (max 48, cpu0 allocated at STAB0_PHYS_ADDR) */
        .globl  stab_array
 stab_array:
-        .space 4096 * 48
+       .space  4096 * 48
        
 /*
  * This space gets a copy of optional info passed to us by the bootstrap
@@ -2275,4 +2137,4 @@ stab_array:
  */
        .globl  cmd_line
 cmd_line:
-       .space  512     /* COMMAND_LINE_SIZE */
+       .space  COMMAND_LINE_SIZE