X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;ds=sidebyside;f=arch%2Fsparc64%2Fkernel%2Fdtlb_backend.S;h=b73a3c85877045a3e9c946059cc27b1d5c85414a;hb=f7f1b0f1e2fbadeab12d24236000e778aa9b1ead;hp=e6bc4a26aeb9b034f7ac64501957bb2bfcbf61a8;hpb=e3f6fb6212a7102bdb56ba38fa1e98fe72950475;p=linux-2.6.git diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S index e6bc4a26a..b73a3c858 100644 --- a/arch/sparc64/kernel/dtlb_backend.S +++ b/arch/sparc64/kernel/dtlb_backend.S @@ -7,60 +7,143 @@ */ #include -#include +#include #if PAGE_SHIFT == 13 -#define FILL_VALID_SZ_BITS1(r1) \ - sllx %g2, 62, r1 -#define FILL_VALID_SZ_BITS2(r1) -#define FILL_VALID_SZ_BITS_NOP nop +#define SZ_BITS _PAGE_SZ8K #elif PAGE_SHIFT == 16 -#define FILL_VALID_SZ_BITS1(r1) \ - or %g0, 5, r1 -#define FILL_VALID_SZ_BITS2(r1) \ - sllx r1, 61, r1 -#define FILL_VALID_SZ_BITS_NOP -#else -#error unsupported PAGE_SIZE -#endif /* PAGE_SHIFT */ +#define SZ_BITS _PAGE_SZ64K +#elif PAGE_SHIFT == 19 +#define SZ_BITS _PAGE_SZ512K +#elif PAGE_SHIFT == 22 +#define SZ_BITS _PAGE_SZ4M +#endif + +#define VALID_SZ_BITS (_PAGE_VALID | SZ_BITS) #define VPTE_BITS (_PAGE_CP | _PAGE_CV | _PAGE_P ) #define VPTE_SHIFT (PAGE_SHIFT - 3) -#define TLB_PMD_SHIFT (PAGE_SHIFT - 3 + 3) -#define TLB_PGD_SHIFT (PMD_BITS + PAGE_SHIFT - 3 + 3) -#define TLB_PMD_MASK (((1 << PMD_BITS) - 1) << 1) -#define TLB_PGD_MASK (((1 << (VA_BITS - PAGE_SHIFT - (PAGE_SHIFT - 3) - PMD_BITS)) - 1) << 2) /* Ways we can get here: * * 1) Nucleus loads and stores to/from PA-->VA direct mappings at tl>1. * 2) Nucleus loads and stores to/from user/kernel window save areas. * 3) VPTE misses from dtlb_base and itlb_base. + * + * We need to extract out the PMD and PGDIR indexes from the + * linear virtual page table access address. The PTE index + * is at the bottom, but we are not concerned with it. Bits + * 0 to 2 are clear since each PTE is 8 bytes in size. Each + * PMD and PGDIR entry are 4 bytes in size. Thus, this + * address looks something like: + * + * |---------------------------------------------------------------| + * | ... | PGDIR index | PMD index | PTE index | | + * |---------------------------------------------------------------| + * 63 F E D C B A 3 2 0 <- bit nr + * + * The variable bits above are defined as: + * A --> 3 + (PAGE_SHIFT - log2(8)) + * --> 3 + (PAGE_SHIFT - 3) - 1 + * (ie. this is "bit 3" + PAGE_SIZE - size of PTE entry in bits - 1) + * B --> A + 1 + * C --> B + (PAGE_SHIFT - log2(4)) + * --> B + (PAGE_SHIFT - 2) - 1 + * (ie. this is "bit B" + PAGE_SIZE - size of PMD entry in bits - 1) + * D --> C + 1 + * E --> D + (PAGE_SHIFT - log2(4)) + * --> D + (PAGE_SHIFT - 2) - 1 + * (ie. this is "bit D" + PAGE_SIZE - size of PGDIR entry in bits - 1) + * F --> E + 1 + * + * (Note how "B" always evalutes to PAGE_SHIFT, all the other constants + * cancel out.) + * + * For 8K PAGE_SIZE (thus, PAGE_SHIFT of 13) the bit numbers are: + * A --> 12 + * B --> 13 + * C --> 23 + * D --> 24 + * E --> 34 + * F --> 35 + * + * For 64K PAGE_SIZE (thus, PAGE_SHIFT of 16) the bit numbers are: + * A --> 15 + * B --> 16 + * C --> 29 + * D --> 30 + * E --> 43 + * F --> 44 + * + * Because bits both above and below each PGDIR and PMD index need to + * be masked out, and the index can be as long as 14 bits (when using a + * 64K PAGE_SIZE, and thus a PAGE_SHIFT of 16), we need 3 instructions + * to extract each index out. + * + * Shifts do not pair very well on UltraSPARC-I, II, IIi, and IIe, so + * we try to avoid using them for the entire operation. We could setup + * a mask anywhere from bit 31 down to bit 10 using the sethi instruction. + * + * We need a mask covering bits B --> C and one covering D --> E. + * For 8K PAGE_SIZE these masks are 0x00ffe000 and 0x7ff000000. + * For 64K PAGE_SIZE these masks are 0x3fff0000 and 0xfffc0000000. + * The second in each set cannot be loaded with a single sethi + * instruction, because the upper bits are past bit 32. We would + * need to use a sethi + a shift. + * + * For the time being, we use 2 shifts and a simple "and" mask. + * We shift left to clear the bits above the index, we shift down + * to clear the bits below the index (sans the log2(4 or 8) bits) + * and a mask to clear the log2(4 or 8) bits. We need therefore + * define 4 shift counts, all of which are relative to PAGE_SHIFT. + * + * Although unsupportable for other reasons, this does mean that + * 512K and 4MB page sizes would be generaally supported by the + * kernel. (ELF binaries would break with > 64K PAGE_SIZE since + * the sections are only aligned that strongly). + * + * The operations performed for extraction are thus: + * + * ((X << FOO_SHIFT_LEFT) >> FOO_SHIFT_RIGHT) & ~0x3 + * */ +#define A (3 + (PAGE_SHIFT - 3) - 1) +#define B (A + 1) +#define C (B + (PAGE_SHIFT - 2) - 1) +#define D (C + 1) +#define E (D + (PAGE_SHIFT - 2) - 1) +#define F (E + 1) + +#define PMD_SHIFT_LEFT (64 - D) +#define PMD_SHIFT_RIGHT (64 - (D - B) - 2) +#define PGDIR_SHIFT_LEFT (64 - F) +#define PGDIR_SHIFT_RIGHT (64 - (F - D) - 2) +#define LOW_MASK_BITS 0x3 + /* TLB1 ** ICACHE line 1: tl1 DTLB and quick VPTE miss */ ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS add %g3, %g3, %g5 ! Compute VPTE base cmp %g4, %g5 ! VPTE miss? bgeu,pt %xcc, 1f ! Continue here - andcc %g4, TAG_CONTEXT_BITS, %g5 ! From Nucleus? (for tl0 miss) - ba,pt %xcc, from_tl1_trap ! Fall to tl0 miss - rdpr %tl, %g5 ! For tl0 miss TL==3 test + andcc %g4, TAG_CONTEXT_BITS, %g5 ! tl0 miss Nucleus test + ba,a,pt %xcc, from_tl1_trap ! Fall to tl0 miss 1: sllx %g6, VPTE_SHIFT, %g4 ! Position TAG_ACCESS + or %g4, %g5, %g4 ! Prepare TAG_ACCESS /* TLB1 ** ICACHE line 2: Quick VPTE miss */ - or %g4, %g5, %g4 ! Prepare TAG_ACCESS mov TSB_REG, %g1 ! Grab TSB reg ldxa [%g1] ASI_DMMU, %g5 ! Doing PGD caching? - srlx %g6, (TLB_PMD_SHIFT - 1), %g1 ! Position PMD offset + sllx %g6, PMD_SHIFT_LEFT, %g1 ! Position PMD offset be,pn %xcc, sparc64_vpte_nucleus ! Is it from Nucleus? - and %g1, TLB_PMD_MASK, %g1 ! Mask PMD offset bits + srlx %g1, PMD_SHIFT_RIGHT, %g1 ! Mask PMD offset bits brnz,pt %g5, sparc64_vpte_continue ! Yep, go like smoke - add %g1, %g1, %g1 ! Position PMD offset some more + andn %g1, LOW_MASK_BITS, %g1 ! Final PMD mask + sllx %g6, PGDIR_SHIFT_LEFT, %g5 ! Position PGD offset /* TLB1 ** ICACHE line 3: Quick VPTE miss */ - srlx %g6, (TLB_PGD_SHIFT - 2), %g5 ! Position PGD offset - and %g5, TLB_PGD_MASK, %g5 ! Mask PGD offset + srlx %g5, PGDIR_SHIFT_RIGHT, %g5 ! Mask PGD offset bits + andn %g5, LOW_MASK_BITS, %g5 ! Final PGD mask lduwa [%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD brz,pn %g5, vpte_noent ! Valid? sparc64_kpte_continue: @@ -71,23 +154,28 @@ sparc64_vpte_continue: brz,pn %g5, vpte_noent ! Valid? /* TLB1 ** ICACHE line 4: Quick VPTE miss */ - FILL_VALID_SZ_BITS1(%g1) ! Put _PAGE_VALID into %g1 - FILL_VALID_SZ_BITS2(%g1) ! Put _PAGE_VALID into %g1 + mov (VALID_SZ_BITS >> 61), %g1 ! upper vpte into %g1 + sllx %g1, 61, %g1 ! finish calc or %g5, VPTE_BITS, %g5 ! Prepare VPTE data or %g5, %g1, %g5 ! ... mov TLB_SFSR, %g1 ! Restore %g1 value stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load VPTE into TLB stxa %g4, [%g1 + %g1] ASI_DMMU ! Restore previous TAG_ACCESS retry ! Load PTE once again - FILL_VALID_SZ_BITS_NOP +#undef SZ_BITS +#undef VALID_SZ_BITS #undef VPTE_SHIFT -#undef TLB_PMD_SHIFT -#undef TLB_PGD_SHIFT #undef VPTE_BITS -#undef TLB_PMD_MASK -#undef TLB_PGD_MASK -#undef FILL_VALID_SZ_BITS1 -#undef FILL_VALID_SZ_BITS2 -#undef FILL_VALID_SZ_BITS_NOP +#undef A +#undef B +#undef C +#undef D +#undef E +#undef F +#undef PMD_SHIFT_LEFT +#undef PMD_SHIFT_RIGHT +#undef PGDIR_SHIFT_LEFT +#undef PGDIR_SHIFT_RIGHT +#undef LOW_MASK_BITS