ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / arch / mips / mm / pg-sb1.c
1 /*
2  * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
3  * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
4  * Copyright (C) 2000 SiByte, Inc.
5  *
6  * Written by Justin Carlson of SiByte, Inc.
7  *         and Kip Walker of Broadcom Corp.
8  *
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License
12  * as published by the Free Software Foundation; either version 2
13  * of the License, or (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
23  */
24 #include <linux/config.h>
25 #include <linux/module.h>
26 #include <linux/sched.h>
27 #include <linux/smp.h>
28
29 #include <asm/io.h>
30 #include <asm/sibyte/sb1250.h>
31 #include <asm/sibyte/sb1250_regs.h>
32 #include <asm/sibyte/sb1250_dma.h>
33
34 #ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
35 #define SB1_PREF_LOAD_STREAMED_HINT "0"
36 #define SB1_PREF_STORE_STREAMED_HINT "1"
37 #else
38 #define SB1_PREF_LOAD_STREAMED_HINT "4"
39 #define SB1_PREF_STORE_STREAMED_HINT "5"
40 #endif
41
42 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
43 static inline void clear_page_cpu(void *page)
44 #else
45 void clear_page(void *page)
46 #endif
47 {
48         unsigned char *addr = (unsigned char *) page;
49         unsigned char *end = addr + PAGE_SIZE;
50
51         /*
52          * JDCXXX - This should be bottlenecked by the write buffer, but these
53          * things tend to be mildly unpredictable...should check this on the
54          * performance model
55          *
56          * We prefetch 4 lines ahead.  We're also "cheating" slightly here...
57          * since we know we're on an SB1, we force the assembler to take
58          * 64-bit operands to speed things up
59          */
60         do {
61                 __asm__ __volatile__(
62                 "       .set    mips4           \n"
63 #ifdef CONFIG_CPU_HAS_PREFETCH
64                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  0(%0)  \n"  /* Prefetch the first 4 lines */
65                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ", 32(%0)  \n"
66                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ", 64(%0)  \n"
67                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ", 96(%0)  \n"
68 #endif
69                 "1:     sd      $0,  0(%0)      \n"  /* Throw out a cacheline of 0's */
70                 "       sd      $0,  8(%0)      \n"
71                 "       sd      $0, 16(%0)      \n"
72                 "       sd      $0, 24(%0)      \n"
73 #ifdef CONFIG_CPU_HAS_PREFETCH
74                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",128(%0)  \n"  /* Prefetch 4 lines ahead     */
75 #endif
76                 "       .set    mips0           \n"
77                 :
78                 : "r" (addr)
79                 : "memory");
80                 addr += 32;
81         } while (addr != end);
82 }
83
84 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
85 static inline void copy_page_cpu(void *to, void *from)
86 #else
87 void copy_page(void *to, void *from)
88 #endif
89 {
90         unsigned char *src = from;
91         unsigned char *dst = to;
92         unsigned char *end = src + PAGE_SIZE;
93
94         /*
95          * This should be optimized in assembly...can't use ld/sd, though,
96          * because the top 32 bits could be nuked if we took an interrupt
97          * during the routine.  And this is not a good place to be cli()'ing
98          *
99          * The pref's used here are using "streaming" hints, which cause the
100          * copied data to be kicked out of the cache sooner.  A page copy often
101          * ends up copying a lot more data than is commonly used, so this seems
102          * to make sense in terms of reducing cache pollution, but I've no real
103          * performance data to back this up
104          */
105
106         do {
107                 __asm__ __volatile__(
108                 "       .set    mips4                                   \n"
109 #ifdef CONFIG_CPU_HAS_PREFETCH
110                 "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ",  0(%0)\n"  /* Prefetch the first 3 lines */
111                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  0(%1)\n"
112                 "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ",  32(%0)\n"
113                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  32(%1)\n"
114                 "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ",  64(%0)\n"
115                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  64(%1)\n"
116 #endif
117                 "1:     lw      $2,  0(%0)      \n"  /* Block copy a cacheline */
118                 "       lw      $3,  4(%0)      \n"
119                 "       lw      $4,  8(%0)      \n"
120                 "       lw      $5, 12(%0)      \n"
121                 "       lw      $6, 16(%0)      \n"
122                 "       lw      $7, 20(%0)      \n"
123                 "       lw      $8, 24(%0)      \n"
124                 "       lw      $9, 28(%0)      \n"
125 #ifdef CONFIG_CPU_HAS_PREFETCH
126                 "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ", 96(%0)  \n"  /* Prefetch ahead         */
127                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ", 96(%1)  \n"
128 #endif
129                 "       sw      $2,  0(%1)      \n"
130                 "       sw      $3,  4(%1)      \n"
131                 "       sw      $4,  8(%1)      \n"
132                 "       sw      $5, 12(%1)      \n"
133                 "       sw      $6, 16(%1)      \n"
134                 "       sw      $7, 20(%1)      \n"
135                 "       sw      $8, 24(%1)      \n"
136                 "       sw      $9, 28(%1)      \n"
137                 "       .set    mips0           \n"
138                 :
139                 : "r" (src), "r" (dst)
140                 : "$2","$3","$4","$5","$6","$7","$8","$9","memory");
141                 src += 32;
142                 dst += 32;
143         } while (src != end);
144 }
145
146
147 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
148
149 /*
150  * Pad descriptors to cacheline, since each is exclusively owned by a
151  * particular CPU. 
152  */
153 typedef struct dmadscr_s {
154         uint64_t  dscr_a;
155         uint64_t  dscr_b;
156         uint64_t  pad_a;
157         uint64_t  pad_b;
158 } dmadscr_t;
159
160 static dmadscr_t page_descr[NR_CPUS] __attribute__((aligned(SMP_CACHE_BYTES)));
161
162 void sb1_dma_init(void)
163 {
164         int cpu = smp_processor_id();
165         uint64_t base_val = PHYSADDR(&page_descr[cpu]) | V_DM_DSCR_BASE_RINGSZ(1);
166
167         __raw_writeq(base_val,
168                      IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
169         __raw_writeq(base_val | M_DM_DSCR_BASE_RESET,
170                      IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
171         __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL,
172                      IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
173 }
174
175 void clear_page(void *page)
176 {
177         int cpu = smp_processor_id();
178
179         /* if the page is above Kseg0, use old way */
180         if (KSEGX(page) != CAC_BASE)
181                 return clear_page_cpu(page);
182
183         page_descr[cpu].dscr_a = PHYSADDR(page) | M_DM_DSCRA_ZERO_MEM | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
184         page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
185         __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
186
187         /*
188          * Don't really want to do it this way, but there's no
189          * reliable way to delay completion detection.
190          */
191         while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT)))
192                 ;
193         __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
194 }
195
196 void copy_page(void *to, void *from)
197 {
198         unsigned long from_phys = PHYSADDR(from);
199         unsigned long to_phys = PHYSADDR(to);
200         int cpu = smp_processor_id();
201
202         /* if either page is above Kseg0, use old way */
203         if ((KSEGX(to) != CAC_BASE) || (KSEGX(from) != CAC_BASE))
204                 return copy_page_cpu(to, from);
205
206         page_descr[cpu].dscr_a = PHYSADDR(to_phys) | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
207         page_descr[cpu].dscr_b = PHYSADDR(from_phys) | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
208         __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
209
210         /*
211          * Don't really want to do it this way, but there's no
212          * reliable way to delay completion detection.
213          */
214         while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT)))
215                 ;
216         __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
217 }
218
219 #endif
220
221 EXPORT_SYMBOL(clear_page);
222 EXPORT_SYMBOL(copy_page);