5543aaa568e7940a6339bb7218a989d02c59b501
[linux-2.6.git] / arch / ppc64 / kernel / pSeries_iommu.c
1 /*
2  * arch/ppc64/kernel/pSeries_iommu.c
3  *
4  * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
5  *
6  * Rewrite, cleanup: 
7  *
8  * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation
9  *
10  * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
11  *
12  * 
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation; either version 2 of the License, or
16  * (at your option) any later version.
17  * 
18  * This program is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  * 
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
26  */
27
28 #include <linux/config.h>
29 #include <linux/init.h>
30 #include <linux/types.h>
31 #include <linux/slab.h>
32 #include <linux/mm.h>
33 #include <linux/spinlock.h>
34 #include <linux/string.h>
35 #include <linux/pci.h>
36 #include <linux/dma-mapping.h>
37 #include <asm/io.h>
38 #include <asm/prom.h>
39 #include <asm/rtas.h>
40 #include <asm/ppcdebug.h>
41 #include <asm/iommu.h>
42 #include <asm/pci-bridge.h>
43 #include <asm/machdep.h>
44 #include <asm/abs_addr.h>
45 #include <asm/plpar_wrappers.h>
46 #include <asm/systemcfg.h>
47 #include "pci.h"
48
49 #define DBG(fmt...)
50
51 extern int is_python(struct device_node *);
52
53 static void tce_build_pSeries(struct iommu_table *tbl, long index, 
54                               long npages, unsigned long uaddr, 
55                               enum dma_data_direction direction)
56 {
57         union tce_entry t;
58         union tce_entry *tp;
59
60         t.te_word = 0;
61         t.te_rdwr = 1; // Read allowed 
62
63         if (direction != DMA_TO_DEVICE)
64                 t.te_pciwr = 1;
65
66         tp = ((union tce_entry *)tbl->it_base) + index;
67
68         while (npages--) {
69                 /* can't move this out since we might cross LMB boundary */
70                 t.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT;
71         
72                 tp->te_word = t.te_word;
73
74                 uaddr += PAGE_SIZE;
75                 tp++;
76         }
77 }
78
79
80 static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
81 {
82         union tce_entry t;
83         union tce_entry *tp;
84
85         t.te_word = 0;
86         tp  = ((union tce_entry *)tbl->it_base) + index;
87                 
88         while (npages--) {
89                 tp->te_word = t.te_word;
90                 
91                 tp++;
92         }
93 }
94
95
96 static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
97                                 long npages, unsigned long uaddr,
98                                 enum dma_data_direction direction)
99 {
100         u64 rc;
101         union tce_entry tce;
102
103         tce.te_word = 0;
104         tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT;
105         tce.te_rdwr = 1;
106         if (direction != DMA_TO_DEVICE)
107                 tce.te_pciwr = 1;
108
109         while (npages--) {
110                 rc = plpar_tce_put((u64)tbl->it_index, 
111                                    (u64)tcenum << 12, 
112                                    tce.te_word );
113                 
114                 if (rc && printk_ratelimit()) {
115                         printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
116                         printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
117                         printk("\ttcenum  = 0x%lx\n", (u64)tcenum);
118                         printk("\ttce val = 0x%lx\n", tce.te_word );
119                         show_stack(current, (unsigned long *)__get_SP());
120                 }
121                         
122                 tcenum++;
123                 tce.te_rpn++;
124         }
125 }
126
127 static DEFINE_PER_CPU(void *, tce_page) = NULL;
128
129 static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
130                                      long npages, unsigned long uaddr,
131                                      enum dma_data_direction direction)
132 {
133         u64 rc;
134         union tce_entry tce, *tcep;
135         long l, limit;
136
137         if (npages == 1)
138                 return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
139                                            direction);
140
141         tcep = __get_cpu_var(tce_page);
142
143         /* This is safe to do since interrupts are off when we're called
144          * from iommu_alloc{,_sg}()
145          */
146         if (!tcep) {
147                 tcep = (void *)__get_free_page(GFP_ATOMIC);
148                 /* If allocation fails, fall back to the loop implementation */
149                 if (!tcep)
150                         return tce_build_pSeriesLP(tbl, tcenum, npages,
151                                                    uaddr, direction);
152                 __get_cpu_var(tce_page) = tcep;
153         }
154
155         tce.te_word = 0;
156         tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT;
157         tce.te_rdwr = 1;
158         if (direction != DMA_TO_DEVICE)
159                 tce.te_pciwr = 1;
160
161         /* We can map max one pageful of TCEs at a time */
162         do {
163                 /*
164                  * Set up the page with TCE data, looping through and setting
165                  * the values.
166                  */
167                 limit = min_t(long, npages, PAGE_SIZE/sizeof(union tce_entry));
168
169                 for (l = 0; l < limit; l++) {
170                         tcep[l] = tce;
171                         tce.te_rpn++;
172                 }
173
174                 rc = plpar_tce_put_indirect((u64)tbl->it_index,
175                                             (u64)tcenum << 12,
176                                             (u64)virt_to_abs(tcep),
177                                             limit);
178
179                 npages -= limit;
180                 tcenum += limit;
181         } while (npages > 0 && !rc);
182
183         if (rc && printk_ratelimit()) {
184                 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
185                 printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
186                 printk("\tnpages  = 0x%lx\n", (u64)npages);
187                 printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word);
188                 show_stack(current, (unsigned long *)__get_SP());
189         }
190 }
191
192 static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
193 {
194         u64 rc;
195         union tce_entry tce;
196
197         tce.te_word = 0;
198
199         while (npages--) {
200                 rc = plpar_tce_put((u64)tbl->it_index,
201                                    (u64)tcenum << 12,
202                                    tce.te_word);
203
204                 if (rc && printk_ratelimit()) {
205                         printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
206                         printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
207                         printk("\ttcenum  = 0x%lx\n", (u64)tcenum);
208                         printk("\ttce val = 0x%lx\n", tce.te_word );
209                         show_stack(current, (unsigned long *)__get_SP());
210                 }
211
212                 tcenum++;
213         }
214 }
215
216
217 static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
218 {
219         u64 rc;
220         union tce_entry tce;
221
222         tce.te_word = 0;
223
224         rc = plpar_tce_stuff((u64)tbl->it_index,
225                            (u64)tcenum << 12,
226                            tce.te_word,
227                            npages);
228
229         if (rc && printk_ratelimit()) {
230                 printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
231                 printk("\trc      = %ld\n", rc);
232                 printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
233                 printk("\tnpages  = 0x%lx\n", (u64)npages);
234                 printk("\ttce val = 0x%lx\n", tce.te_word );
235                 show_stack(current, (unsigned long *)__get_SP());
236         }
237 }
238
239 static void iommu_table_setparms(struct pci_controller *phb,
240                                  struct device_node *dn,
241                                  struct iommu_table *tbl) 
242 {
243         struct device_node *node;
244         unsigned long *basep;
245         unsigned int *sizep;
246
247         node = (struct device_node *)phb->arch_data;
248
249         basep = (unsigned long *)get_property(node, "linux,tce-base", NULL);
250         sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL);
251         if (basep == NULL || sizep == NULL) {
252                 printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has "
253                                 "missing tce entries !\n", dn->full_name);
254                 return;
255         }
256
257         tbl->it_base = (unsigned long)__va(*basep);
258         memset((void *)tbl->it_base, 0, *sizep);
259
260         tbl->it_busno = phb->bus->number;
261         
262         /* Units of tce entries */
263         tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT;
264         
265         /* Test if we are going over 2GB of DMA space */
266         if (phb->dma_window_base_cur + phb->dma_window_size > (1L << 31))
267                 panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); 
268         
269         phb->dma_window_base_cur += phb->dma_window_size;
270
271         /* Set the tce table size - measured in entries */
272         tbl->it_size = phb->dma_window_size >> PAGE_SHIFT;
273
274         tbl->it_index = 0;
275         tbl->it_blocksize = 16;
276         tbl->it_type = TCE_PCI;
277 }
278
279 /*
280  * iommu_table_setparms_lpar
281  *
282  * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
283  *
284  * ToDo: properly interpret the ibm,dma-window property.  The definition is:
285  *      logical-bus-number      (1 word)
286  *      phys-address            (#address-cells words)
287  *      size                    (#cell-size words)
288  *
289  * Currently we hard code these sizes (more or less).
290  */
291 static void iommu_table_setparms_lpar(struct pci_controller *phb,
292                                       struct device_node *dn,
293                                       struct iommu_table *tbl,
294                                       unsigned int *dma_window)
295 {
296         tbl->it_busno  = dn->bussubno;
297
298         /* TODO: Parse field size properties properly. */
299         tbl->it_size   = (((unsigned long)dma_window[4] << 32) |
300                            (unsigned long)dma_window[5]) >> PAGE_SHIFT;
301         tbl->it_offset = (((unsigned long)dma_window[2] << 32) |
302                            (unsigned long)dma_window[3]) >> PAGE_SHIFT;
303         tbl->it_base   = 0;
304         tbl->it_index  = dma_window[0];
305         tbl->it_blocksize  = 16;
306         tbl->it_type = TCE_PCI;
307 }
308
309 static void iommu_bus_setup_pSeries(struct pci_bus *bus)
310 {
311         struct device_node *dn, *pdn;
312         struct iommu_table *tbl;
313
314         DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self);
315
316         /* For each (root) bus, we carve up the available DMA space in 256MB
317          * pieces. Since each piece is used by one (sub) bus/device, that would
318          * give a maximum of 7 devices per PHB. In most cases, this is plenty.
319          *
320          * The exception is on Python PHBs (pre-POWER4). Here we don't have EADS
321          * bridges below the PHB to allocate the sectioned tables to, so instead
322          * we allocate a 1GB table at the PHB level.
323          */
324
325         dn = pci_bus_to_OF_node(bus);
326
327         if (!bus->self) {
328                 /* Root bus */
329                 if (is_python(dn)) {
330                         unsigned int *iohole;
331
332                         DBG("Python root bus %s\n", bus->name);
333
334                         iohole = (unsigned int *)get_property(dn, "io-hole", 0);
335
336                         if (iohole) {
337                                 /* On first bus we need to leave room for the
338                                  * ISA address space. Just skip the first 256MB
339                                  * alltogether. This leaves 768MB for the window.
340                                  */
341                                 DBG("PHB has io-hole, reserving 256MB\n");
342                                 dn->phb->dma_window_size = 3 << 28;
343                                 dn->phb->dma_window_base_cur = 1 << 28;
344                         } else {
345                                 /* 1GB window by default */
346                                 dn->phb->dma_window_size = 1 << 30;
347                                 dn->phb->dma_window_base_cur = 0;
348                         }
349
350                         tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
351
352                         iommu_table_setparms(dn->phb, dn, tbl);
353                         dn->iommu_table = iommu_init_table(tbl);
354                 } else {
355                         /* Do a 128MB table at root. This is used for the IDE
356                          * controller on some SMP-mode POWER4 machines. It
357                          * doesn't hurt to allocate it on other machines
358                          * -- it'll just be unused since new tables are
359                          * allocated on the EADS level.
360                          *
361                          * Allocate at offset 128MB to avoid having to deal
362                          * with ISA holes; 128MB table for IDE is plenty.
363                          */
364                         dn->phb->dma_window_size = 1 << 27;
365                         dn->phb->dma_window_base_cur = 1 << 27;
366
367                         tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
368
369                         iommu_table_setparms(dn->phb, dn, tbl);
370                         dn->iommu_table = iommu_init_table(tbl);
371
372                         /* All child buses have 256MB tables */
373                         dn->phb->dma_window_size = 1 << 28;
374                 }
375         } else {
376                 pdn = pci_bus_to_OF_node(bus->parent);
377
378                 if (!bus->parent->self && !is_python(pdn)) {
379                         struct iommu_table *tbl;
380                         /* First child and not python means this is the EADS
381                          * level. Allocate new table for this slot with 256MB
382                          * window.
383                          */
384
385                         tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
386
387                         iommu_table_setparms(dn->phb, dn, tbl);
388
389                         dn->iommu_table = iommu_init_table(tbl);
390                 } else {
391                         /* Lower than first child or under python, use parent table */
392                         dn->iommu_table = pdn->iommu_table;
393                 }
394         }
395 }
396
397
398 static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus)
399 {
400         struct iommu_table *tbl;
401         struct device_node *dn, *pdn;
402         unsigned int *dma_window = NULL;
403
404         dn = pci_bus_to_OF_node(bus);
405
406         /* Find nearest ibm,dma-window, walking up the device tree */
407         for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
408                 dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL);
409                 if (dma_window != NULL)
410                         break;
411         }
412
413         if (dma_window == NULL) {
414                 DBG("iommu_bus_setup_pSeriesLP: bus %s seems to have no ibm,dma-window property\n", dn->full_name);
415                 return;
416         }
417
418         if (!pdn->iommu_table) {
419                 /* Bussubno hasn't been copied yet.
420                  * Do it now because iommu_table_setparms_lpar needs it.
421                  */
422                 pdn->bussubno = bus->number;
423
424                 tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table),
425                                                     GFP_KERNEL);
426         
427                 iommu_table_setparms_lpar(pdn->phb, pdn, tbl, dma_window);
428
429                 pdn->iommu_table = iommu_init_table(tbl);
430         }
431
432         if (pdn != dn)
433                 dn->iommu_table = pdn->iommu_table;
434 }
435
436
437 static void iommu_dev_setup_pSeries(struct pci_dev *dev)
438 {
439         struct device_node *dn, *mydn;
440
441         DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, dev->pretty_name);
442         /* Now copy the iommu_table ptr from the bus device down to the
443          * pci device_node.  This means get_iommu_table() won't need to search
444          * up the device tree to find it.
445          */
446         mydn = dn = pci_device_to_OF_node(dev);
447
448         while (dn && dn->iommu_table == NULL)
449                 dn = dn->parent;
450
451         if (dn) {
452                 mydn->iommu_table = dn->iommu_table;
453         } else {
454                 DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, dev->pretty_name);
455         }
456 }
457
458 static void iommu_bus_setup_null(struct pci_bus *b) { }
459 static void iommu_dev_setup_null(struct pci_dev *d) { }
460
461 /* These are called very early. */
462 void iommu_init_early_pSeries(void)
463 {
464         if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) {
465                 /* Direct I/O, IOMMU off */
466                 ppc_md.iommu_dev_setup = iommu_dev_setup_null;
467                 ppc_md.iommu_bus_setup = iommu_bus_setup_null;
468                 pci_direct_iommu_init();
469
470                 return;
471         }
472
473         if (systemcfg->platform & PLATFORM_LPAR) {
474                 if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) {
475                         ppc_md.tce_build = tce_buildmulti_pSeriesLP;
476                         ppc_md.tce_free  = tce_freemulti_pSeriesLP;
477                 } else {
478                         ppc_md.tce_build = tce_build_pSeriesLP;
479                         ppc_md.tce_free  = tce_free_pSeriesLP;
480                 }
481                 ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP;
482         } else {
483                 ppc_md.tce_build = tce_build_pSeries;
484                 ppc_md.tce_free  = tce_free_pSeries;
485                 ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries;
486         }
487
488         ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries;
489
490         pci_iommu_init();
491 }
492