X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fppc64%2Fkernel%2FpSeries_iommu.c;h=5f05d6007fab5e7bde55cd7560620dd51ce964a2;hb=8e8ece46a861c84343256819eaec77e608ff9217;hp=367da0eb5b51d607a1ebcb0fe5b75b8befb56964;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c index 367da0eb5..5f05d6007 100644 --- a/arch/ppc64/kernel/pSeries_iommu.c +++ b/arch/ppc64/kernel/pSeries_iommu.c @@ -42,8 +42,13 @@ #include #include #include +#include +#include #include "pci.h" +#define DBG(fmt...) + +extern int is_python(struct device_node *); static void tce_build_pSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, @@ -88,129 +93,187 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) } -static void iommu_buses_init(void) +static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, + enum dma_data_direction direction) { - struct pci_controller* phb; - struct device_node *dn, *first_dn; - int num_slots, num_slots_ilog2; - int first_phb = 1; - unsigned long tcetable_ilog2; - - /* - * We default to a TCE table that maps 2GB (4MB table, 22 bits), - * however some machines have a 3GB IO hole and for these we - * create a table that maps 1GB (2MB table, 21 bits) - */ - if (io_hole_start < 0x80000000UL) - tcetable_ilog2 = 21; - else - tcetable_ilog2 = 22; + u64 rc; + union tce_entry tce; - /* XXX Should we be using pci_root_buses instead? -ojn - */ + tce.te_word = 0; + tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT; + tce.te_rdwr = 1; + if (direction != DMA_TO_DEVICE) + tce.te_pciwr = 1; + + while (npages--) { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word ); + + if (rc && printk_ratelimit()) { + printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } + + tcenum++; + tce.te_rpn++; + } +} + +static DEFINE_PER_CPU(void *, tce_page) = NULL; + +static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, + enum dma_data_direction direction) +{ + u64 rc; + union tce_entry tce, *tcep; + long l, limit; - for (phb=hose_head; phb; phb=phb->next) { - first_dn = ((struct device_node *)phb->arch_data)->child; + if (npages == 1) + return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + direction); - /* Carve 2GB into the largest dma_window_size possible */ - for (dn = first_dn, num_slots = 0; dn != NULL; dn = dn->sibling) - num_slots++; - num_slots_ilog2 = __ilog2(num_slots); + tcep = __get_cpu_var(tce_page); - if ((1<dma_window_size = 1 << (tcetable_ilog2 - num_slots_ilog2); + tce.te_word = 0; + tce.te_rpn = (virt_to_abs(uaddr)) >> PAGE_SHIFT; + tce.te_rdwr = 1; + if (direction != DMA_TO_DEVICE) + tce.te_pciwr = 1; - /* Reserve 16MB of DMA space on the first PHB. - * We should probably be more careful and use firmware props. - * In reality this space is remapped, not lost. But we don't - * want to get that smart to handle it -- too much work. + /* We can map max one pageful of TCEs at a time */ + do { + /* + * Set up the page with TCE data, looping through and setting + * the values. */ - phb->dma_window_base_cur = first_phb ? (1 << 12) : 0; - first_phb = 0; + limit = min_t(long, npages, PAGE_SIZE/sizeof(union tce_entry)); - for (dn = first_dn; dn != NULL; dn = dn->sibling) - iommu_devnode_init(dn); + for (l = 0; l < limit; l++) { + tcep[l] = tce; + tce.te_rpn++; + } + + rc = plpar_tce_put_indirect((u64)tbl->it_index, + (u64)tcenum << 12, + (u64)virt_to_abs(tcep), + limit); + + npages -= limit; + tcenum += limit; + } while (npages > 0 && !rc); + + if (rc && printk_ratelimit()) { + printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%lx\n", (u64)npages); + printk("\ttce[0] val = 0x%lx\n", tcep[0].te_word); + show_stack(current, (unsigned long *)__get_SP()); } } - -static void iommu_buses_init_lpar(struct list_head *bus_list) +static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) { - struct list_head *ln; - struct pci_bus *bus; - struct device_node *busdn; - unsigned int *dma_window; - - for (ln=bus_list->next; ln != bus_list; ln=ln->next) { - bus = pci_bus_b(ln); - busdn = PCI_GET_DN(bus); - - dma_window = (unsigned int *)get_property(busdn, "ibm,dma-window", 0); - if (dma_window) { - /* Bussubno hasn't been copied yet. - * Do it now because iommu_table_setparms_lpar needs it. - */ - busdn->bussubno = bus->number; - iommu_devnode_init(busdn); + u64 rc; + union tce_entry tce; + + tce.te_word = 0; + + while (npages--) { + rc = plpar_tce_put((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word); + + if (rc && printk_ratelimit()) { + printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); } - /* look for a window on a bridge even if the PHB had one */ - iommu_buses_init_lpar(&bus->children); + tcenum++; } } +static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +{ + u64 rc; + union tce_entry tce; + + tce.te_word = 0; + + rc = plpar_tce_stuff((u64)tbl->it_index, + (u64)tcenum << 12, + tce.te_word, + npages); + + if (rc && printk_ratelimit()) { + printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); + printk("\trc = %ld\n", rc); + printk("\tindex = 0x%lx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%lx\n", (u64)npages); + printk("\ttce val = 0x%lx\n", tce.te_word ); + show_stack(current, (unsigned long *)__get_SP()); + } +} + static void iommu_table_setparms(struct pci_controller *phb, struct device_node *dn, struct iommu_table *tbl) { - phandle node; - unsigned long i; - struct of_tce_table *oft; - - node = ((struct device_node *)(phb->arch_data))->node; - - oft = NULL; - - for (i=0; of_tce_table[i].node; i++) - if(of_tce_table[i].node == node) { - oft = &of_tce_table[i]; - break; - } - - if (!oft) - panic("PCI_DMA: iommu_table_setparms: Can't find phb named '%s' in of_tce_table\n", dn->full_name); + struct device_node *node; + unsigned long *basep; + unsigned int *sizep; + + node = (struct device_node *)phb->arch_data; + + basep = (unsigned long *)get_property(node, "linux,tce-base", NULL); + sizep = (unsigned int *)get_property(node, "linux,tce-size", NULL); + if (basep == NULL || sizep == NULL) { + printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %s has " + "missing tce entries !\n", dn->full_name); + return; + } - memset((void *)oft->base, 0, oft->size); + tbl->it_base = (unsigned long)__va(*basep); + memset((void *)tbl->it_base, 0, *sizep); tbl->it_busno = phb->bus->number; /* Units of tce entries */ - tbl->it_offset = phb->dma_window_base_cur; - - /* Adjust the current table offset to the next - * region. Measured in TCE entries. Force an - * alignment to the size allotted per IOA. This - * makes it easier to remove the 1st 16MB. - */ - phb->dma_window_base_cur += (phb->dma_window_size>>3); - phb->dma_window_base_cur &= - ~((phb->dma_window_size>>3)-1); - - /* Set the tce table size - measured in pages */ - tbl->it_size = ((phb->dma_window_base_cur - - tbl->it_offset) << 3) >> PAGE_SHIFT; + tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT; /* Test if we are going over 2GB of DMA space */ - if (phb->dma_window_base_cur > (1 << 19)) + if (phb->dma_window_base_cur + phb->dma_window_size > (1L << 31)) panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); - tbl->it_base = oft->base; + phb->dma_window_base_cur += phb->dma_window_size; + + /* Set the tce table size - measured in entries */ + tbl->it_size = phb->dma_window_size >> PAGE_SHIFT; + tbl->it_index = 0; - tbl->it_entrysize = sizeof(union tce_entry); tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; } /* @@ -227,74 +290,255 @@ static void iommu_table_setparms(struct pci_controller *phb, */ static void iommu_table_setparms_lpar(struct pci_controller *phb, struct device_node *dn, - struct iommu_table *tbl) + struct iommu_table *tbl, + unsigned int *dma_window) { - unsigned int *dma_window; - - dma_window = (unsigned int *)get_property(dn, "ibm,dma-window", 0); - - if (!dma_window) - panic("iommu_table_setparms_lpar: device %s has no" - " ibm,dma-window property!\n", dn->full_name); - tbl->it_busno = dn->bussubno; - tbl->it_size = (((((unsigned long)dma_window[4] << 32) | - (unsigned long)dma_window[5]) >> PAGE_SHIFT) << 3) >> PAGE_SHIFT; - tbl->it_offset = ((((unsigned long)dma_window[2] << 32) | - (unsigned long)dma_window[3]) >> 12); + + /* TODO: Parse field size properties properly. */ + tbl->it_size = (((unsigned long)dma_window[4] << 32) | + (unsigned long)dma_window[5]) >> PAGE_SHIFT; + tbl->it_offset = (((unsigned long)dma_window[2] << 32) | + (unsigned long)dma_window[3]) >> PAGE_SHIFT; tbl->it_base = 0; tbl->it_index = dma_window[0]; - tbl->it_entrysize = sizeof(union tce_entry); tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; } +static void iommu_bus_setup_pSeries(struct pci_bus *bus) +{ + struct device_node *dn, *pdn; + struct iommu_table *tbl; + + DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self); + + /* For each (root) bus, we carve up the available DMA space in 256MB + * pieces. Since each piece is used by one (sub) bus/device, that would + * give a maximum of 7 devices per PHB. In most cases, this is plenty. + * + * The exception is on Python PHBs (pre-POWER4). Here we don't have EADS + * bridges below the PHB to allocate the sectioned tables to, so instead + * we allocate a 1GB table at the PHB level. + */ + + dn = pci_bus_to_OF_node(bus); + + if (!bus->self) { + /* Root bus */ + if (is_python(dn)) { + unsigned int *iohole; + + DBG("Python root bus %s\n", bus->name); + + iohole = (unsigned int *)get_property(dn, "io-hole", 0); + + if (iohole) { + /* On first bus we need to leave room for the + * ISA address space. Just skip the first 256MB + * alltogether. This leaves 768MB for the window. + */ + DBG("PHB has io-hole, reserving 256MB\n"); + dn->phb->dma_window_size = 3 << 28; + dn->phb->dma_window_base_cur = 1 << 28; + } else { + /* 1GB window by default */ + dn->phb->dma_window_size = 1 << 30; + dn->phb->dma_window_base_cur = 0; + } + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(dn->phb, dn, tbl); + dn->iommu_table = iommu_init_table(tbl); + } else { + /* Do a 128MB table at root. This is used for the IDE + * controller on some SMP-mode POWER4 machines. It + * doesn't hurt to allocate it on other machines + * -- it'll just be unused since new tables are + * allocated on the EADS level. + * + * Allocate at offset 128MB to avoid having to deal + * with ISA holes; 128MB table for IDE is plenty. + */ + dn->phb->dma_window_size = 1 << 27; + dn->phb->dma_window_base_cur = 1 << 27; + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(dn->phb, dn, tbl); + dn->iommu_table = iommu_init_table(tbl); -void iommu_devnode_init(struct device_node *dn) + /* All child buses have 256MB tables */ + dn->phb->dma_window_size = 1 << 28; + } + } else { + pdn = pci_bus_to_OF_node(bus->parent); + + if (!bus->parent->self && !is_python(pdn)) { + struct iommu_table *tbl; + /* First child and not python means this is the EADS + * level. Allocate new table for this slot with 256MB + * window. + */ + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(dn->phb, dn, tbl); + + dn->iommu_table = iommu_init_table(tbl); + } else { + /* Lower than first child or under python, use parent table */ + dn->iommu_table = pdn->iommu_table; + } + } +} + + +static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) { struct iommu_table *tbl; + struct device_node *dn, *pdn; + unsigned int *dma_window = NULL; - tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), - GFP_KERNEL); - - if (systemcfg->platform == PLATFORM_PSERIES_LPAR) - iommu_table_setparms_lpar(dn->phb, dn, tbl); - else - iommu_table_setparms(dn->phb, dn, tbl); + DBG("iommu_bus_setup_pSeriesLP, bus %p, bus->self %p\n", bus, bus->self); + + dn = pci_bus_to_OF_node(bus); + + /* Find nearest ibm,dma-window, walking up the device tree */ + for (pdn = dn; pdn != NULL; pdn = pdn->parent) { + dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL); + if (dma_window != NULL) + break; + } + + if (dma_window == NULL) { + DBG("iommu_bus_setup_pSeriesLP: bus %s seems to have no ibm,dma-window property\n", dn->full_name); + return; + } + + if (!pdn->iommu_table) { + /* Bussubno hasn't been copied yet. + * Do it now because iommu_table_setparms_lpar needs it. + */ + pdn->bussubno = bus->number; + + tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), + GFP_KERNEL); - dn->iommu_table = iommu_init_table(tbl); + iommu_table_setparms_lpar(pdn->phb, pdn, tbl, dma_window); + + pdn->iommu_table = iommu_init_table(tbl); + } + + if (pdn != dn) + dn->iommu_table = pdn->iommu_table; } -void iommu_setup_pSeries(void) +static void iommu_dev_setup_pSeries(struct pci_dev *dev) { - struct pci_dev *dev = NULL; struct device_node *dn, *mydn; - if (systemcfg->platform == PLATFORM_PSERIES_LPAR) - iommu_buses_init_lpar(&pci_root_buses); - else - iommu_buses_init(); - - /* Now copy the iommu_table ptr from the bus devices down to every + DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, dev->pretty_name); + /* Now copy the iommu_table ptr from the bus device down to the * pci device_node. This means get_iommu_table() won't need to search * up the device tree to find it. */ - while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - mydn = dn = PCI_GET_DN(dev); + mydn = dn = pci_device_to_OF_node(dev); + + while (dn && dn->iommu_table == NULL) + dn = dn->parent; + + if (dn) { + mydn->iommu_table = dn->iommu_table; + } else { + DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, dev->pretty_name); + } +} + +static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) +{ + struct device_node *pdn, *dn; + struct iommu_table *tbl; + int *dma_window = NULL; + + DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, dev->pretty_name); + + /* dev setup for LPAR is a little tricky, since the device tree might + * contain the dma-window properties per-device and not neccesarily + * for the bus. So we need to search upwards in the tree until we + * either hit a dma-window property, OR find a parent with a table + * already allocated. + */ + dn = pci_device_to_OF_node(dev); + + for (pdn = dn; pdn && !pdn->iommu_table; pdn = pdn->parent) { + dma_window = (unsigned int *)get_property(pdn, "ibm,dma-window", NULL); + if (dma_window) + break; + } - while (dn && dn->iommu_table == NULL) - dn = dn->parent; - if (dn) - mydn->iommu_table = dn->iommu_table; + /* Check for parent == NULL so we don't try to setup the empty EADS + * slots on POWER4 machines. + */ + if (dma_window == NULL || pdn->parent == NULL) { + /* Fall back to regular (non-LPAR) dev setup */ + DBG("No dma window for device, falling back to regular setup\n"); + iommu_dev_setup_pSeries(dev); + return; + } else { + DBG("Found DMA window, allocating table\n"); } + + if (!pdn->iommu_table) { + /* iommu_table_setparms_lpar needs bussubno. */ + pdn->bussubno = pdn->phb->bus->number; + + tbl = (struct iommu_table *)kmalloc(sizeof(struct iommu_table), + GFP_KERNEL); + + iommu_table_setparms_lpar(pdn->phb, pdn, tbl, dma_window); + + pdn->iommu_table = iommu_init_table(tbl); + } + + if (pdn != dn) + dn->iommu_table = pdn->iommu_table; } +static void iommu_bus_setup_null(struct pci_bus *b) { } +static void iommu_dev_setup_null(struct pci_dev *d) { } /* These are called very early. */ -void tce_init_pSeries(void) +void iommu_init_early_pSeries(void) { - ppc_md.tce_build = tce_build_pSeries; - ppc_md.tce_free = tce_free_pSeries; + if (of_chosen && get_property(of_chosen, "linux,iommu-off", NULL)) { + /* Direct I/O, IOMMU off */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + pci_direct_iommu_init(); + + return; + } + + if (systemcfg->platform & PLATFORM_LPAR) { + if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) { + ppc_md.tce_build = tce_buildmulti_pSeriesLP; + ppc_md.tce_free = tce_freemulti_pSeriesLP; + } else { + ppc_md.tce_build = tce_build_pSeriesLP; + ppc_md.tce_free = tce_free_pSeriesLP; + } + ppc_md.iommu_bus_setup = iommu_bus_setup_pSeriesLP; + ppc_md.iommu_dev_setup = iommu_dev_setup_pSeriesLP; + } else { + ppc_md.tce_build = tce_build_pSeries; + ppc_md.tce_free = tce_free_pSeries; + ppc_md.iommu_bus_setup = iommu_bus_setup_pSeries; + ppc_md.iommu_dev_setup = iommu_dev_setup_pSeries; + } + pci_iommu_init(); }