X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fia64%2Fhp%2Fcommon%2Fsba_iommu.c;h=ce49fe3a3b5654f88256d24b8fe6fc2410ccfc0b;hb=refs%2Fheads%2Fvserver;hp=5333e61907bad641b3e11cfc4c8878a87ebd1824;hpb=9bf4aaab3e101692164d49b7ca357651eb691cb6;p=linux-2.6.git diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 5333e6190..ce49fe3a3 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1,9 +1,9 @@ /* ** IA64 System Bus Adapter (SBA) I/O MMU manager ** -** (c) Copyright 2002-2004 Alex Williamson +** (c) Copyright 2002-2005 Alex Williamson ** (c) Copyright 2002-2003 Grant Grundler -** (c) Copyright 2002-2004 Hewlett-Packard Company +** (c) Copyright 2002-2005 Hewlett-Packard Company ** ** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) ** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) @@ -19,7 +19,6 @@ ** */ -#include #include #include #include @@ -33,13 +32,14 @@ #include #include #include +#include +#include /* hweight64() */ #include /* ia64_get_itc() */ #include #include /* PAGE_OFFSET */ #include #include /* wmb() */ -#include /* hweight64() */ #include @@ -75,7 +75,7 @@ ** If a device prefetches beyond the end of a valid pdir entry, it will cause ** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should ** disconnect on 4k boundaries and prevent such issues. If the device is -** particularly agressive, this option will keep the entire pdir valid such +** particularly aggressive, this option will keep the entire pdir valid such ** that prefetching will hit a valid address. This could severely impact ** error containment, and is therefore off by default. The page that is ** used for spill-over is poisoned, so that should help debugging somewhat. @@ -155,9 +155,13 @@ */ #define DELAYED_RESOURCE_CNT 64 +#define PCI_DEVICE_ID_HP_SX2000_IOC 0x12ec + #define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) +#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP) #define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) #define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP) +#define SX2000_IOC_ID ((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP) #define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ @@ -191,7 +195,7 @@ static unsigned long iovp_shift; static unsigned long iovp_mask; struct ioc { - void *ioc_hpa; /* I/O MMU base address */ + void __iomem *ioc_hpa; /* I/O MMU base address */ char *res_map; /* resource map, bit == pdir entry */ u64 *pdir_base; /* physical base address */ unsigned long ibase; /* pdir IOV Space base */ @@ -203,6 +207,9 @@ struct ioc { /* clearing pdir to prevent races with allocations. */ unsigned int res_bitshift; /* from the RIGHT! */ unsigned int res_size; /* size of resource map in bytes */ +#ifdef CONFIG_NUMA + unsigned int node; /* node where this IOC lives */ +#endif #if DELAYED_RESOURCE_CNT > 0 spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ /* than res_lock for bigger systems. */ @@ -251,10 +258,10 @@ static u64 prefetch_spill_page; /* ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up -** (or rather not merge) DMA's into managable chunks. +** (or rather not merge) DMAs into manageable chunks. ** On parisc, this is more of the software/tuning constraint -** rather than the HW. I/O MMU allocation alogorithms can be -** faster with smaller size is (to some degree). +** rather than the HW. I/O MMU allocation algorithms can be +** faster with smaller sizes (to some degree). */ #define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) @@ -454,28 +461,39 @@ get_iovp_order (unsigned long size) * sba_search_bitmap - find free space in IO PDIR resource bitmap * @ioc: IO MMU structure which owns the pdir we are interested in. * @bits_wanted: number of entries we need. + * @use_hint: use res_hint to indicate where to start looking * * Find consecutive free bits in resource bitmap. * Each bit represents one entry in the IO Pdir. * Cool perf optimization: search for log2(size) bits at a time. */ static SBA_INLINE unsigned long -sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted) +sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) { - unsigned long *res_ptr = ioc->res_hint; + unsigned long *res_ptr; unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); - unsigned long pide = ~0UL; + unsigned long flags, pide = ~0UL; ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); ASSERT(res_ptr < res_end); + spin_lock_irqsave(&ioc->res_lock, flags); + + /* Allow caller to force a search through the entire resource space */ + if (likely(use_hint)) { + res_ptr = ioc->res_hint; + } else { + res_ptr = (ulong *)ioc->res_map; + ioc->res_bitshift = 0; + } + /* * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts * if a TLB entry is purged while in use. sba_mark_invalid() * purges IOTLB entries in power-of-two sizes, so we also * allocate IOVA space in power-of-two sizes. */ - bits_wanted = 1UL << get_iovp_order(bits_wanted << PAGE_SHIFT); + bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift); if (likely(bits_wanted == 1)) { unsigned int bitshiftcnt; @@ -565,10 +583,12 @@ not_found: prefetch(ioc->res_map); ioc->res_hint = (unsigned long *) ioc->res_map; ioc->res_bitshift = 0; + spin_unlock_irqrestore(&ioc->res_lock, flags); return (pide); found_it: ioc->res_hint = res_ptr; + spin_unlock_irqrestore(&ioc->res_lock, flags); return (pide); } @@ -589,36 +609,36 @@ sba_alloc_range(struct ioc *ioc, size_t size) unsigned long itc_start; #endif unsigned long pide; - unsigned long flags; ASSERT(pages_needed); ASSERT(0 == (size & ~iovp_mask)); - spin_lock_irqsave(&ioc->res_lock, flags); - #ifdef PDIR_SEARCH_TIMING itc_start = ia64_get_itc(); #endif /* ** "seek and ye shall find"...praying never hurts either... */ - pide = sba_search_bitmap(ioc, pages_needed); + pide = sba_search_bitmap(ioc, pages_needed, 1); if (unlikely(pide >= (ioc->res_size << 3))) { - pide = sba_search_bitmap(ioc, pages_needed); + pide = sba_search_bitmap(ioc, pages_needed, 0); if (unlikely(pide >= (ioc->res_size << 3))) { #if DELAYED_RESOURCE_CNT > 0 + unsigned long flags; + /* ** With delayed resource freeing, we can give this one more shot. We're ** getting close to being in trouble here, so do what we can to make this ** one count. */ - spin_lock(&ioc->saved_lock); + spin_lock_irqsave(&ioc->saved_lock, flags); if (ioc->saved_cnt > 0) { struct sba_dma_pair *d; int cnt = ioc->saved_cnt; - d = &(ioc->saved[ioc->saved_cnt]); + d = &(ioc->saved[ioc->saved_cnt - 1]); + spin_lock(&ioc->res_lock); while (cnt--) { sba_mark_invalid(ioc, d->iova, d->size); sba_free_range(ioc, d->iova, d->size); @@ -626,10 +646,11 @@ sba_alloc_range(struct ioc *ioc, size_t size) } ioc->saved_cnt = 0; READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock(&ioc->res_lock); } - spin_unlock(&ioc->saved_lock); + spin_unlock_irqrestore(&ioc->saved_lock, flags); - pide = sba_search_bitmap(ioc, pages_needed); + pide = sba_search_bitmap(ioc, pages_needed, 0); if (unlikely(pide >= (ioc->res_size << 3))) panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", ioc->ioc_hpa); @@ -659,8 +680,6 @@ sba_alloc_range(struct ioc *ioc, size_t size) (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), ioc->res_bitshift ); - spin_unlock_irqrestore(&ioc->res_lock, flags); - return (pide); } @@ -684,7 +703,7 @@ sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) unsigned long m; /* Round up to power-of-two size: see AR2305 note above */ - bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << PAGE_SHIFT); + bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift); for (; bits_not_wanted > 0 ; res_ptr++) { if (unlikely(bits_not_wanted > BITS_PER_LONG)) { @@ -757,7 +776,7 @@ sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba) #ifdef ENABLE_MARK_CLEAN /** * Since DMA is i-cache coherent, any (complete) pages that were written via - * DMA can be marked as "clean" so that update_mmu_cache() doesn't have to + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to * flush them when they get mapped into an executable vm-area. */ static void @@ -945,6 +964,30 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir) return SBA_IOVA(ioc, iovp, offset); } +#ifdef ENABLE_MARK_CLEAN +static SBA_INLINE void +sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) +{ + u32 iovp = (u32) SBA_IOVP(ioc,iova); + int off = PDIR_INDEX(iovp); + void *addr; + + if (size <= iovp_size) { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, size); + } else { + do { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, min(size, iovp_size)); + off++; + size -= iovp_size; + } while (size > 0); + } +} +#endif + /** * sba_unmap_single - unmap one IOVA and free resources * @dev: instance of PCI owned by the driver that's asking. @@ -990,6 +1033,10 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) size += offset; size = ROUNDUP(size, iovp_size); +#ifdef ENABLE_MARK_CLEAN + if (dir == DMA_FROM_DEVICE) + sba_mark_clean(ioc, iova, size); +#endif #if DELAYED_RESOURCE_CNT > 0 spin_lock_irqsave(&ioc->saved_lock, flags); @@ -1016,30 +1063,6 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ spin_unlock_irqrestore(&ioc->res_lock, flags); #endif /* DELAYED_RESOURCE_CNT == 0 */ -#ifdef ENABLE_MARK_CLEAN - if (dir == DMA_FROM_DEVICE) { - u32 iovp = (u32) SBA_IOVP(ioc,iova); - int off = PDIR_INDEX(iovp); - void *addr; - - if (size <= iovp_size) { - addr = phys_to_virt(ioc->pdir_base[off] & - ~0xE000000000000FFFULL); - mark_clean(addr, size); - } else { - size_t byte_cnt = size; - - do { - addr = phys_to_virt(ioc->pdir_base[off] & - ~0xE000000000000FFFULL); - mark_clean(addr, min(byte_cnt, iovp_size)); - off++; - byte_cnt -= iovp_size; - - } while (byte_cnt > 0); - } - } -#endif } @@ -1052,12 +1075,29 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) * See Documentation/DMA-mapping.txt */ void * -sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags) +sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { struct ioc *ioc; void *addr; + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef CONFIG_NUMA + { + struct page *page; + page = alloc_pages_node(ioc->node == MAX_NUMNODES ? + numa_node_id() : ioc->node, flags, + get_order(size)); + + if (unlikely(!page)) + return NULL; + + addr = page_address(page); + } +#else addr = (void *) __get_free_pages(flags, get_order(size)); +#endif if (unlikely(!addr)) return NULL; @@ -1081,8 +1121,6 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int * If device can't bypass or bypass is disabled, pass the 32bit fake * device to map single to get an iova mapping. */ - ioc = GET_IOC(dev); - ASSERT(ioc); *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0); return addr; @@ -1135,7 +1173,7 @@ sba_fill_pdir( { struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ int n_mappings = 0; - u64 *pdirp = 0; + u64 *pdirp = NULL; unsigned long dma_offset = 0; dma_sg--; @@ -1538,7 +1576,7 @@ ioc_iova_init(struct ioc *ioc) ** We program the next pdir index after we stop w/ a key for ** the GART code to handshake on. */ - while ((device = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, device)) != NULL) + for_each_pci_dev(device) agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP); if (agp_found && reserve_sba_gart) { @@ -1634,15 +1672,13 @@ ioc_sac_init(struct ioc *ioc) * SAC (single address cycle) addressable, so allocate a * pseudo-device to enforce that. */ - sac = kmalloc(sizeof(*sac), GFP_KERNEL); + sac = kzalloc(sizeof(*sac), GFP_KERNEL); if (!sac) panic(PFX "Couldn't allocate struct pci_dev"); - memset(sac, 0, sizeof(*sac)); - controller = kmalloc(sizeof(*controller), GFP_KERNEL); + controller = kzalloc(sizeof(*controller), GFP_KERNEL); if (!controller) panic(PFX "Couldn't allocate struct pci_controller"); - memset(controller, 0, sizeof(*controller)); controller->iommu = ioc; sac->sysdata = controller; @@ -1688,7 +1724,9 @@ struct ioc_iommu { static struct ioc_iommu ioc_iommu_info[] __initdata = { { ZX1_IOC_ID, "zx1", ioc_zx1_init }, + { ZX2_IOC_ID, "zx2", NULL }, { SX1000_IOC_ID, "sx1000", NULL }, + { SX2000_IOC_ID, "sx2000", NULL }, }; static struct ioc * __init @@ -1697,12 +1735,10 @@ ioc_init(u64 hpa, void *handle) struct ioc *ioc; struct ioc_iommu *info; - ioc = kmalloc(sizeof(*ioc), GFP_KERNEL); + ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); if (!ioc) return NULL; - memset(ioc, 0, sizeof(*ioc)); - ioc->next = ioc_list; ioc_list = ioc; @@ -1799,6 +1835,10 @@ ioc_show(struct seq_file *s, void *v) seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); +#ifdef CONFIG_NUMA + if (ioc->node != MAX_NUMNODES) + seq_printf(s, "NUMA node : %d\n", ioc->node); +#endif seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); @@ -1853,7 +1893,7 @@ ioc_proc_init(void) { struct proc_dir_entry *dir, *entry; - dir = proc_mkdir("bus/mckinley", 0); + dir = proc_mkdir("bus/mckinley", NULL); if (!dir) return; @@ -1899,6 +1939,32 @@ sba_connect_bus(struct pci_bus *bus) printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); } +#ifdef CONFIG_NUMA +static void __init +sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) +{ + unsigned int node; + int pxm; + + ioc->node = MAX_NUMNODES; + + pxm = acpi_get_pxm(handle); + + if (pxm < 0) + return; + + node = pxm_to_node(pxm); + + if (node >= MAX_NUMNODES || !node_online(node)) + return; + + ioc->node = node; + return; +} +#else +#define sba_map_ioc_to_node(ioc, handle) +#endif + static int __init acpi_sba_ioc_add(struct acpi_device *device) { @@ -1928,7 +1994,7 @@ acpi_sba_ioc_add(struct acpi_device *device) if (!iovp_shift) iovp_shift = min(PAGE_SHIFT, 16); } - ACPI_MEM_FREE(dev_info); + kfree(dev_info); /* * default anything not caught above or specified on cmdline to 4k @@ -1941,6 +2007,8 @@ acpi_sba_ioc_add(struct acpi_device *device) if (!ioc) return 1; + /* setup NUMA node association */ + sba_map_ioc_to_node(ioc, device->handle); return 0; } @@ -1955,9 +2023,40 @@ static struct acpi_driver acpi_sba_ioc_driver = { static int __init sba_init(void) { + if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb")) + return 0; + acpi_bus_register_driver(&acpi_sba_ioc_driver); - if (!ioc_list) + if (!ioc_list) { +#ifdef CONFIG_IA64_GENERIC + extern int swiotlb_late_init_with_default_size (size_t size); + + /* + * If we didn't find something sba_iommu can claim, we + * need to setup the swiotlb and switch to the dig machvec. + */ + if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) + panic("Unable to find SBA IOMMU or initialize " + "software I/O TLB: Try machvec=dig boot option"); + machvec_init("dig"); +#else + panic("Unable to find SBA IOMMU: Try a generic or DIG kernel"); +#endif return 0; + } + +#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB) + /* + * hpzx1_swiotlb needs to have a fairly small swiotlb bounce + * buffer setup to support devices with smaller DMA masks than + * sba_iommu can handle. + */ + if (ia64_platform_is("hpzx1_swiotlb")) { + extern void hwsw_init(void); + + hwsw_init(); + } +#endif #ifdef CONFIG_PCI { @@ -1975,18 +2074,6 @@ sba_init(void) subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ -extern void dig_setup(char**); -/* - * MAX_DMA_ADDRESS needs to be setup prior to paging_init to do any good, - * so we use the platform_setup hook to fix it up. - */ -void __init -sba_setup(char **cmdline_p) -{ - MAX_DMA_ADDRESS = ~0UL; - dig_setup(cmdline_p); -} - static int __init nosbagart(char *str) {