linux 2.6.16.38 w/ vs2.0.3-rc1
[linux-2.6.git] / arch / x86_64 / kernel / pci-gart.c
index 6d3e61b..b9dbe3c 100644 (file)
@@ -10,6 +10,7 @@
  * Copyright 2002 Andi Kleen, SuSE Labs.
  */
 
+#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/ctype.h>
 #include <linux/agp_backend.h>
@@ -31,7 +32,6 @@
 #include <asm/kdebug.h>
 #include <asm/swiotlb.h>
 #include <asm/dma.h>
-#include <asm/k8.h>
 
 unsigned long iommu_bus_base;  /* GART remapping area (physical) */
 static unsigned long iommu_size;       /* size of remapping area bytes */
@@ -46,6 +46,8 @@ u32 *iommu_gatt_base;                 /* Remapping table */
    also seen with Qlogic at least). */
 int iommu_fullflush = 1;
 
+#define MAX_NB 8
+
 /* Allocation bitmap for the remapping area */ 
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
 static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -61,6 +63,15 @@ static u32 gart_unmapped_entry;
 #define to_pages(addr,size) \
        (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
 
+#define for_all_nb(dev) \
+       dev = NULL;     \
+       while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\
+            if (dev->bus->number == 0 &&                                    \
+                   (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
+
+static struct pci_dev *northbridges[MAX_NB];
+static u32 northbridge_flush_word[MAX_NB];
+
 #define EMERGENCY_PAGES 32 /* = 128KB */ 
 
 #ifdef CONFIG_AGP
@@ -84,7 +95,7 @@ static unsigned long alloc_iommu(int size)
        offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
        if (offset == -1) {
                need_flush = 1;
-               offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
+               offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
        }
        if (offset != -1) { 
                set_bit_string(iommu_gart_bitmap, offset, size); 
@@ -111,17 +122,41 @@ static void free_iommu(unsigned long offset, int size)
 /* 
  * Use global flush state to avoid races with multiple flushers.
  */
-static void flush_gart(void)
+static void flush_gart(struct device *dev)
 { 
        unsigned long flags;
+       int flushed = 0;
+       int i, max;
+
        spin_lock_irqsave(&iommu_bitmap_lock, flags);
-       if (need_flush) {
-               k8_flush_garts();
+       if (need_flush) { 
+               max = 0;
+               for (i = 0; i < MAX_NB; i++) {
+                       if (!northbridges[i]) 
+                               continue;
+                       pci_write_config_dword(northbridges[i], 0x9c, 
+                                              northbridge_flush_word[i] | 1); 
+                       flushed++;
+                       max = i;
+               }
+               for (i = 0; i <= max; i++) {
+                       u32 w;
+                       if (!northbridges[i])
+                               continue;
+                       /* Make sure the hardware actually executed the flush. */
+                       do { 
+                               pci_read_config_dword(northbridges[i], 0x9c, &w);
+                       } while (w & 1);
+               } 
+               if (!flushed) 
+                       printk("nothing to flush?\n");
                need_flush = 0;
        } 
        spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 } 
 
+
+
 #ifdef CONFIG_IOMMU_LEAK
 
 #define SET_LEAK(x) if (iommu_leak_tab) \
@@ -230,7 +265,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
                                 size_t size, int dir)
 {
        dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
-       flush_gart();
+       flush_gart(dev);
        return map;
 }
 
@@ -252,28 +287,6 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
        return bus; 
 }
 
-/*
- * Free a DMA mapping.
- */
-void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
-                     size_t size, int direction)
-{
-       unsigned long iommu_page;
-       int npages;
-       int i;
-
-       if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
-           dma_addr >= iommu_bus_base + iommu_size)
-               return;
-       iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
-       npages = to_pages(dma_addr, size);
-       for (i = 0; i < npages; i++) {
-               iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
-               CLEAR_LEAK(iommu_page + i);
-       }
-       free_iommu(iommu_page, npages);
-}
-
 /*
  * Wrapper for pci_unmap_single working with scatterlists.
  */
@@ -285,7 +298,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di
                struct scatterlist *s = &sg[i];
                if (!s->dma_length || !s->length)
                        break;
-               gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
+               dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
        }
 }
 
@@ -315,7 +328,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
                s->dma_address = addr;
                s->dma_length = s->length;
        }
-       flush_gart();
+       flush_gart(dev);
        return nents;
 }
 
@@ -422,13 +435,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
        if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
                goto error;
        out++;
-       flush_gart();
+       flush_gart(dev);
        if (out < nents) 
                sg[out].dma_length = 0; 
        return out;
 
 error:
-       flush_gart();
+       flush_gart(NULL);
        gart_unmap_sg(dev, sg, nents, dir);
        /* When it was forced or merged try again in a dumb way */
        if (force_iommu || iommu_merge) {
@@ -444,6 +457,28 @@ error:
        return 0;
 } 
 
+/*
+ * Free a DMA mapping.
+ */ 
+void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
+                     size_t size, int direction)
+{
+       unsigned long iommu_page; 
+       int npages;
+       int i;
+
+       if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || 
+           dma_addr >= iommu_bus_base + iommu_size)
+               return;
+       iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;   
+       npages = to_pages(dma_addr, size);
+       for (i = 0; i < npages; i++) { 
+               iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; 
+               CLEAR_LEAK(iommu_page + i);
+       }
+       free_iommu(iommu_page, npages);
+}
+
 static int no_agp;
 
 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -496,13 +531,10 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
        void *gatt;
        unsigned aper_base, new_aper_base;
        unsigned aper_size, gatt_size, new_aper_size;
-       int i;
-
+       
        printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
        aper_size = aper_base = info->aper_size = 0;
-       dev = NULL;
-       for (i = 0; i < num_k8_northbridges; i++) {
-               dev = k8_northbridges[i];
+       for_all_nb(dev) { 
                new_aper_base = read_aperture(dev, &new_aper_size); 
                if (!new_aper_base) 
                        goto nommu; 
@@ -525,12 +557,11 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
                panic("Cannot allocate GATT table"); 
        memset(gatt, 0, gatt_size); 
        agp_gatt_table = gatt;
-
-       for (i = 0; i < num_k8_northbridges; i++) {
+       
+       for_all_nb(dev) { 
                u32 ctl; 
                u32 gatt_reg; 
 
-               dev = k8_northbridges[i];
                gatt_reg = __pa(gatt) >> 12; 
                gatt_reg <<= 4; 
                pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -541,7 +572,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
                pci_write_config_dword(dev, 0x90, ctl); 
        }
-       flush_gart();
+       flush_gart(NULL); 
        
        printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 
        return 0;
@@ -570,19 +601,15 @@ static struct dma_mapping_ops gart_dma_ops = {
        .unmap_sg = gart_unmap_sg,
 };
 
-void __init gart_iommu_init(void)
+static int __init pci_iommu_init(void)
 { 
        struct agp_kern_info info;
        unsigned long aper_size;
        unsigned long iommu_start;
+       struct pci_dev *dev;
        unsigned long scratch;
        long i;
 
-       if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
-               printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
-               return;
-       }
-
 #ifndef CONFIG_AGP_AMD64
        no_agp = 1; 
 #else
@@ -594,11 +621,7 @@ void __init gart_iommu_init(void)
 #endif 
 
        if (swiotlb)
-               return;
-
-       /* Did we detect a different HW IOMMU? */
-       if (iommu_detected && !iommu_aperture)
-               return;
+               return -1; 
 
        if (no_iommu ||
            (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
@@ -607,10 +630,12 @@ void __init gart_iommu_init(void)
                printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
                if (end_pfn > MAX_DMA32_PFN) {
                        printk(KERN_ERR "WARNING more than 4GB of memory "
-                                       "but IOMMU not available.\n"
-                              KERN_ERR "WARNING 32bit PCI may malfunction.\n");
+                                       "but IOMMU not compiled in.\n"
+                              KERN_ERR "WARNING 32bit PCI may malfunction.\n"
+                              KERN_ERR "You might want to enable "
+                                       "CONFIG_GART_IOMMU\n");
                }
-               return;
+               return -1;
        }
 
        printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
@@ -675,10 +700,26 @@ void __init gart_iommu_init(void)
        for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 
                iommu_gatt_base[i] = gart_unmapped_entry;
 
-       flush_gart();
+       for_all_nb(dev) {
+               u32 flag; 
+               int cpu = PCI_SLOT(dev->devfn) - 24;
+               if (cpu >= MAX_NB)
+                       continue;
+               northbridges[cpu] = dev;
+               pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
+               northbridge_flush_word[cpu] = flag; 
+       }
+                    
+       flush_gart(NULL);
+
        dma_ops = &gart_dma_ops;
+
+       return 0;
 } 
 
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
+
 void gart_parse_options(char *p)
 {
        int arg;