2 * Copyright 2001-2003 SuSE Labs.
3 * Distributed under the GNU public license, v2.
5 * This is a GART driver for the AMD Opteron/Athlon64 on-CPU northbridge.
6 * It also includes support for the AMD 8151 AGP bridge,
7 * although it doesn't actually do much, as all the real
8 * work is done in the northbridge(s).
11 #include <linux/config.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/init.h>
15 #include <linux/agp_backend.h>
18 /* Will need to be increased if AMD64 ever goes >8-way. */
19 #define MAX_HAMMER_GARTS 8
23 #define GPTE_COHERENT 2
25 /* Aperture control register bits. */
27 #define DISGARTCPU (1<<4)
28 #define DISGARTIO (1<<5)
30 /* GART cache control register bits. */
31 #define INVGART (1<<0)
32 #define GARTPTEERR (1<<1)
34 /* K8 On-cpu GART registers */
35 #define AMD64_GARTAPERTURECTL 0x90
36 #define AMD64_GARTAPERTUREBASE 0x94
37 #define AMD64_GARTTABLEBASE 0x98
38 #define AMD64_GARTCACHECTL 0x9c
39 #define AMD64_GARTEN (1<<0)
41 /* NVIDIA K8 registers */
42 #define NVIDIA_X86_64_0_APBASE 0x10
43 #define NVIDIA_X86_64_1_APBASE1 0x50
44 #define NVIDIA_X86_64_1_APLIMIT1 0x54
45 #define NVIDIA_X86_64_1_APSIZE 0xa8
46 #define NVIDIA_X86_64_1_APBASE2 0xd8
47 #define NVIDIA_X86_64_1_APLIMIT2 0xdc
50 static struct pci_dev * hammers[MAX_HAMMER_GARTS];
52 static struct resource *aperture_resource;
53 static int __initdata agp_try_unsupported;
55 static int gart_iterator;
56 #define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++)
58 static void flush_amd64_tlb(struct pci_dev *dev)
62 pci_read_config_dword (dev, AMD64_GARTCACHECTL, &tmp);
64 pci_write_config_dword (dev, AMD64_GARTCACHECTL, tmp);
67 static void amd64_tlbflush(struct agp_memory *temp)
70 flush_amd64_tlb(hammers[gart_iterator]);
73 static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
75 int i, j, num_entries;
79 num_entries = agp_num_entries();
81 if (type != 0 || mem->type != 0)
84 /* Make sure we can fit the range in the gatt table. */
85 /* FIXME: could wrap */
86 if (((unsigned long)pg_start + mem->page_count) > num_entries)
91 /* gatt table should be empty. */
92 while (j < (pg_start + mem->page_count)) {
93 if (!PGE_EMPTY(agp_bridge, agp_bridge->gatt_table[j]))
98 if (mem->is_flushed == FALSE) {
100 mem->is_flushed = TRUE;
103 for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
104 tmp = agp_bridge->driver->mask_memory(mem->memory[i], mem->type);
106 BUG_ON(tmp & 0xffffff0000000ffcULL);
107 pte = (tmp & 0x000000ff00000000ULL) >> 28;
108 pte |=(tmp & 0x00000000fffff000ULL);
109 pte |= GPTE_VALID | GPTE_COHERENT;
111 agp_bridge->gatt_table[j] = pte;
118 * This hack alters the order element according
119 * to the size of a long. It sucks. I totally disown this, even
120 * though it does appear to work for the most part.
122 static struct aper_size_info_32 amd64_aperture_sizes[7] =
124 {32, 8192, 3+(sizeof(long)/8), 0 },
125 {64, 16384, 4+(sizeof(long)/8), 1<<1 },
126 {128, 32768, 5+(sizeof(long)/8), 1<<2 },
127 {256, 65536, 6+(sizeof(long)/8), 1<<1 | 1<<2 },
128 {512, 131072, 7+(sizeof(long)/8), 1<<3 },
129 {1024, 262144, 8+(sizeof(long)/8), 1<<1 | 1<<3},
130 {2048, 524288, 9+(sizeof(long)/8), 1<<2 | 1<<3}
135 * Get the current Aperture size from the x86-64.
136 * Note, that there may be multiple x86-64's, but we just return
137 * the value from the first one we find. The set_size functions
138 * keep the rest coherent anyway. Or at least should do.
140 static int amd64_fetch_size(void)
145 struct aper_size_info_32 *values;
151 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &temp);
153 values = A_SIZE_32(amd64_aperture_sizes);
155 for (i = 0; i < agp_bridge->driver->num_aperture_sizes; i++) {
156 if (temp == values[i].size_value) {
157 agp_bridge->previous_size =
158 agp_bridge->current_size = (void *) (values + i);
160 agp_bridge->aperture_size_idx = i;
161 return values[i].size;
168 * In a multiprocessor x86-64 system, this function gets
169 * called once for each CPU.
171 static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table)
177 /* Address to map to */
178 pci_read_config_dword (hammer, AMD64_GARTAPERTUREBASE, &tmp);
179 aperturebase = tmp << 25;
180 aper_base = (aperturebase & PCI_BASE_ADDRESS_MEM_MASK);
182 /* address of the mappings table */
183 addr = (u64) gatt_table;
187 pci_write_config_dword (hammer, AMD64_GARTTABLEBASE, tmp);
189 /* Enable GART translation for this hammer. */
190 pci_read_config_dword(hammer, AMD64_GARTAPERTURECTL, &tmp);
192 tmp &= ~(DISGARTCPU | DISGARTIO);
193 pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp);
195 /* keep CPU's coherent. */
196 flush_amd64_tlb (hammer);
202 static struct aper_size_info_32 amd_8151_sizes[7] =
204 {2048, 524288, 9, 0x00000000 }, /* 0 0 0 0 0 0 */
205 {1024, 262144, 8, 0x00000400 }, /* 1 0 0 0 0 0 */
206 {512, 131072, 7, 0x00000600 }, /* 1 1 0 0 0 0 */
207 {256, 65536, 6, 0x00000700 }, /* 1 1 1 0 0 0 */
208 {128, 32768, 5, 0x00000720 }, /* 1 1 1 1 0 0 */
209 {64, 16384, 4, 0x00000730 }, /* 1 1 1 1 1 0 */
210 {32, 8192, 3, 0x00000738 } /* 1 1 1 1 1 1 */
213 static int amd_8151_configure(void)
215 unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
217 /* Configure AGP regs in each x86-64 host bridge. */
219 agp_bridge->gart_bus_addr =
220 amd64_configure(hammers[gart_iterator],gatt_bus);
226 static void amd64_cleanup(void)
231 /* disable gart translation */
232 pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp);
233 tmp &= ~AMD64_GARTEN;
234 pci_write_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, tmp);
239 struct agp_bridge_driver amd_8151_driver = {
240 .owner = THIS_MODULE,
241 .aperture_sizes = amd_8151_sizes,
242 .size_type = U32_APER_SIZE,
243 .num_aperture_sizes = 7,
244 .configure = amd_8151_configure,
245 .fetch_size = amd64_fetch_size,
246 .cleanup = amd64_cleanup,
247 .tlb_flush = amd64_tlbflush,
248 .mask_memory = agp_generic_mask_memory,
250 .agp_enable = agp_generic_enable,
251 .cache_flush = global_cache_flush,
252 .create_gatt_table = agp_generic_create_gatt_table,
253 .free_gatt_table = agp_generic_free_gatt_table,
254 .insert_memory = amd64_insert_memory,
255 .remove_memory = agp_generic_remove_memory,
256 .alloc_by_type = agp_generic_alloc_by_type,
257 .free_by_type = agp_generic_free_by_type,
258 .agp_alloc_page = agp_generic_alloc_page,
259 .agp_destroy_page = agp_generic_destroy_page,
262 /* Some basic sanity checks for the aperture. */
263 static int __devinit aperture_valid(u64 aper, u32 size)
267 printk(KERN_ERR PFX "No aperture\n");
270 if (size < 32*1024*1024) {
271 printk(KERN_ERR PFX "Aperture too small (%d MB)\n", size>>20);
274 if (aper + size > 0xffffffff) {
275 printk(KERN_ERR PFX "Aperture out of bounds\n");
278 pfn = aper >> PAGE_SHIFT;
279 for (c = 0; c < size/PAGE_SIZE; c++) {
280 if (!pfn_valid(pfn + c))
282 if (!PageReserved(pfn_to_page(pfn + c))) {
283 printk(KERN_ERR PFX "Aperture pointing to RAM\n");
288 /* Request the Aperture. This catches cases when someone else
289 already put a mapping in there - happens with some very broken BIOS
291 Maybe better to use pci_assign_resource/pci_enable_device instead
292 trusting the bridges? */
293 if (!aperture_resource &&
294 !(aperture_resource = request_mem_region(aper, size, "aperture"))) {
295 printk(KERN_ERR PFX "Aperture conflicts with PCI mapping.\n");
302 * W*s centric BIOS sometimes only set up the aperture in the AGP
303 * bridge, not the northbridge. On AMD64 this is handled early
304 * in aperture.c, but when GART_IOMMU is not enabled or we run
305 * on a 32bit kernel this needs to be redone.
306 * Unfortunately it is impossible to fix the aperture here because it's too late
307 * to allocate that much memory. But at least error out cleanly instead of
310 static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
313 u32 aper_low, aper_hi;
316 u32 nb_order, nb_base;
319 pci_read_config_dword(nb, 0x90, &nb_order);
320 nb_order = (nb_order >> 1) & 7;
321 pci_read_config_dword(nb, 0x94, &nb_base);
322 nb_aper = nb_base << 25;
323 if (aperture_valid(nb_aper, (32*1024*1024)<<nb_order)) {
327 /* Northbridge seems to contain crap. Try the AGP bridge. */
329 pci_read_config_word(agp, cap+0x14, &apsize);
330 if (apsize == 0xffff)
334 /* Some BIOS use weird encodings not in the AGPv3 table. */
337 order = 7 - hweight16(apsize);
339 pci_read_config_dword(agp, 0x10, &aper_low);
340 pci_read_config_dword(agp, 0x14, &aper_hi);
341 aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32);
342 printk(KERN_INFO PFX "Aperture from AGP @ %Lx size %u MB\n", aper, 32 << order);
343 if (order < 0 || !aperture_valid(aper, (32*1024*1024)<<order))
346 pci_write_config_dword(nb, 0x90, order << 1);
347 pci_write_config_dword(nb, 0x94, aper >> 25);
352 static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr)
354 struct pci_dev *loop_dev = NULL;
357 /* cache pci_devs of northbridges. */
358 while ((loop_dev = pci_find_device(PCI_VENDOR_ID_AMD, 0x1103, loop_dev))
360 if (i == MAX_HAMMER_GARTS) {
361 printk(KERN_ERR PFX "Too many northbridges for AGP\n");
364 if (fix_northbridge(loop_dev, pdev, cap_ptr) < 0) {
365 printk(KERN_ERR PFX "No usable aperture found.\n");
367 /* should port this to i386 */
368 printk(KERN_ERR PFX "Consider rebooting with iommu=memaper=2 to get a good aperture.\n");
372 hammers[i++] = loop_dev;
375 return i == 0 ? -1 : 0;
378 /* Handle AMD 8151 quirks */
379 static void __devinit amd8151_init(struct pci_dev *pdev, struct agp_bridge_data *bridge)
384 pci_read_config_byte(pdev, PCI_REVISION_ID, &rev_id);
386 case 0x01: revstring="A0"; break;
387 case 0x02: revstring="A1"; break;
388 case 0x11: revstring="B0"; break;
389 case 0x12: revstring="B1"; break;
390 case 0x13: revstring="B2"; break;
391 case 0x14: revstring="B3"; break;
392 default: revstring="??"; break;
395 printk (KERN_INFO PFX "Detected AMD 8151 AGP Bridge rev %s\n", revstring);
398 * Work around errata.
399 * Chips before B2 stepping incorrectly reporting v3.5
402 printk (KERN_INFO PFX "Correcting AGP revision (reports 3.5, is really 3.0)\n");
403 bridge->major_version = 3;
404 bridge->minor_version = 0;
408 static struct aper_size_info_32 nforce3_sizes[5] =
410 {512, 131072, 7, 0x00000000 },
411 {256, 65536, 6, 0x00000008 },
412 {128, 32768, 5, 0x0000000C },
413 {64, 16384, 4, 0x0000000E },
414 {32, 8192, 3, 0x0000000F }
417 /* Handle shadow device of the Nvidia NForce3 */
418 /* CHECK-ME original 2.4 version set up some IORRs. Check if that is needed. */
419 static int __devinit nforce3_agp_init(struct pci_dev *pdev)
421 u32 tmp, apbase, apbar, aplimit;
422 struct pci_dev *dev1;
424 unsigned size = amd64_fetch_size();
426 printk(KERN_INFO PFX "Setting up Nforce3 AGP.\n");
428 dev1 = pci_find_slot((unsigned int)pdev->bus->number, PCI_DEVFN(11, 0));
430 printk(KERN_INFO PFX "agpgart: Detected an NVIDIA "
431 "nForce3 chipset, but could not find "
432 "the secondary device.\n");
436 for (i = 0; i < ARRAY_SIZE(nforce3_sizes); i++)
437 if (nforce3_sizes[i].size == size)
440 if (i == ARRAY_SIZE(nforce3_sizes)) {
441 printk(KERN_INFO PFX "No NForce3 size found for %d\n", size);
445 pci_read_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, &tmp);
447 tmp |= nforce3_sizes[i].size_value;
448 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
450 /* shadow x86-64 registers into NVIDIA registers */
451 pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &apbase);
453 /* if x86-64 aperture base is beyond 4G, exit here */
454 if ( (apbase & 0x7fff) >> (32 - 25) )
457 apbase = (apbase & 0x7fff) << 25;
459 pci_read_config_dword(pdev, NVIDIA_X86_64_0_APBASE, &apbar);
460 apbar &= ~PCI_BASE_ADDRESS_MEM_MASK;
462 pci_write_config_dword(pdev, NVIDIA_X86_64_0_APBASE, apbar);
464 aplimit = apbase + (size * 1024 * 1024) - 1;
465 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APBASE1, apbase);
466 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APLIMIT1, aplimit);
467 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APBASE2, apbase);
468 pci_write_config_dword(dev1, NVIDIA_X86_64_1_APLIMIT2, aplimit);
473 static int __devinit agp_amd64_probe(struct pci_dev *pdev,
474 const struct pci_device_id *ent)
476 struct agp_bridge_data *bridge;
479 cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP);
483 /* Could check for AGPv3 here */
485 bridge = agp_alloc_bridge();
489 if (pdev->vendor == PCI_VENDOR_ID_AMD &&
490 pdev->device == PCI_DEVICE_ID_AMD_8151_0) {
491 amd8151_init(pdev, bridge);
493 printk(KERN_INFO PFX "Detected AGP bridge %x\n", pdev->devfn);
496 bridge->driver = &amd_8151_driver;
498 bridge->capndx = cap_ptr;
500 /* Fill in the mode register */
501 pci_read_config_dword(pdev, bridge->capndx+PCI_AGP_STATUS, &bridge->mode);
503 if (cache_nbs(pdev, cap_ptr) == -1) {
504 agp_put_bridge(bridge);
508 if (pdev->vendor == PCI_VENDOR_ID_NVIDIA) {
509 int ret = nforce3_agp_init(pdev);
511 agp_put_bridge(bridge);
516 pci_set_drvdata(pdev, bridge);
517 return agp_add_bridge(bridge);
520 static void __devexit agp_amd64_remove(struct pci_dev *pdev)
522 struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
524 release_mem_region(virt_to_phys(bridge->gatt_table_real),
525 amd64_aperture_sizes[bridge->aperture_size_idx].size);
526 agp_remove_bridge(bridge);
527 agp_put_bridge(bridge);
530 static struct pci_device_id agp_amd64_pci_table[] = {
532 .class = (PCI_CLASS_BRIDGE_HOST << 8),
534 .vendor = PCI_VENDOR_ID_AMD,
535 .device = PCI_DEVICE_ID_AMD_8151_0,
536 .subvendor = PCI_ANY_ID,
537 .subdevice = PCI_ANY_ID,
541 .class = (PCI_CLASS_BRIDGE_HOST << 8),
543 .vendor = PCI_VENDOR_ID_VIA,
544 .device = PCI_DEVICE_ID_VIA_K8T800PRO_0,
545 .subvendor = PCI_ANY_ID,
546 .subdevice = PCI_ANY_ID,
550 .class = (PCI_CLASS_BRIDGE_HOST << 8),
552 .vendor = PCI_VENDOR_ID_VIA,
553 .device = PCI_DEVICE_ID_VIA_8385_0,
554 .subvendor = PCI_ANY_ID,
555 .subdevice = PCI_ANY_ID,
557 /* VIA K8M800 / K8N800 */
559 .class = (PCI_CLASS_BRIDGE_HOST << 8),
561 .vendor = PCI_VENDOR_ID_VIA,
562 .device = PCI_DEVICE_ID_VIA_8380_0,
563 .subvendor = PCI_ANY_ID,
564 .subdevice = PCI_ANY_ID,
567 .class = (PCI_CLASS_BRIDGE_HOST << 8),
569 .vendor = PCI_VENDOR_ID_VIA,
570 .device = PCI_DEVICE_ID_VIA_8380_0,
571 .subvendor = PCI_ANY_ID,
572 .subdevice = PCI_ANY_ID,
576 .class = (PCI_CLASS_BRIDGE_HOST << 8),
578 .vendor = PCI_VENDOR_ID_NVIDIA,
579 .device = PCI_DEVICE_ID_NVIDIA_NFORCE3,
580 .subvendor = PCI_ANY_ID,
581 .subdevice = PCI_ANY_ID,
584 .class = (PCI_CLASS_BRIDGE_HOST << 8),
586 .vendor = PCI_VENDOR_ID_NVIDIA,
587 .device = PCI_DEVICE_ID_NVIDIA_NFORCE3S,
588 .subvendor = PCI_ANY_ID,
589 .subdevice = PCI_ANY_ID,
593 .class = (PCI_CLASS_BRIDGE_HOST << 8),
595 .vendor = PCI_VENDOR_ID_SI,
596 .device = PCI_DEVICE_ID_SI_755,
597 .subvendor = PCI_ANY_ID,
598 .subdevice = PCI_ANY_ID,
603 MODULE_DEVICE_TABLE(pci, agp_amd64_pci_table);
605 static struct pci_driver agp_amd64_pci_driver = {
606 .name = "agpgart-amd64",
607 .id_table = agp_amd64_pci_table,
608 .probe = agp_amd64_probe,
609 .remove = agp_amd64_remove,
613 /* Not static due to IOMMU code calling it early. */
614 int __init agp_amd64_init(void)
619 if (pci_module_init(&agp_amd64_pci_driver) > 0) {
621 if (!agp_try_unsupported && !agp_try_unsupported_boot) {
622 printk(KERN_INFO PFX "No supported AGP bridge found.\n");
624 printk(KERN_INFO PFX "You can try agp_try_unsupported=1\n");
626 printk(KERN_INFO PFX "You can boot with agp=try_unsupported\n");
631 /* First check that we have at least one AMD64 NB */
632 if (!pci_find_device(PCI_VENDOR_ID_AMD, 0x1103, NULL))
635 /* Look for any AGP bridge */
638 while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev))) {
639 if (!pci_find_capability(dev, PCI_CAP_ID_AGP))
641 /* Only one bridge supported right now */
642 if (agp_amd64_probe(dev, NULL) == 0) {
651 static void __exit agp_amd64_cleanup(void)
653 if (aperture_resource)
654 release_resource(aperture_resource);
655 pci_unregister_driver(&agp_amd64_pci_driver);
658 /* On AMD64 the PCI driver needs to initialize this driver early
659 for the IOMMU, so it has to be called via a backdoor. */
660 #ifndef CONFIG_GART_IOMMU
661 module_init(agp_amd64_init);
662 module_exit(agp_amd64_cleanup);
665 MODULE_AUTHOR("Dave Jones <davej@codemonkey.org.uk>, Andi Kleen");
666 MODULE_PARM(agp_try_unsupported, "1i");
667 MODULE_LICENSE("GPL and additional rights");