From c9aac115683ef9daf506244046d518ae3bfbfe0d Mon Sep 17 00:00:00 2001 From: =?utf8?q?S=2E=C3=87a=C4=9Flar=20Onur?= Date: Thu, 4 Feb 2010 22:14:38 +0000 Subject: [PATCH] merge --- kernel-2.6.spec | 4 + linux-2.6-150-cciss-allow-kexec-to-work.patch | 258 ++++++++++++++++++ 2 files changed, 262 insertions(+) create mode 100644 linux-2.6-150-cciss-allow-kexec-to-work.patch diff --git a/kernel-2.6.spec b/kernel-2.6.spec index 1906557a6..7213ad6e2 100644 --- a/kernel-2.6.spec +++ b/kernel-2.6.spec @@ -154,6 +154,8 @@ Patch040: linux-2.6-040-i_mutex-check.patch # These are patches picked up from Fedora/RHEL Patch100: linux-2.6-100-build-nonintconfig.patch +Patch150: linux-2.6-150-cciss-allow-kexec-to-work.patch + # Linux-VServer Patch200: patch-%{rpmversion}-vs%{vsversion}.diff Patch210: linux-2.6-210-vserver-cpu-sched.patch @@ -376,6 +378,8 @@ KERNEL_PREVIOUS=vanilla %ApplyPatch 100 +%ApplyPatch 150 + %ApplyPatch 200 %ApplyPatch 210 %ApplyPatch 220 diff --git a/linux-2.6-150-cciss-allow-kexec-to-work.patch b/linux-2.6-150-cciss-allow-kexec-to-work.patch new file mode 100644 index 000000000..37b8884f7 --- /dev/null +++ b/linux-2.6-150-cciss-allow-kexec-to-work.patch @@ -0,0 +1,258 @@ +commit 82eb03cfd862a65363fa2826de0dbd5474cfe5e2 +Author: Chip Coldwell +Date: Mon Feb 16 13:11:56 2009 +0100 + + cciss: PCI power management reset for kexec + + The kexec kernel resets the CCISS hardware in three steps: + + 1. Use PCI power management states to reset the controller in the + kexec kernel. + + 2. Clear the MSI/MSI-X bits in PCI configuration space so that MSI + initialization in the kexec kernel doesn't fail. + + 3. Use the CCISS "No-op" message to determine when the controller + firmware has recovered from the PCI PM reset. + + [akpm@linux-foundation.org: cleanups] + Signed-off-by: Mike Miller + Cc: Randy Dunlap + Signed-off-by: Andrew Morton + Signed-off-by: Jens Axboe + +diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c +index 132f76b..fecf524 100644 +--- a/drivers/block/cciss.c ++++ b/drivers/block/cciss.c +@@ -3257,6 +3257,205 @@ static void free_hba(int i) + kfree(p); + } + ++/* Send a message CDB to the firmware. */ ++static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type) ++{ ++ typedef struct { ++ CommandListHeader_struct CommandHeader; ++ RequestBlock_struct Request; ++ ErrDescriptor_struct ErrorDescriptor; ++ } Command; ++ static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct); ++ Command *cmd; ++ dma_addr_t paddr64; ++ uint32_t paddr32, tag; ++ void __iomem *vaddr; ++ int i, err; ++ ++ vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); ++ if (vaddr == NULL) ++ return -ENOMEM; ++ ++ /* The Inbound Post Queue only accepts 32-bit physical addresses for the ++ CCISS commands, so they must be allocated from the lower 4GiB of ++ memory. */ ++ err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); ++ if (err) { ++ iounmap(vaddr); ++ return -ENOMEM; ++ } ++ ++ cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64); ++ if (cmd == NULL) { ++ iounmap(vaddr); ++ return -ENOMEM; ++ } ++ ++ /* This must fit, because of the 32-bit consistent DMA mask. Also, ++ although there's no guarantee, we assume that the address is at ++ least 4-byte aligned (most likely, it's page-aligned). */ ++ paddr32 = paddr64; ++ ++ cmd->CommandHeader.ReplyQueue = 0; ++ cmd->CommandHeader.SGList = 0; ++ cmd->CommandHeader.SGTotal = 0; ++ cmd->CommandHeader.Tag.lower = paddr32; ++ cmd->CommandHeader.Tag.upper = 0; ++ memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8); ++ ++ cmd->Request.CDBLen = 16; ++ cmd->Request.Type.Type = TYPE_MSG; ++ cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE; ++ cmd->Request.Type.Direction = XFER_NONE; ++ cmd->Request.Timeout = 0; /* Don't time out */ ++ cmd->Request.CDB[0] = opcode; ++ cmd->Request.CDB[1] = type; ++ memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */ ++ ++ cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command); ++ cmd->ErrorDescriptor.Addr.upper = 0; ++ cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct); ++ ++ writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET); ++ ++ for (i = 0; i < 10; i++) { ++ tag = readl(vaddr + SA5_REPLY_PORT_OFFSET); ++ if ((tag & ~3) == paddr32) ++ break; ++ schedule_timeout_uninterruptible(HZ); ++ } ++ ++ iounmap(vaddr); ++ ++ /* we leak the DMA buffer here ... no choice since the controller could ++ still complete the command. */ ++ if (i == 10) { ++ printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n", ++ opcode, type); ++ return -ETIMEDOUT; ++ } ++ ++ pci_free_consistent(pdev, cmd_sz, cmd, paddr64); ++ ++ if (tag & 2) { ++ printk(KERN_ERR "cciss: controller message %02x:%02x failed\n", ++ opcode, type); ++ return -EIO; ++ } ++ ++ printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n", ++ opcode, type); ++ return 0; ++} ++ ++#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0) ++#define cciss_noop(p) cciss_message(p, 3, 0) ++ ++static __devinit int cciss_reset_msi(struct pci_dev *pdev) ++{ ++/* the #defines are stolen from drivers/pci/msi.h. */ ++#define msi_control_reg(base) (base + PCI_MSI_FLAGS) ++#define PCI_MSIX_FLAGS_ENABLE (1 << 15) ++ ++ int pos; ++ u16 control = 0; ++ ++ pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); ++ if (pos) { ++ pci_read_config_word(pdev, msi_control_reg(pos), &control); ++ if (control & PCI_MSI_FLAGS_ENABLE) { ++ printk(KERN_INFO "cciss: resetting MSI\n"); ++ pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE); ++ } ++ } ++ ++ pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); ++ if (pos) { ++ pci_read_config_word(pdev, msi_control_reg(pos), &control); ++ if (control & PCI_MSIX_FLAGS_ENABLE) { ++ printk(KERN_INFO "cciss: resetting MSI-X\n"); ++ pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE); ++ } ++ } ++ ++ return 0; ++} ++ ++/* This does a hard reset of the controller using PCI power management ++ * states. */ ++static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev) ++{ ++ u16 pmcsr, saved_config_space[32]; ++ int i, pos; ++ ++ printk(KERN_INFO "cciss: using PCI PM to reset controller\n"); ++ ++ /* This is very nearly the same thing as ++ ++ pci_save_state(pci_dev); ++ pci_set_power_state(pci_dev, PCI_D3hot); ++ pci_set_power_state(pci_dev, PCI_D0); ++ pci_restore_state(pci_dev); ++ ++ but we can't use these nice canned kernel routines on ++ kexec, because they also check the MSI/MSI-X state in PCI ++ configuration space and do the wrong thing when it is ++ set/cleared. Also, the pci_save/restore_state functions ++ violate the ordering requirements for restoring the ++ configuration space from the CCISS document (see the ++ comment below). So we roll our own .... */ ++ ++ for (i = 0; i < 32; i++) ++ pci_read_config_word(pdev, 2*i, &saved_config_space[i]); ++ ++ pos = pci_find_capability(pdev, PCI_CAP_ID_PM); ++ if (pos == 0) { ++ printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n"); ++ return -ENODEV; ++ } ++ ++ /* Quoting from the Open CISS Specification: "The Power ++ * Management Control/Status Register (CSR) controls the power ++ * state of the device. The normal operating state is D0, ++ * CSR=00h. The software off state is D3, CSR=03h. To reset ++ * the controller, place the interface device in D3 then to ++ * D0, this causes a secondary PCI reset which will reset the ++ * controller." */ ++ ++ /* enter the D3hot power management state */ ++ pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr); ++ pmcsr &= ~PCI_PM_CTRL_STATE_MASK; ++ pmcsr |= PCI_D3hot; ++ pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); ++ ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule_timeout(HZ >> 1); ++ ++ /* enter the D0 power management state */ ++ pmcsr &= ~PCI_PM_CTRL_STATE_MASK; ++ pmcsr |= PCI_D0; ++ pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); ++ ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule_timeout(HZ >> 1); ++ ++ /* Restore the PCI configuration space. The Open CISS ++ * Specification says, "Restore the PCI Configuration ++ * Registers, offsets 00h through 60h. It is important to ++ * restore the command register, 16-bits at offset 04h, ++ * last. Do not restore the configuration status register, ++ * 16-bits at offset 06h." Note that the offset is 2*i. */ ++ for (i = 0; i < 32; i++) { ++ if (i == 2 || i == 3) ++ continue; ++ pci_write_config_word(pdev, 2*i, saved_config_space[i]); ++ } ++ wmb(); ++ pci_write_config_word(pdev, 4, saved_config_space[2]); ++ ++ return 0; ++} ++ + /* + * This is it. Find all the controllers and register them. I really hate + * stealing all these major device numbers. +@@ -3270,6 +3469,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, + int rc; + int dac; + ++ if (reset_devices) { ++ /* Reset the controller with a PCI power-cycle */ ++ if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev)) ++ return -ENODEV; ++ ++ /* Some devices (notably the HP Smart Array 5i Controller) ++ need a little pause here */ ++ schedule_timeout_uninterruptible(30*HZ); ++ ++ /* Now try to get the controller to respond to a no-op */ ++ for (i=0; i<12; i++) { ++ if (cciss_noop(pdev) == 0) ++ break; ++ else ++ printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : "")); ++ } ++ } ++ + i = alloc_cciss_hba(); + if (i < 0) + return -1; -- 2.43.0