From d8356c39af29ce153459801d0b94d3746597629d Mon Sep 17 00:00:00 2001 From: Aaron Klingaman Date: Mon, 4 Apr 2005 18:13:30 +0000 Subject: [PATCH] kexec applied from http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.10/2.6.10-mm3/ --- Documentation/devices.txt | 1 + Documentation/kdump.txt | 105 ++++ MAINTAINERS | 11 + arch/h8300/kernel/ints.c | 2 +- arch/h8300/platform/h8s/ints.c | 2 +- arch/i386/Kconfig | 47 ++ arch/i386/boot/compressed/head.S | 6 +- arch/i386/boot/compressed/misc.c | 7 +- arch/i386/kernel/Makefile | 2 + arch/i386/kernel/apic.c | 30 + arch/i386/kernel/crash_dump.c | 105 ++++ arch/i386/kernel/entry.S | 2 +- arch/i386/kernel/i386_ksyms.c | 6 +- arch/i386/kernel/i8259.c | 12 + arch/i386/kernel/machine_kexec.c | 233 +++++++ arch/i386/kernel/reboot.c | 82 +-- arch/i386/kernel/relocate_kernel.S | 118 ++++ arch/i386/kernel/setup.c | 13 + arch/i386/kernel/smp.c | 14 +- arch/i386/kernel/vmlinux.lds.S | 59 +- arch/i386/mm/discontig.c | 4 + arch/i386/mm/highmem.c | 19 +- arch/ppc/Kconfig | 20 + arch/ppc/kernel/Makefile | 1 + arch/ppc/kernel/machine_kexec.c | 114 ++++ arch/ppc/kernel/relocate_kernel.S | 135 +++++ arch/x86_64/Kconfig | 17 + arch/x86_64/kernel/Makefile | 1 + arch/x86_64/kernel/apic.c | 30 + arch/x86_64/kernel/e820.c | 2 - arch/x86_64/kernel/i8259.c | 14 + arch/x86_64/kernel/io_apic.c | 34 +- arch/x86_64/kernel/machine_kexec.c | 246 ++++++++ arch/x86_64/kernel/reboot.c | 65 +- arch/x86_64/kernel/relocate_kernel.S | 141 +++++ configs/kernel-2.6.10-i686-planetlab.config | 3 + drivers/char/mem.c | 74 +++ drivers/dump/dump_fmt.c | 4 +- drivers/dump/dump_i386.c | 4 +- drivers/dump/dump_memdev.c | 18 +- drivers/dump/dump_overlay.c | 6 +- drivers/dump/dump_setup.c | 9 +- fs/aio.c | 2 +- fs/proc/Makefile | 1 + fs/proc/kcore.c | 10 +- fs/proc/proc_misc.c | 3 + fs/proc/vmcore.c | 239 ++++++++ include/asm-generic/vmlinux.lds.h | 2 +- include/asm-i386/apicdef.h | 1 + include/asm-i386/crash_dump.h | 82 +++ include/asm-i386/highmem.h | 1 + include/asm-i386/kexec.h | 25 + include/asm-i386/mach-default/irq_vectors.h | 2 +- include/asm-i386/smp.h | 1 + include/asm-ppc/kexec.h | 36 ++ include/asm-ppc/machdep.h | 25 + include/asm-x86_64/kexec.h | 25 + include/asm-x86_64/unistd.h | 2 +- include/linux/bootmem.h | 1 + include/linux/crash_dump.h | 34 ++ include/linux/dump.h | 2 +- include/linux/highmem.h | 1 + include/linux/kexec.h | 57 ++ include/linux/reboot.h | 2 + kernel/Makefile | 2 + kernel/crash.c | 117 ++++ kernel/kexec.c | 637 ++++++++++++++++++++ kernel/panic.c | 8 +- kernel/sys.c | 2 + kernel/sys_ni.c | 1 + mm/bootmem.c | 5 + mm/page_alloc.c | 5 +- 72 files changed, 2984 insertions(+), 165 deletions(-) create mode 100644 Documentation/kdump.txt create mode 100644 arch/i386/kernel/crash_dump.c create mode 100644 arch/i386/kernel/machine_kexec.c create mode 100644 arch/i386/kernel/relocate_kernel.S create mode 100644 arch/ppc/kernel/machine_kexec.c create mode 100644 arch/ppc/kernel/relocate_kernel.S create mode 100644 arch/x86_64/kernel/machine_kexec.c create mode 100644 arch/x86_64/kernel/relocate_kernel.S create mode 100644 fs/proc/vmcore.c create mode 100644 include/asm-i386/crash_dump.h create mode 100644 include/asm-i386/kexec.h create mode 100644 include/asm-ppc/kexec.h create mode 100644 include/asm-x86_64/kexec.h create mode 100644 include/linux/crash_dump.h create mode 100644 include/linux/kexec.h create mode 100644 kernel/crash.c create mode 100644 kernel/kexec.c diff --git a/Documentation/devices.txt b/Documentation/devices.txt index f115145e5..60ce4ae9d 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -100,6 +100,7 @@ Your cooperation is appreciated. 9 = /dev/urandom Faster, less secure random number gen. 10 = /dev/aio Asyncronous I/O notification interface 11 = /dev/kmsg Writes to this come out as printk's + 12 = /dev/oldmem Access to kexec-ed crash dump 1 block RAM disk 0 = /dev/ram0 First RAM disk 1 = /dev/ram1 Second RAM disk diff --git a/Documentation/kdump.txt b/Documentation/kdump.txt new file mode 100644 index 000000000..8fc3d68ae --- /dev/null +++ b/Documentation/kdump.txt @@ -0,0 +1,105 @@ +Documentation for kdump - the kexec based crash dumping solution +================================================================ + +DESIGN +====== + +We use kexec to reboot to a second kernel whenever a dump needs to be taken. +This second kernel is booted with with very little memory (configurable +at compile time). The first kernel reserves the section of memory that the +second kernel uses. This ensures that on-going DMA from the first kernel +does not corrupt the second kernel. The first 640k of physical memory is +needed irrespective of where the kernel loads at. Hence, this region is +backed up before reboot. + +In the second kernel, "old memory" can be accessed in two ways. The +first one is through a device interface. We can create a /dev/oldmem or +whatever and write out the memory in raw format. The second interface is +through /proc/vmcore. This exports the dump as an ELF format file which +can be written out using any file copy command (cp, scp, etc). Further, gdb +can be used to perform some minimal debugging on the dump file. Both these +methods ensure that there is correct ordering of the dump pages (corresponding +to the first 640k that has been relocated). + +SETUP +===== + +1) Obtain the appropriate -mm tree patch and apply it on to the vanilla + kernel tree. + +2) Two kernels need to be built in order to get this feature working. + + For the first kernel, choose the default values for the following options. + + a) Physical address where the kernel is loaded + b) kexec system call + c) kernel crash dumps + + All the options are under "Processor type and features" + + For the second kernel, change (a) to 16MB. If you want to choose another + value here, ensure "location from where the crash dumping kernel will boot + (MB)" under (c) reflects the same value. + + Also ensure you have CONFIG_HIGHMEM on. + +3) Boot into the first kernel. You are now ready to try out kexec based crash + dumps. + +4) Load the second kernel to be booted using + + kexec -p --args-linux --append="root= dump + init 1 memmap=exactmap memmap=640k@0 memmap=32M@16M" + + Note that has to be a vmlinux image. bzImage will not + work, as of now. + +5) Enable kexec based dumping by + + echo 1 > /proc/kexec-dump + + If this is not set, the system will not do a kexec reboot in the event + of a panic. + +6) System reboots into the second kernel when a panic occurs. + You could write a module to call panic, for testing purposes. + +7) Write out the dump file using + + cp /proc/vmcore + +You can also access the dump as a device for a linear/raw view. To do this, +you will need the kd-oldmem-.patch built into the kernel. To create +the device, type + + mknod /dev/oldmem c 1 12 + +Use "dd" with suitable options for count, bs and skip to access specific +portions of the dump. + +ANALYSIS +======== + +You can run gdb on the dump file copied out of /proc/vmcore. Use vmlinux built +with -g and run + + gdb vmlinux + +Stack trace for the task on processor 0, register display, memory display +work fine. + +TODO +==== + +1) Provide a kernel-pages only view for the dump. This could possibly turn up + as /proc/vmcore-kern. +2) Provide register contents of all processors (similar to what multi-threaded + core dumps does). +3) Modify "crash" to make it recognize this dump. +4) Make the i386 kernel boot from any location so we can run the second kernel + from the reserved location instead of the current approach. + +CONTACT +======= + +Hariprasad Nellitheertha - hari at in dot ibm dot com diff --git a/MAINTAINERS b/MAINTAINERS index 66275d498..5f81698b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1259,6 +1259,17 @@ M: rml@novell.com L: linux-kernel@vger.kernel.org S: Maintained +KEXEC +P: Eric Biederman +P: Randy Dunlap +M: ebiederm@xmission.com +M: rddunlap@osdl.org +W: http://www.xmission.com/~ebiederm/files/kexec/ +W: http://developer.osdl.org/rddunlap/kexec/ +L: linux-kernel@vger.kernel.org +L: fastboot@osdl.org +S: Maintained + LANMEDIA WAN CARD DRIVER P: Andrew Stanley-Jones M: asj@lanmedia.com diff --git a/arch/h8300/kernel/ints.c b/arch/h8300/kernel/ints.c index edb3c4170..0b9ddba3d 100644 --- a/arch/h8300/kernel/ints.c +++ b/arch/h8300/kernel/ints.c @@ -114,7 +114,7 @@ void __init init_IRQ(void) } } interrupt_redirect_table = ramvec; -#ifdef DUMP_VECTOR +#ifdef CRASH_DUMP_VECTOR ramvec_p = ramvec; for (i = 0; i < NR_IRQS; i++) { if ((i % 8) == 0) diff --git a/arch/h8300/platform/h8s/ints.c b/arch/h8300/platform/h8s/ints.c index 5441cdd12..6b27e5ac5 100644 --- a/arch/h8300/platform/h8s/ints.c +++ b/arch/h8300/platform/h8s/ints.c @@ -134,7 +134,7 @@ void __init init_IRQ(void) ramvec[TRAP0_VEC] = VECTOR(system_call); ramvec[TRAP3_VEC] = break_vec; interrupt_redirect_table = ramvec; -#ifdef DUMP_VECTOR +#ifdef CRASH_DUMP_VECTOR ramvec_p = ramvec; for (i = 0; i < NR_IRQS; i++) { if ((i % 8) == 0) diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 1e4f78c0a..1ed5b3831 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -922,6 +922,53 @@ config REGPARM generate incorrect output with certain kernel constructs when -mregparm=3 is used. +config KERN_PHYS_OFFSET + int "Physical address where the kernel is loaded (1-112)MB" + range 1 112 + default "1" + help + This gives the physical address where the kernel is loaded. + Primarily used in the case of kexec on panic where the + recovery kernel needs to run at a different address than + the panic-ed kernel. + +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on KEXEC + help + Generate crash dump using kexec. + +config BACKUP_BASE + int "location from where the crash dumping kernel will boot (MB)" + depends on CRASH_DUMP + default 16 + help + This is the location where the second kernel will boot from. + +config BACKUP_SIZE + int "Size of memory used by the crash dumping kernel (MB)" + depends on CRASH_DUMP + range 16 64 + default 32 + help + The size of the second kernel's memory. endmenu diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index c5e80b69e..4f41af3a5 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -74,7 +74,7 @@ startup_32: popl %esi # discard address popl %esi # real mode pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $0x100000 + ljmp $(__BOOT_CS), $KERN_PHYS_OFFSET /* * We come here, if we were loaded high. @@ -99,7 +99,7 @@ startup_32: popl %ecx # lcount popl %edx # high_buffer_start popl %eax # hcount - movl $0x100000,%edi + movl $KERN_PHYS_OFFSET,%edi cli # make sure we don't get interrupted ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine @@ -124,5 +124,5 @@ move_routine_start: movsl movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $0x100000 + ljmp $(__BOOT_CS), $KERN_PHYS_OFFSET move_routine_end: diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index 874568330..9805b3730 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -14,6 +14,7 @@ #include #include