From: Aaron Klingaman Date: Mon, 4 Apr 2005 18:13:30 +0000 (+0000) Subject: kexec applied from X-Git-Tag: before-fedora-2_6_18-1_2239_FC5-vs2_0_2_2-rc6-merge~225 X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=d8356c39af29ce153459801d0b94d3746597629d;p=linux-2.6.git kexec applied from http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.10/2.6.10-mm3/ --- diff --git a/Documentation/devices.txt b/Documentation/devices.txt index f115145e5..60ce4ae9d 100644 --- a/Documentation/devices.txt +++ b/Documentation/devices.txt @@ -100,6 +100,7 @@ Your cooperation is appreciated. 9 = /dev/urandom Faster, less secure random number gen. 10 = /dev/aio Asyncronous I/O notification interface 11 = /dev/kmsg Writes to this come out as printk's + 12 = /dev/oldmem Access to kexec-ed crash dump 1 block RAM disk 0 = /dev/ram0 First RAM disk 1 = /dev/ram1 Second RAM disk diff --git a/Documentation/kdump.txt b/Documentation/kdump.txt new file mode 100644 index 000000000..8fc3d68ae --- /dev/null +++ b/Documentation/kdump.txt @@ -0,0 +1,105 @@ +Documentation for kdump - the kexec based crash dumping solution +================================================================ + +DESIGN +====== + +We use kexec to reboot to a second kernel whenever a dump needs to be taken. +This second kernel is booted with with very little memory (configurable +at compile time). The first kernel reserves the section of memory that the +second kernel uses. This ensures that on-going DMA from the first kernel +does not corrupt the second kernel. The first 640k of physical memory is +needed irrespective of where the kernel loads at. Hence, this region is +backed up before reboot. + +In the second kernel, "old memory" can be accessed in two ways. The +first one is through a device interface. We can create a /dev/oldmem or +whatever and write out the memory in raw format. The second interface is +through /proc/vmcore. This exports the dump as an ELF format file which +can be written out using any file copy command (cp, scp, etc). Further, gdb +can be used to perform some minimal debugging on the dump file. Both these +methods ensure that there is correct ordering of the dump pages (corresponding +to the first 640k that has been relocated). + +SETUP +===== + +1) Obtain the appropriate -mm tree patch and apply it on to the vanilla + kernel tree. + +2) Two kernels need to be built in order to get this feature working. + + For the first kernel, choose the default values for the following options. + + a) Physical address where the kernel is loaded + b) kexec system call + c) kernel crash dumps + + All the options are under "Processor type and features" + + For the second kernel, change (a) to 16MB. If you want to choose another + value here, ensure "location from where the crash dumping kernel will boot + (MB)" under (c) reflects the same value. + + Also ensure you have CONFIG_HIGHMEM on. + +3) Boot into the first kernel. You are now ready to try out kexec based crash + dumps. + +4) Load the second kernel to be booted using + + kexec -p --args-linux --append="root= dump + init 1 memmap=exactmap memmap=640k@0 memmap=32M@16M" + + Note that has to be a vmlinux image. bzImage will not + work, as of now. + +5) Enable kexec based dumping by + + echo 1 > /proc/kexec-dump + + If this is not set, the system will not do a kexec reboot in the event + of a panic. + +6) System reboots into the second kernel when a panic occurs. + You could write a module to call panic, for testing purposes. + +7) Write out the dump file using + + cp /proc/vmcore + +You can also access the dump as a device for a linear/raw view. To do this, +you will need the kd-oldmem-.patch built into the kernel. To create +the device, type + + mknod /dev/oldmem c 1 12 + +Use "dd" with suitable options for count, bs and skip to access specific +portions of the dump. + +ANALYSIS +======== + +You can run gdb on the dump file copied out of /proc/vmcore. Use vmlinux built +with -g and run + + gdb vmlinux + +Stack trace for the task on processor 0, register display, memory display +work fine. + +TODO +==== + +1) Provide a kernel-pages only view for the dump. This could possibly turn up + as /proc/vmcore-kern. +2) Provide register contents of all processors (similar to what multi-threaded + core dumps does). +3) Modify "crash" to make it recognize this dump. +4) Make the i386 kernel boot from any location so we can run the second kernel + from the reserved location instead of the current approach. + +CONTACT +======= + +Hariprasad Nellitheertha - hari at in dot ibm dot com diff --git a/MAINTAINERS b/MAINTAINERS index 66275d498..5f81698b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1259,6 +1259,17 @@ M: rml@novell.com L: linux-kernel@vger.kernel.org S: Maintained +KEXEC +P: Eric Biederman +P: Randy Dunlap +M: ebiederm@xmission.com +M: rddunlap@osdl.org +W: http://www.xmission.com/~ebiederm/files/kexec/ +W: http://developer.osdl.org/rddunlap/kexec/ +L: linux-kernel@vger.kernel.org +L: fastboot@osdl.org +S: Maintained + LANMEDIA WAN CARD DRIVER P: Andrew Stanley-Jones M: asj@lanmedia.com diff --git a/arch/h8300/kernel/ints.c b/arch/h8300/kernel/ints.c index edb3c4170..0b9ddba3d 100644 --- a/arch/h8300/kernel/ints.c +++ b/arch/h8300/kernel/ints.c @@ -114,7 +114,7 @@ void __init init_IRQ(void) } } interrupt_redirect_table = ramvec; -#ifdef DUMP_VECTOR +#ifdef CRASH_DUMP_VECTOR ramvec_p = ramvec; for (i = 0; i < NR_IRQS; i++) { if ((i % 8) == 0) diff --git a/arch/h8300/platform/h8s/ints.c b/arch/h8300/platform/h8s/ints.c index 5441cdd12..6b27e5ac5 100644 --- a/arch/h8300/platform/h8s/ints.c +++ b/arch/h8300/platform/h8s/ints.c @@ -134,7 +134,7 @@ void __init init_IRQ(void) ramvec[TRAP0_VEC] = VECTOR(system_call); ramvec[TRAP3_VEC] = break_vec; interrupt_redirect_table = ramvec; -#ifdef DUMP_VECTOR +#ifdef CRASH_DUMP_VECTOR ramvec_p = ramvec; for (i = 0; i < NR_IRQS; i++) { if ((i % 8) == 0) diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 1e4f78c0a..1ed5b3831 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -922,6 +922,53 @@ config REGPARM generate incorrect output with certain kernel constructs when -mregparm=3 is used. +config KERN_PHYS_OFFSET + int "Physical address where the kernel is loaded (1-112)MB" + range 1 112 + default "1" + help + This gives the physical address where the kernel is loaded. + Primarily used in the case of kexec on panic where the + recovery kernel needs to run at a different address than + the panic-ed kernel. + +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on KEXEC + help + Generate crash dump using kexec. + +config BACKUP_BASE + int "location from where the crash dumping kernel will boot (MB)" + depends on CRASH_DUMP + default 16 + help + This is the location where the second kernel will boot from. + +config BACKUP_SIZE + int "Size of memory used by the crash dumping kernel (MB)" + depends on CRASH_DUMP + range 16 64 + default 32 + help + The size of the second kernel's memory. endmenu diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index c5e80b69e..4f41af3a5 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -74,7 +74,7 @@ startup_32: popl %esi # discard address popl %esi # real mode pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $0x100000 + ljmp $(__BOOT_CS), $KERN_PHYS_OFFSET /* * We come here, if we were loaded high. @@ -99,7 +99,7 @@ startup_32: popl %ecx # lcount popl %edx # high_buffer_start popl %eax # hcount - movl $0x100000,%edi + movl $KERN_PHYS_OFFSET,%edi cli # make sure we don't get interrupted ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine @@ -124,5 +124,5 @@ move_routine_start: movsl movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $0x100000 + ljmp $(__BOOT_CS), $KERN_PHYS_OFFSET move_routine_end: diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index 874568330..9805b3730 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -14,6 +14,7 @@ #include #include