Initial revision
authorMark Huang <mlhuang@cs.princeton.edu>
Fri, 10 Sep 2004 19:18:37 +0000 (19:18 +0000)
committerMark Huang <mlhuang@cs.princeton.edu>
Fri, 10 Sep 2004 19:18:37 +0000 (19:18 +0000)
21 files changed:
drivers/dump/Makefile [new file with mode: 0644]
drivers/dump/dump_arm.c [new file with mode: 0644]
drivers/dump/dump_blockdev.c [new file with mode: 0644]
drivers/dump/dump_execute.c [new file with mode: 0644]
drivers/dump/dump_filters.c [new file with mode: 0644]
drivers/dump/dump_fmt.c [new file with mode: 0644]
drivers/dump/dump_gzip.c [new file with mode: 0644]
drivers/dump/dump_i386.c [new file with mode: 0644]
drivers/dump/dump_memdev.c [new file with mode: 0644]
drivers/dump/dump_methods.h [new file with mode: 0644]
drivers/dump/dump_netdev.c [new file with mode: 0644]
drivers/dump/dump_overlay.c [new file with mode: 0644]
drivers/dump/dump_ppc64.c [new file with mode: 0644]
drivers/dump/dump_rle.c [new file with mode: 0644]
drivers/dump/dump_scheme.c [new file with mode: 0644]
drivers/dump/dump_setup.c [new file with mode: 0644]
include/asm-i386/dump.h [new file with mode: 0644]
include/linux/dump.h [new file with mode: 0644]
include/linux/dump_netdev.h [new file with mode: 0644]
include/linux/dumpdev.h [new file with mode: 0644]
init/kerntypes.c [new file with mode: 0644]

diff --git a/drivers/dump/Makefile b/drivers/dump/Makefile
new file mode 100644 (file)
index 0000000..deb671f
--- /dev/null
@@ -0,0 +1,16 @@
+#
+# Makefile for the dump device drivers.
+#
+
+dump-y                                 := dump_setup.o dump_fmt.o dump_filters.o dump_scheme.o dump_execute.o
+dump-$(CONFIG_X86)                     += dump_i386.o
+dump-$(CONFIG_ARM)                     += dump_arm.o
+dump-$(CONFIG_PPC64)                    += dump_ppc64.o
+dump-$(CONFIG_CRASH_DUMP_MEMDEV)       += dump_memdev.o dump_overlay.o
+dump-objs                              += $(dump-y)
+
+obj-$(CONFIG_CRASH_DUMP)               += dump.o
+obj-$(CONFIG_CRASH_DUMP_BLOCKDEV)      += dump_blockdev.o
+obj-$(CONFIG_CRASH_DUMP_NETDEV)        += dump_netdev.o
+obj-$(CONFIG_CRASH_DUMP_COMPRESS_RLE)  += dump_rle.o
+obj-$(CONFIG_CRASH_DUMP_COMPRESS_GZIP) += dump_gzip.o
diff --git a/drivers/dump/dump_arm.c b/drivers/dump/dump_arm.c
new file mode 100644 (file)
index 0000000..b1849fa
--- /dev/null
@@ -0,0 +1,250 @@
+/*
+ * Architecture specific (ARM/XScale) functions for Linux crash dumps.
+ *
+ * Created by: Fleming Feng (fleming.feng@intel.com)
+ *
+ * Copyright(C) 2003 Intel Corp. All rights reserved.
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/*
+ * The hooks for dumping the kernel virtual memory to disk are in this
+ * file.  Any time a modification is made to the virtual memory mechanism,
+ * these routines must be changed to use the new mechanisms.
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/dump.h>
+#include <linux/mm.h>
+#include <asm/processor.h>
+#include <asm/hardirq.h>
+#include <asm/kdebug.h>
+
+static __s32   saved_irq_count;        /* saved preempt_count() flags */
+
+static int alloc_dha_stack(void)
+{
+       int i;
+       void *ptr;
+       
+       if (dump_header_asm.dha_stack[0])
+               return 0;
+
+               ptr = vmalloc(THREAD_SIZE * num_online_cpus());
+       if (!ptr) {
+               printk("vmalloc for dha_stacks failed\n");
+               return -ENOMEM;
+       }
+
+       for( i = 0; i < num_online_cpus(); i++){
+               dump_header_asm.dha_stack[i] = (u32)((unsigned long)ptr +
+                                                    (i * THREAD_SIZE));
+       }
+
+       return 0;
+}
+
+static int free_dha_stack(void) 
+{
+       if (dump_header_asm.dha_stack[0]){
+               vfree((void*)dump_header_asm.dha_stack[0]);
+               dump_header_asm.dha_stack[0] = 0;
+       }
+       return 0;
+}
+
+void __dump_save_regs(struct pt_regs* dest_regs, const struct pt_regs* regs)
+{
+
+       /* Here, because the arm version uses _dump_regs_t,
+        * instead of pt_regs in dump_header_asm, while the
+        * the function is defined inside architecture independent
+        * header file include/linux/dump.h, the size of block of
+        * memory copied is not equal to pt_regs.
+        */
+
+       memcpy(dest_regs, regs, sizeof(_dump_regs_t));
+
+}
+
+#ifdef CONFIG_SMP
+/* FIXME: This is reserved for possible future usage for SMP system 
+ * based on ARM/XScale. Currently, there is no information for an 
+ * SMP system based on ARM/XScale, they are not used!
+ */
+/* save registers on other processor */
+void
+__dump_save_other_cpus(void)
+{
+
+       /* Dummy now! */
+
+       return;
+       
+}
+#else  /* !CONFIG_SMP */
+#define save_other_cpu_state() do { } while (0)
+#endif /* !CONFIG_SMP */
+
+/* 
+ * Kludge - dump from interrupt context is unreliable (Fixme)
+ *
+ * We do this so that softirqs initiated for dump i/o 
+ * get processed and we don't hang while waiting for i/o
+ * to complete or in any irq synchronization attempt.
+ *
+ * This is not quite legal of course, as it has the side 
+ * effect of making all interrupts & softirqs triggered 
+ * while dump is in progress complete before currently 
+ * pending softirqs and the currently executing interrupt 
+ * code. 
+ */
+static inline void
+irq_bh_save(void)
+{
+       saved_irq_count = irq_count();
+       preempt_count() &= ~(HARDIRQ_MASK|SOFTIRQ_MASK);
+}
+
+static inline void
+irq_bh_restore(void)
+{
+       preempt_count() |= saved_irq_count;
+}
+
+/*
+ * Name: __dump_irq_enable
+ * Func: Reset system so interrupts are enabled.
+ *      This is used for dump methods that requires interrupts
+ *      Eventually, all methods will have interrupts disabled
+ *      and this code can be removed.
+ *      
+ *      Re-enable interrupts
+ */
+int
+__dump_irq_enable(void)
+{
+       irq_bh_save();
+       local_irq_enable();
+       return 0;
+}
+
+/* Name: __dump_irq_restore
+ * Func: Resume the system state in an architecture-specific way.
+ */
+void 
+__dump_irq_restore(void)
+{
+       local_irq_disable();
+       irq_bh_restore();
+}
+       
+
+/*
+ * Name: __dump_configure_header()
+ * Func: Meant to fill in arch specific header fields except per-cpu state
+ *      already captured in dump_lcrash_configure_header.
+ */
+int
+__dump_configure_header(const struct pt_regs *regs)
+{
+       return (0);
+}
+
+/*
+ * Name: dump_die_event
+ * Func: Called from notify_die
+ */
+static int dump_die_event(struct notifier_block* this,
+                         unsigned long event,
+                         void* arg)
+{
+       const struct die_args* args = (const struct die_args*)arg;
+
+       switch(event){
+               case DIE_PANIC: 
+               case DIE_OOPS:
+               case DIE_WATCHDOG:
+                       dump_execute(args->str, args->regs);
+                       break;          
+       }
+       return NOTIFY_DONE;
+
+}
+
+static struct notifier_block dump_die_block = {
+       .notifier_call = dump_die_event,
+};
+
+/* Name: __dump_init()
+ * Func: Initialize the dumping routine process.
+ */
+void
+__dump_init(uint64_t local_memory_start)
+{
+       /* hook into PANIC and OOPS */
+       register_die_notifier(&dump_die_block);
+}
+
+/*
+ * Name: __dump_open()
+ * Func: Open the dump device (architecture specific).  This is in
+ *       case it's necessary in the future.
+ */
+void
+__dump_open(void)
+{
+
+       alloc_dha_stack();
+
+       return;
+}
+
+/*
+ * Name: __dump_cleanup()
+ * Func: Free any architecture specific data structures. This is called
+ *       when the dump module is being removed.
+ */
+void
+__dump_cleanup(void)
+{
+       free_dha_stack();
+       unregister_die_notifier(&dump_die_block);
+
+       /* return */
+       return;
+}
+
+/* 
+ * Name: __dump_page_valid()
+ * Func: Check if page is valid to dump.
+ */
+int
+__dump_page_valid(unsigned long index)
+{
+       if(!pfn_valid(index))
+               return 0;
+       else
+               return 1;
+}
+
+/* 
+ * Name: manual_handle_crashdump 
+ * Func: Interface for the lkcd dump command. Calls dump_execute()
+ */
+int
+manual_handle_crashdump(void) {
+       
+       _dump_regs_t regs;      
+
+       get_current_general_regs(&regs);
+       get_current_cp14_regs(&regs);
+       get_current_cp15_regs(&regs);
+       dump_execute("manual", &regs);
+       return 0;
+}
diff --git a/drivers/dump/dump_blockdev.c b/drivers/dump/dump_blockdev.c
new file mode 100644 (file)
index 0000000..cee31a4
--- /dev/null
@@ -0,0 +1,468 @@
+/*
+ * Implements the dump driver interface for saving a dump to 
+ * a block device through the kernel's generic low level block i/o
+ * routines.
+ *
+ * Started: June 2002 - Mohamed Abbas <mohamed.abbas@intel.com>
+ *     Moved original lkcd kiobuf dump i/o code from dump_base.c
+ *     to use generic dump device interfaces
+ *
+ * Sept 2002 - Bharata B. Rao <bharata@in.ibm.com>
+ *     Convert dump i/o to directly use bio instead of kiobuf for 2.5
+ *
+ * Oct 2002  - Suparna Bhattacharya <suparna@in.ibm.com>
+ *     Rework to new dumpdev.h structures, implement open/close/
+ *     silence, misc fixes (blocknr removal, bio_add_page usage)  
+ *
+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001 - 2002 Matt D. Robinson.  All rights reserved.
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <asm/hardirq.h>
+#include <linux/dump.h>
+#include "dump_methods.h"
+
+extern void *dump_page_buf;
+
+/* The end_io callback for dump i/o completion */
+static int
+dump_bio_end_io(struct bio *bio, unsigned int bytes_done, int error)
+{
+       struct dump_blockdev *dump_bdev;
+
+       if (bio->bi_size) {
+               /* some bytes still left to transfer */
+               return 1; /* not complete */
+       }
+
+       dump_bdev = (struct dump_blockdev *)bio->bi_private;
+       if (error) {
+               printk("IO error while writing the dump, aborting\n");
+       }
+
+       dump_bdev->err = error;
+
+       /* no wakeup needed, since caller polls for completion */
+       return 0;
+}
+
+/* Check if the dump bio is already mapped to the specified buffer */
+static int
+dump_block_map_valid(struct dump_blockdev *dev, struct page *page, 
+       int len) 
+{
+       struct bio *bio = dev->bio;
+       unsigned long bsize = 0;
+
+       if (!bio->bi_vcnt)
+               return 0; /* first time, not mapped */
+
+
+       if ((bio_page(bio) != page) || (len > bio->bi_vcnt << PAGE_SHIFT))
+               return 0; /* buffer not mapped */
+
+       bsize = bdev_hardsect_size(bio->bi_bdev);
+       if ((len & (PAGE_SIZE - 1)) || (len & bsize))
+               return 0; /* alignment checks needed */
+
+       /* quick check to decide if we need to redo bio_add_page */
+       if (bdev_get_queue(bio->bi_bdev)->merge_bvec_fn)
+               return 0; /* device may have other restrictions */
+
+       return 1; /* already mapped */
+}
+
+/* 
+ * Set up the dump bio for i/o from the specified buffer 
+ * Return value indicates whether the full buffer could be mapped or not
+ */
+static int
+dump_block_map(struct dump_blockdev *dev, void *buf, int len)
+{
+       struct page *page = virt_to_page(buf);
+       struct bio *bio = dev->bio;
+       unsigned long bsize = 0;
+
+       bio->bi_bdev = dev->bdev;
+       bio->bi_sector = (dev->start_offset + dev->ddev.curr_offset) >> 9;
+       bio->bi_idx = 0; /* reset index to the beginning */
+
+       if (dump_block_map_valid(dev, page, len)) {
+               /* already mapped and usable rightaway */
+               bio->bi_size = len; /* reset size to the whole bio */
+       } else {
+               /* need to map the bio */
+               bio->bi_size = 0;
+               bio->bi_vcnt = 0;
+               bsize = bdev_hardsect_size(bio->bi_bdev);
+
+               /* first a few sanity checks */
+               if (len < bsize) {
+                       printk("map: len less than hardsect size \n");
+                       return -EINVAL;
+               }
+
+               if ((unsigned long)buf & bsize) {
+                       printk("map: not aligned \n");
+                       return -EINVAL;
+               }
+
+               /* assume contig. page aligned low mem buffer( no vmalloc) */
+               if ((page_address(page) != buf) || (len & (PAGE_SIZE - 1))) {
+                       printk("map: invalid buffer alignment!\n");
+                       return -EINVAL; 
+               }
+               /* finally we can go ahead and map it */
+               while (bio->bi_size < len)
+                       if (bio_add_page(bio, page++, PAGE_SIZE, 0) == 0) {
+                               break;
+                       }
+
+               bio->bi_end_io = dump_bio_end_io;
+               bio->bi_private = dev;
+       }
+
+       if (bio->bi_size != len) {
+               printk("map: bio size = %d not enough for len = %d!\n",
+                       bio->bi_size, len);
+               return -E2BIG;
+       }
+       return 0;
+}
+
+static void
+dump_free_bio(struct bio *bio)
+{
+       if (bio)
+               kfree(bio->bi_io_vec);
+       kfree(bio);
+}
+
+/*
+ * Prepares the dump device so we can take a dump later. 
+ * The caller is expected to have filled up the dev_id field in the 
+ * block dump dev structure.
+ *
+ * At dump time when dump_block_write() is invoked it will be too 
+ * late to recover, so as far as possible make sure obvious errors 
+ * get caught right here and reported back to the caller.
+ */
+static int
+dump_block_open(struct dump_dev *dev, unsigned long arg)
+{
+       struct dump_blockdev *dump_bdev = DUMP_BDEV(dev);
+       struct block_device *bdev;
+       int retval = 0;
+       struct bio_vec *bvec;
+
+       /* make sure this is a valid block device */
+       if (!arg) {
+               retval = -EINVAL;
+               goto err;
+       }
+
+       /* Convert it to the new dev_t format */
+       arg = MKDEV((arg >> OLDMINORBITS), (arg & OLDMINORMASK));
+       
+       /* get a corresponding block_dev struct for this */
+       bdev = bdget((dev_t)arg);
+       if (!bdev) {
+               retval = -ENODEV;
+               goto err;
+       }
+
+       /* get the block device opened */
+       if ((retval = blkdev_get(bdev, O_RDWR | O_LARGEFILE, 0))) {
+               goto err1;
+       }
+
+       if ((dump_bdev->bio = kmalloc(sizeof(struct bio), GFP_KERNEL)) 
+               == NULL) {
+               printk("Cannot allocate bio\n");
+               retval = -ENOMEM;
+               goto err2;
+       }
+
+       bio_init(dump_bdev->bio);
+
+       if ((bvec = kmalloc(sizeof(struct bio_vec) * 
+               (DUMP_BUFFER_SIZE >> PAGE_SHIFT), GFP_KERNEL)) == NULL) {
+               retval = -ENOMEM;
+               goto err3;
+       }
+
+       /* assign the new dump dev structure */
+       dump_bdev->dev_id = (dev_t)arg;
+       dump_bdev->bdev = bdev;
+
+       /* make a note of the limit */
+       dump_bdev->limit = bdev->bd_inode->i_size;
+       
+       /* now make sure we can map the dump buffer */
+       dump_bdev->bio->bi_io_vec = bvec;
+       dump_bdev->bio->bi_max_vecs = DUMP_BUFFER_SIZE >> PAGE_SHIFT;
+
+       retval = dump_block_map(dump_bdev, dump_config.dumper->dump_buf, 
+               DUMP_BUFFER_SIZE);
+               
+       if (retval) {
+               printk("open: dump_block_map failed, ret %d\n", retval);
+               goto err3;
+       }
+
+       printk("Block device (%d,%d) successfully configured for dumping\n",
+              MAJOR(dump_bdev->dev_id),
+              MINOR(dump_bdev->dev_id));
+
+
+       /* after opening the block device, return */
+       return retval;
+
+err3:  dump_free_bio(dump_bdev->bio);
+       dump_bdev->bio = NULL;
+err2:  if (bdev) blkdev_put(bdev);
+               goto err;
+err1:  if (bdev) bdput(bdev);
+       dump_bdev->bdev = NULL;
+err:   return retval;
+}
+
+/*
+ * Close the dump device and release associated resources
+ * Invoked when unconfiguring the dump device.
+ */
+static int
+dump_block_release(struct dump_dev *dev)
+{
+       struct dump_blockdev *dump_bdev = DUMP_BDEV(dev);
+
+       /* release earlier bdev if present */
+       if (dump_bdev->bdev) {
+               blkdev_put(dump_bdev->bdev);
+               dump_bdev->bdev = NULL;
+       }
+
+       dump_free_bio(dump_bdev->bio);
+       dump_bdev->bio = NULL;
+
+       return 0;
+}
+
+
+/*
+ * Prepare the dump device for use (silence any ongoing activity
+ * and quiesce state) when the system crashes.
+ */
+static int
+dump_block_silence(struct dump_dev *dev)
+{
+       struct dump_blockdev *dump_bdev = DUMP_BDEV(dev);
+       struct request_queue *q = bdev_get_queue(dump_bdev->bdev);
+       int ret;
+
+       /* If we can't get request queue lock, refuse to take the dump */
+       if (!spin_trylock(q->queue_lock))
+               return -EBUSY;
+
+       ret = elv_queue_empty(q);
+       spin_unlock(q->queue_lock);
+
+       /* For now we assume we have the device to ourselves */
+       /* Just a quick sanity check */
+       if (!ret) {
+               /* Warn the user and move on */
+               printk(KERN_ALERT "Warning: Non-empty request queue\n");
+               printk(KERN_ALERT "I/O requests in flight at dump time\n");
+       }
+
+       /* 
+        * Move to a softer level of silencing where no spin_lock_irqs 
+        * are held on other cpus
+        */
+       dump_silence_level = DUMP_SOFT_SPIN_CPUS;       
+
+       ret = __dump_irq_enable();
+       if (ret) {
+               return ret;
+       }
+
+       printk("Dumping to block device (%d,%d) on CPU %d ...\n",
+              MAJOR(dump_bdev->dev_id), MINOR(dump_bdev->dev_id),
+              smp_processor_id());
+       
+       return 0;
+}
+
+/*
+ * Invoked when dumping is done. This is the time to put things back 
+ * (i.e. undo the effects of dump_block_silence) so the device is 
+ * available for normal use.
+ */
+static int
+dump_block_resume(struct dump_dev *dev)
+{
+       __dump_irq_restore();
+       return 0;
+}
+
+
+/*
+ * Seek to the specified offset in the dump device.
+ * Makes sure this is a valid offset, otherwise returns an error.
+ */
+static int
+dump_block_seek(struct dump_dev *dev, loff_t off)
+{
+       struct dump_blockdev *dump_bdev = DUMP_BDEV(dev);
+       loff_t offset = off + dump_bdev->start_offset;
+       
+       if (offset & ( PAGE_SIZE - 1)) {
+               printk("seek: non-page aligned\n");
+               return -EINVAL;
+       }
+
+       if (offset & (bdev_hardsect_size(dump_bdev->bdev) - 1)) {
+               printk("seek: not sector aligned \n");
+               return -EINVAL;
+       }
+
+       if (offset > dump_bdev->limit) {
+               printk("seek: not enough space left on device!\n");
+               return -ENOSPC; 
+       }
+       dev->curr_offset = off;
+       return 0;
+}
+
+/*
+ * Write out a buffer after checking the device limitations, 
+ * sector sizes, etc. Assumes the buffer is in directly mapped 
+ * kernel address space (not vmalloc'ed).
+ *
+ * Returns: number of bytes written or -ERRNO. 
+ */
+static int
+dump_block_write(struct dump_dev *dev, void *buf, 
+       unsigned long len)
+{
+       struct dump_blockdev *dump_bdev = DUMP_BDEV(dev);
+       loff_t offset = dev->curr_offset + dump_bdev->start_offset;
+       int retval = -ENOSPC;
+
+       if (offset >= dump_bdev->limit) {
+               printk("write: not enough space left on device!\n");
+               goto out;
+       }
+
+       /* don't write more blocks than our max limit */
+       if (offset + len > dump_bdev->limit) 
+               len = dump_bdev->limit - offset;
+
+
+       retval = dump_block_map(dump_bdev, buf, len);
+       if (retval){
+               printk("write: dump_block_map failed! err %d\n", retval);
+               goto out;
+       }
+
+       /*
+        * Write out the data to disk.
+        * Assumes the entire buffer mapped to a single bio, which we can
+        * submit and wait for io completion. In the future, may consider
+        * increasing the dump buffer size and submitting multiple bio s 
+        * for better throughput.
+        */
+       dump_bdev->err = -EAGAIN;
+       submit_bio(WRITE, dump_bdev->bio);
+
+       dump_bdev->ddev.curr_offset += len;
+       retval = len;
+ out:
+       return retval;
+}
+
+/*
+ * Name: dump_block_ready()
+ * Func: check if the last dump i/o is over and ready for next request
+ */
+static int
+dump_block_ready(struct dump_dev *dev, void *buf)
+{
+       struct dump_blockdev *dump_bdev = DUMP_BDEV(dev);
+       request_queue_t *q = bdev_get_queue(dump_bdev->bio->bi_bdev);
+
+       /* check for io completion */
+       if (dump_bdev->err == -EAGAIN) {
+               q->unplug_fn(q);
+               return -EAGAIN;
+       }
+
+       if (dump_bdev->err) {
+               printk("dump i/o err\n");
+               return dump_bdev->err;
+       }
+
+       return 0;
+}
+
+
+struct dump_dev_ops dump_blockdev_ops = {
+       .open           = dump_block_open,
+       .release        = dump_block_release,
+       .silence        = dump_block_silence,
+       .resume         = dump_block_resume,
+       .seek           = dump_block_seek,
+       .write          = dump_block_write,
+       /* .read not implemented */
+       .ready          = dump_block_ready
+};
+
+static struct dump_blockdev default_dump_blockdev = {
+       .ddev = {.type_name = "blockdev", .ops = &dump_blockdev_ops, 
+                       .curr_offset = 0},
+       /* 
+        * leave enough room for the longest swap header possibly written 
+        * written by mkswap (likely the largest page size supported by
+        * the arch
+        */
+       .start_offset   = DUMP_HEADER_OFFSET,
+       .err            = 0
+       /* assume the rest of the fields are zeroed by default */
+};     
+       
+struct dump_blockdev *dump_blockdev = &default_dump_blockdev;
+
+static int __init
+dump_blockdev_init(void)
+{
+       if (dump_register_device(&dump_blockdev->ddev) < 0) {
+               printk("block device driver registration failed\n");
+               return -1;
+       }
+               
+       printk("block device driver for LKCD registered\n");
+       return 0;
+}
+
+static void __exit
+dump_blockdev_cleanup(void)
+{
+       dump_unregister_device(&dump_blockdev->ddev);
+       printk("block device driver for LKCD unregistered\n");
+}
+
+MODULE_AUTHOR("LKCD Development Team <lkcd-devel@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Block Dump Driver for Linux Kernel Crash Dump (LKCD)");
+MODULE_LICENSE("GPL");
+
+module_init(dump_blockdev_init);
+module_exit(dump_blockdev_cleanup);
diff --git a/drivers/dump/dump_execute.c b/drivers/dump/dump_execute.c
new file mode 100644 (file)
index 0000000..d0dc251
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * The file has the common/generic dump execution code 
+ *
+ * Started: Oct 2002 -  Suparna Bhattacharya <suparna@in.ibm.com>
+ *     Split and rewrote high level dump execute code to make use 
+ *     of dump method interfaces.
+ *
+ * Derived from original code in dump_base.c created by 
+ *     Matt Robinson <yakker@sourceforge.net>)
+ *     
+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001 - 2002 Matt D. Robinson.  All rights reserved.
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ * Assumes dumper and dump config settings are in place
+ * (invokes corresponding dumper specific routines as applicable)
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/dump.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include "dump_methods.h"
+
+struct notifier_block *dump_notifier_list; /* dump started/ended callback */
+
+extern int panic_timeout;
+
+/* Dump progress indicator */
+void 
+dump_speedo(int i)
+{
+       static const char twiddle[4] =  { '|', '\\', '-', '/' };
+       printk("%c\b", twiddle[i&3]);
+}
+
+/* Make the device ready and write out the header */
+int dump_begin(void)
+{
+       int err = 0;
+
+       /* dump_dev = dump_config.dumper->dev; */
+       dumper_reset();
+       if ((err = dump_dev_silence())) {
+               /* quiesce failed, can't risk continuing */
+               /* Todo/Future: switch to alternate dump scheme if possible */
+               printk("dump silence dev failed ! error %d\n", err);
+               return err;
+       }
+
+       pr_debug("Writing dump header\n");
+       if ((err = dump_update_header())) {
+               printk("dump update header failed ! error %d\n", err);
+               dump_dev_resume();
+               return err;
+       }
+
+       dump_config.dumper->curr_offset = DUMP_BUFFER_SIZE;
+
+       return 0;
+}
+
+/* 
+ * Write the dump terminator, a final header update and let go of 
+ * exclusive use of the device for dump.
+ */
+int dump_complete(void)
+{
+       int ret = 0;
+
+       if (dump_config.level != DUMP_LEVEL_HEADER) {
+               if ((ret = dump_update_end_marker())) {
+                       printk("dump update end marker error %d\n", ret);
+               }
+               if ((ret = dump_update_header())) {
+                       printk("dump update header error %d\n", ret);
+               }
+       }
+       ret = dump_dev_resume();
+
+       if ((panic_timeout > 0) && (!(dump_config.flags & (DUMP_FLAGS_SOFTBOOT | DUMP_FLAGS_NONDISRUPT)))) {
+               printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
+#ifdef CONFIG_SMP
+               smp_send_stop();
+#endif
+               mdelay(panic_timeout * 1000);
+               machine_restart(NULL);
+       }
+
+       return ret;
+}
+
+/* Saves all dump data */
+int dump_execute_savedump(void)
+{
+       int ret = 0, err = 0;
+
+       if ((ret = dump_begin()))  {
+               return ret;
+       }
+
+       if (dump_config.level != DUMP_LEVEL_HEADER) { 
+               ret = dump_sequencer();
+       }
+       if ((err = dump_complete())) {
+               printk("Dump complete failed. Error %d\n", err);
+       }
+
+       return ret;
+}
+
+extern void dump_calc_bootmap_pages(void);
+
+/* Does all the real work:  Capture and save state */
+int dump_generic_execute(const char *panic_str, const struct pt_regs *regs)
+{
+       int ret = 0;
+
+       if ((ret = dump_configure_header(panic_str, regs))) {
+               printk("dump config header failed ! error %d\n", ret);
+               return ret;     
+       }
+
+       dump_calc_bootmap_pages();
+       /* tell interested parties that a dump is about to start */
+       notifier_call_chain(&dump_notifier_list, DUMP_BEGIN, 
+               &dump_config.dump_device);
+
+       if (dump_config.level != DUMP_LEVEL_NONE)
+               ret = dump_execute_savedump();
+
+       pr_debug("dumped %ld blocks of %d bytes each\n", 
+               dump_config.dumper->count, DUMP_BUFFER_SIZE);
+       
+       /* tell interested parties that a dump has completed */
+       notifier_call_chain(&dump_notifier_list, DUMP_END, 
+               &dump_config.dump_device);
+
+       return ret;
+}
diff --git a/drivers/dump/dump_filters.c b/drivers/dump/dump_filters.c
new file mode 100644 (file)
index 0000000..735cd0b
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * Default filters to select data to dump for various passes.
+ *
+ * Started: Oct 2002 -  Suparna Bhattacharya <suparna@in.ibm.com>
+ *     Split and rewrote default dump selection logic to generic dump 
+ *     method interfaces 
+ * Derived from a portion of dump_base.c created by 
+ *     Matt Robinson <yakker@sourceforge.net>)
+ *
+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001 - 2002 Matt D. Robinson.  All rights reserved.
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ * Used during single-stage dumping and during stage 1 of the 2-stage scheme
+ * (Stage 2 of the 2-stage scheme uses the fully transparent filters
+ * i.e. passthru filters in dump_overlay.c)
+ *
+ * Future: Custom selective dump may involve a different set of filters.
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/dump.h>
+#include "dump_methods.h"
+
+#define DUMP_PFN_SAFETY_MARGIN  1024  /* 4 MB */
+static unsigned long bootmap_pages;
+
+/* Copied from mm/bootmem.c - FIXME */
+/* return the number of _pages_ that will be allocated for the boot bitmap */
+void dump_calc_bootmap_pages (void)
+{
+       unsigned long mapsize;
+       unsigned long pages = num_physpages;
+
+       mapsize = (pages+7)/8;
+       mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK;
+       mapsize >>= PAGE_SHIFT;
+       bootmap_pages = mapsize + DUMP_PFN_SAFETY_MARGIN + 1;
+}
+
+
+/* temporary */
+extern unsigned long min_low_pfn;
+
+
+int dump_low_page(struct page *p)
+{
+       return ((page_to_pfn(p) >= min_low_pfn) &&
+               (page_to_pfn(p) < (min_low_pfn + bootmap_pages)));
+}
+
+static inline int kernel_page(struct page *p)
+{
+       /* FIXME: Need to exclude hugetlb pages. Clue: reserved but inuse */
+       return (PageReserved(p) && !PageInuse(p)) || (!PageLRU(p) && PageInuse(p));
+}
+
+static inline int user_page(struct page *p)
+{
+       return PageInuse(p) && (!PageReserved(p) && PageLRU(p));
+}
+
+static inline int unreferenced_page(struct page *p)
+{
+       return !PageInuse(p) && !PageReserved(p);
+}
+
+
+/* loc marks the beginning of a range of pages */
+int dump_filter_kernpages(int pass, unsigned long loc, unsigned long sz)
+{
+       struct page *page = (struct page *)loc;
+       /* if any of the pages is a kernel page, select this set */     
+       while (sz) {
+               if (dump_low_page(page) || kernel_page(page))
+                       return 1;
+               sz -= PAGE_SIZE;
+               page++;
+       }       
+       return 0;
+}
+
+
+/* loc marks the beginning of a range of pages */
+int dump_filter_userpages(int pass, unsigned long loc, unsigned long sz)
+{
+       struct page *page = (struct page *)loc;
+       int ret = 0;
+       /* select if the set has any user page, and no kernel pages  */ 
+       while (sz) {
+               if (user_page(page) && !dump_low_page(page)) {
+                       ret = 1;
+               } else if (kernel_page(page) || dump_low_page(page)) {
+                       return 0;
+               }
+               page++;
+               sz -= PAGE_SIZE;
+       }       
+       return ret;
+}
+
+
+
+/* loc marks the beginning of a range of pages */
+int dump_filter_unusedpages(int pass, unsigned long loc, unsigned long sz)
+{
+       struct page *page = (struct page *)loc;
+
+       /* select if the set does not have any used pages  */   
+       while (sz) {
+               if (!unreferenced_page(page) || dump_low_page(page)) {
+                       return 0;
+               }
+               page++;
+               sz -= PAGE_SIZE;
+       }       
+       return 1;
+}
+
+/* dummy: last (non-existent) pass */
+int dump_filter_none(int pass, unsigned long loc, unsigned long sz)
+{
+       return 0;
+}
+
+/* TBD: resolve level bitmask ? */
+struct dump_data_filter dump_filter_table[] = {
+       { .name = "kern", .selector = dump_filter_kernpages, 
+               .level_mask = DUMP_MASK_KERN},
+       { .name = "user", .selector = dump_filter_userpages, 
+               .level_mask = DUMP_MASK_USED},
+       { .name = "unused", .selector = dump_filter_unusedpages, 
+               .level_mask = DUMP_MASK_UNUSED},
+       { .name = "none", .selector = dump_filter_none, 
+               .level_mask = DUMP_MASK_REST},
+       { .name = "", .selector = NULL, .level_mask = 0}
+};
+
diff --git a/drivers/dump/dump_fmt.c b/drivers/dump/dump_fmt.c
new file mode 100644 (file)
index 0000000..afa0aed
--- /dev/null
@@ -0,0 +1,406 @@
+/*
+ * Implements the routines which handle the format specific
+ * aspects of dump for the default dump format.
+ *
+ * Used in single stage dumping and stage 1 of soft-boot based dumping 
+ * Saves data in LKCD (lcrash) format 
+ *
+ * Previously a part of dump_base.c
+ *
+ * Started: Oct 2002 -  Suparna Bhattacharya <suparna@in.ibm.com>
+ *     Split off and reshuffled LKCD dump format code around generic
+ *     dump method interfaces.
+ *
+ * Derived from original code created by 
+ *     Matt Robinson <yakker@sourceforge.net>)
+ *
+ * Contributions from SGI, IBM, HP, MCL, and others.
+ *
+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000 - 2002 TurboLinux, Inc.  All rights reserved.
+ * Copyright (C) 2001 - 2002 Matt D. Robinson.  All rights reserved.
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/utsname.h>
+#include <asm/dump.h>
+#include <linux/dump.h>
+#include "dump_methods.h"
+
+/*
+ * SYSTEM DUMP LAYOUT
+ * 
+ * System dumps are currently the combination of a dump header and a set
+ * of data pages which contain the system memory.  The layout of the dump
+ * (for full dumps) is as follows:
+ *
+ *             +-----------------------------+
+ *             |     generic dump header     |
+ *             +-----------------------------+
+ *             |   architecture dump header  |
+ *             +-----------------------------+
+ *             |         page header         |
+ *             +-----------------------------+
+ *             |          page data          |
+ *             +-----------------------------+
+ *             |         page header         |
+ *             +-----------------------------+
+ *             |          page data          |
+ *             +-----------------------------+
+ *             |              |              |
+ *             |              |              |
+ *             |              |              |
+ *             |              |              |
+ *             |              V              |
+ *             +-----------------------------+
+ *             |        PAGE_END header      |
+ *             +-----------------------------+
+ *
+ * There are two dump headers, the first which is architecture
+ * independent, and the other which is architecture dependent.  This
+ * allows different architectures to dump different data structures
+ * which are specific to their chipset, CPU, etc.
+ *
+ * After the dump headers come a succession of dump page headers along
+ * with dump pages.  The page header contains information about the page
+ * size, any flags associated with the page (whether it's compressed or
+ * not), and the address of the page.  After the page header is the page
+ * data, which is either compressed (or not).  Each page of data is
+ * dumped in succession, until the final dump header (PAGE_END) is
+ * placed at the end of the dump, assuming the dump device isn't out
+ * of space.
+ *
+ * This mechanism allows for multiple compression types, different
+ * types of data structures, different page ordering, etc., etc., etc.
+ * It's a very straightforward mechanism for dumping system memory.
+ */
+
+struct __dump_header dump_header;  /* the primary dump header              */
+struct __dump_header_asm dump_header_asm; /* the arch-specific dump header */
+
+/*
+ *  Set up common header fields (mainly the arch indep section) 
+ *  Per-cpu state is handled by lcrash_save_context
+ *  Returns the size of the header in bytes.
+ */
+static int lcrash_init_dump_header(const char *panic_str)
+{
+       struct timeval dh_time;
+       unsigned long temp_dha_stack[DUMP_MAX_NUM_CPUS];
+       u64 temp_memsz = dump_header.dh_memory_size;
+
+       /* make sure the dump header isn't TOO big */
+       if ((sizeof(struct __dump_header) +
+               sizeof(struct __dump_header_asm)) > DUMP_BUFFER_SIZE) {
+                       printk("lcrash_init_header(): combined "
+                               "headers larger than DUMP_BUFFER_SIZE!\n");
+                       return -E2BIG;
+       }
+
+       /* initialize the dump headers to zero */
+       /* save dha_stack pointer because it may contains pointer for stack! */
+       memcpy(&(temp_dha_stack[0]), &(dump_header_asm.dha_stack[0]),
+               DUMP_MAX_NUM_CPUS * sizeof(unsigned long));
+       memset(&dump_header, 0, sizeof(dump_header));
+       memset(&dump_header_asm, 0, sizeof(dump_header_asm));
+       dump_header.dh_memory_size = temp_memsz;
+       memcpy(&(dump_header_asm.dha_stack[0]), &(temp_dha_stack[0]),
+               DUMP_MAX_NUM_CPUS * sizeof(unsigned long));
+
+       /* configure dump header values */
+       dump_header.dh_magic_number = DUMP_MAGIC_NUMBER;
+       dump_header.dh_version = DUMP_VERSION_NUMBER;
+       dump_header.dh_memory_start = PAGE_OFFSET;
+       dump_header.dh_memory_end = DUMP_MAGIC_NUMBER;
+       dump_header.dh_header_size = sizeof(struct __dump_header);
+       dump_header.dh_page_size = PAGE_SIZE;
+       dump_header.dh_dump_level = dump_config.level;
+       dump_header.dh_current_task = (unsigned long) current;
+       dump_header.dh_dump_compress = dump_config.dumper->compress->
+               compress_type;
+       dump_header.dh_dump_flags = dump_config.flags;
+       dump_header.dh_dump_device = dump_config.dumper->dev->device_id; 
+
+#if DUMP_DEBUG >= 6
+       dump_header.dh_num_bytes = 0;
+#endif
+       dump_header.dh_num_dump_pages = 0;
+       do_gettimeofday(&dh_time);
+       dump_header.dh_time.tv_sec = dh_time.tv_sec;
+       dump_header.dh_time.tv_usec = dh_time.tv_usec;
+
+       memcpy((void *)&(dump_header.dh_utsname_sysname), 
+               (const void *)&(system_utsname.sysname), __NEW_UTS_LEN + 1);
+       memcpy((void *)&(dump_header.dh_utsname_nodename), 
+               (const void *)&(system_utsname.nodename), __NEW_UTS_LEN + 1);
+       memcpy((void *)&(dump_header.dh_utsname_release), 
+               (const void *)&(system_utsname.release), __NEW_UTS_LEN + 1);
+       memcpy((void *)&(dump_header.dh_utsname_version), 
+               (const void *)&(system_utsname.version), __NEW_UTS_LEN + 1);
+       memcpy((void *)&(dump_header.dh_utsname_machine), 
+               (const void *)&(system_utsname.machine), __NEW_UTS_LEN + 1);
+       memcpy((void *)&(dump_header.dh_utsname_domainname), 
+               (const void *)&(system_utsname.domainname), __NEW_UTS_LEN + 1);
+
+       if (panic_str) {
+               memcpy((void *)&(dump_header.dh_panic_string),
+                       (const void *)panic_str, DUMP_PANIC_LEN);
+       }
+
+        dump_header_asm.dha_magic_number = DUMP_ASM_MAGIC_NUMBER;
+        dump_header_asm.dha_version = DUMP_ASM_VERSION_NUMBER;
+        dump_header_asm.dha_header_size = sizeof(dump_header_asm);
+#ifdef CONFIG_ARM
+       dump_header_asm.dha_physaddr_start = PHYS_OFFSET;
+#endif
+
+       dump_header_asm.dha_smp_num_cpus = num_online_cpus();
+       pr_debug("smp_num_cpus in header %d\n", 
+               dump_header_asm.dha_smp_num_cpus);
+
+       dump_header_asm.dha_dumping_cpu = smp_processor_id();
+       
+       return sizeof(dump_header) + sizeof(dump_header_asm);
+}
+
+
+int dump_lcrash_configure_header(const char *panic_str, 
+       const struct pt_regs *regs)
+{
+       int retval = 0;
+
+       dump_config.dumper->header_len = lcrash_init_dump_header(panic_str);
+
+       /* capture register states for all processors */
+       dump_save_this_cpu(regs);
+       __dump_save_other_cpus(); /* side effect:silence cpus */
+
+       /* configure architecture-specific dump header values */
+       if ((retval = __dump_configure_header(regs))) 
+               return retval;
+
+       dump_config.dumper->header_dirty++;
+       return 0;
+}
+
+/* save register and task context */
+void dump_lcrash_save_context(int cpu, const struct pt_regs *regs, 
+       struct task_struct *tsk)
+{
+       dump_header_asm.dha_smp_current_task[cpu] = (unsigned long)tsk;
+
+       __dump_save_regs(&dump_header_asm.dha_smp_regs[cpu], regs);
+
+       /* take a snapshot of the stack */
+       /* doing this enables us to tolerate slight drifts on this cpu */
+       if (dump_header_asm.dha_stack[cpu]) {
+               memcpy((void *)dump_header_asm.dha_stack[cpu],
+                               tsk->thread_info, THREAD_SIZE);
+       }
+       dump_header_asm.dha_stack_ptr[cpu] = (unsigned long)(tsk->thread_info);
+}
+
+/* write out the header */
+int dump_write_header(void)
+{
+       int retval = 0, size;
+       void *buf = dump_config.dumper->dump_buf;
+
+       /* accounts for DUMP_HEADER_OFFSET if applicable */
+       if ((retval = dump_dev_seek(0))) {
+               printk("Unable to seek to dump header offset: %d\n", 
+                       retval);
+               return retval;
+       }
+
+       memcpy(buf, (void *)&dump_header, sizeof(dump_header));
+       size = sizeof(dump_header);
+       memcpy(buf + size, (void *)&dump_header_asm, sizeof(dump_header_asm));
+       size += sizeof(dump_header_asm);
+       size = PAGE_ALIGN(size);
+       retval = dump_ll_write(buf , size);
+
+       if (retval < size) 
+               return (retval >= 0) ? ENOSPC : retval;
+       return 0;
+}
+
+int dump_generic_update_header(void)
+{
+       int err = 0;
+
+       if (dump_config.dumper->header_dirty) {
+               if ((err = dump_write_header())) {
+                       printk("dump write header failed !err %d\n", err);
+               } else {
+                       dump_config.dumper->header_dirty = 0;
+               }
+       }
+
+       return err;
+}
+
+static inline int is_curr_stack_page(struct page *page, unsigned long size)
+{
+       unsigned long thread_addr = (unsigned long)current_thread_info();
+       unsigned long addr = (unsigned long)page_address(page);
+
+       return !PageHighMem(page) && (addr < thread_addr + THREAD_SIZE)
+               && (addr + size > thread_addr);
+}
+
+static inline int is_dump_page(struct page *page, unsigned long size)
+{
+       unsigned long addr = (unsigned long)page_address(page);
+       unsigned long dump_buf = (unsigned long)dump_config.dumper->dump_buf;
+
+       return !PageHighMem(page) && (addr < dump_buf + DUMP_BUFFER_SIZE)
+               && (addr + size > dump_buf);
+}
+
+int dump_allow_compress(struct page *page, unsigned long size)
+{
+       /*
+        * Don't compress the page if any part of it overlaps
+        * with the current stack or dump buffer (since the contents
+        * in these could be changing while compression is going on)
+        */
+       return !is_curr_stack_page(page, size) && !is_dump_page(page, size);
+}
+
+void lcrash_init_pageheader(struct __dump_page *dp, struct page *page, 
+       unsigned long sz)
+{
+       memset(dp, sizeof(struct __dump_page), 0);
+       dp->dp_flags = 0; 
+       dp->dp_size = 0;
+       if (sz > 0)
+               dp->dp_address = (loff_t)page_to_pfn(page) << PAGE_SHIFT;
+
+#if DUMP_DEBUG > 6
+       dp->dp_page_index = dump_header.dh_num_dump_pages;
+       dp->dp_byte_offset = dump_header.dh_num_bytes + DUMP_BUFFER_SIZE
+               + DUMP_HEADER_OFFSET; /* ?? */
+#endif /* DUMP_DEBUG */
+}
+
+int dump_lcrash_add_data(unsigned long loc, unsigned long len)
+{
+       struct page *page = (struct page *)loc;
+       void *addr, *buf = dump_config.dumper->curr_buf;
+       struct __dump_page *dp = (struct __dump_page *)buf; 
+       int bytes, size;
+
+       if (buf > dump_config.dumper->dump_buf + DUMP_BUFFER_SIZE)
+               return -ENOMEM;
+
+       lcrash_init_pageheader(dp, page, len);
+       buf += sizeof(struct __dump_page);
+
+       while (len) {
+               addr = kmap_atomic(page, KM_DUMP);
+               size = bytes = (len > PAGE_SIZE) ? PAGE_SIZE : len;     
+               /* check for compression */
+               if (dump_allow_compress(page, bytes)) {
+                       size = dump_compress_data((char *)addr, bytes, (char *)buf);
+               }
+               /* set the compressed flag if the page did compress */
+               if (size && (size < bytes)) {
+                       dp->dp_flags |= DUMP_DH_COMPRESSED;
+               } else {
+                       /* compression failed -- default to raw mode */
+                       dp->dp_flags |= DUMP_DH_RAW;
+                       memcpy(buf, addr, bytes);
+                       size = bytes;
+               }
+               /* memset(buf, 'A', size); temporary: testing only !! */
+               kunmap_atomic(addr, KM_DUMP);
+               dp->dp_size += size;
+               buf += size;
+               len -= bytes;
+               page++;
+       }
+
+       /* now update the header */
+#if DUMP_DEBUG > 6
+       dump_header.dh_num_bytes += dp->dp_size + sizeof(*dp);
+#endif
+       dump_header.dh_num_dump_pages++;
+       dump_config.dumper->header_dirty++;
+
+       dump_config.dumper->curr_buf = buf;     
+
+       return len;
+}
+
+int dump_lcrash_update_end_marker(void)
+{
+       struct __dump_page *dp = 
+               (struct __dump_page *)dump_config.dumper->curr_buf;
+       unsigned long left;
+       int ret = 0;
+               
+       lcrash_init_pageheader(dp, NULL, 0);
+       dp->dp_flags |= DUMP_DH_END; /* tbd: truncation test ? */
+       
+       /* now update the header */
+#if DUMP_DEBUG > 6
+       dump_header.dh_num_bytes += sizeof(*dp);
+#endif
+       dump_config.dumper->curr_buf += sizeof(*dp);
+       left = dump_config.dumper->curr_buf - dump_config.dumper->dump_buf;
+
+       printk("\n");
+
+       while (left) {
+               if ((ret = dump_dev_seek(dump_config.dumper->curr_offset))) {
+                       printk("Seek failed at offset 0x%llx\n", 
+                       dump_config.dumper->curr_offset);
+                       return ret;
+               }
+
+               if (DUMP_BUFFER_SIZE > left) 
+                       memset(dump_config.dumper->curr_buf, 'm', 
+                               DUMP_BUFFER_SIZE - left);
+
+               if ((ret = dump_ll_write(dump_config.dumper->dump_buf, 
+                       DUMP_BUFFER_SIZE)) < DUMP_BUFFER_SIZE) {
+                       return (ret < 0) ? ret : -ENOSPC;
+               }
+
+               dump_config.dumper->curr_offset += DUMP_BUFFER_SIZE;
+       
+               if (left > DUMP_BUFFER_SIZE) {
+                       left -= DUMP_BUFFER_SIZE;
+                       memcpy(dump_config.dumper->dump_buf, 
+                       dump_config.dumper->dump_buf + DUMP_BUFFER_SIZE, left);
+                       dump_config.dumper->curr_buf -= DUMP_BUFFER_SIZE;
+               } else {
+                       left = 0;
+               }
+       }
+       return 0;
+}
+
+
+/* Default Formatter (lcrash) */
+struct dump_fmt_ops dump_fmt_lcrash_ops = {
+       .configure_header       = dump_lcrash_configure_header,
+       .update_header          = dump_generic_update_header,
+       .save_context           = dump_lcrash_save_context,
+       .add_data               = dump_lcrash_add_data,
+       .update_end_marker      = dump_lcrash_update_end_marker
+};
+
+struct dump_fmt dump_fmt_lcrash = {
+       .name   = "lcrash",
+       .ops    = &dump_fmt_lcrash_ops
+};
+
diff --git a/drivers/dump/dump_gzip.c b/drivers/dump/dump_gzip.c
new file mode 100644 (file)
index 0000000..8809f52
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * GZIP Compression functions for kernel crash dumps.
+ *
+ * Created by: Matt Robinson (yakker@sourceforge.net)
+ * Copyright 2001 Matt D. Robinson.  All rights reserved.
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/* header files */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/dump.h>
+#include <linux/zlib.h>
+#include <linux/vmalloc.h>
+
+static void *deflate_workspace;
+
+/*
+ * Name: dump_compress_gzip()
+ * Func: Compress a DUMP_PAGE_SIZE page using gzip-style algorithms (the.
+ *       deflate functions similar to what's used in PPP).
+ */
+static u16
+dump_compress_gzip(const u8 *old, u16 oldsize, u8 *new, u16 newsize)
+{
+       /* error code and dump stream */
+       int err;
+       z_stream dump_stream;
+       
+       dump_stream.workspace = deflate_workspace;
+       
+       if ((err = zlib_deflateInit(&dump_stream, Z_BEST_COMPRESSION)) != Z_OK) {
+               /* fall back to RLE compression */
+               printk("dump_compress_gzip(): zlib_deflateInit() "
+                       "failed (%d)!\n", err);
+               return 0;
+       }
+
+       /* use old (page of memory) and size (DUMP_PAGE_SIZE) as in-streams */
+       dump_stream.next_in = (u8 *) old;
+       dump_stream.avail_in = oldsize;
+
+       /* out streams are new (dpcpage) and new size (DUMP_DPC_PAGE_SIZE) */
+       dump_stream.next_out = new;
+       dump_stream.avail_out = newsize;
+
+       /* deflate the page -- check for error */
+       err = zlib_deflate(&dump_stream, Z_FINISH);
+       if (err != Z_STREAM_END) {
+               /* zero is return code here */
+               (void)zlib_deflateEnd(&dump_stream);
+               printk("dump_compress_gzip(): zlib_deflate() failed (%d)!\n",
+                       err);
+               return 0;
+       }
+
+       /* let's end the deflated compression stream */
+       if ((err = zlib_deflateEnd(&dump_stream)) != Z_OK) {
+               printk("dump_compress_gzip(): zlib_deflateEnd() "
+                       "failed (%d)!\n", err);
+       }
+
+       /* return the compressed byte total (if it's smaller) */
+       if (dump_stream.total_out >= oldsize) {
+               return oldsize;
+       }
+       return dump_stream.total_out;
+}
+
+/* setup the gzip compression functionality */
+static struct __dump_compress dump_gzip_compression = {
+       .compress_type = DUMP_COMPRESS_GZIP,
+       .compress_func = dump_compress_gzip,
+       .compress_name = "GZIP",
+};
+
+/*
+ * Name: dump_compress_gzip_init()
+ * Func: Initialize gzip as a compression mechanism.
+ */
+static int __init
+dump_compress_gzip_init(void)
+{
+       deflate_workspace = vmalloc(zlib_deflate_workspacesize());
+       if (!deflate_workspace) {
+               printk("dump_compress_gzip_init(): Failed to "
+                       "alloc %d bytes for deflate workspace\n",
+                       zlib_deflate_workspacesize());
+               return -ENOMEM;
+       }
+       dump_register_compression(&dump_gzip_compression);
+       return 0;
+}
+
+/*
+ * Name: dump_compress_gzip_cleanup()
+ * Func: Remove gzip as a compression mechanism.
+ */
+static void __exit
+dump_compress_gzip_cleanup(void)
+{
+       vfree(deflate_workspace);
+       dump_unregister_compression(DUMP_COMPRESS_GZIP);
+}
+
+/* module initialization */
+module_init(dump_compress_gzip_init);
+module_exit(dump_compress_gzip_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("LKCD Development Team <lkcd-devel@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Gzip compression module for crash dump driver");
diff --git a/drivers/dump/dump_i386.c b/drivers/dump/dump_i386.c
new file mode 100644 (file)
index 0000000..5a01e0f
--- /dev/null
@@ -0,0 +1,344 @@
+/*
+ * Architecture specific (i386) functions for Linux crash dumps.
+ *
+ * Created by: Matt Robinson (yakker@sgi.com)
+ *
+ * Copyright 1999 Silicon Graphics, Inc. All rights reserved.
+ *
+ * 2.3 kernel modifications by: Matt D. Robinson (yakker@turbolinux.com)
+ * Copyright 2000 TurboLinux, Inc.  All rights reserved.
+ * 
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/*
+ * The hooks for dumping the kernel virtual memory to disk are in this
+ * file.  Any time a modification is made to the virtual memory mechanism,
+ * these routines must be changed to use the new mechanisms.
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/dump.h>
+#include "dump_methods.h"
+#include <linux/irq.h>
+
+#include <asm/processor.h>
+#include <asm/e820.h>
+#include <asm/hardirq.h>
+#include <asm/nmi.h>
+
+static __s32        saved_irq_count;   /* saved preempt_count() flags */
+
+static int
+alloc_dha_stack(void)
+{
+       int i;
+       void *ptr;
+       
+       if (dump_header_asm.dha_stack[0])
+               return 0;
+
+       ptr = vmalloc(THREAD_SIZE * num_online_cpus());
+       if (!ptr) {
+               printk("vmalloc for dha_stacks failed\n");
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < num_online_cpus(); i++) {
+               dump_header_asm.dha_stack[i] = (u32)((unsigned long)ptr +
+                               (i * THREAD_SIZE));
+       }
+       return 0;
+}
+
+static int
+free_dha_stack(void) 
+{
+       if (dump_header_asm.dha_stack[0]) {
+               vfree((void *)dump_header_asm.dha_stack[0]);    
+               dump_header_asm.dha_stack[0] = 0;
+       }
+       return 0;
+}
+
+
+void 
+__dump_save_regs(struct pt_regs *dest_regs, const struct pt_regs *regs)
+{
+       *dest_regs = *regs;
+
+       /* In case of panic dumps, we collects regs on entry to panic.
+        * so, we shouldn't 'fix' ssesp here again. But it is hard to
+        * tell just looking at regs whether ssesp need fixing. We make
+        * this decision by looking at xss in regs. If we have better
+        * means to determine that ssesp are valid (by some flag which
+        * tells that we are here due to panic dump), then we can use
+        * that instead of this kludge.
+        */
+       if (!user_mode(regs)) {
+               if ((0xffff & regs->xss) == __KERNEL_DS) 
+                       /* already fixed up */
+                       return;
+               dest_regs->esp = (unsigned long)&(regs->esp);
+               __asm__ __volatile__ ("movw %%ss, %%ax;"
+                       :"=a"(dest_regs->xss));
+       }
+}
+
+
+#ifdef CONFIG_SMP
+extern cpumask_t irq_affinity[];
+extern irq_desc_t irq_desc[];
+extern void dump_send_ipi(void);
+
+static int dump_expect_ipi[NR_CPUS];
+static atomic_t waiting_for_dump_ipi;
+static cpumask_t saved_affinity[NR_IRQS];
+
+extern void stop_this_cpu(void *); /* exported by i386 kernel */
+
+static int
+dump_nmi_callback(struct pt_regs *regs, int cpu) 
+{
+       if (!dump_expect_ipi[cpu])
+               return 0;
+
+       dump_expect_ipi[cpu] = 0;
+       
+       dump_save_this_cpu(regs);
+       atomic_dec(&waiting_for_dump_ipi);
+
+ level_changed:
+       switch (dump_silence_level) {
+       case DUMP_HARD_SPIN_CPUS:       /* Spin until dump is complete */
+               while (dump_oncpu) {
+                       barrier();      /* paranoia */
+                       if (dump_silence_level != DUMP_HARD_SPIN_CPUS)
+                               goto level_changed;
+
+                       cpu_relax();    /* kill time nicely */
+               }
+               break;
+
+       case DUMP_HALT_CPUS:            /* Execute halt */
+               stop_this_cpu(NULL);
+               break;
+               
+       case DUMP_SOFT_SPIN_CPUS:
+               /* Mark the task so it spins in schedule */
+               set_tsk_thread_flag(current, TIF_NEED_RESCHED);
+               break;
+       }
+
+       return 1;
+}
+
+/* save registers on other processors */
+void 
+__dump_save_other_cpus(void)
+{
+       int i, cpu = smp_processor_id();
+       int other_cpus = num_online_cpus()-1;
+       
+       if (other_cpus > 0) {
+               atomic_set(&waiting_for_dump_ipi, other_cpus);
+
+               for (i = 0; i < NR_CPUS; i++) {
+                       dump_expect_ipi[i] = (i != cpu && cpu_online(i));
+               }
+
+               /* short circuit normal NMI handling temporarily */
+               set_nmi_callback(dump_nmi_callback);
+               wmb();
+
+               dump_send_ipi();
+               /* may be we dont need to wait for NMI to be processed. 
+                  just write out the header at the end of dumping, if
+                  this IPI is not processed until then, there probably
+                  is a problem and we just fail to capture state of 
+                  other cpus. */
+               while(atomic_read(&waiting_for_dump_ipi) > 0) {
+                       cpu_relax();
+               }
+
+               unset_nmi_callback();
+       }
+}
+
+/*
+ * Routine to save the old irq affinities and change affinities of all irqs to
+ * the dumping cpu.
+ */
+static void 
+set_irq_affinity(void)
+{
+       int i;
+       cpumask_t cpu = CPU_MASK_NONE;
+
+       cpu_set(smp_processor_id(), cpu);
+       memcpy(saved_affinity, irq_affinity, NR_IRQS * sizeof(unsigned long));
+       for (i = 0; i < NR_IRQS; i++) {
+               if (irq_desc[i].handler == NULL)
+                       continue;
+               irq_affinity[i] = cpu;
+               if (irq_desc[i].handler->set_affinity != NULL)
+                       irq_desc[i].handler->set_affinity(i, irq_affinity[i]);
+       }
+}
+
+/*
+ * Restore old irq affinities.
+ */
+static void 
+reset_irq_affinity(void)
+{
+       int i;
+
+       memcpy(irq_affinity, saved_affinity, NR_IRQS * sizeof(unsigned long));
+       for (i = 0; i < NR_IRQS; i++) {
+               if (irq_desc[i].handler == NULL)
+                       continue;
+               if (irq_desc[i].handler->set_affinity != NULL)
+                       irq_desc[i].handler->set_affinity(i, saved_affinity[i]);
+       }
+}
+
+#else /* !CONFIG_SMP */
+#define set_irq_affinity()     do { } while (0)
+#define reset_irq_affinity()   do { } while (0)
+#define save_other_cpu_states() do { } while (0)
+#endif /* !CONFIG_SMP */
+
+/* 
+ * Kludge - dump from interrupt context is unreliable (Fixme)
+ *
+ * We do this so that softirqs initiated for dump i/o 
+ * get processed and we don't hang while waiting for i/o
+ * to complete or in any irq synchronization attempt.
+ *
+ * This is not quite legal of course, as it has the side 
+ * effect of making all interrupts & softirqs triggered 
+ * while dump is in progress complete before currently 
+ * pending softirqs and the currently executing interrupt 
+ * code. 
+ */
+static inline void
+irq_bh_save(void)
+{
+       saved_irq_count = irq_count();
+       preempt_count() &= ~(HARDIRQ_MASK|SOFTIRQ_MASK);
+}
+
+static inline void
+irq_bh_restore(void)
+{
+       preempt_count() |= saved_irq_count;
+}
+
+/*
+ * Name: __dump_irq_enable
+ * Func: Reset system so interrupts are enabled.
+ *      This is used for dump methods that require interrupts
+ *      Eventually, all methods will have interrupts disabled
+ *      and this code can be removed.
+ *
+ *     Change irq affinities
+ *     Re-enable interrupts
+ */
+int
+__dump_irq_enable(void)
+{
+       set_irq_affinity();
+       irq_bh_save();
+       local_irq_enable();
+       return 0;
+}
+
+/*
+ * Name: __dump_irq_restore
+ * Func: Resume the system state in an architecture-specific way.
+
+ */
+void 
+__dump_irq_restore(void)
+{
+       local_irq_disable();
+       reset_irq_affinity();
+       irq_bh_restore();
+}
+
+/*
+ * Name: __dump_configure_header()
+ * Func: Meant to fill in arch specific header fields except per-cpu state
+ * already captured via __dump_save_context for all CPUs.
+ */
+int
+__dump_configure_header(const struct pt_regs *regs)
+{
+       return (0);
+}
+
+/*
+ * Name: __dump_init()
+ * Func: Initialize the dumping routine process.
+ */
+void
+__dump_init(uint64_t local_memory_start)
+{
+       return;
+}
+
+/*
+ * Name: __dump_open()
+ * Func: Open the dump device (architecture specific).
+ */
+void
+__dump_open(void)
+{
+       alloc_dha_stack();
+}
+
+/*
+ * Name: __dump_cleanup()
+ * Func: Free any architecture specific data structures. This is called
+ *       when the dump module is being removed.
+ */
+void
+__dump_cleanup(void)
+{
+       free_dha_stack();
+}
+
+extern int pfn_is_ram(unsigned long);
+
+/*
+ * Name: __dump_page_valid()
+ * Func: Check if page is valid to dump.
+ */ 
+int 
+__dump_page_valid(unsigned long index)
+{
+       if (!pfn_valid(index))
+               return 0;
+
+       return pfn_is_ram(index);
+}
+
+/* 
+ * Name: manual_handle_crashdump()
+ * Func: Interface for the lkcd dump command. Calls dump_execute()
+ */
+int
+manual_handle_crashdump(void) {
+
+       struct pt_regs regs;
+       
+       get_current_regs(&regs);
+       dump_execute("manual", &regs);
+       return 0;
+}
diff --git a/drivers/dump/dump_memdev.c b/drivers/dump/dump_memdev.c
new file mode 100644 (file)
index 0000000..1cd700d
--- /dev/null
@@ -0,0 +1,640 @@
+/*
+ * Implements the dump driver interface for saving a dump in available
+ * memory areas. The saved pages may be written out to persistent storage  
+ * after a soft reboot.
+ *
+ * Started: Oct 2002 -  Suparna Bhattacharya <suparna@in.ibm.com>
+ *
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ * This code is released under version 2 of the GNU GPL.
+ *
+ * The approach of tracking pages containing saved dump using map pages 
+ * allocated as needed has been derived from the Mission Critical Linux 
+ * mcore dump implementation. 
+ *
+ * Credits and a big thanks for letting the lkcd project make use of 
+ * the excellent piece of work and also helping with clarifications 
+ * and tips along the way are due to:
+ *     Dave Winchell <winchell@mclx.com> (primary author of mcore)
+ *     Jeff Moyer <moyer@mclx.com>
+ *     Josh Huber <huber@mclx.com>
+ *
+ * For those familiar with the mcore code, the main differences worth
+ * noting here (besides the dump device abstraction) result from enabling 
+ * "high" memory pages (pages not permanently mapped in the kernel 
+ * address space) to be used for saving dump data (because of which a 
+ * simple virtual address based linked list cannot be used anymore for 
+ * managing free pages), an added level of indirection for faster 
+ * lookups during the post-boot stage, and the idea of pages being 
+ * made available as they get freed up while dump to memory progresses 
+ * rather than one time before starting the dump. The last point enables 
+ * a full memory snapshot to be saved starting with an initial set of 
+ * bootstrap pages given a good compression ratio. (See dump_overlay.c)
+ *
+ */
+
+/*
+ * -----------------MEMORY LAYOUT ------------------
+ * The memory space consists of a set of discontiguous pages, and
+ * discontiguous map pages as well, rooted in a chain of indirect
+ * map pages (also discontiguous). Except for the indirect maps 
+ * (which must be preallocated in advance), the rest of the pages 
+ * could be in high memory.
+ *
+ * root
+ *  |    ---------    --------        --------
+ *  -->  | .  . +|--->|  .  +|------->| . .  |       indirect 
+ *       --|--|---    ---|----        --|-|---      maps
+ *         |  |          |                     | |     
+ *    ------  ------   -------     ------ -------
+ *    | .  |  | .  |   | .  . |    | .  | |  . . |   maps 
+ *    --|---  --|---   --|--|--    --|--- ---|-|--
+ *     page    page    page page   page   page page  data
+ *                                                   pages
+ *
+ * Writes to the dump device happen sequentially in append mode.
+ * The main reason for the existence of the indirect map is
+ * to enable a quick way to lookup a specific logical offset in
+ * the saved data post-soft-boot, e.g. to writeout pages
+ * with more critical data first, even though such pages
+ * would have been compressed and copied last, being the lowest
+ * ranked candidates for reuse due to their criticality.
+ * (See dump_overlay.c)
+ */
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/dump.h>
+#include "dump_methods.h"
+
+#define DUMP_MAP_SZ (PAGE_SIZE / sizeof(unsigned long)) /* direct map size */
+#define DUMP_IND_MAP_SZ        DUMP_MAP_SZ - 1  /* indirect map size */
+#define DUMP_NR_BOOTSTRAP      64  /* no of bootstrap pages */
+
+extern int dump_low_page(struct page *);
+
+/* check if the next entry crosses a page boundary */
+static inline int is_last_map_entry(unsigned long *map)
+{
+       unsigned long addr = (unsigned long)(map + 1);
+
+       return (!(addr & (PAGE_SIZE - 1)));
+}
+
+/* Todo: should have some validation checks */
+/* The last entry in the indirect map points to the next indirect map */
+/* Indirect maps are referred to directly by virtual address */
+static inline unsigned long *next_indirect_map(unsigned long *map)
+{
+       return (unsigned long *)map[DUMP_IND_MAP_SZ];
+}
+
+#ifdef CONFIG_CRASH_DUMP_SOFTBOOT
+/* Called during early bootup - fixme: make this __init */
+void dump_early_reserve_map(struct dump_memdev *dev)
+{
+       unsigned long *map1, *map2;
+       loff_t off = 0, last = dev->last_used_offset >> PAGE_SHIFT;
+       int i, j;
+       
+       printk("Reserve bootmap space holding previous dump of %lld pages\n",
+                       last);
+       map1= (unsigned long *)dev->indirect_map_root;
+
+       while (map1 && (off < last)) {
+               reserve_bootmem(virt_to_phys((void *)map1), PAGE_SIZE);
+               for (i=0;  (i < DUMP_MAP_SZ - 1) && map1[i] && (off < last); 
+                       i++, off += DUMP_MAP_SZ) {
+                       pr_debug("indirect map[%d] = 0x%lx\n", i, map1[i]);
+                       if (map1[i] >= max_low_pfn)
+                               continue;
+                       reserve_bootmem(map1[i] << PAGE_SHIFT, PAGE_SIZE);
+                       map2 = pfn_to_kaddr(map1[i]);
+                       for (j = 0 ; (j < DUMP_MAP_SZ) && map2[j] && 
+                               (off + j < last); j++) {
+                               pr_debug("\t map[%d][%d] = 0x%lx\n", i, j, 
+                                       map2[j]);
+                               if (map2[j] < max_low_pfn) {
+                                       reserve_bootmem(map2[j] << PAGE_SHIFT,
+                                               PAGE_SIZE);
+                               }
+                       }
+               }
+               map1 = next_indirect_map(map1);
+       }
+       dev->nr_free = 0; /* these pages don't belong to this boot */
+}
+#endif
+
+/* mark dump pages so that they aren't used by this kernel */
+void dump_mark_map(struct dump_memdev *dev)
+{
+       unsigned long *map1, *map2;
+       loff_t off = 0, last = dev->last_used_offset >> PAGE_SHIFT;
+       struct page *page;
+       int i, j;
+       
+       printk("Dump: marking pages in use by previous dump\n");
+       map1= (unsigned long *)dev->indirect_map_root;
+
+       while (map1 && (off < last)) {
+               page = virt_to_page(map1);      
+               set_page_count(page, 1);
+               for (i=0;  (i < DUMP_MAP_SZ - 1) && map1[i] && (off < last); 
+                       i++, off += DUMP_MAP_SZ) {
+                       pr_debug("indirect map[%d] = 0x%lx\n", i, map1[i]);
+                       page = pfn_to_page(map1[i]);
+                       set_page_count(page, 1);
+                       map2 = kmap_atomic(page, KM_DUMP);
+                       for (j = 0 ; (j < DUMP_MAP_SZ) && map2[j] && 
+                               (off + j < last); j++) {
+                               pr_debug("\t map[%d][%d] = 0x%lx\n", i, j, 
+                                       map2[j]);
+                               page = pfn_to_page(map2[j]);
+                               set_page_count(page, 1);
+                       }
+               }
+               map1 = next_indirect_map(map1);
+       }
+}
+       
+
+/* 
+ * Given a logical offset into the mem device lookup the 
+ * corresponding page 
+ *     loc is specified in units of pages 
+ * Note: affects curr_map (even in the case where lookup fails)
+ */
+struct page *dump_mem_lookup(struct dump_memdev *dump_mdev, unsigned long loc)
+{
+       unsigned long *map;
+       unsigned long i, index = loc / DUMP_MAP_SZ;
+       struct page *page = NULL;
+       unsigned long curr_pfn, curr_map, *curr_map_ptr = NULL;
+
+       map = (unsigned long *)dump_mdev->indirect_map_root;
+       if (!map)
+               return NULL;
+
+       if (loc > dump_mdev->last_offset >> PAGE_SHIFT)
+               return NULL;
+
+       /* 
+        * first locate the right indirect map 
+        * in the chain of indirect maps 
+        */
+       for (i = 0; i + DUMP_IND_MAP_SZ < index ; i += DUMP_IND_MAP_SZ) {
+               if (!(map = next_indirect_map(map)))
+                       return NULL;
+       }
+       /* then the right direct map */
+       /* map entries are referred to by page index */
+       if ((curr_map = map[index - i])) {
+               page = pfn_to_page(curr_map);
+               /* update the current traversal index */
+               /* dump_mdev->curr_map = &map[index - i];*/
+               curr_map_ptr = &map[index - i];
+       }
+
+       if (page)
+               map = kmap_atomic(page, KM_DUMP);
+       else 
+               return NULL;
+
+       /* and finally the right entry therein */
+       /* data pages are referred to by page index */
+       i = index * DUMP_MAP_SZ;
+       if ((curr_pfn = map[loc - i])) {
+               page = pfn_to_page(curr_pfn);
+               dump_mdev->curr_map = curr_map_ptr;
+               dump_mdev->curr_map_offset = loc - i;
+               dump_mdev->ddev.curr_offset = loc << PAGE_SHIFT;
+       } else {
+               page = NULL;
+       }
+       kunmap_atomic(map, KM_DUMP);
+
+       return page;
+}
+                       
+/* 
+ * Retrieves a pointer to the next page in the dump device 
+ * Used during the lookup pass post-soft-reboot 
+ */
+struct page *dump_mem_next_page(struct dump_memdev *dev)
+{
+       unsigned long i; 
+       unsigned long *map;     
+       struct page *page = NULL;
+
+       if (dev->ddev.curr_offset + PAGE_SIZE >= dev->last_offset) {
+               return NULL;
+       }
+
+       if ((i = (unsigned long)(++dev->curr_map_offset)) >= DUMP_MAP_SZ) {
+               /* move to next map */  
+               if (is_last_map_entry(++dev->curr_map)) {
+                       /* move to the next indirect map page */
+                       printk("dump_mem_next_page: go to next indirect map\n");
+                       dev->curr_map = (unsigned long *)*dev->curr_map;
+                       if (!dev->curr_map)
+                               return NULL;
+               }
+               i = dev->curr_map_offset = 0;
+               pr_debug("dump_mem_next_page: next map 0x%lx, entry 0x%lx\n",
+                               dev->curr_map, *dev->curr_map);
+
+       };
+       
+       if (*dev->curr_map) {
+               map = kmap_atomic(pfn_to_page(*dev->curr_map), KM_DUMP);
+               if (map[i])
+                       page = pfn_to_page(map[i]);
+               kunmap_atomic(map, KM_DUMP);
+               dev->ddev.curr_offset += PAGE_SIZE;
+       };
+
+       return page;
+}
+
+/* Copied from dump_filters.c */
+static inline int kernel_page(struct page *p)
+{
+       /* FIXME: Need to exclude hugetlb pages. Clue: reserved but inuse */
+       return (PageReserved(p) && !PageInuse(p)) || (!PageLRU(p) && PageInuse(p));
+}
+
+static inline int user_page(struct page *p)
+{
+       return PageInuse(p) && (!PageReserved(p) && PageLRU(p));
+}
+
+int dump_reused_by_boot(struct page *page)
+{
+       /* Todo
+        * Checks:
+        * if PageReserved 
+        * if < __end + bootmem_bootmap_pages for this boot + allowance 
+        * if overwritten by initrd (how to check ?)
+        * Also, add more checks in early boot code
+        * e.g. bootmem bootmap alloc verify not overwriting dump, and if
+        * so then realloc or move the dump pages out accordingly.
+        */
+
+       /* Temporary proof of concept hack, avoid overwriting kern pages */
+
+       return (kernel_page(page) || dump_low_page(page) || user_page(page));
+}
+
+
+/* Uses the free page passed in to expand available space */
+int dump_mem_add_space(struct dump_memdev *dev, struct page *page)
+{
+       struct page *map_page;
+       unsigned long *map;     
+       unsigned long i; 
+
+       if (!dev->curr_map)
+               return -ENOMEM; /* must've exhausted indirect map */
+
+       if (!*dev->curr_map || dev->curr_map_offset >= DUMP_MAP_SZ) {
+               /* add map space */
+               *dev->curr_map = page_to_pfn(page);
+               dev->curr_map_offset = 0;
+               return 0;
+       }
+
+       /* add data space */
+       i = dev->curr_map_offset;
+       map_page = pfn_to_page(*dev->curr_map);
+       map = (unsigned long *)kmap_atomic(map_page, KM_DUMP);
+       map[i] = page_to_pfn(page);
+       kunmap_atomic(map, KM_DUMP);
+       dev->curr_map_offset = ++i;
+       dev->last_offset += PAGE_SIZE;
+       if (i >= DUMP_MAP_SZ) {
+               /* move to next map */
+               if (is_last_map_entry(++dev->curr_map)) {
+                       /* move to the next indirect map page */
+                       pr_debug("dump_mem_add_space: using next"
+                       "indirect map\n");
+                       dev->curr_map = (unsigned long *)*dev->curr_map;
+               }
+       }               
+       return 0;
+}
+
+
+/* Caution: making a dest page invalidates existing contents of the page */
+int dump_check_and_free_page(struct dump_memdev *dev, struct page *page)
+{
+       int err = 0;
+
+       /* 
+        * the page can be used as a destination only if we are sure
+        * it won't get overwritten by the soft-boot, and is not
+        * critical for us right now.
+        */
+       if (dump_reused_by_boot(page))
+               return 0;
+
+       if ((err = dump_mem_add_space(dev, page))) {
+               printk("Warning: Unable to extend memdev space. Err %d\n",
+               err);
+               return 0;
+       }
+
+       dev->nr_free++;
+       return 1;
+}
+
+
+/* Set up the initial maps and bootstrap space  */
+/* Must be called only after any previous dump is written out */
+int dump_mem_open(struct dump_dev *dev, unsigned long devid)
+{
+       struct dump_memdev *dump_mdev = DUMP_MDEV(dev);
+       unsigned long nr_maps, *map, *prev_map = &dump_mdev->indirect_map_root;
+       void *addr;
+       struct page *page;
+       unsigned long i = 0;
+       int err = 0;
+
+       /* Todo: sanity check for unwritten previous dump */
+
+       /* allocate pages for indirect map (non highmem area) */
+       nr_maps = num_physpages / DUMP_MAP_SZ; /* maps to cover entire mem */
+       for (i = 0; i < nr_maps; i += DUMP_IND_MAP_SZ) {
+               if (!(map = (unsigned long *)dump_alloc_mem(PAGE_SIZE))) {
+                       printk("Unable to alloc indirect map %ld\n", 
+                               i / DUMP_IND_MAP_SZ);
+                       return -ENOMEM;
+               }
+               clear_page(map);
+               *prev_map = (unsigned long)map;
+               prev_map = &map[DUMP_IND_MAP_SZ];
+       };
+               
+       dump_mdev->curr_map = (unsigned long *)dump_mdev->indirect_map_root;
+       dump_mdev->curr_map_offset = 0; 
+
+       /* 
+        * allocate a few bootstrap pages: at least 1 map and 1 data page
+        * plus enough to save the dump header
+        */
+       i = 0;
+       do {
+               if (!(addr = dump_alloc_mem(PAGE_SIZE))) {
+                       printk("Unable to alloc bootstrap page %ld\n", i);
+                       return -ENOMEM;
+               }
+
+               page = virt_to_page(addr);
+               if (dump_low_page(page)) {
+                       dump_free_mem(addr);
+                       continue;
+               }
+
+               if (dump_mem_add_space(dump_mdev, page)) {
+                       printk("Warning: Unable to extend memdev "
+                                       "space. Err %d\n", err);
+                       dump_free_mem(addr);
+                       continue;
+               }
+               i++;
+       } while (i < DUMP_NR_BOOTSTRAP);
+
+       printk("dump memdev init: %ld maps, %ld bootstrap pgs, %ld free pgs\n",
+               nr_maps, i, dump_mdev->last_offset >> PAGE_SHIFT);
+       
+       dump_mdev->last_bs_offset = dump_mdev->last_offset;
+
+       return 0;
+}
+
+/* Releases all pre-alloc'd pages */
+int dump_mem_release(struct dump_dev *dev)
+{
+       struct dump_memdev *dump_mdev = DUMP_MDEV(dev);
+       struct page *page, *map_page;
+       unsigned long *map, *prev_map;
+       void *addr;
+       int i;
+
+       if (!dump_mdev->nr_free)
+               return 0;
+
+       pr_debug("dump_mem_release\n");
+       page = dump_mem_lookup(dump_mdev, 0);
+       for (i = 0; page && (i < DUMP_NR_BOOTSTRAP - 1); i++) {
+               if (PageHighMem(page))
+                       break;
+               addr = page_address(page);
+               if (!addr) {
+                       printk("page_address(%p) = NULL\n", page);
+                       break;
+               }
+               pr_debug("Freeing page at 0x%lx\n", addr); 
+               dump_free_mem(addr);
+               if (dump_mdev->curr_map_offset >= DUMP_MAP_SZ - 1) {
+                       map_page = pfn_to_page(*dump_mdev->curr_map);
+                       if (PageHighMem(map_page))
+                               break;
+                       page = dump_mem_next_page(dump_mdev);
+                       addr = page_address(map_page);
+                       if (!addr) {
+                               printk("page_address(%p) = NULL\n", 
+                                       map_page);
+                               break;
+                       }
+                       pr_debug("Freeing map page at 0x%lx\n", addr);
+                       dump_free_mem(addr);
+                       i++;
+               } else {
+                       page = dump_mem_next_page(dump_mdev);
+               }
+       }
+
+       /* now for the last used bootstrap page used as a map page */
+       if ((i < DUMP_NR_BOOTSTRAP) && (*dump_mdev->curr_map)) {
+               map_page = pfn_to_page(*dump_mdev->curr_map);
+               if ((map_page) && !PageHighMem(map_page)) {
+                       addr = page_address(map_page);
+                       if (!addr) {
+                               printk("page_address(%p) = NULL\n", map_page);
+                       } else {
+                               pr_debug("Freeing map page at 0x%lx\n", addr);
+                               dump_free_mem(addr);
+                               i++;
+                       }
+               }
+       }
+
+       printk("Freed %d bootstrap pages\n", i);
+
+       /* free the indirect maps */
+       map = (unsigned long *)dump_mdev->indirect_map_root;
+
+       i = 0;
+       while (map) {
+               prev_map = map;
+               map = next_indirect_map(map);
+               dump_free_mem(prev_map);
+               i++;
+       }
+
+       printk("Freed %d indirect map(s)\n", i);
+
+       /* Reset the indirect map */
+       dump_mdev->indirect_map_root = 0;
+       dump_mdev->curr_map = 0;
+
+       /* Reset the free list */
+       dump_mdev->nr_free = 0;
+
+       dump_mdev->last_offset = dump_mdev->ddev.curr_offset = 0;
+       dump_mdev->last_used_offset = 0;
+       dump_mdev->curr_map = NULL;
+       dump_mdev->curr_map_offset = 0;
+       return 0;
+}
+
+/*
+ * Long term:
+ * It is critical for this to be very strict. Cannot afford
+ * to have anything running and accessing memory while we overwrite 
+ * memory (potential risk of data corruption).
+ * If in doubt (e.g if a cpu is hung and not responding) just give
+ * up and refuse to proceed with this scheme.
+ *
+ * Note: I/O will only happen after soft-boot/switchover, so we can 
+ * safely disable interrupts and force stop other CPUs if this is
+ * going to be a disruptive dump, no matter what they
+ * are in the middle of.
+ */
+/* 
+ * ATM Most of this is already taken care of in the nmi handler 
+ * We may halt the cpus rightaway if we know this is going to be disruptive 
+ * For now, since we've limited ourselves to overwriting free pages we
+ * aren't doing much here. Eventually, we'd have to wait to make sure other
+ * cpus aren't using memory we could be overwriting
+ */
+int dump_mem_silence(struct dump_dev *dev)
+{
+       struct dump_memdev *dump_mdev = DUMP_MDEV(dev);
+
+       if (dump_mdev->last_offset > dump_mdev->last_bs_offset) {
+               /* prefer to run lkcd config & start with a clean slate */
+               return -EEXIST;
+       }
+       return 0;
+}
+
+extern int dump_overlay_resume(void);
+
+/* Trigger the next stage of dumping */
+int dump_mem_resume(struct dump_dev *dev)
+{
+       dump_overlay_resume(); 
+       return 0;
+}
+
+/* 
+ * Allocate mem dev pages as required and copy buffer contents into it.
+ * Fails if the no free pages are available
+ * Keeping it simple and limited for starters (can modify this over time)
+ *  Does not handle holes or a sparse layout
+ *  Data must be in multiples of PAGE_SIZE
+ */
+int dump_mem_write(struct dump_dev *dev, void *buf, unsigned long len)
+{
+       struct dump_memdev *dump_mdev = DUMP_MDEV(dev);
+       struct page *page;
+       unsigned long n = 0;
+       void *addr;
+       unsigned long *saved_curr_map, saved_map_offset;
+       int ret = 0;
+
+       pr_debug("dump_mem_write: offset 0x%llx, size %ld\n", 
+               dev->curr_offset, len);
+
+       if (dev->curr_offset + len > dump_mdev->last_offset)  {
+               printk("Out of space to write\n");
+               return -ENOSPC;
+       }
+       
+       if ((len & (PAGE_SIZE - 1)) || (dev->curr_offset & (PAGE_SIZE - 1)))
+               return -EINVAL; /* not aligned in units of page size */
+
+       saved_curr_map = dump_mdev->curr_map;
+       saved_map_offset = dump_mdev->curr_map_offset;
+       page = dump_mem_lookup(dump_mdev, dev->curr_offset >> PAGE_SHIFT);
+
+       for (n = len; (n > 0) && page; n -= PAGE_SIZE, buf += PAGE_SIZE ) {
+               addr = kmap_atomic(page, KM_DUMP);
+               /* memset(addr, 'x', PAGE_SIZE); */
+               memcpy(addr, buf, PAGE_SIZE);
+               kunmap_atomic(addr, KM_DUMP);
+               /* dev->curr_offset += PAGE_SIZE; */
+               page = dump_mem_next_page(dump_mdev);
+       }
+
+       dump_mdev->curr_map = saved_curr_map;
+       dump_mdev->curr_map_offset = saved_map_offset;
+
+       if (dump_mdev->last_used_offset < dev->curr_offset)
+               dump_mdev->last_used_offset = dev->curr_offset;
+
+       return (len - n) ? (len - n) : ret ;
+}
+
+/* dummy - always ready */
+int dump_mem_ready(struct dump_dev *dev, void *buf)
+{
+       return 0;
+}
+
+/* 
+ * Should check for availability of space to write upto the offset 
+ * affects only the curr_offset; last_offset untouched 
+ * Keep it simple: Only allow multiples of PAGE_SIZE for now 
+ */
+int dump_mem_seek(struct dump_dev *dev, loff_t offset)
+{
+       struct dump_memdev *dump_mdev = DUMP_MDEV(dev);
+
+       if (offset & (PAGE_SIZE - 1))
+               return -EINVAL; /* allow page size units only for now */
+       
+       /* Are we exceeding available space ? */
+       if (offset > dump_mdev->last_offset) {
+               printk("dump_mem_seek failed for offset 0x%llx\n",
+                       offset);
+               return -ENOSPC; 
+       }
+
+       dump_mdev->ddev.curr_offset = offset;
+       return 0;
+}
+
+struct dump_dev_ops dump_memdev_ops = {
+       .open           = dump_mem_open,
+       .release        = dump_mem_release,
+       .silence        = dump_mem_silence,
+       .resume         = dump_mem_resume,
+       .seek           = dump_mem_seek,
+       .write          = dump_mem_write,
+       .read           = NULL, /* not implemented at the moment */
+       .ready          = dump_mem_ready
+};
+
+static struct dump_memdev default_dump_memdev = {
+       .ddev = {.type_name = "memdev", .ops = &dump_memdev_ops,
+                .device_id = 0x14}
+       /* assume the rest of the fields are zeroed by default */
+};     
+       
+/* may be overwritten if a previous dump exists */
+struct dump_memdev *dump_memdev = &default_dump_memdev;
+
diff --git a/drivers/dump/dump_methods.h b/drivers/dump/dump_methods.h
new file mode 100644 (file)
index 0000000..d2e1f7c
--- /dev/null
@@ -0,0 +1,349 @@
+/*
+ * Generic interfaces for flexible system dump 
+ *
+ * Started: Oct 2002 -  Suparna Bhattacharya (suparna@in.ibm.com)
+ *
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+#ifndef _LINUX_DUMP_METHODS_H
+#define _LINUX_DUMP_METHODS_H
+
+/*
+ * Inspired by Matt Robinson's suggestion of introducing dump 
+ * methods as a way to enable different crash dump facilities to 
+ * coexist where each employs its own scheme or dumping policy.
+ *
+ * The code here creates a framework for flexible dump by defining 
+ * a set of methods and providing associated helpers that differentiate
+ * between the underlying mechanism (how to dump), overall scheme 
+ * (sequencing of stages and data dumped and associated quiescing), 
+ * output format (what the dump output looks like), target type 
+ * (where to save the dump; see dumpdev.h), and selection policy 
+ * (state/data to dump).
+ * 
+ * These sets of interfaces can be mixed and matched to build a 
+ * dumper suitable for a given situation, allowing for 
+ * flexibility as well appropriate degree of code reuse.
+ * For example all features and options of lkcd (including
+ * granular selective dumping in the near future) should be
+ * available even when say, the 2 stage soft-boot based mechanism 
+ * is used for taking disruptive dumps.
+ *
+ * Todo: Additionally modules or drivers may supply their own
+ * custom dumpers which extend dump with module specific
+ * information or hardware state, and can even tweak the
+ * mechanism when it comes to saving state relevant to
+ * them.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/dumpdev.h>
+
+#define MAX_PASSES     6
+#define MAX_DEVS       4
+
+
+/* To customise selection of pages to be dumped in a given pass/group */
+struct dump_data_filter{
+       char name[32];
+       int (*selector)(int, unsigned long, unsigned long);
+       ulong level_mask; /* dump level(s) for which this filter applies */
+       loff_t start[MAX_NUMNODES], end[MAX_NUMNODES]; /* location range applicable */
+       ulong num_mbanks;  /* Number of memory banks. Greater than one for discontig memory (NUMA) */
+};
+
+
+/* 
+ * Determined by the kind of dump mechanism and appropriate 
+ * overall scheme 
+ */ 
+struct dump_scheme_ops {
+       /* sets aside memory, inits data structures etc */
+       int (*configure)(unsigned long devid); 
+       /* releases  resources */
+       int (*unconfigure)(void); 
+
+       /* ordering of passes, invoking iterator */
+       int (*sequencer)(void); 
+        /* iterates over system data, selects and acts on data to dump */
+       int (*iterator)(int, int (*)(unsigned long, unsigned long), 
+               struct dump_data_filter *); 
+        /* action when data is selected for dump */
+       int (*save_data)(unsigned long, unsigned long); 
+        /* action when data is to be excluded from dump */
+       int (*skip_data)(unsigned long, unsigned long); 
+       /* policies for space, multiple dump devices etc */
+       int (*write_buffer)(void *, unsigned long); 
+};
+
+struct dump_scheme {
+       /* the name serves as an anchor to locate the scheme after reboot */
+       char name[32]; 
+       struct dump_scheme_ops *ops;
+       struct list_head list;
+};
+
+/* Quiescing/Silence levels (controls IPI callback behaviour) */
+extern enum dump_silence_levels {
+       DUMP_SOFT_SPIN_CPUS     = 1,
+       DUMP_HARD_SPIN_CPUS     = 2,
+       DUMP_HALT_CPUS          = 3,
+} dump_silence_level;
+
+/* determined by the dump (file) format */
+struct dump_fmt_ops {
+       /* build header */
+       int (*configure_header)(const char *, const struct pt_regs *); 
+       int (*update_header)(void); /* update header and write it out */
+       /* save curr context  */
+       void (*save_context)(int, const struct pt_regs *, 
+               struct task_struct *); 
+       /* typically called by the save_data action */
+       /* add formatted data to the dump buffer */
+       int (*add_data)(unsigned long, unsigned long); 
+       int (*update_end_marker)(void);
+};
+
+struct dump_fmt {
+       unsigned long magic; 
+       char name[32];  /* lcrash, crash, elf-core etc */
+       struct dump_fmt_ops *ops;
+       struct list_head list;
+};
+
+/* 
+ * Modules will be able add their own data capture schemes by 
+ * registering their own dumpers. Typically they would use the 
+ * primary dumper as a template and tune it with their routines.
+ * Still Todo.
+ */
+
+/* The combined dumper profile (mechanism, scheme, dev, fmt) */
+struct dumper {
+       char name[32]; /* singlestage, overlay (stg1), passthru(stg2), pull */
+       struct dump_scheme *scheme;
+       struct dump_fmt *fmt;
+       struct __dump_compress *compress;
+       struct dump_data_filter *filter;
+       struct dump_dev *dev; 
+       /* state valid only for active dumper(s) - per instance */
+       /* run time state/context */
+       int curr_pass;
+       unsigned long count;
+       loff_t curr_offset; /* current logical offset into dump device */
+       loff_t curr_loc; /* current memory location */
+       void *curr_buf; /* current position in the dump buffer */
+       void *dump_buf; /* starting addr of dump buffer */
+       int header_dirty; /* whether the header needs to be written out */
+       int header_len; 
+       struct list_head dumper_list; /* links to other dumpers */
+};     
+
+/* Starting point to get to the current configured state */
+struct dump_config {
+       ulong level;
+       ulong flags;
+       struct dumper *dumper;
+       unsigned long dump_device;
+       unsigned long dump_addr; /* relevant only for in-memory dumps */
+       struct list_head dump_dev_list;
+};     
+
+extern struct dump_config dump_config;
+
+/* Used to save the dump config across a reboot for 2-stage dumps: 
+ * 
+ * Note: The scheme, format, compression and device type should be 
+ * registered at bootup, for this config to be sharable across soft-boot. 
+ * The function addresses could have changed and become invalid, and
+ * need to be set up again.
+ */
+struct dump_config_block {
+       u64 magic; /* for a quick sanity check after reboot */
+       struct dump_memdev memdev; /* handle to dump stored in memory */
+       struct dump_config config;
+       struct dumper dumper;
+       struct dump_scheme scheme;
+       struct dump_fmt fmt;
+       struct __dump_compress compress;
+       struct dump_data_filter filter_table[MAX_PASSES];
+       struct dump_anydev dev[MAX_DEVS]; /* target dump device */
+};
+
+
+/* Wrappers that invoke the methods for the current (active) dumper */
+
+/* Scheme operations */
+
+static inline int dump_sequencer(void)
+{
+       return dump_config.dumper->scheme->ops->sequencer();
+}
+
+static inline int dump_iterator(int pass, int (*action)(unsigned long, 
+       unsigned long), struct dump_data_filter *filter)
+{
+       return dump_config.dumper->scheme->ops->iterator(pass, action, filter);
+}
+
+#define dump_save_data dump_config.dumper->scheme->ops->save_data
+#define dump_skip_data dump_config.dumper->scheme->ops->skip_data
+
+static inline int dump_write_buffer(void *buf, unsigned long len)
+{
+       return dump_config.dumper->scheme->ops->write_buffer(buf, len);
+}
+
+static inline int dump_configure(unsigned long devid)
+{
+       return dump_config.dumper->scheme->ops->configure(devid);
+}
+
+static inline int dump_unconfigure(void)
+{
+       return dump_config.dumper->scheme->ops->unconfigure();
+}
+
+/* Format operations */
+
+static inline int dump_configure_header(const char *panic_str, 
+       const struct pt_regs *regs)
+{
+       return dump_config.dumper->fmt->ops->configure_header(panic_str, regs);
+}
+
+static inline void dump_save_context(int cpu, const struct pt_regs *regs, 
+               struct task_struct *tsk)
+{
+       dump_config.dumper->fmt->ops->save_context(cpu, regs, tsk);
+}
+
+static inline int dump_save_this_cpu(const struct pt_regs *regs)
+{
+       int cpu = smp_processor_id();
+
+       dump_save_context(cpu, regs, current);
+       return 1;
+}
+
+static inline int dump_update_header(void)
+{
+       return dump_config.dumper->fmt->ops->update_header();
+}
+
+static inline int dump_update_end_marker(void)
+{
+       return dump_config.dumper->fmt->ops->update_end_marker();
+}
+
+static inline int dump_add_data(unsigned long loc, unsigned long sz)
+{
+       return dump_config.dumper->fmt->ops->add_data(loc, sz);
+}
+
+/* Compression operation */
+static inline int dump_compress_data(char *src, int slen, char *dst)
+{
+       return dump_config.dumper->compress->compress_func(src, slen, 
+               dst, DUMP_DPC_PAGE_SIZE);
+}
+
+
+/* Prototypes of some default implementations of dump methods */
+
+extern struct __dump_compress dump_none_compression;
+
+/* Default scheme methods (dump_scheme.c) */
+
+extern int dump_generic_sequencer(void);
+extern int dump_page_iterator(int pass, int (*action)(unsigned long, unsigned
+       long), struct dump_data_filter *filter);
+extern int dump_generic_save_data(unsigned long loc, unsigned long sz);
+extern int dump_generic_skip_data(unsigned long loc, unsigned long sz);
+extern int dump_generic_write_buffer(void *buf, unsigned long len);
+extern int dump_generic_configure(unsigned long);
+extern int dump_generic_unconfigure(void);
+
+/* Default scheme template */
+extern struct dump_scheme dump_scheme_singlestage;
+
+/* Default dump format methods */
+
+extern int dump_lcrash_configure_header(const char *panic_str, 
+       const struct pt_regs *regs);
+extern void dump_lcrash_save_context(int  cpu, const struct pt_regs *regs, 
+       struct task_struct *tsk);
+extern int dump_generic_update_header(void);
+extern int dump_lcrash_add_data(unsigned long loc, unsigned long sz);
+extern int dump_lcrash_update_end_marker(void);
+
+/* Default format (lcrash) template */
+extern struct dump_fmt dump_fmt_lcrash;
+
+/* Default dump selection filter table */
+
+/* 
+ * Entries listed in order of importance and correspond to passes
+ * The last entry (with a level_mask of zero) typically reflects data that 
+ * won't be dumped  -- this may for example be used to identify data 
+ * that will be skipped for certain so the corresponding memory areas can be 
+ * utilized as scratch space.
+ */   
+extern struct dump_data_filter dump_filter_table[];
+
+/* Some pre-defined dumpers */
+extern struct dumper dumper_singlestage;
+extern struct dumper dumper_stage1;
+extern struct dumper dumper_stage2;
+
+/* These are temporary */
+#define DUMP_MASK_HEADER       DUMP_LEVEL_HEADER
+#define DUMP_MASK_KERN         DUMP_LEVEL_KERN
+#define DUMP_MASK_USED         DUMP_LEVEL_USED
+#define DUMP_MASK_UNUSED       DUMP_LEVEL_ALL_RAM
+#define DUMP_MASK_REST         0 /* dummy for now */
+
+/* Helpers - move these to dump.h later ? */
+
+int dump_generic_execute(const char *panic_str, const struct pt_regs *regs);
+extern int dump_ll_write(void *buf, unsigned long len); 
+int dump_check_and_free_page(struct dump_memdev *dev, struct page *page);
+
+static inline void dumper_reset(void)
+{
+       dump_config.dumper->curr_buf = dump_config.dumper->dump_buf;
+       dump_config.dumper->curr_loc = 0;
+       dump_config.dumper->curr_offset = 0;
+       dump_config.dumper->count = 0;
+       dump_config.dumper->curr_pass = 0;
+}
+
+/* 
+ * May later be moulded to perform boot-time allocations so we can dump 
+ * earlier during bootup 
+ */
+static inline void *dump_alloc_mem(unsigned long size)
+{
+       return kmalloc(size, GFP_KERNEL);
+}
+
+static inline void dump_free_mem(void *buf)
+{
+       struct page *page;
+
+       /* ignore reserved pages (e.g. post soft boot stage) */
+       if (buf && (page = virt_to_page(buf))) {
+               if (PageReserved(page))
+                       return;
+       }
+
+       kfree(buf);
+}
+
+
+#endif /*  _LINUX_DUMP_METHODS_H */
diff --git a/drivers/dump/dump_netdev.c b/drivers/dump/dump_netdev.c
new file mode 100644 (file)
index 0000000..1feb6da
--- /dev/null
@@ -0,0 +1,867 @@
+/*
+ * Implements the dump driver interface for saving a dump via network
+ * interface. 
+ *
+ * Some of this code has been taken/adapted from Ingo Molnar's netconsole
+ * code. LKCD team expresses its thanks to Ingo.
+ *
+ * Started: June 2002 - Mohamed Abbas <mohamed.abbas@intel.com>
+ *     Adapted netconsole code to implement LKCD dump over the network.
+ *
+ * Nov 2002 - Bharata B. Rao <bharata@in.ibm.com>
+ *     Innumerable code cleanups, simplification and some fixes.
+ *     Netdump configuration done by ioctl instead of using module parameters.
+ *
+ * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ *  This code is released under version 2 of the GNU GPL.
+ */
+
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/delay.h>
+#include <linux/random.h>
+#include <linux/reboot.h>
+#include <linux/module.h>
+#include <linux/dump.h>
+#include <linux/dump_netdev.h>
+#include <linux/percpu.h>
+
+#include <asm/unaligned.h>
+
+static int startup_handshake;
+static int page_counter;
+static struct net_device *dump_ndev;
+static struct in_device *dump_in_dev;
+static u16 source_port, target_port;
+static u32 source_ip, target_ip;
+static unsigned char daddr[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff} ;
+static spinlock_t dump_skb_lock = SPIN_LOCK_UNLOCKED;
+static int dump_nr_skbs;
+static struct sk_buff *dump_skb;
+static unsigned long flags_global;
+static int netdump_in_progress;
+static char device_name[IFNAMSIZ];
+
+/*
+ * security depends on the trusted path between the netconsole
+ * server and netconsole client, since none of the packets are
+ * encrypted. The random magic number protects the protocol
+ * against spoofing.
+ */
+static u64 dump_magic;
+
+#define MAX_UDP_CHUNK 1460
+#define MAX_PRINT_CHUNK (MAX_UDP_CHUNK-HEADER_LEN)
+
+/*
+ * We maintain a small pool of fully-sized skbs,
+ * to make sure the message gets out even in
+ * extreme OOM situations.
+ */
+#define DUMP_MAX_SKBS 32
+
+#define MAX_SKB_SIZE \
+               (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
+                               sizeof(struct iphdr) + sizeof(struct ethhdr))
+
+static void
+dump_refill_skbs(void)
+{
+       struct sk_buff *skb;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dump_skb_lock, flags);
+       while (dump_nr_skbs < DUMP_MAX_SKBS) {
+               skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
+               if (!skb)
+                       break;
+               if (dump_skb)
+                       skb->next = dump_skb;
+               else
+                       skb->next = NULL;
+               dump_skb = skb;
+               dump_nr_skbs++;
+       }
+       spin_unlock_irqrestore(&dump_skb_lock, flags);
+}
+
+static struct
+sk_buff * dump_get_skb(void)
+{
+       struct sk_buff *skb;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dump_skb_lock, flags);
+       skb = dump_skb;
+       if (skb) {
+               dump_skb = skb->next;
+               skb->next = NULL;
+               dump_nr_skbs--;
+       }
+       spin_unlock_irqrestore(&dump_skb_lock, flags);
+        
+       return skb;
+}
+
+/*
+ * Zap completed output skbs.
+ */
+static void
+zap_completion_queue(void)
+{
+       int count;
+       unsigned long flags;
+       struct softnet_data *sd;
+
+        count=0;
+       sd = &__get_cpu_var(softnet_data);
+       if (sd->completion_queue) {
+               struct sk_buff *clist;
+       
+               local_irq_save(flags);
+               clist = sd->completion_queue;
+               sd->completion_queue = NULL;
+               local_irq_restore(flags);
+
+               while (clist != NULL) {
+                       struct sk_buff *skb = clist;
+                       clist = clist->next;
+                       __kfree_skb(skb);
+                       count++;
+                       if (count > 10000)
+                               printk("Error in sk list\n");
+               }
+       }
+}
+
+static void
+dump_send_skb(struct net_device *dev, const char *msg, unsigned int msg_len,
+               reply_t *reply)
+{
+       int once = 1;
+       int total_len, eth_len, ip_len, udp_len, count = 0;
+       struct sk_buff *skb;
+       struct udphdr *udph;
+       struct iphdr *iph;
+       struct ethhdr *eth; 
+
+       udp_len = msg_len + HEADER_LEN + sizeof(*udph);
+       ip_len = eth_len = udp_len + sizeof(*iph);
+       total_len = eth_len + ETH_HLEN;
+
+repeat_loop:
+       zap_completion_queue();
+       if (dump_nr_skbs < DUMP_MAX_SKBS)
+               dump_refill_skbs();
+
+       skb = alloc_skb(total_len, GFP_ATOMIC);
+       if (!skb) {
+               skb = dump_get_skb();
+               if (!skb) {
+                       count++;
+                       if (once && (count == 1000000)) {
+                               printk("possibly FATAL: out of netconsole "
+                                       "skbs!!! will keep retrying.\n");
+                               once = 0;
+                       }
+                       dev->poll_controller(dev);
+                       goto repeat_loop;
+               }
+       }
+
+       atomic_set(&skb->users, 1);
+       skb_reserve(skb, total_len - msg_len - HEADER_LEN);
+       skb->data[0] = NETCONSOLE_VERSION;
+
+       put_unaligned(htonl(reply->nr), (u32 *) (skb->data + 1));
+       put_unaligned(htonl(reply->code), (u32 *) (skb->data + 5));
+       put_unaligned(htonl(reply->info), (u32 *) (skb->data + 9));
+
+       memcpy(skb->data + HEADER_LEN, msg, msg_len);
+       skb->len += msg_len + HEADER_LEN;
+
+       udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
+       udph->source = source_port;
+       udph->dest = target_port;
+       udph->len = htons(udp_len);
+       udph->check = 0;
+
+       iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
+
+       iph->version  = 4;
+       iph->ihl      = 5;
+       iph->tos      = 0;
+       iph->tot_len  = htons(ip_len);
+       iph->id       = 0;
+       iph->frag_off = 0;
+       iph->ttl      = 64;
+       iph->protocol = IPPROTO_UDP;
+       iph->check    = 0;
+       iph->saddr    = source_ip;
+       iph->daddr    = target_ip;
+       iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+       eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
+
+       eth->h_proto = htons(ETH_P_IP);
+       memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
+       memcpy(eth->h_dest, daddr, dev->addr_len);
+
+       count=0;
+repeat_poll:
+       spin_lock(&dev->xmit_lock);
+       dev->xmit_lock_owner = smp_processor_id();
+
+       count++;
+
+
+       if (netif_queue_stopped(dev)) {
+               dev->xmit_lock_owner = -1;
+               spin_unlock(&dev->xmit_lock);
+
+               dev->poll_controller(dev);
+               zap_completion_queue();
+
+
+               goto repeat_poll;
+       }
+
+       dev->hard_start_xmit(skb, dev);
+
+       dev->xmit_lock_owner = -1;
+       spin_unlock(&dev->xmit_lock);
+}
+
+static unsigned short
+udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr,
+               unsigned long base)
+{
+       return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
+}
+
+static int
+udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
+                            unsigned short ulen, u32 saddr, u32 daddr)
+{
+       if (uh->check == 0) {
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+       } else if (skb->ip_summed == CHECKSUM_HW) {
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+               if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
+                       return 0;
+               skb->ip_summed = CHECKSUM_NONE;
+       }
+       if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+               skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen,
+                               IPPROTO_UDP, 0);
+       /* Probably, we should checksum udp header (it should be in cache
+        * in any case) and data in tiny packets (< rx copybreak).
+        */
+       return 0;
+}
+
+static __inline__ int
+__udp_checksum_complete(struct sk_buff *skb)
+{
+       return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len,
+                               skb->csum));
+}
+
+static __inline__
+int udp_checksum_complete(struct sk_buff *skb)
+{
+       return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+               __udp_checksum_complete(skb);
+}
+
+int new_req = 0;
+static req_t req;
+
+static int
+dump_rx_hook(struct sk_buff *skb)
+{
+       int proto;
+       struct iphdr *iph;
+       struct udphdr *uh;
+       __u32 len, saddr, daddr, ulen;
+       req_t *__req;
+
+       /* 
+        * First check if were are dumping or doing startup handshake, if
+        * not quickly return.
+        */
+       if (!netdump_in_progress)
+               return NET_RX_SUCCESS;
+
+       if (skb->dev->type != ARPHRD_ETHER)
+               goto out;
+
+       proto = ntohs(skb->mac.ethernet->h_proto);
+       if (proto != ETH_P_IP)
+               goto out;
+
+       if (skb->pkt_type == PACKET_OTHERHOST)
+               goto out;
+
+       if (skb_shared(skb))
+               goto out;
+
+        /* IP header correctness testing: */
+       iph = (struct iphdr *)skb->data;
+       if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+               goto out;
+
+       if (iph->ihl < 5 || iph->version != 4)
+               goto out;
+
+       if (!pskb_may_pull(skb, iph->ihl*4))
+               goto out;
+
+       if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+               goto out;
+
+       len = ntohs(iph->tot_len);
+       if (skb->len < len || len < iph->ihl*4)
+               goto out;
+
+       saddr = iph->saddr;
+       daddr = iph->daddr;
+       if (iph->protocol != IPPROTO_UDP)
+               goto out;
+
+       if (source_ip != daddr)
+               goto out;
+
+       if (target_ip != saddr)
+               goto out;
+
+       len -= iph->ihl*4;
+       uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
+       ulen = ntohs(uh->len);
+
+       if (ulen != len || ulen < (sizeof(*uh) + sizeof(*__req)))
+               goto out;
+
+       if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
+               goto out;
+
+       if (udp_checksum_complete(skb))
+               goto out;
+
+       if (source_port != uh->dest)
+               goto out;
+
+       if (target_port != uh->source)
+               goto out;
+
+       __req = (req_t *)(uh + 1);
+       if ((ntohl(__req->command) != COMM_GET_MAGIC) &&
+           (ntohl(__req->command) != COMM_HELLO) &&
+           (ntohl(__req->command) != COMM_START_WRITE_NETDUMP_ACK) &&
+           (ntohl(__req->command) != COMM_START_NETDUMP_ACK) &&
+           (memcmp(&__req->magic, &dump_magic, sizeof(dump_magic)) != 0))
+               goto out;
+
+       req.magic = ntohl(__req->magic);
+       req.command = ntohl(__req->command);
+       req.from = ntohl(__req->from);
+       req.to = ntohl(__req->to);
+       req.nr = ntohl(__req->nr);
+       new_req = 1;
+out:
+       return NET_RX_DROP;
+}
+
+static void
+dump_send_mem(struct net_device *dev, req_t *req, const char* buff, size_t len)
+{
+       int i;
+
+       int nr_chunks = len/1024;
+       reply_t reply;
+       
+       reply.nr = req->nr;
+       reply.info = 0;
+
+        if ( nr_chunks <= 0)
+                nr_chunks = 1;
+       for (i = 0; i < nr_chunks; i++) {
+               unsigned int offset = i*1024;
+               reply.code = REPLY_MEM;
+               reply.info = offset;
+                dump_send_skb(dev, buff + offset, 1024, &reply);
+       }
+}
+
+/*
+ * This function waits for the client to acknowledge the receipt
+ * of the netdump startup reply, with the possibility of packets
+ * getting lost. We resend the startup packet if no ACK is received,
+ * after a 1 second delay.
+ *
+ * (The client can test the success of the handshake via the HELLO
+ * command, and send ACKs until we enter netdump mode.)
+ */
+static int
+dump_handshake(struct dump_dev *net_dev)
+{
+       char tmp[200];
+       reply_t reply;
+       int i, j;
+
+       if (startup_handshake) {
+               sprintf(tmp, "NETDUMP start, waiting for start-ACK.\n");
+               reply.code = REPLY_START_NETDUMP;
+               reply.nr = 0;
+               reply.info = 0;
+       } else {
+               sprintf(tmp, "NETDUMP start, waiting for start-ACK.\n");
+               reply.code = REPLY_START_WRITE_NETDUMP;
+               reply.nr = net_dev->curr_offset;
+               reply.info = net_dev->curr_offset;
+       }
+       
+       /* send 300 handshake packets before declaring failure */
+       for (i = 0; i < 300; i++) {
+               dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
+
+               /* wait 1 sec */
+               for (j = 0; j < 10000; j++) {
+                       udelay(100);
+                       dump_ndev->poll_controller(dump_ndev);
+                       zap_completion_queue();
+                       if (new_req)
+                               break;
+               }
+
+               /* 
+                * if there is no new request, try sending the handshaking
+                * packet again
+                */
+               if (!new_req)
+                       continue;
+
+               /* 
+                * check if the new request is of the expected type,
+                * if so, return, else try sending the handshaking
+                * packet again
+                */
+               if (startup_handshake) {
+                       if (req.command == COMM_HELLO || req.command ==
+                               COMM_START_NETDUMP_ACK) {
+                               return 0;
+                       } else {
+                               new_req = 0;
+                               continue;
+                       }
+               } else {
+                       if (req.command == COMM_SEND_MEM) {
+                               return 0;
+                       } else {
+                               new_req = 0;
+                               continue;
+                       }
+               }
+       }
+       return -1;
+}
+
+static ssize_t
+do_netdump(struct dump_dev *net_dev, const char* buff, size_t len)
+{
+       reply_t reply;
+       char tmp[200];
+       ssize_t  ret = 0;
+       int repeatCounter, counter, total_loop;
+       
+       netdump_in_progress = 1;
+
+       if (dump_handshake(net_dev) < 0) {
+               printk("network dump failed due to handshake failure\n");
+               goto out;
+       }
+
+       /*
+        * Ideally startup handshake should be done during dump configuration,
+        * i.e., in dump_net_open(). This will be done when I figure out
+        * the dependency between startup handshake, subsequent write and
+        * various commands wrt to net-server.
+        */
+       if (startup_handshake)
+               startup_handshake = 0;
+
+        counter = 0;
+       repeatCounter = 0;
+       total_loop = 0;
+       while (1) {
+                if (!new_req) {
+                       dump_ndev->poll_controller(dump_ndev);
+                       zap_completion_queue();
+               }
+               if (!new_req) {
+                       repeatCounter++;
+
+                       if (repeatCounter > 5) {
+                               counter++;
+                               if (counter > 10000) {
+                                       if (total_loop >= 100000) {
+                                               printk("Time OUT LEAVE NOW\n");
+                                               goto out;
+                                       } else {
+                                               total_loop++;
+                                               printk("Try number %d out of "
+                                                       "10 before Time Out\n",
+                                                       total_loop);
+                                       }
+                               }
+                               mdelay(1);
+                               repeatCounter = 0;
+                       }       
+                       continue;
+               }
+               repeatCounter = 0;
+               counter = 0;
+               total_loop = 0;
+               new_req = 0;
+               switch (req.command) {
+               case COMM_NONE:
+                       break;
+
+               case COMM_SEND_MEM:
+                       dump_send_mem(dump_ndev, &req, buff, len);
+                       break;
+
+               case COMM_EXIT:
+                case COMM_START_WRITE_NETDUMP_ACK:
+                       ret = len;
+                       goto out;
+
+               case COMM_HELLO:
+                       sprintf(tmp, "Hello, this is netdump version "
+                                       "0.%02d\n", NETCONSOLE_VERSION);
+                       reply.code = REPLY_HELLO;
+                       reply.nr = req.nr;
+                        reply.info = net_dev->curr_offset;
+                       dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
+                       break;
+
+               case COMM_GET_PAGE_SIZE:
+                       sprintf(tmp, "PAGE_SIZE: %ld\n", PAGE_SIZE);
+                       reply.code = REPLY_PAGE_SIZE;
+                       reply.nr = req.nr;
+                       reply.info = PAGE_SIZE;
+                       dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
+                       break;
+
+               case COMM_GET_NR_PAGES:
+                       reply.code = REPLY_NR_PAGES;
+                       reply.nr = req.nr;
+                       reply.info = num_physpages;
+                        reply.info = page_counter;
+                       sprintf(tmp, "Number of pages: %ld\n", num_physpages);
+                       dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
+                       break;
+
+               case COMM_GET_MAGIC:
+                       reply.code = REPLY_MAGIC;
+                       reply.nr = req.nr;
+                       reply.info = NETCONSOLE_VERSION;
+                       dump_send_skb(dump_ndev, (char *)&dump_magic,
+                                       sizeof(dump_magic), &reply);
+                       break;
+
+               default:
+                       reply.code = REPLY_ERROR;
+                       reply.nr = req.nr;
+                       reply.info = req.command;
+                       sprintf(tmp, "Got unknown command code %d!\n",
+                                       req.command);
+                       dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
+                       break;
+               }
+       }
+out:
+       netdump_in_progress = 0;
+       return ret;
+}
+
+static int
+dump_validate_config(void)
+{
+       source_ip = dump_in_dev->ifa_list->ifa_local;
+       if (!source_ip) {
+               printk("network device %s has no local address, "
+                               "aborting.\n", device_name);
+               return -1;
+       }
+
+#define IP(x) ((unsigned char *)&source_ip)[x]
+       printk("Source %d.%d.%d.%d", IP(0), IP(1), IP(2), IP(3));
+#undef IP
+
+       if (!source_port) {
+               printk("source_port parameter not specified, aborting.\n");
+               return -1;
+       }
+       printk(":%i\n", source_port);
+       source_port = htons(source_port);
+
+       if (!target_ip) {
+               printk("target_ip parameter not specified, aborting.\n");
+               return -1;
+       }
+
+#define IP(x) ((unsigned char *)&target_ip)[x]
+       printk("Target %d.%d.%d.%d", IP(0), IP(1), IP(2), IP(3));
+#undef IP
+
+       if (!target_port) {
+               printk("target_port parameter not specified, aborting.\n");
+               return -1;
+       }
+       printk(":%i\n", target_port);
+       target_port = htons(target_port);
+
+       printk("Target Ethernet Address %02x:%02x:%02x:%02x:%02x:%02x",
+               daddr[0], daddr[1], daddr[2], daddr[3], daddr[4], daddr[5]);
+
+       if ((daddr[0] & daddr[1] & daddr[2] & daddr[3] & daddr[4] & 
+                               daddr[5]) == 255)
+               printk("(Broadcast)");
+       printk("\n");
+       return 0;
+}
+
+/*
+ * Prepares the dump device so we can take a dump later. 
+ * Validates the netdump configuration parameters.
+ *
+ * TODO: Network connectivity check should be done here.
+ */
+static int
+dump_net_open(struct dump_dev *net_dev, unsigned long arg)
+{
+       int retval = 0;
+
+       /* get the interface name */
+       if (copy_from_user(device_name, (void *)arg, IFNAMSIZ))
+               return -EFAULT;
+
+       if (!(dump_ndev = dev_get_by_name(device_name))) {
+               printk("network device %s does not exist, aborting.\n",
+                               device_name);
+               return -ENODEV;
+       }
+
+       if (!dump_ndev->poll_controller) {
+               printk("network device %s does not implement polling yet, "
+                               "aborting.\n", device_name);
+               retval = -1; /* return proper error */
+               goto err1;
+       }
+
+       if (!(dump_in_dev = in_dev_get(dump_ndev))) {
+               printk("network device %s is not an IP protocol device, "
+                               "aborting.\n", device_name);
+               retval = -EINVAL;
+               goto err1;
+       }
+
+       if ((retval = dump_validate_config()) < 0)
+               goto err2;
+
+       net_dev->curr_offset = 0;
+       printk("Network device %s successfully configured for dumping\n",
+                       device_name);
+       return retval;
+err2:
+       in_dev_put(dump_in_dev);
+err1:
+       dev_put(dump_ndev);     
+       return retval;
+}
+
+/*
+ * Close the dump device and release associated resources
+ * Invoked when unconfiguring the dump device.
+ */
+static int
+dump_net_release(struct dump_dev *net_dev)
+{
+       if (dump_in_dev)
+               in_dev_put(dump_in_dev);
+       if (dump_ndev)
+               dev_put(dump_ndev);
+       return 0;
+}
+
+/*
+ * Prepare the dump device for use (silence any ongoing activity
+ * and quiesce state) when the system crashes.
+ */
+static int
+dump_net_silence(struct dump_dev *net_dev)
+{
+       netpoll_set_trap(1);
+       local_irq_save(flags_global);
+       dump_ndev->rx_hook = dump_rx_hook;
+        startup_handshake = 1;
+       net_dev->curr_offset = 0;
+       printk("Dumping to network device %s on CPU %d ...\n", device_name,
+                       smp_processor_id());
+       return 0;
+}
+
+/*
+ * Invoked when dumping is done. This is the time to put things back 
+ * (i.e. undo the effects of dump_block_silence) so the device is 
+ * available for normal use.
+ */
+static int
+dump_net_resume(struct dump_dev *net_dev)
+{
+       int indx;
+       reply_t reply;
+       char tmp[200];
+
+        if (!dump_ndev)
+               return (0);
+
+       sprintf(tmp, "NETDUMP end.\n");
+       for( indx = 0; indx < 6; indx++) {
+               reply.code = REPLY_END_NETDUMP;
+               reply.nr = 0;
+               reply.info = 0;
+               dump_send_skb(dump_ndev, tmp, strlen(tmp), &reply);
+       }
+       printk("NETDUMP END!\n");
+       local_irq_restore(flags_global);
+       netpoll_set_trap(0);
+       dump_ndev->rx_hook = NULL;
+       startup_handshake = 0;
+       return 0;
+}
+
+/*
+ * Seek to the specified offset in the dump device.
+ * Makes sure this is a valid offset, otherwise returns an error.
+ */
+static  int
+dump_net_seek(struct dump_dev *net_dev, loff_t off)
+{
+       /*
+        * For now using DUMP_HEADER_OFFSET as hard coded value,
+        * See dump_block_seekin dump_blockdev.c to know how to
+        * do this properly.
+        */
+       net_dev->curr_offset = off;
+       return 0;
+}
+
+/*
+ *
+ */
+static int
+dump_net_write(struct dump_dev *net_dev, void *buf, unsigned long len)
+{
+       int cnt, i, off;
+       ssize_t ret;
+
+       cnt = len/ PAGE_SIZE;
+
+       for (i = 0; i < cnt; i++) {
+               off = i* PAGE_SIZE;
+               ret = do_netdump(net_dev, buf+off, PAGE_SIZE);
+               if (ret <= 0)
+                       return -1;
+               net_dev->curr_offset = net_dev->curr_offset + PAGE_SIZE;
+       }
+       return len;
+}
+
+/*
+ * check if the last dump i/o is over and ready for next request
+ */
+static int
+dump_net_ready(struct dump_dev *net_dev, void *buf)
+{
+       return 0;
+}
+
+/*
+ * ioctl function used for configuring network dump
+ */
+static int
+dump_net_ioctl(struct dump_dev *net_dev, unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case DIOSTARGETIP:
+               target_ip = arg;
+               break;
+       case DIOSTARGETPORT:
+               target_port = (u16)arg;
+               break;
+       case DIOSSOURCEPORT:
+               source_port = (u16)arg;
+               break;
+       case DIOSETHADDR:
+               return copy_from_user(daddr, (void *)arg, 6);
+               break;
+       case DIOGTARGETIP:
+       case DIOGTARGETPORT:
+       case DIOGSOURCEPORT:
+       case DIOGETHADDR:
+               break;
+       default:
+               return -EINVAL;
+       }
+       return 0;
+}
+
+struct dump_dev_ops dump_netdev_ops = {
+       .open           = dump_net_open,
+       .release        = dump_net_release,
+       .silence        = dump_net_silence,
+       .resume         = dump_net_resume,
+       .seek           = dump_net_seek,
+       .write          = dump_net_write,
+       /* .read not implemented */
+       .ready          = dump_net_ready,
+       .ioctl          = dump_net_ioctl
+};
+
+static struct dump_dev default_dump_netdev = {
+       .type_name = "networkdev", 
+       .ops = &dump_netdev_ops, 
+       .curr_offset = 0
+};
+
+static int __init
+dump_netdev_init(void)
+{
+        default_dump_netdev.curr_offset = 0;
+
+       if (dump_register_device(&default_dump_netdev) < 0) {
+               printk("network dump device driver registration failed\n");
+               return -1;
+       }
+       printk("network device driver for LKCD registered\n");
+       get_random_bytes(&dump_magic, sizeof(dump_magic));
+       return 0;
+}
+
+static void __exit
+dump_netdev_cleanup(void)
+{
+       dump_unregister_device(&default_dump_netdev);
+}
+
+MODULE_AUTHOR("LKCD Development Team <lkcd-devel@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Network Dump Driver for Linux Kernel Crash Dump (LKCD)");
+MODULE_LICENSE("GPL");
+
+module_init(dump_netdev_init);
+module_exit(dump_netdev_cleanup);
diff --git a/drivers/dump/dump_overlay.c b/drivers/dump/dump_overlay.c
new file mode 100644 (file)
index 0000000..8e10b78
--- /dev/null
@@ -0,0 +1,884 @@
+/*
+ * Two-stage soft-boot based dump scheme methods (memory overlay
+ * with post soft-boot writeout)
+ *
+ * Started: Oct 2002 -  Suparna Bhattacharya <suparna@in.ibm.com>
+ *
+ * This approach of saving the dump in memory and writing it 
+ * out after a softboot without clearing memory is derived from the 
+ * Mission Critical Linux dump implementation. Credits and a big
+ * thanks for letting the lkcd project make use of the excellent 
+ * piece of work and also for helping with clarifications and 
+ * tips along the way are due to:
+ *     Dave Winchell <winchell@mclx.com> (primary author of mcore)
+ *     and also to
+ *     Jeff Moyer <moyer@mclx.com>
+ *     Josh Huber <huber@mclx.com>
+ * 
+ * For those familiar with the mcore implementation, the key 
+ * differences/extensions here are in allowing entire memory to be 
+ * saved (in compressed form) through a careful ordering scheme 
+ * on both the way down as well on the way up after boot, the latter
+ * for supporting the LKCD notion of passes in which most critical 
+ * data is the first to be saved to the dump device. Also the post 
+ * boot writeout happens from within the kernel rather than driven 
+ * from userspace.
+ *
+ * The sequence is orchestrated through the abstraction of "dumpers",
+ * one for the first stage which then sets up the dumper for the next 
+ * stage, providing for a smooth and flexible reuse of the singlestage 
+ * dump scheme methods and a handle to pass dump device configuration 
+ * information across the soft boot. 
+ *
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/*
+ * Disruptive dumping using the second kernel soft-boot option
+ * for issuing dump i/o operates in 2 stages:
+ * 
+ * (1) - Saves the (compressed & formatted) dump in memory using a 
+ *       carefully ordered overlay scheme designed to capture the 
+ *       entire physical memory or selective portions depending on 
+ *       dump config settings, 
+ *     - Registers the stage 2 dumper and 
+ *     - Issues a soft reboot w/o clearing memory. 
+ *
+ *     The overlay scheme starts with a small bootstrap free area
+ *     and follows a reverse ordering of passes wherein it 
+ *     compresses and saves data starting with the least critical 
+ *     areas first, thus freeing up the corresponding pages to 
+ *     serve as destination for subsequent data to be saved, and
+ *     so on. With a good compression ratio, this makes it feasible
+ *     to capture an entire physical memory dump without significantly
+ *     reducing memory available during regular operation.
+ *
+ * (2) Post soft-reboot, runs through the saved memory dump and
+ *     writes it out to disk, this time around, taking care to
+ *     save the more critical data first (i.e. pages which figure 
+ *     in early passes for a regular dump). Finally issues a 
+ *     clean reboot.
+ *     
+ *     Since the data was saved in memory after selection/filtering
+ *     and formatted as per the chosen output dump format, at this 
+ *     stage the filter and format actions are just dummy (or
+ *     passthrough) actions, except for influence on ordering of
+ *     passes.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/dump.h>
+#ifdef CONFIG_KEXEC
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#endif
+#include "dump_methods.h"
+
+extern struct list_head dumper_list_head;
+extern struct dump_memdev *dump_memdev;
+extern struct dumper dumper_stage2;
+struct dump_config_block *dump_saved_config = NULL;
+extern struct dump_blockdev *dump_blockdev;
+static struct dump_memdev *saved_dump_memdev = NULL;
+static struct dumper *saved_dumper = NULL;
+
+#ifdef CONFIG_KEXEC
+extern int panic_timeout;
+#endif
+
+/* For testing 
+extern void dump_display_map(struct dump_memdev *);
+*/
+
+struct dumper *dumper_by_name(char *name)
+{
+#ifdef LATER
+       struct dumper *dumper;
+       list_for_each_entry(dumper, &dumper_list_head, dumper_list)
+               if (!strncmp(dumper->name, name, 32))
+                       return dumper;
+
+       /* not found */
+       return NULL; 
+#endif
+       /* Temporary proof of concept */
+       if (!strncmp(dumper_stage2.name, name, 32))
+               return &dumper_stage2;
+       else
+               return NULL;
+}
+
+#ifdef CONFIG_CRASH_DUMP_SOFTBOOT
+extern void dump_early_reserve_map(struct dump_memdev *);
+
+void crashdump_reserve(void)
+{
+       extern unsigned long crashdump_addr;
+
+       if (crashdump_addr == 0xdeadbeef) 
+               return;
+
+       /* reserve dump config and saved dump pages */
+       dump_saved_config = (struct dump_config_block *)crashdump_addr;
+       /* magic verification */
+       if (dump_saved_config->magic != DUMP_MAGIC_LIVE) {
+               printk("Invalid dump magic. Ignoring dump\n");
+               dump_saved_config = NULL;
+               return;
+       }
+                       
+       printk("Dump may be available from previous boot\n");
+
+       reserve_bootmem(virt_to_phys((void *)crashdump_addr), 
+               PAGE_ALIGN(sizeof(struct dump_config_block)));
+       dump_early_reserve_map(&dump_saved_config->memdev);
+
+}
+#endif
+
+/* 
+ * Loads the dump configuration from a memory block saved across soft-boot
+ * The ops vectors need fixing up as the corresp. routines may have 
+ * relocated in the new soft-booted kernel.
+ */
+int dump_load_config(struct dump_config_block *config)
+{
+       struct dumper *dumper;
+       struct dump_data_filter *filter_table, *filter;
+       struct dump_dev *dev;
+       int i;
+
+       if (config->magic != DUMP_MAGIC_LIVE)
+               return -ENOENT; /* not a valid config */
+
+       /* initialize generic config data */
+       memcpy(&dump_config, &config->config, sizeof(dump_config));
+
+       /* initialize dumper state */
+       if (!(dumper = dumper_by_name(config->dumper.name)))  {
+               printk("dumper name mismatch\n");
+               return -ENOENT; /* dumper mismatch */
+       }
+       
+       /* verify and fixup schema */
+       if (strncmp(dumper->scheme->name, config->scheme.name, 32)) {
+               printk("dumper scheme mismatch\n");
+               return -ENOENT; /* mismatch */
+       }
+       config->scheme.ops = dumper->scheme->ops;
+       config->dumper.scheme = &config->scheme;
+       
+       /* verify and fixup filter operations */
+       filter_table = dumper->filter;
+       for (i = 0, filter = config->filter_table; 
+               ((i < MAX_PASSES) && filter_table[i].selector); 
+               i++, filter++) {
+               if (strncmp(filter_table[i].name, filter->name, 32)) {
+                       printk("dump filter mismatch\n");
+                       return -ENOENT; /* filter name mismatch */
+               }
+               filter->selector = filter_table[i].selector;
+       }
+       config->dumper.filter = config->filter_table;
+
+       /* fixup format */
+       if (strncmp(dumper->fmt->name, config->fmt.name, 32)) {
+               printk("dump format mismatch\n");
+               return -ENOENT; /* mismatch */
+       }
+       config->fmt.ops = dumper->fmt->ops;
+       config->dumper.fmt = &config->fmt;
+
+       /* fixup target device */
+       dev = (struct dump_dev *)(&config->dev[0]);
+       if (dumper->dev == NULL) {
+               pr_debug("Vanilla dumper - assume default\n");
+               if (dump_dev == NULL)
+                       return -ENODEV;
+               dumper->dev = dump_dev;
+       }
+
+       if (strncmp(dumper->dev->type_name, dev->type_name, 32)) { 
+               printk("dump dev type mismatch %s instead of %s\n",
+                               dev->type_name, dumper->dev->type_name);
+               return -ENOENT; /* mismatch */
+       }
+       dev->ops = dumper->dev->ops; 
+       config->dumper.dev = dev;
+       
+       /* fixup memory device containing saved dump pages */
+       /* assume statically init'ed dump_memdev */
+       config->memdev.ddev.ops = dump_memdev->ddev.ops; 
+       /* switch to memdev from prev boot */
+       saved_dump_memdev = dump_memdev; /* remember current */
+       dump_memdev = &config->memdev;
+
+       /* Make this the current primary dumper */
+       dump_config.dumper = &config->dumper;
+
+       return 0;
+}
+
+/* Saves the dump configuration in a memory block for use across a soft-boot */
+int dump_save_config(struct dump_config_block *config)
+{
+       printk("saving dump config settings\n");
+
+       /* dump config settings */
+       memcpy(&config->config, &dump_config, sizeof(dump_config));
+
+       /* dumper state */
+       memcpy(&config->dumper, dump_config.dumper, sizeof(struct dumper));
+       memcpy(&config->scheme, dump_config.dumper->scheme, 
+               sizeof(struct dump_scheme));
+       memcpy(&config->fmt, dump_config.dumper->fmt, sizeof(struct dump_fmt));
+       memcpy(&config->dev[0], dump_config.dumper->dev, 
+               sizeof(struct dump_anydev));
+       memcpy(&config->filter_table, dump_config.dumper->filter, 
+               sizeof(struct dump_data_filter)*MAX_PASSES);
+
+       /* handle to saved mem pages */
+       memcpy(&config->memdev, dump_memdev, sizeof(struct dump_memdev));
+
+       config->magic = DUMP_MAGIC_LIVE;
+       
+       return 0;
+}
+
+int dump_init_stage2(struct dump_config_block *saved_config)
+{
+       int err = 0;
+
+       pr_debug("dump_init_stage2\n");
+       /* Check if dump from previous boot exists */
+       if (saved_config) {
+               printk("loading dumper from previous boot \n");
+               /* load and configure dumper from previous boot */
+               if ((err = dump_load_config(saved_config)))
+                       return err;
+
+               if (!dump_oncpu) {
+                       if ((err = dump_configure(dump_config.dump_device))) {
+                               printk("Stage 2 dump configure failed\n");
+                               return err;
+                       }
+               }
+
+               dumper_reset();
+               dump_dev = dump_config.dumper->dev;
+               /* write out the dump */
+               err = dump_generic_execute(NULL, NULL);
+               
+               dump_saved_config = NULL;
+
+               if (!dump_oncpu) {
+                       dump_unconfigure(); 
+               }
+               
+               return err;
+
+       } else {
+               /* no dump to write out */
+               printk("no dumper from previous boot \n");
+               return 0;
+       }
+}
+
+extern void dump_mem_markpages(struct dump_memdev *);
+
+int dump_switchover_stage(void)
+{
+       int ret = 0;
+
+       /* trigger stage 2 rightaway - in real life would be after soft-boot */
+       /* dump_saved_config would be a boot param */
+       saved_dump_memdev = dump_memdev;
+       saved_dumper = dump_config.dumper;
+       ret = dump_init_stage2(dump_saved_config);
+       dump_memdev = saved_dump_memdev;
+       dump_config.dumper = saved_dumper;
+       return ret;
+}
+
+int dump_activate_softboot(void) 
+{
+        int err = 0;
+#ifdef CONFIG_KEXEC
+        int num_cpus_online = 0;
+        struct kimage *image;
+#endif
+
+        /* temporary - switchover to writeout previously saved dump */
+#ifndef CONFIG_KEXEC
+        err = dump_switchover_stage(); /* non-disruptive case */
+        if (dump_oncpu)
+                       dump_config.dumper = &dumper_stage1; /* set things back */
+
+        return err;
+#else
+
+        dump_silence_level = DUMP_HALT_CPUS;
+        /* wait till we become the only cpu */
+        /* maybe by checking for online cpus ? */
+
+        while((num_cpus_online = num_online_cpus()) > 1);
+
+        /* now call into kexec */
+
+        image = xchg(&kexec_image, 0);
+        if (image) {
+                       mdelay(panic_timeout*1000);
+                               machine_kexec(image);
+                               }
+
+
+        /* TBD/Fixme:
+        *          * should we call reboot notifiers ? inappropriate for panic ?
+        *                   * what about device_shutdown() ?
+        *                            * is explicit bus master disabling needed or can we do that
+        *                                     * through driverfs ?
+        *                                              */
+        return 0;
+#endif
+}
+
+/* --- DUMP SCHEME ROUTINES  --- */
+
+static inline int dump_buf_pending(struct dumper *dumper)
+{
+       return (dumper->curr_buf - dumper->dump_buf);
+}
+
+/* Invoked during stage 1 of soft-reboot based dumping */
+int dump_overlay_sequencer(void)
+{
+       struct dump_data_filter *filter = dump_config.dumper->filter;
+       struct dump_data_filter *filter2 = dumper_stage2.filter;
+       int pass = 0, err = 0, save = 0;
+       int (*action)(unsigned long, unsigned long);
+
+       /* Make sure gzip compression is being used */
+       if (dump_config.dumper->compress->compress_type != DUMP_COMPRESS_GZIP) {
+               printk(" Please set GZIP compression \n");
+               return -EINVAL;
+       }
+
+       /* start filling in dump data right after the header */
+       dump_config.dumper->curr_offset = 
+               PAGE_ALIGN(dump_config.dumper->header_len);
+
+       /* Locate the last pass */
+       for (;filter->selector; filter++, pass++);
+       
+       /* 
+        * Start from the end backwards: overlay involves a reverse 
+        * ordering of passes, since less critical pages are more
+        * likely to be reusable as scratch space once we are through
+        * with them. 
+        */
+       for (--pass, --filter; pass >= 0; pass--, filter--)
+       {
+               /* Assumes passes are exclusive (even across dumpers) */
+               /* Requires care when coding the selection functions */
+               if ((save = filter->level_mask & dump_config.level))
+                       action = dump_save_data;
+               else
+                       action = dump_skip_data;
+
+               /* Remember the offset where this pass started */
+               /* The second stage dumper would use this */
+               if (dump_buf_pending(dump_config.dumper) & (PAGE_SIZE - 1)) {
+                       pr_debug("Starting pass %d with pending data\n", pass);
+                       pr_debug("filling dummy data to page-align it\n");
+                       dump_config.dumper->curr_buf = (void *)PAGE_ALIGN(
+                               (unsigned long)dump_config.dumper->curr_buf);
+               }
+               
+               filter2[pass].start[0] = dump_config.dumper->curr_offset
+                       + dump_buf_pending(dump_config.dumper);
+
+               err = dump_iterator(pass, action, filter);
+
+               filter2[pass].end[0] = dump_config.dumper->curr_offset
+                       + dump_buf_pending(dump_config.dumper);
+               filter2[pass].num_mbanks = 1;
+
+               if (err < 0) {
+                       printk("dump_overlay_seq: failure %d in pass %d\n", 
+                               err, pass);
+                       break;
+               }       
+               printk("\n %d overlay pages %s of %d each in pass %d\n", 
+               err, save ? "saved" : "skipped", DUMP_PAGE_SIZE, pass);
+       }
+
+       return err;
+}
+
+/* from dump_memdev.c */
+extern struct page *dump_mem_lookup(struct dump_memdev *dev, unsigned long loc);
+extern struct page *dump_mem_next_page(struct dump_memdev *dev);
+
+static inline struct page *dump_get_saved_page(loff_t loc)
+{
+       return (dump_mem_lookup(dump_memdev, loc >> PAGE_SHIFT));
+}
+
+static inline struct page *dump_next_saved_page(void)
+{
+       return (dump_mem_next_page(dump_memdev));
+}
+
+/* 
+ * Iterates over list of saved dump pages. Invoked during second stage of 
+ * soft boot dumping
+ *
+ * Observation: If additional selection is desired at this stage then
+ * a different iterator could be written which would advance 
+ * to the next page header everytime instead of blindly picking up
+ * the data. In such a case loc would be interpreted differently. 
+ * At this moment however a blind pass seems sufficient, cleaner and
+ * faster.
+ */
+int dump_saved_data_iterator(int pass, int (*action)(unsigned long, 
+       unsigned long), struct dump_data_filter *filter)
+{
+       loff_t loc, end;
+       struct page *page;
+       unsigned long count = 0;
+       int i, err = 0;
+       unsigned long sz;
+
+       for (i = 0; i < filter->num_mbanks; i++) {
+               loc  = filter->start[i];
+               end = filter->end[i];
+               printk("pass %d, start off 0x%llx end offset 0x%llx\n", pass,
+                       loc, end);
+
+               /* loc will get treated as logical offset into stage 1 */
+               page = dump_get_saved_page(loc);
+                       
+               for (; loc < end; loc += PAGE_SIZE) {
+                       dump_config.dumper->curr_loc = loc;
+                       if (!page) {
+                               printk("no more saved data for pass %d\n", 
+                                       pass);
+                               break;
+                       }
+                       sz = (loc + PAGE_SIZE > end) ? end - loc : PAGE_SIZE;
+
+                       if (page && filter->selector(pass, (unsigned long)page, 
+                               PAGE_SIZE))  {
+                               pr_debug("mem offset 0x%llx\n", loc);
+                               if ((err = action((unsigned long)page, sz))) 
+                                       break;
+                               else
+                                       count++;
+                               /* clear the contents of page */
+                               /* fixme: consider using KM_DUMP instead */
+                               clear_highpage(page);
+                       
+                       }
+                       page = dump_next_saved_page();
+               }
+       }
+
+       return err ? err : count;
+}
+
+static inline int dump_overlay_pages_done(struct page *page, int nr)
+{
+       int ret=0;
+
+       for (; nr ; page++, nr--) {
+               if (dump_check_and_free_page(dump_memdev, page))
+                       ret++;
+       }
+       return ret;
+}
+
+int dump_overlay_save_data(unsigned long loc, unsigned long len)
+{
+       int err = 0;
+       struct page *page = (struct page *)loc;
+       static unsigned long cnt = 0;
+
+       if ((err = dump_generic_save_data(loc, len)))
+               return err;
+
+       if (dump_overlay_pages_done(page, len >> PAGE_SHIFT)) {
+               cnt++;
+               if (!(cnt & 0x7f))
+                       pr_debug("released page 0x%lx\n", page_to_pfn(page));
+       }
+       
+       return err;
+}
+
+
+int dump_overlay_skip_data(unsigned long loc, unsigned long len)
+{
+       struct page *page = (struct page *)loc;
+
+       dump_overlay_pages_done(page, len >> PAGE_SHIFT);
+       return 0;
+}
+
+int dump_overlay_resume(void)
+{
+       int err = 0;
+
+       /* 
+        * switch to stage 2 dumper, save dump_config_block
+        * and then trigger a soft-boot
+        */
+       dumper_stage2.header_len = dump_config.dumper->header_len;
+       dump_config.dumper = &dumper_stage2;
+       if ((err = dump_save_config(dump_saved_config)))
+               return err;
+
+       dump_dev = dump_config.dumper->dev;
+
+#ifdef CONFIG_KEXEC
+        /* If we are doing a disruptive dump, activate softboot now */
+        if((panic_timeout > 0) && (!(dump_config.flags & DUMP_FLAGS_NONDISRUPT)))
+        err = dump_activate_softboot();
+#endif
+               
+       return err;
+       err = dump_switchover_stage();  /* plugs into soft boot mechanism */
+       dump_config.dumper = &dumper_stage1; /* set things back */
+       return err;
+}
+
+int dump_overlay_configure(unsigned long devid)
+{
+       struct dump_dev *dev;
+       struct dump_config_block *saved_config = dump_saved_config;
+       int err = 0;
+
+       /* If there is a previously saved dump, write it out first */
+       if (saved_config) {
+               printk("Processing old dump pending writeout\n");
+               err = dump_switchover_stage();
+               if (err) {
+                       printk("failed to writeout saved dump\n");
+                       return err;
+               }
+               dump_free_mem(saved_config); /* testing only: not after boot */
+       }
+
+       dev = dumper_stage2.dev = dump_config.dumper->dev;
+       /* From here on the intermediate dump target is memory-only */
+       dump_dev = dump_config.dumper->dev = &dump_memdev->ddev;
+       if ((err = dump_generic_configure(0))) {
+               printk("dump generic configure failed: err %d\n", err);
+               return err;
+       }
+       /* temporary */
+       dumper_stage2.dump_buf = dump_config.dumper->dump_buf;
+
+       /* Sanity check on the actual target dump device */
+       if (!dev || (err = dev->ops->open(dev, devid))) {
+               return err;
+       }
+       /* TBD: should we release the target if this is soft-boot only ? */
+
+       /* alloc a dump config block area to save across reboot */
+       if (!(dump_saved_config = dump_alloc_mem(sizeof(struct 
+               dump_config_block)))) {
+               printk("dump config block alloc failed\n");
+               /* undo configure */
+               dump_generic_unconfigure();
+               return -ENOMEM;
+       }
+       dump_config.dump_addr = (unsigned long)dump_saved_config;
+       printk("Dump config block of size %d set up at 0x%lx\n", 
+               sizeof(*dump_saved_config), (unsigned long)dump_saved_config);
+       return 0;
+}
+
+int dump_overlay_unconfigure(void)
+{
+       struct dump_dev *dev = dumper_stage2.dev;
+       int err = 0;
+
+       pr_debug("dump_overlay_unconfigure\n");
+       /* Close the secondary device */
+       dev->ops->release(dev); 
+       pr_debug("released secondary device\n");
+
+       err = dump_generic_unconfigure();
+       pr_debug("Unconfigured generic portions\n");
+       dump_free_mem(dump_saved_config);
+       dump_saved_config = NULL;
+       pr_debug("Freed saved config block\n");
+       dump_dev = dump_config.dumper->dev = dumper_stage2.dev;
+
+       printk("Unconfigured overlay dumper\n");
+       return err;
+}
+
+int dump_staged_unconfigure(void)
+{
+       int err = 0;
+       struct dump_config_block *saved_config = dump_saved_config;
+       struct dump_dev *dev;
+
+       pr_debug("dump_staged_unconfigure\n");
+       err = dump_generic_unconfigure();
+
+       /* now check if there is a saved dump waiting to be written out */
+       if (saved_config) {
+               printk("Processing saved dump pending writeout\n");
+               if ((err = dump_switchover_stage())) {
+                       printk("Error in commiting saved dump at 0x%lx\n", 
+                               (unsigned long)saved_config);
+                       printk("Old dump may hog memory\n");
+               } else {
+                       dump_free_mem(saved_config);
+                       pr_debug("Freed saved config block\n");
+               }
+               dump_saved_config = NULL;
+       } else {
+               dev = &dump_memdev->ddev;
+               dev->ops->release(dev);
+       }
+       printk("Unconfigured second stage dumper\n");
+
+       return 0;
+}
+
+/* ----- PASSTHRU FILTER ROUTINE --------- */
+
+/* transparent - passes everything through */
+int dump_passthru_filter(int pass, unsigned long loc, unsigned long sz)
+{
+       return 1;
+}
+
+/* ----- PASSTRU FORMAT ROUTINES ---- */
+
+
+int dump_passthru_configure_header(const char *panic_str, const struct pt_regs *regs)
+{
+       dump_config.dumper->header_dirty++;
+       return 0;
+}
+
+/* Copies bytes of data from page(s) to the specified buffer */
+int dump_copy_pages(void *buf, struct page *page, unsigned long sz)
+{
+       unsigned long len = 0, bytes;
+       void *addr;
+
+       while (len < sz) {
+               addr = kmap_atomic(page, KM_DUMP);
+               bytes = (sz > len + PAGE_SIZE) ? PAGE_SIZE : sz - len;  
+               memcpy(buf, addr, bytes); 
+               kunmap_atomic(addr, KM_DUMP);
+               buf += bytes;
+               len += bytes;
+               page++;
+       }
+       /* memset(dump_config.dumper->curr_buf, 0x57, len); temporary */
+
+       return sz - len;
+}
+
+int dump_passthru_update_header(void)
+{
+       long len = dump_config.dumper->header_len;
+       struct page *page;
+       void *buf = dump_config.dumper->dump_buf;
+       int err = 0;
+
+       if (!dump_config.dumper->header_dirty)
+               return 0;
+
+       pr_debug("Copying header of size %ld bytes from memory\n", len);
+       if (len > DUMP_BUFFER_SIZE) 
+               return -E2BIG;
+
+       page = dump_mem_lookup(dump_memdev, 0);
+       for (; (len > 0) && page; buf += PAGE_SIZE, len -= PAGE_SIZE) {
+               if ((err = dump_copy_pages(buf, page, PAGE_SIZE)))
+                       return err;
+               page = dump_mem_next_page(dump_memdev);
+       }
+       if (len > 0) {
+               printk("Incomplete header saved in mem\n");
+               return -ENOENT;
+       }
+
+       if ((err = dump_dev_seek(0))) {
+               printk("Unable to seek to dump header offset\n");
+               return err;
+       }
+       err = dump_ll_write(dump_config.dumper->dump_buf, 
+               buf - dump_config.dumper->dump_buf);
+       if (err < dump_config.dumper->header_len)
+               return (err < 0) ? err : -ENOSPC;
+
+       dump_config.dumper->header_dirty = 0;
+       return 0;
+}
+
+static loff_t next_dph_offset = 0;
+
+static int dph_valid(struct __dump_page *dph)
+{
+       if ((dph->dp_address & (PAGE_SIZE - 1)) || (dph->dp_flags 
+             > DUMP_DH_COMPRESSED) || (!dph->dp_flags) ||
+               (dph->dp_size > PAGE_SIZE)) {
+       printk("dp->address = 0x%llx, dp->size = 0x%x, dp->flag = 0x%x\n",
+               dph->dp_address, dph->dp_size, dph->dp_flags);
+               return 0;
+       }
+       return 1;
+}
+
+int dump_verify_lcrash_data(void *buf, unsigned long sz)
+{
+       struct __dump_page *dph;
+
+       /* sanity check for page headers */
+       while (next_dph_offset + sizeof(*dph) < sz) {
+               dph = (struct __dump_page *)(buf + next_dph_offset);
+               if (!dph_valid(dph)) {
+                       printk("Invalid page hdr at offset 0x%llx\n",
+                               next_dph_offset);
+                       return -EINVAL;
+               }
+               next_dph_offset += dph->dp_size + sizeof(*dph);
+       }
+
+       next_dph_offset -= sz;  
+       return 0;
+}
+
+/* 
+ * TBD/Later: Consider avoiding the copy by using a scatter/gather 
+ * vector representation for the dump buffer
+ */
+int dump_passthru_add_data(unsigned long loc, unsigned long sz)
+{
+       struct page *page = (struct page *)loc;
+       void *buf = dump_config.dumper->curr_buf;
+       int err = 0;
+
+       if ((err = dump_copy_pages(buf, page, sz))) {
+               printk("dump_copy_pages failed");
+               return err;
+       }
+
+       if ((err = dump_verify_lcrash_data(buf, sz))) {
+               printk("dump_verify_lcrash_data failed\n");
+               printk("Invalid data for pfn 0x%lx\n", page_to_pfn(page));
+               printk("Page flags 0x%lx\n", page->flags);
+               printk("Page count 0x%x\n", atomic_read(&page->count));
+               return err;
+       }
+
+       dump_config.dumper->curr_buf = buf + sz;
+
+       return 0;
+}
+
+
+/* Stage 1 dumper: Saves compressed dump in memory and soft-boots system */
+
+/* Scheme to overlay saved data in memory for writeout after a soft-boot */
+struct dump_scheme_ops dump_scheme_overlay_ops = {
+       .configure      = dump_overlay_configure,
+       .unconfigure    = dump_overlay_unconfigure,
+       .sequencer      = dump_overlay_sequencer,
+       .iterator       = dump_page_iterator,
+       .save_data      = dump_overlay_save_data,
+       .skip_data      = dump_overlay_skip_data,
+       .write_buffer   = dump_generic_write_buffer
+};
+
+struct dump_scheme dump_scheme_overlay = {
+       .name           = "overlay",
+       .ops            = &dump_scheme_overlay_ops
+};
+
+
+/* Stage 1 must use a good compression scheme - default to gzip */
+extern struct __dump_compress dump_gzip_compression;
+
+struct dumper dumper_stage1 = {
+       .name           = "stage1",
+       .scheme         = &dump_scheme_overlay,
+       .fmt            = &dump_fmt_lcrash,
+       .compress       = &dump_none_compression, /* needs to be gzip */
+       .filter         = dump_filter_table,
+       .dev            = NULL,
+};             
+
+/* Stage 2 dumper: Activated after softboot to write out saved dump to device */
+
+/* Formatter that transfers data as is (transparent) w/o further conversion */
+struct dump_fmt_ops dump_fmt_passthru_ops = {
+       .configure_header       = dump_passthru_configure_header,
+       .update_header          = dump_passthru_update_header,
+       .save_context           = NULL, /* unused */
+       .add_data               = dump_passthru_add_data,
+       .update_end_marker      = dump_lcrash_update_end_marker
+};
+
+struct dump_fmt dump_fmt_passthru = {
+       .name   = "passthru",
+       .ops    = &dump_fmt_passthru_ops
+};
+
+/* Filter that simply passes along any data within the range (transparent)*/
+/* Note: The start and end ranges in the table are filled in at run-time */
+
+extern int dump_filter_none(int pass, unsigned long loc, unsigned long sz);
+
+struct dump_data_filter dump_passthru_filtertable[MAX_PASSES] = {
+{.name = "passkern", .selector = dump_passthru_filter, 
+       .level_mask = DUMP_MASK_KERN },
+{.name = "passuser", .selector = dump_passthru_filter, 
+       .level_mask = DUMP_MASK_USED },
+{.name = "passunused", .selector = dump_passthru_filter, 
+       .level_mask = DUMP_MASK_UNUSED },
+{.name = "none", .selector = dump_filter_none, 
+       .level_mask = DUMP_MASK_REST }
+};
+
+
+/* Scheme to handle data staged / preserved across a soft-boot */
+struct dump_scheme_ops dump_scheme_staged_ops = {
+       .configure      = dump_generic_configure,
+       .unconfigure    = dump_staged_unconfigure,
+       .sequencer      = dump_generic_sequencer,
+       .iterator       = dump_saved_data_iterator,
+       .save_data      = dump_generic_save_data,
+       .skip_data      = dump_generic_skip_data,
+       .write_buffer   = dump_generic_write_buffer
+};
+
+struct dump_scheme dump_scheme_staged = {
+       .name           = "staged",
+       .ops            = &dump_scheme_staged_ops
+};
+
+/* The stage 2 dumper comprising all these */
+struct dumper dumper_stage2 = {
+       .name           = "stage2",
+       .scheme         = &dump_scheme_staged,
+       .fmt            = &dump_fmt_passthru,
+       .compress       = &dump_none_compression,
+       .filter         = dump_passthru_filtertable,
+       .dev            = NULL,
+};             
+
diff --git a/drivers/dump/dump_ppc64.c b/drivers/dump/dump_ppc64.c
new file mode 100644 (file)
index 0000000..7fa6d85
--- /dev/null
@@ -0,0 +1,436 @@
+/*
+ * Architecture specific (ppc64) functions for Linux crash dumps.
+ *
+ * Created by: Matt Robinson (yakker@sgi.com)
+ *
+ * Copyright 1999 Silicon Graphics, Inc. All rights reserved.
+ * 
+ * 2.3 kernel modifications by: Matt D. Robinson (yakker@turbolinux.com)
+ * Copyright 2000 TurboLinux, Inc.  All rights reserved.
+ * Copyright 2003, 2004 IBM Corporation
+ * 
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/*
+ * The hooks for dumping the kernel virtual memory to disk are in this
+ * file.  Any time a modification is made to the virtual memory mechanism,
+ * these routines must be changed to use the new mechanisms.
+ */
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/dump.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/delay.h>
+#include <linux/syscalls.h> 
+#include <linux/ioctl32.h>
+#include <asm/hardirq.h>
+#include "dump_methods.h"
+#include <linux/irq.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#if defined(CONFIG_KDB) && !defined(CONFIG_DUMP_MODULE)
+#include <linux/kdb.h>
+#endif
+
+extern cpumask_t irq_affinity[];
+
+static cpumask_t saved_affinity[NR_IRQS];
+
+static __s32         saved_irq_count;   /* saved preempt_count() flags */
+
+static int alloc_dha_stack(void)
+{
+        int i;
+        void *ptr;
+
+        if (dump_header_asm.dha_stack[0])
+                return 0;
+
+        ptr = (void *)vmalloc(THREAD_SIZE * num_online_cpus());
+        if (!ptr) {
+                return -ENOMEM;
+        }
+
+        for (i = 0; i < num_online_cpus(); i++) {
+                dump_header_asm.dha_stack[i] = 
+                       (uint64_t)((unsigned long)ptr + (i * THREAD_SIZE));
+       }
+       return 0;
+}
+
+static int free_dha_stack(void)
+{
+        if (dump_header_asm.dha_stack[0]) {
+                vfree((void*)dump_header_asm.dha_stack[0]);
+               dump_header_asm.dha_stack[0] = 0;
+       }
+        return 0;
+}
+#ifdef CONFIG_SMP
+static int dump_expect_ipi[NR_CPUS];
+static atomic_t waiting_for_dump_ipi;
+
+extern void stop_this_cpu(void *);
+static int
+dump_ipi_handler(struct pt_regs *regs) 
+{
+       int cpu = smp_processor_id();
+
+       if (!dump_expect_ipi[cpu])
+               return 0;
+       dump_save_this_cpu(regs);
+       atomic_dec(&waiting_for_dump_ipi);
+
+ level_changed:
+       switch (dump_silence_level) {
+       case DUMP_HARD_SPIN_CPUS:       /* Spin until dump is complete */
+               while (dump_oncpu) {
+                       barrier();      /* paranoia */
+                       if (dump_silence_level != DUMP_HARD_SPIN_CPUS)
+                               goto level_changed;
+                       cpu_relax();    /* kill time nicely */
+               }
+               break;
+
+       case DUMP_HALT_CPUS:            /* Execute halt */
+               stop_this_cpu(NULL);
+               break;
+       
+       case DUMP_SOFT_SPIN_CPUS:
+               /* Mark the task so it spins in schedule */
+               set_tsk_thread_flag(current, TIF_NEED_RESCHED);
+               break;
+       }
+
+       return 1;
+}
+
+/* save registers on other processors
+ * If the other cpus don't respond we simply do not get their states.
+ */
+void 
+__dump_save_other_cpus(void)
+{
+       int i, cpu = smp_processor_id();
+       int other_cpus = num_online_cpus()-1;
+       
+       if (other_cpus > 0) {
+               atomic_set(&waiting_for_dump_ipi, other_cpus);
+               for (i = 0; i < NR_CPUS; i++)
+                       dump_expect_ipi[i] = (i != cpu && cpu_online(i));
+
+               dump_send_ipi(dump_ipi_handler);
+               /*
+                * may be we dont need to wait for NMI to be processed.
+                * just write out the header at the end of dumping, if
+                * this IPI is not processed until then, there probably
+                * is a problem and we just fail to capture state of
+                * other cpus.
+                */
+               while (atomic_read(&waiting_for_dump_ipi) > 0) {
+                       cpu_relax();
+               }
+               dump_send_ipi(NULL);    /* clear handler */
+       }
+}
+
+/*
+ * Restore old irq affinities.
+ */
+static void
+__dump_reset_irq_affinity(void)
+{
+       int i;
+       irq_desc_t *irq_d;
+
+       memcpy(irq_affinity, saved_affinity, NR_IRQS * sizeof(unsigned long));
+
+       for_each_irq(i) {
+               irq_d = get_irq_desc(i);
+               if (irq_d->handler == NULL) {
+                       continue;
+               }
+               if (irq_d->handler->set_affinity != NULL) {
+                       irq_d->handler->set_affinity(i, saved_affinity[i]);
+               }
+       }
+}
+
+/*
+ * Routine to save the old irq affinities and change affinities of all irqs to
+ * the dumping cpu.
+ *
+ * NB: Need to be expanded to multiple nodes.
+ */
+static void
+__dump_set_irq_affinity(void)
+{
+       int i;
+       cpumask_t cpu = CPU_MASK_NONE;
+       irq_desc_t *irq_d;
+
+       cpu_set(smp_processor_id(), cpu);
+
+       memcpy(saved_affinity, irq_affinity, NR_IRQS * sizeof(unsigned long));
+
+       for_each_irq(i) {
+               irq_d = get_irq_desc(i);
+               if (irq_d->handler == NULL) {
+                       continue;
+               }
+               irq_affinity[i] = cpu;
+               if (irq_d->handler->set_affinity != NULL) {
+                       irq_d->handler->set_affinity(i, irq_affinity[i]);
+               }
+       }
+}
+#else /* !CONFIG_SMP */
+#define __dump_save_other_cpus() do { } while (0)
+#define __dump_set_irq_affinity()      do { } while (0)
+#define __dump_reset_irq_affinity()    do { } while (0)
+#endif /* !CONFIG_SMP */
+
+void
+__dump_save_regs(struct pt_regs *dest_regs, const struct pt_regs *regs)
+{
+       if (regs) {
+               memcpy(dest_regs, regs, sizeof(struct pt_regs));
+       } 
+}
+
+/*
+ * Name: __dump_configure_header()
+ * Func: Configure the dump header with all proper values.
+ */
+int
+__dump_configure_header(const struct pt_regs *regs)
+{
+       return (0);
+}
+
+#if defined(CONFIG_KDB) && !defined(CONFIG_DUMP_MODULE)
+int
+kdb_sysdump(int argc, const char **argv, const char **envp, struct pt_regs *regs)
+{
+       kdb_printf("Dumping to disk...\n");
+       dump("dump from kdb", regs);
+       kdb_printf("Dump Complete\n");
+       return 0;
+}
+#endif
+
+static int dw_long(unsigned int fd, unsigned int cmd, unsigned long arg,
+                  struct file *f)
+{
+       mm_segment_t old_fs = get_fs();
+       int err;
+       unsigned long val;
+
+       set_fs (KERNEL_DS);
+       err = sys_ioctl(fd, cmd, (unsigned long)&val);
+       set_fs (old_fs);
+       if (!err && put_user((unsigned int) val, (u32 *)arg))
+               return -EFAULT;
+       return err;
+}
+
+/*
+ * Name: __dump_init()
+ * Func: Initialize the dumping routine process.  This is in case
+ *       it's necessary in the future.
+ */
+void
+__dump_init(uint64_t local_memory_start)
+{
+       int ret;
+
+       ret = register_ioctl32_conversion(DIOSDUMPDEV, NULL);
+       ret |= register_ioctl32_conversion(DIOGDUMPDEV, NULL);
+       ret |= register_ioctl32_conversion(DIOSDUMPLEVEL, NULL);
+       ret |= register_ioctl32_conversion(DIOGDUMPLEVEL, dw_long);
+       ret |= register_ioctl32_conversion(DIOSDUMPFLAGS, NULL);
+       ret |= register_ioctl32_conversion(DIOGDUMPFLAGS, dw_long);
+       ret |= register_ioctl32_conversion(DIOSDUMPCOMPRESS, NULL);
+       ret |= register_ioctl32_conversion(DIOGDUMPCOMPRESS, dw_long);
+       ret |= register_ioctl32_conversion(DIOSTARGETIP, NULL);
+       ret |= register_ioctl32_conversion(DIOGTARGETIP, NULL);
+       ret |= register_ioctl32_conversion(DIOSTARGETPORT, NULL);
+       ret |= register_ioctl32_conversion(DIOGTARGETPORT, NULL);
+       ret |= register_ioctl32_conversion(DIOSSOURCEPORT, NULL);
+       ret |= register_ioctl32_conversion(DIOGSOURCEPORT, NULL);
+       ret |= register_ioctl32_conversion(DIOSETHADDR, NULL);
+       ret |= register_ioctl32_conversion(DIOGETHADDR, NULL);
+       ret |= register_ioctl32_conversion(DIOGDUMPOKAY, dw_long);
+       ret |= register_ioctl32_conversion(DIOSDUMPTAKE, NULL);
+       if (ret) {
+               printk(KERN_ERR "LKCD: registering ioctl32 translations failed\n");
+       }
+
+#if defined(FIXME) && defined(CONFIG_KDB) && !defined(CONFIG_DUMP_MODULE)
+       /* This won't currently work because interrupts are off in kdb
+        * and the dump process doesn't understand how to recover.
+        */
+       /* ToDo: add a command to query/set dump configuration */
+       kdb_register_repeat("sysdump", kdb_sysdump, "", "use lkcd to dump the system to disk (if configured)", 0, KDB_REPEAT_NONE);
+#endif
+
+       /* return */
+       return;
+}
+
+/*
+ * Name: __dump_open()
+ * Func: Open the dump device (architecture specific).  This is in
+ *       case it's necessary in the future.
+ */
+void
+__dump_open(void)
+{
+       alloc_dha_stack();
+}
+
+
+/*
+ * Name: __dump_cleanup()
+ * Func: Free any architecture specific data structures. This is called
+ *       when the dump module is being removed.
+ */
+void
+__dump_cleanup(void)
+{
+       int ret;
+
+       ret = unregister_ioctl32_conversion(DIOSDUMPDEV);
+       ret |= unregister_ioctl32_conversion(DIOGDUMPDEV);
+       ret |= unregister_ioctl32_conversion(DIOSDUMPLEVEL);
+       ret |= unregister_ioctl32_conversion(DIOGDUMPLEVEL);
+       ret |= unregister_ioctl32_conversion(DIOSDUMPFLAGS);
+       ret |= unregister_ioctl32_conversion(DIOGDUMPFLAGS);
+       ret |= unregister_ioctl32_conversion(DIOSDUMPCOMPRESS);
+       ret |= unregister_ioctl32_conversion(DIOGDUMPCOMPRESS);
+       ret |= unregister_ioctl32_conversion(DIOSTARGETIP);
+       ret |= unregister_ioctl32_conversion(DIOGTARGETIP);
+       ret |= unregister_ioctl32_conversion(DIOSTARGETPORT);
+       ret |= unregister_ioctl32_conversion(DIOGTARGETPORT);
+       ret |= unregister_ioctl32_conversion(DIOSSOURCEPORT);
+       ret |= unregister_ioctl32_conversion(DIOGSOURCEPORT);
+       ret |= unregister_ioctl32_conversion(DIOSETHADDR);
+       ret |= unregister_ioctl32_conversion(DIOGETHADDR);
+       ret |= unregister_ioctl32_conversion(DIOGDUMPOKAY);
+       ret |= unregister_ioctl32_conversion(DIOSDUMPTAKE);
+       if (ret) {
+               printk(KERN_ERR "LKCD: Unregistering ioctl32 translations failed\n");
+       }
+       free_dha_stack();
+}
+
+/*
+ * Kludge - dump from interrupt context is unreliable (Fixme)
+ *
+ * We do this so that softirqs initiated for dump i/o
+ * get processed and we don't hang while waiting for i/o
+ * to complete or in any irq synchronization attempt.
+ *
+ * This is not quite legal of course, as it has the side
+ * effect of making all interrupts & softirqs triggered
+ * while dump is in progress complete before currently
+ * pending softirqs and the currently executing interrupt
+ * code.
+ */
+static inline void
+irq_bh_save(void)
+{
+       saved_irq_count = irq_count();
+       preempt_count() &= ~(HARDIRQ_MASK|SOFTIRQ_MASK);
+}
+
+static inline void
+irq_bh_restore(void)
+{
+       preempt_count() |= saved_irq_count;
+}
+
+/*
+ * Name: __dump_irq_enable
+ * Func: Reset system so interrupts are enabled.
+ * This is used for dump methods that require interrupts
+ * Eventually, all methods will have interrupts disabled
+ * and this code can be removed.
+ *
+ * Change irq affinities
+ * Re-enable interrupts
+ */
+int
+__dump_irq_enable(void)
+{
+       __dump_set_irq_affinity();
+       irq_bh_save();
+       local_irq_enable();
+       return 0;
+}
+
+/*
+ * Name: __dump_irq_restore
+ * Func: Resume the system state in an architecture-specific way.
+ */
+void
+__dump_irq_restore(void)
+{
+       local_irq_disable();
+       __dump_reset_irq_affinity();
+       irq_bh_restore(); 
+}
+
+#if 0
+/* Cheap progress hack.  It estimates pages to write and
+ * assumes all pages will go -- so it may get way off.
+ * As the progress is not displayed for other architectures, not used at this 
+ * moment.
+ */
+void
+__dump_progress_add_page(void)
+{
+       unsigned long total_pages = nr_free_pages() + nr_inactive_pages + nr_active_pages;
+       unsigned int percent = (dump_header.dh_num_dump_pages * 100) / total_pages;
+       char buf[30];
+
+       if (percent > last_percent && percent <= 100) {
+               sprintf(buf, "Dump %3d%%     ", percent);
+               ppc64_dump_msg(0x2, buf);
+               last_percent = percent;
+       }
+
+}
+#endif
+
+extern int dump_page_is_ram(unsigned long);
+/*
+ * Name: __dump_page_valid()
+ * Func: Check if page is valid to dump.
+ */
+int
+__dump_page_valid(unsigned long index)
+{
+       if (!pfn_valid(index))
+               return 0;
+
+       return dump_page_is_ram(index);
+}
+
+/*
+ * Name: manual_handle_crashdump()
+ * Func: Interface for the lkcd dump command. Calls dump_execute()
+ */
+int
+manual_handle_crashdump(void)
+{
+       struct pt_regs regs;
+
+       get_current_regs(&regs);
+       dump_execute("manual", &regs);
+       return 0;
+}
diff --git a/drivers/dump/dump_rle.c b/drivers/dump/dump_rle.c
new file mode 100644 (file)
index 0000000..9d8c1bd
--- /dev/null
@@ -0,0 +1,175 @@
+/*
+ * RLE Compression functions for kernel crash dumps.
+ *
+ * Created by: Matt Robinson (yakker@sourceforge.net)
+ * Copyright 2001 Matt D. Robinson.  All rights reserved.
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/* header files */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/dump.h>
+
+/*
+ * Name: dump_compress_rle()
+ * Func: Compress a DUMP_PAGE_SIZE (hardware) page down to something more
+ *       reasonable, if possible.  This is the same routine we use in IRIX.
+ */
+static u16
+dump_compress_rle(const u8 *old, u16 oldsize, u8 *new, u16 newsize)
+{
+       u16 ri, wi, count = 0;
+       u_char value = 0, cur_byte;
+
+       /*
+        * If the block should happen to "compress" to larger than the
+        * buffer size, allocate a larger one and change cur_buf_size.
+        */
+
+       wi = ri = 0;
+
+       while (ri < oldsize) {
+               if (!ri) {
+                       cur_byte = value = old[ri];
+                       count = 0;
+               } else {
+                       if (count == 255) {
+                               if (wi + 3 > oldsize) {
+                                       return oldsize;
+                               }
+                               new[wi++] = 0;
+                               new[wi++] = count;
+                               new[wi++] = value;
+                               value = cur_byte = old[ri];
+                               count = 0;
+                       } else { 
+                               if ((cur_byte = old[ri]) == value) {
+                                       count++;
+                               } else {
+                                       if (count > 1) {
+                                               if (wi + 3 > oldsize) {
+                                                       return oldsize;
+                                               }
+                                               new[wi++] = 0;
+                                               new[wi++] = count;
+                                               new[wi++] = value;
+                                       } else if (count == 1) {
+                                               if (value == 0) {
+                                                       if (wi + 3 > oldsize) {
+                                                               return oldsize;
+                                                       }
+                                                       new[wi++] = 0;
+                                                       new[wi++] = 1;
+                                                       new[wi++] = 0;
+                                               } else {
+                                                       if (wi + 2 > oldsize) {
+                                                               return oldsize;
+                                                       }
+                                                       new[wi++] = value;
+                                                       new[wi++] = value;
+                                               }
+                                       } else { /* count == 0 */
+                                               if (value == 0) {
+                                                       if (wi + 2 > oldsize) {
+                                                               return oldsize;
+                                                       }
+                                                       new[wi++] = value;
+                                                       new[wi++] = value;
+                                               } else {
+                                                       if (wi + 1 > oldsize) {
+                                                               return oldsize;
+                                                       }
+                                                       new[wi++] = value;
+                                               }
+                                       } /* if count > 1 */
+
+                                       value = cur_byte;
+                                       count = 0;
+
+                               } /* if byte == value */
+
+                       } /* if count == 255 */
+
+               } /* if ri == 0 */
+               ri++;
+
+       }
+       if (count > 1) {
+               if (wi + 3 > oldsize) {
+                       return oldsize;
+               }
+               new[wi++] = 0;
+               new[wi++] = count;
+               new[wi++] = value;
+       } else if (count == 1) {
+               if (value == 0) {
+                       if (wi + 3 > oldsize)
+                               return oldsize;
+                       new[wi++] = 0;
+                       new[wi++] = 1;
+                       new[wi++] = 0;
+               } else {
+                       if (wi + 2 > oldsize)
+                               return oldsize;
+                       new[wi++] = value;
+                       new[wi++] = value;
+               }
+       } else { /* count == 0 */
+               if (value == 0) {
+                       if (wi + 2 > oldsize)
+                               return oldsize;
+                       new[wi++] = value;
+                       new[wi++] = value;
+               } else {
+                       if (wi + 1 > oldsize)
+                               return oldsize;
+                       new[wi++] = value;
+               }
+       } /* if count > 1 */
+
+       value = cur_byte;
+       count = 0;
+       return wi;
+}
+
+/* setup the rle compression functionality */
+static struct __dump_compress dump_rle_compression = {
+       .compress_type = DUMP_COMPRESS_RLE,
+       .compress_func = dump_compress_rle,
+       .compress_name = "RLE",
+};
+
+/*
+ * Name: dump_compress_rle_init()
+ * Func: Initialize rle compression for dumping.
+ */
+static int __init
+dump_compress_rle_init(void)
+{
+       dump_register_compression(&dump_rle_compression);
+       return 0;
+}
+
+/*
+ * Name: dump_compress_rle_cleanup()
+ * Func: Remove rle compression for dumping.
+ */
+static void __exit
+dump_compress_rle_cleanup(void)
+{
+       dump_unregister_compression(DUMP_COMPRESS_RLE);
+}
+
+/* module initialization */
+module_init(dump_compress_rle_init);
+module_exit(dump_compress_rle_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("LKCD Development Team <lkcd-devel@lists.sourceforge.net>");
+MODULE_DESCRIPTION("RLE compression module for crash dump driver");
diff --git a/drivers/dump/dump_scheme.c b/drivers/dump/dump_scheme.c
new file mode 100644 (file)
index 0000000..de0ce78
--- /dev/null
@@ -0,0 +1,383 @@
+/* 
+ * Default single stage dump scheme methods
+ *
+ * Previously a part of dump_base.c
+ *
+ * Started: Oct 2002 -  Suparna Bhattacharya <suparna@in.ibm.com>
+ *     Split and rewrote LKCD dump scheme to generic dump method 
+ *     interfaces 
+ * Derived from original code created by
+ *     Matt Robinson <yakker@sourceforge.net>)
+ *
+ * Contributions from SGI, IBM, HP, MCL, and others.
+ *
+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001 - 2002 Matt D. Robinson.  All rights reserved.
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/*
+ * Implements the default dump scheme, i.e. single-stage gathering and 
+ * saving of dump data directly to the target device, which operates in
+ * a push mode, where the dumping system decides what data it saves
+ * taking into account pre-specified dump config options.
+ *
+ * Aside: The 2-stage dump scheme, where there is a soft-reset between
+ * the gathering and saving phases, also reuses some of these
+ * default routines (see dump_overlay.c) 
+ */ 
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/nmi.h>
+#include <linux/dump.h>
+#include "dump_methods.h"
+
+extern int panic_timeout;  /* time before reboot */
+
+extern void dump_speedo(int);
+
+/* Default sequencer used during single stage dumping */
+/* Also invoked during stage 2 of soft-boot based dumping */
+int dump_generic_sequencer(void)
+{
+       struct dump_data_filter *filter = dump_config.dumper->filter;
+       int pass = 0, err = 0, save = 0;
+       int (*action)(unsigned long, unsigned long);
+
+       /* 
+        * We want to save the more critical data areas first in 
+        * case we run out of space, encounter i/o failures, or get
+        * interrupted otherwise and have to give up midway
+        * So, run through the passes in increasing order 
+        */
+       for (;filter->selector; filter++, pass++)
+       {
+               /* Assumes passes are exclusive (even across dumpers) */
+               /* Requires care when coding the selection functions */
+               if ((save = filter->level_mask & dump_config.level))
+                       action = dump_save_data;
+               else
+                       action = dump_skip_data;
+
+               if ((err = dump_iterator(pass, action, filter)) < 0)
+                       break;
+
+               printk("\n %d dump pages %s of %d each in pass %d\n", 
+               err, save ? "saved" : "skipped", DUMP_PAGE_SIZE, pass);
+
+       }
+
+       return (err < 0) ? err : 0;
+}
+
+static inline struct page *dump_get_page(loff_t loc)
+{
+
+       unsigned long page_index = loc >> PAGE_SHIFT;
+
+       /* todo: complete this  to account for ia64/discontig mem */
+       /* todo: and to check for validity, ram page, no i/o mem etc */
+       /* need to use pfn/physaddr equiv of kern_addr_valid */
+
+       /* Important:
+        *   On ARM/XScale system, the physical address starts from 
+        *   PHYS_OFFSET, and it maybe the situation that PHYS_OFFSET != 0. 
+        *   For example on Intel's PXA250, PHYS_OFFSET = 0xa0000000. And the 
+        *   page index starts from PHYS_PFN_OFFSET. When configuring
+        *   filter, filter->start is assigned to 0 in dump_generic_configure.
+        *   Here we want to adjust it by adding PHYS_PFN_OFFSET to it!
+        */
+#ifdef CONFIG_ARM
+       page_index += PHYS_PFN_OFFSET;
+#endif
+       if (__dump_page_valid(page_index))
+               return pfn_to_page(page_index);
+       else
+               return NULL;
+
+}
+
+/* Default iterator: for singlestage and stage 1 of soft-boot dumping */
+/* Iterates over range of physical memory pages in DUMP_PAGE_SIZE increments */
+int dump_page_iterator(int pass, int (*action)(unsigned long, unsigned long), 
+       struct dump_data_filter *filter)
+{
+       /* Todo : fix unit, type */
+       loff_t loc, start, end;
+       int i, count = 0, err = 0;
+       struct page *page;
+
+       /* Todo: Add membanks code */
+       /* TBD: Check if we need to address DUMP_PAGE_SIZE < PAGE_SIZE */       
+
+       for (i = 0; i < filter->num_mbanks; i++) {
+               start = filter->start[i];
+               end = filter->end[i];
+               for (loc = start; loc < end; loc += DUMP_PAGE_SIZE) {
+                       dump_config.dumper->curr_loc = loc;
+                       page = dump_get_page(loc);
+                       if (page && filter->selector(pass, 
+                               (unsigned long) page, DUMP_PAGE_SIZE)) { 
+                               if ((err = action((unsigned long)page, 
+                                       DUMP_PAGE_SIZE))) {
+                                       printk("dump_page_iterator: err %d for "
+                                               "loc 0x%llx, in pass %d\n", 
+                                               err, loc, pass);
+                                       return err ? err : count;
+                               } else
+                                       count++;
+                       }
+               }
+       }
+
+       return err ? err : count;
+}
+
+/* 
+ * Base function that saves the selected block of data in the dump 
+ * Action taken when iterator decides that data needs to be saved 
+ */
+int dump_generic_save_data(unsigned long loc, unsigned long sz)
+{
+       void *buf;
+       void *dump_buf = dump_config.dumper->dump_buf;
+       int left, bytes, ret;
+
+       if ((ret = dump_add_data(loc, sz))) {
+               return ret;
+       }
+       buf = dump_config.dumper->curr_buf;
+
+       /* If we've filled up the buffer write it out */
+       if ((left = buf - dump_buf) >= DUMP_BUFFER_SIZE) {
+               bytes = dump_write_buffer(dump_buf, DUMP_BUFFER_SIZE);
+               if (bytes < DUMP_BUFFER_SIZE) {
+                       printk("dump_write_buffer failed %d\n", bytes);
+                       return bytes ? -ENOSPC : bytes;
+               }
+
+               left -= bytes;
+               
+               /* -- A few chores to do from time to time -- */
+               dump_config.dumper->count++;
+
+               if (!(dump_config.dumper->count & 0x3f)) {
+                       /* Update the header every one in a while */
+                       memset((void *)dump_buf, 'b', DUMP_BUFFER_SIZE);
+                       if ((ret = dump_update_header()) < 0) {
+                               /* issue warning */
+                               return ret;
+                       }
+                       printk(".");
+
+                       touch_nmi_watchdog();
+               } else if (!(dump_config.dumper->count & 0x7)) {
+                       /* Show progress so the user knows we aren't hung */
+                       dump_speedo(dump_config.dumper->count >> 3); 
+               }
+               /* Todo: Touch/Refresh watchdog */
+
+               /* --- Done with periodic chores -- */
+
+               /* 
+                * extra bit of copying to simplify verification  
+                * in the second kernel boot based scheme
+                */
+               memcpy(dump_buf - DUMP_PAGE_SIZE, dump_buf + 
+                       DUMP_BUFFER_SIZE - DUMP_PAGE_SIZE, DUMP_PAGE_SIZE);
+
+               /* now adjust the leftover bits back to the top of the page */
+               /* this case would not arise during stage 2 (passthru) */
+               memset(dump_buf, 'z', DUMP_BUFFER_SIZE);
+               if (left) {
+                       memcpy(dump_buf, dump_buf + DUMP_BUFFER_SIZE, left);
+               }
+               buf -= DUMP_BUFFER_SIZE;
+               dump_config.dumper->curr_buf = buf;
+       }
+                               
+       return 0;
+}
+
+int dump_generic_skip_data(unsigned long loc, unsigned long sz)
+{
+       /* dummy by default */
+       return 0;
+}
+
+/* 
+ * Common low level routine to write a buffer to current dump device 
+ * Expects checks for space etc to have been taken care of by the caller 
+ * Operates serially at the moment for simplicity. 
+ * TBD/Todo: Consider batching for improved throughput
+ */
+int dump_ll_write(void *buf, unsigned long len)
+{
+       long transferred = 0, last_transfer = 0;
+       int ret = 0;
+
+       /* make sure device is ready */
+       while ((ret = dump_dev_ready(NULL)) == -EAGAIN);
+       if  (ret < 0) {
+               printk("dump_dev_ready failed !err %d\n", ret);
+               return ret;
+       }
+
+       while (len) {
+               if ((last_transfer = dump_dev_write(buf, len)) <= 0)  {
+                       ret = last_transfer;
+                       printk("dump_dev_write failed !err %d\n", 
+                       ret);
+                       break;
+               }
+               /* wait till complete */
+               while ((ret = dump_dev_ready(buf)) == -EAGAIN)
+                       cpu_relax();
+
+               if  (ret < 0) {
+                       printk("i/o failed !err %d\n", ret);
+                       break;
+               }
+
+               len -= last_transfer;
+               buf += last_transfer;
+               transferred += last_transfer;
+       }
+       return (ret < 0) ? ret : transferred;
+}
+
+/* default writeout routine for single dump device */
+/* writes out the dump data ensuring enough space is left for the end marker */
+int dump_generic_write_buffer(void *buf, unsigned long len)
+{
+       long written = 0;
+       int err = 0;
+
+       /* check for space */
+       if ((err = dump_dev_seek(dump_config.dumper->curr_offset + len + 
+                       2*DUMP_BUFFER_SIZE)) < 0) {
+               printk("dump_write_buffer: insuff space after offset 0x%llx\n",
+                       dump_config.dumper->curr_offset);
+               return err;
+       }
+       /* alignment check would happen as a side effect of this */
+       if ((err = dump_dev_seek(dump_config.dumper->curr_offset)) < 0)
+               return err; 
+
+       written = dump_ll_write(buf, len);
+
+       /* all or none */
+
+       if (written < len)
+               written = written ? -ENOSPC : written;
+       else
+               dump_config.dumper->curr_offset += len;
+
+       return written;
+}
+
+int dump_generic_configure(unsigned long devid)
+{
+       struct dump_dev *dev = dump_config.dumper->dev;
+       struct dump_data_filter *filter;
+       void *buf;
+       int ret = 0;
+
+       /* Allocate the dump buffer and initialize dumper state */
+       /* Assume that we get aligned addresses */
+       if (!(buf = dump_alloc_mem(DUMP_BUFFER_SIZE + 3 * DUMP_PAGE_SIZE)))
+               return -ENOMEM;
+
+       if ((unsigned long)buf & (PAGE_SIZE - 1)) {
+               /* sanity check for page aligned address */
+               dump_free_mem(buf);
+               return -ENOMEM; /* fixme: better error code */
+       }
+
+       /* Initialize the rest of the fields */
+       dump_config.dumper->dump_buf = buf + DUMP_PAGE_SIZE;
+       dumper_reset();
+
+       /* Open the dump device */
+       if (!dev)
+               return -ENODEV;
+
+       if ((ret = dev->ops->open(dev, devid))) {
+              return ret;
+       }
+
+       /* Initialise the memory ranges in the dump filter */
+       for (filter = dump_config.dumper->filter ;filter->selector; filter++) {
+               if (!filter->start[0] && !filter->end[0]) {
+                       pg_data_t *pgdat;
+                       int i = 0;
+                       for_each_pgdat(pgdat) {
+                               filter->start[i] = 
+                                       (loff_t)pgdat->node_start_pfn << PAGE_SHIFT;
+                               filter->end[i] =
+                                       (loff_t)(pgdat->node_start_pfn + pgdat->node_spanned_pages) << PAGE_SHIFT;
+                               i++;
+                       }
+                       filter->num_mbanks = i;
+               }
+       }
+
+       return 0;
+}
+
+int dump_generic_unconfigure(void)
+{
+       struct dump_dev *dev = dump_config.dumper->dev;
+       void *buf = dump_config.dumper->dump_buf;
+       int ret = 0;
+
+       pr_debug("Generic unconfigure\n");
+       /* Close the dump device */
+       if (dev && (ret = dev->ops->release(dev)))
+               return ret;
+
+       printk("Closed dump device\n");
+       
+       if (buf)
+               dump_free_mem((buf - DUMP_PAGE_SIZE));
+
+       dump_config.dumper->curr_buf = dump_config.dumper->dump_buf = NULL;
+       pr_debug("Released dump buffer\n");
+
+       return 0;
+}
+
+
+/* Set up the default dump scheme */
+
+struct dump_scheme_ops dump_scheme_singlestage_ops = {
+       .configure      = dump_generic_configure,
+       .unconfigure    = dump_generic_unconfigure,
+       .sequencer      = dump_generic_sequencer,
+       .iterator       = dump_page_iterator,
+       .save_data      = dump_generic_save_data,
+       .skip_data      = dump_generic_skip_data,
+       .write_buffer   = dump_generic_write_buffer,
+};
+
+struct dump_scheme dump_scheme_singlestage = {
+       .name           = "single-stage",
+       .ops            = &dump_scheme_singlestage_ops
+};
+
+/* The single stage dumper comprising all these */
+struct dumper dumper_singlestage = {
+       .name           = "single-stage",
+       .scheme         = &dump_scheme_singlestage,
+       .fmt            = &dump_fmt_lcrash,
+       .compress       = &dump_none_compression,
+       .filter         = dump_filter_table,
+       .dev            = NULL,
+};             
+
diff --git a/drivers/dump/dump_setup.c b/drivers/dump/dump_setup.c
new file mode 100644 (file)
index 0000000..668b2d0
--- /dev/null
@@ -0,0 +1,835 @@
+/*
+ * Standard kernel function entry points for Linux crash dumps.
+ *
+ * Created by: Matt Robinson (yakker@sourceforge.net)
+ * Contributions from SGI, IBM, HP, MCL, and others.
+ *
+ * Copyright (C) 1999 - 2002 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000 - 2002 TurboLinux, Inc.  All rights reserved.
+ * Copyright (C) 2001 - 2002 Matt D. Robinson.  All rights reserved.
+ * Copyright (C) 2002 Free Software Foundation, Inc. All rights reserved.
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/*
+ * -----------------------------------------------------------------------
+ *
+ * DUMP HISTORY
+ *
+ * This dump code goes back to SGI's first attempts at dumping system
+ * memory on SGI systems running IRIX.  A few developers at SGI needed
+ * a way to take this system dump and analyze it, and created 'icrash',
+ * or IRIX Crash.  The mechanism (the dumps and 'icrash') were used
+ * by support people to generate crash reports when a system failure
+ * occurred.  This was vital for large system configurations that
+ * couldn't apply patch after patch after fix just to hope that the
+ * problems would go away.  So the system memory, along with the crash
+ * dump analyzer, allowed support people to quickly figure out what the
+ * problem was on the system with the crash dump.
+ *
+ * In comes Linux.  SGI started moving towards the open source community,
+ * and upon doing so, SGI wanted to take its support utilities into Linux
+ * with the hopes that they would end up the in kernel and user space to
+ * be used by SGI's customers buying SGI Linux systems.  One of the first
+ * few products to be open sourced by SGI was LKCD, or Linux Kernel Crash
+ * Dumps.  LKCD comprises of a patch to the kernel to enable system
+ * dumping, along with 'lcrash', or Linux Crash, to analyze the system
+ * memory dump.  A few additional system scripts and kernel modifications
+ * are also included to make the dump mechanism and dump data easier to
+ * process and use.
+ *
+ * As soon as LKCD was released into the open source community, a number
+ * of larger companies started to take advantage of it.  Today, there are
+ * many community members that contribute to LKCD, and it continues to
+ * flourish and grow as an open source project.
+ */
+
+/*
+ * DUMP TUNABLES
+ *
+ * This is the list of system tunables (via /proc) that are available
+ * for Linux systems.  All the read, write, etc., functions are listed
+ * here.  Currently, there are a few different tunables for dumps:
+ *
+ * dump_device (used to be dumpdev):
+ *     The device for dumping the memory pages out to.  This 
+ *     may be set to the primary swap partition for disruptive dumps,
+ *     and must be an unused partition for non-disruptive dumps.
+ *     Todo: In the case of network dumps, this may be interpreted 
+ *     as the IP address of the netdump server to connect to.
+ *
+ * dump_compress (used to be dump_compress_pages):
+ *     This is the flag which indicates which compression mechanism
+ *     to use.  This is a BITMASK, not an index (0,1,2,4,8,16,etc.).
+ *     This is the current set of values:
+ *
+ *     0: DUMP_COMPRESS_NONE -- Don't compress any pages.
+ *     1: DUMP_COMPRESS_RLE  -- This uses RLE compression.
+ *     2: DUMP_COMPRESS_GZIP -- This uses GZIP compression.
+ *
+ * dump_level:
+ *     The amount of effort the dump module should make to save
+ *     information for post crash analysis.  This value is now
+ *     a BITMASK value, not an index:
+ *
+ *     0:   Do nothing, no dumping. (DUMP_LEVEL_NONE)
+ *
+ *     1:   Print out the dump information to the dump header, and
+ *          write it out to the dump_device. (DUMP_LEVEL_HEADER)
+ *
+ *     2:   Write out the dump header and all kernel memory pages.
+ *          (DUMP_LEVEL_KERN)
+ *
+ *     4:   Write out the dump header and all kernel and user
+ *          memory pages.  (DUMP_LEVEL_USED)
+ *
+ *     8:   Write out the dump header and all conventional/cached 
+ *         memory (RAM) pages in the system (kernel, user, free).  
+ *         (DUMP_LEVEL_ALL_RAM)
+ *
+ *    16:   Write out everything, including non-conventional memory
+ *         like firmware, proms, I/O registers, uncached memory.
+ *         (DUMP_LEVEL_ALL)
+ *
+ *     The dump_level will default to 1.
+ *
+ * dump_flags:
+ *     These are the flags to use when talking about dumps.  There
+ *     are lots of possibilities.  This is a BITMASK value, not an index.
+ * 
+ * -----------------------------------------------------------------------
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/fs.h>
+#include <linux/dump.h>
+#include "dump_methods.h"
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/utsname.h>
+#include <linux/highmem.h>
+#include <linux/miscdevice.h>
+#include <linux/sysrq.h>
+#include <linux/sysctl.h>
+#include <linux/nmi.h>
+#include <linux/init.h>
+
+#include <asm/hardirq.h>
+#include <asm/uaccess.h>
+
+/*
+ * -----------------------------------------------------------------------
+ *                         V A R I A B L E S
+ * -----------------------------------------------------------------------
+ */
+
+/* Dump tunables */
+struct dump_config dump_config = {
+       .level          = 0,
+       .flags          = 0,
+       .dump_device    = 0,
+       .dump_addr      = 0,
+       .dumper         = NULL
+};
+#ifdef CONFIG_ARM 
+static _dump_regs_t all_regs;
+#endif
+
+/* Global variables used in dump.h */
+/* degree of system freeze when dumping */
+enum dump_silence_levels dump_silence_level = DUMP_HARD_SPIN_CPUS;      
+
+/* Other global fields */
+extern struct __dump_header dump_header; 
+struct dump_dev *dump_dev = NULL;  /* Active dump device                   */
+static int dump_compress = 0;
+
+static u16 dump_compress_none(const u8 *old, u16 oldsize, u8 *new, u16 newsize);
+struct __dump_compress dump_none_compression = {
+       .compress_type  = DUMP_COMPRESS_NONE,
+       .compress_func  = dump_compress_none,
+       .compress_name  = "none",
+};
+
+/* our device operations and functions */
+static int dump_ioctl(struct inode *i, struct file *f,
+       unsigned int cmd, unsigned long arg);
+
+static struct file_operations dump_fops = {
+       .owner  = THIS_MODULE,
+       .ioctl  = dump_ioctl,
+};
+
+static struct miscdevice dump_miscdev = {
+       .minor  = CRASH_DUMP_MINOR,
+       .name   = "dump",
+       .fops   = &dump_fops,
+};
+MODULE_ALIAS_MISCDEV(CRASH_DUMP_MINOR);
+
+/* static variables                                                    */
+static int dump_okay = 0;              /* can we dump out to disk?     */
+static spinlock_t dump_lock = SPIN_LOCK_UNLOCKED;
+
+/* used for dump compressors */
+static struct list_head dump_compress_list = LIST_HEAD_INIT(dump_compress_list);
+
+/* list of registered dump targets */
+static struct list_head dump_target_list = LIST_HEAD_INIT(dump_target_list);
+
+/* lkcd info structure -- this is used by lcrash for basic system data     */
+struct __lkcdinfo lkcdinfo = {
+       .ptrsz          = (sizeof(void *) * 8),
+#if defined(__LITTLE_ENDIAN) 
+       .byte_order     = __LITTLE_ENDIAN,
+#else
+       .byte_order     = __BIG_ENDIAN,
+#endif
+       .page_shift     = PAGE_SHIFT,
+       .page_size      = PAGE_SIZE,
+       .page_mask      = PAGE_MASK,
+       .page_offset    = PAGE_OFFSET,
+};
+
+/*
+ * -----------------------------------------------------------------------
+ *            / P R O C   T U N A B L E   F U N C T I O N S
+ * -----------------------------------------------------------------------
+ */
+
+static int proc_dump_device(ctl_table *ctl, int write, struct file *f,
+                           void *buffer, size_t *lenp);
+
+static int proc_doulonghex(ctl_table *ctl, int write, struct file *f,
+                           void *buffer, size_t *lenp);
+/*
+ * sysctl-tuning infrastructure.
+ */
+static ctl_table dump_table[] = {
+       { .ctl_name = CTL_DUMP_LEVEL,
+         .procname = DUMP_LEVEL_NAME, 
+         .data = &dump_config.level,    
+         .maxlen = sizeof(int),
+         .mode = 0644,
+         .proc_handler = proc_doulonghex, },
+
+       { .ctl_name = CTL_DUMP_FLAGS,
+         .procname = DUMP_FLAGS_NAME,
+         .data = &dump_config.flags,   
+         .maxlen = sizeof(int),
+         .mode = 0644,
+         .proc_handler = proc_doulonghex, },
+
+       { .ctl_name = CTL_DUMP_COMPRESS,
+         .procname = DUMP_COMPRESS_NAME,
+         .data = &dump_compress, /* FIXME */
+         .maxlen = sizeof(int),
+         .mode = 0644,
+         .proc_handler = proc_dointvec, },
+         
+       { .ctl_name = CTL_DUMP_DEVICE,
+         .procname = DUMP_DEVICE_NAME,
+         .mode = 0644,
+         .data = &dump_config.dump_device, /* FIXME */
+         .maxlen = sizeof(int),
+         .proc_handler = proc_dump_device },
+
+#ifdef CONFIG_CRASH_DUMP_MEMDEV
+       { .ctl_name = CTL_DUMP_ADDR,
+         .procname = DUMP_ADDR_NAME,
+         .mode = 0444,
+         .data = &dump_config.dump_addr,
+         .maxlen = sizeof(unsigned long),
+         .proc_handler = proc_doulonghex },
+#endif
+
+       { 0, }
+};
+
+static ctl_table dump_root[] = {
+       { .ctl_name = KERN_DUMP,
+         .procname = "dump",
+         .mode = 0555, 
+         .child = dump_table },
+       { 0, }
+};
+
+static ctl_table kernel_root[] = {
+       { .ctl_name = CTL_KERN,
+         .procname = "kernel",
+         .mode = 0555,
+         .child = dump_root, },
+       { 0, }
+};
+
+static struct ctl_table_header *sysctl_header;
+
+/*
+ * -----------------------------------------------------------------------
+ *              C O M P R E S S I O N   F U N C T I O N S
+ * -----------------------------------------------------------------------
+ */
+
+/*
+ * Name: dump_compress_none()
+ * Func: Don't do any compression, period.
+ */
+static u16
+dump_compress_none(const u8 *old, u16 oldsize, u8 *new, u16 newsize)
+{
+       /* just return the old size */
+       return oldsize;
+}
+
+
+/*
+ * Name: dump_execute()
+ * Func: Execute the dumping process.  This makes sure all the appropriate
+ *       fields are updated correctly, and calls dump_execute_memdump(),
+ *       which does the real work.
+ */
+void
+dump_execute(const char *panic_str, const struct pt_regs *regs)
+{
+       int state = -1;
+       unsigned long flags;
+
+       /* make sure we can dump */
+       if (!dump_okay) {
+               pr_info("LKCD not yet configured, can't take dump now\n");
+               return;
+       }
+
+       /* Exclude multiple dumps at the same time,
+        * and disable interrupts,  some drivers may re-enable
+        * interrupts in with silence()
+        *
+        * Try and acquire spin lock. If successful, leave preempt
+        * and interrupts disabled.  See spin_lock_irqsave in spinlock.h
+        */
+       local_irq_save(flags);
+       if (!spin_trylock(&dump_lock)) {
+               local_irq_restore(flags);
+               pr_info("LKCD dump already in progress\n");
+               return;
+       }
+
+       /* Bring system into the strictest level of quiescing for min drift 
+        * dump drivers can soften this as required in dev->ops->silence() 
+        */
+       dump_oncpu = smp_processor_id() + 1;
+       dump_silence_level = DUMP_HARD_SPIN_CPUS; 
+
+       state = dump_generic_execute(panic_str, regs);
+       
+       dump_oncpu = 0;
+       spin_unlock_irqrestore(&dump_lock, flags);
+
+       if (state < 0) {
+               printk("Dump Incomplete or failed!\n");
+       } else {
+               printk("Dump Complete; %d dump pages saved.\n", 
+                      dump_header.dh_num_dump_pages);
+       }
+}
+
+/*
+ * Name: dump_register_compression()
+ * Func: Register a dump compression mechanism.
+ */
+void
+dump_register_compression(struct __dump_compress *item)
+{
+       if (item)
+               list_add(&(item->list), &dump_compress_list);
+}
+
+/*
+ * Name: dump_unregister_compression()
+ * Func: Remove a dump compression mechanism, and re-assign the dump
+ *       compression pointer if necessary.
+ */
+void
+dump_unregister_compression(int compression_type)
+{
+       struct list_head *tmp;
+       struct __dump_compress *dc;
+
+       /* let's make sure our list is valid */
+       if (compression_type != DUMP_COMPRESS_NONE) {
+               list_for_each(tmp, &dump_compress_list) {
+                       dc = list_entry(tmp, struct __dump_compress, list);
+                       if (dc->compress_type == compression_type) {
+                               list_del(&(dc->list));
+                               break;
+                       }
+               }
+       }
+}
+
+/*
+ * Name: dump_compress_init()
+ * Func: Initialize (or re-initialize) compression scheme.
+ */
+static int
+dump_compress_init(int compression_type)
+{
+       struct list_head *tmp;
+       struct __dump_compress *dc;
+
+       /* try to remove the compression item */
+       list_for_each(tmp, &dump_compress_list) {
+               dc = list_entry(tmp, struct __dump_compress, list);
+               if (dc->compress_type == compression_type) {
+                       dump_config.dumper->compress = dc;
+                       dump_compress = compression_type;
+                       pr_debug("Dump Compress %s\n", dc->compress_name);
+                       return 0;
+               }
+       }
+
+       /* 
+        * nothing on the list -- return ENODATA to indicate an error 
+        *
+        * NB: 
+        *      EAGAIN: reports "Resource temporarily unavailable" which
+        *              isn't very enlightening.
+        */
+       printk("compression_type:%d not found\n", compression_type);
+
+       return -ENODATA;
+}
+
+static int
+dumper_setup(unsigned long flags, unsigned long devid)
+{
+       int ret = 0;
+
+       /* unconfigure old dumper if it exists */
+       dump_okay = 0;
+       if (dump_config.dumper) {
+               pr_debug("Unconfiguring current dumper\n");
+               dump_unconfigure();
+       }
+       /* set up new dumper */
+       if (dump_config.flags & DUMP_FLAGS_SOFTBOOT) {
+               printk("Configuring softboot based dump \n");
+#ifdef CONFIG_CRASH_DUMP_MEMDEV
+               dump_config.dumper = &dumper_stage1; 
+#else
+               printk("Requires CONFIG_CRASHDUMP_MEMDEV. Can't proceed.\n");
+               return -1;
+#endif
+       } else {
+               dump_config.dumper = &dumper_singlestage;
+       }       
+       dump_config.dumper->dev = dump_dev;
+
+       ret = dump_configure(devid);
+       if (!ret) {
+               dump_okay = 1;
+               pr_debug("%s dumper set up for dev 0x%lx\n", 
+                       dump_config.dumper->name, devid);
+               dump_config.dump_device = devid;
+       } else {
+               printk("%s dumper set up failed for dev 0x%lx\n", 
+                      dump_config.dumper->name, devid);
+               dump_config.dumper = NULL;
+       }
+       return ret;
+}
+
+static int
+dump_target_init(int target)
+{
+       char type[20];
+       struct list_head *tmp;
+       struct dump_dev *dev;
+       
+       switch (target) {
+               case DUMP_FLAGS_DISKDUMP:
+                       strcpy(type, "blockdev"); break;
+               case DUMP_FLAGS_NETDUMP:
+                       strcpy(type, "networkdev"); break;
+               default:
+                       return -1;
+       }
+
+       /*
+        * This is a bit stupid, generating strings from flag
+        * and doing strcmp. This is done because 'struct dump_dev'
+        * has string 'type_name' and not interger 'type'.
+        */
+       list_for_each(tmp, &dump_target_list) {
+               dev = list_entry(tmp, struct dump_dev, list);
+               if (strcmp(type, dev->type_name) == 0) {
+                       dump_dev = dev;
+                       return 0;
+               }
+       }
+       return -1;
+}
+
+/*
+ * Name: dump_ioctl()
+ * Func: Allow all dump tunables through a standard ioctl() mechanism.
+ *       This is far better than before, where we'd go through /proc,
+ *       because now this will work for multiple OS and architectures.
+ */
+static int
+dump_ioctl(struct inode *i, struct file *f, unsigned int cmd, unsigned long arg)
+{
+       /* check capabilities */
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (!dump_config.dumper && cmd == DIOSDUMPCOMPRESS)
+               /* dump device must be configured first */
+               return -ENODEV;
+
+       /*
+        * This is the main mechanism for controlling get/set data
+        * for various dump device parameters.  The real trick here
+        * is setting the dump device (DIOSDUMPDEV).  That's what
+        * triggers everything else.
+        */
+       switch (cmd) {
+       case DIOSDUMPDEV:       /* set dump_device */
+               pr_debug("Configuring dump device\n"); 
+               if (!(f->f_flags & O_RDWR))
+                       return -EPERM;
+
+               __dump_open();
+               return dumper_setup(dump_config.flags, arg);
+
+               
+       case DIOGDUMPDEV:       /* get dump_device */
+               return put_user((long)dump_config.dump_device, (long *)arg);
+
+       case DIOSDUMPLEVEL:     /* set dump_level */
+               if (!(f->f_flags & O_RDWR))
+                       return -EPERM;
+
+               /* make sure we have a positive value */
+               if (arg < 0)
+                       return -EINVAL;
+
+               /* Fixme: clean this up */
+               dump_config.level = 0;
+               switch ((int)arg) {
+                       case DUMP_LEVEL_ALL:
+                       case DUMP_LEVEL_ALL_RAM:
+                               dump_config.level |= DUMP_MASK_UNUSED;
+                       case DUMP_LEVEL_USED:
+                               dump_config.level |= DUMP_MASK_USED;
+                       case DUMP_LEVEL_KERN:
+                               dump_config.level |= DUMP_MASK_KERN;
+                       case DUMP_LEVEL_HEADER:
+                               dump_config.level |= DUMP_MASK_HEADER;
+                       case DUMP_LEVEL_NONE:
+                               break;
+                       default:
+                               return (-EINVAL);
+                       }
+               pr_debug("Dump Level 0x%lx\n", dump_config.level);
+               break;
+
+       case DIOGDUMPLEVEL:     /* get dump_level */
+               /* fixme: handle conversion */
+               return put_user((long)dump_config.level, (long *)arg);
+
+               
+       case DIOSDUMPFLAGS:     /* set dump_flags */
+               /* check flags */
+               if (!(f->f_flags & O_RDWR))
+                       return -EPERM;
+
+               /* make sure we have a positive value */
+               if (arg < 0)
+                       return -EINVAL;
+                       
+               if (dump_target_init(arg & DUMP_FLAGS_TARGETMASK) < 0)
+                       return -EINVAL; /* return proper error */
+
+               dump_config.flags = arg;
+               
+               pr_debug("Dump Flags 0x%lx\n", dump_config.flags);
+               break;
+               
+       case DIOGDUMPFLAGS:     /* get dump_flags */
+               return put_user((long)dump_config.flags, (long *)arg);
+
+       case DIOSDUMPCOMPRESS:  /* set the dump_compress status */
+               if (!(f->f_flags & O_RDWR))
+                       return -EPERM;
+
+               return dump_compress_init((int)arg);
+
+       case DIOGDUMPCOMPRESS:  /* get the dump_compress status */
+               return put_user((long)(dump_config.dumper ? 
+                       dump_config.dumper->compress->compress_type : 0), 
+                       (long *)arg);
+       case DIOGDUMPOKAY: /* check if dump is configured */
+               return put_user((long)dump_okay, (long *)arg);
+       
+       case DIOSDUMPTAKE: /* Trigger a manual dump */
+               /* Do not proceed if lkcd not yet configured */
+               if(!dump_okay) {
+                       printk("LKCD not yet configured. Cannot take manual dump\n");
+                       return -ENODEV;
+               }
+
+               /* Take the dump */
+               return  manual_handle_crashdump();
+                       
+       default:
+               /* 
+                * these are network dump specific ioctls, let the
+                * module handle them.
+                */
+               return dump_dev_ioctl(cmd, arg);
+       }
+       return 0;
+}
+
+/*
+ * Handle special cases for dump_device 
+ * changing dump device requires doing an opening the device
+ */
+static int 
+proc_dump_device(ctl_table *ctl, int write, struct file *f,
+                void *buffer, size_t *lenp)
+{
+       int *valp = ctl->data;
+       int oval = *valp;
+       int ret = -EPERM;
+
+       /* same permission checks as ioctl */
+       if (capable(CAP_SYS_ADMIN)) {
+               ret = proc_doulonghex(ctl, write, f, buffer, lenp);
+               if (ret == 0 && write && *valp != oval) {
+                       /* need to restore old value to close properly */
+                       dump_config.dump_device = (dev_t) oval;
+                       __dump_open();
+                       ret = dumper_setup(dump_config.flags, (dev_t) *valp);
+               }
+       }
+
+       return ret;
+}
+
+/* All for the want of a proc_do_xxx routine which prints values in hex */
+static int 
+proc_doulonghex(ctl_table *ctl, int write, struct file *f,
+                void *buffer, size_t *lenp)
+{
+#define TMPBUFLEN 20
+       unsigned long *i;
+       size_t len, left;
+       char buf[TMPBUFLEN];
+
+       if (!ctl->data || !ctl->maxlen || !*lenp || (f->f_pos)) {
+               *lenp = 0;
+               return 0;
+       }
+       
+       i = (unsigned long *) ctl->data;
+       left = *lenp;
+       
+       sprintf(buf, "0x%lx\n", (*i));
+       len = strlen(buf);
+       if (len > left)
+               len = left;
+       if(copy_to_user(buffer, buf, len))
+               return -EFAULT;
+       
+       left -= len;
+       *lenp -= left;
+       f->f_pos += *lenp;
+       return 0;
+}
+
+/*
+ * -----------------------------------------------------------------------
+ *                     I N I T   F U N C T I O N S
+ * -----------------------------------------------------------------------
+ */
+
+/*
+ * These register and unregister routines are exported for modules
+ * to register their dump drivers (like block, net etc)
+ */
+int
+dump_register_device(struct dump_dev *ddev)
+{
+       struct list_head *tmp;
+       struct dump_dev *dev;
+
+       list_for_each(tmp, &dump_target_list) {
+               dev = list_entry(tmp, struct dump_dev, list);
+               if (strcmp(ddev->type_name, dev->type_name) == 0) {
+                       printk("Target type %s already registered\n",
+                                       dev->type_name);
+                       return -1; /* return proper error */
+               }
+       }
+       list_add(&(ddev->list), &dump_target_list);
+       
+       return 0;
+}
+
+void
+dump_unregister_device(struct dump_dev *ddev)
+{
+       list_del(&(ddev->list));
+       if (ddev != dump_dev)
+               return;
+
+       dump_okay = 0;
+
+       if (dump_config.dumper)
+               dump_unconfigure();
+
+       dump_config.flags &= ~DUMP_FLAGS_TARGETMASK;
+       dump_okay = 0;
+       dump_dev = NULL;
+       dump_config.dumper = NULL;
+}
+
+static int panic_event(struct notifier_block *this, unsigned long event,
+                      void *ptr)
+{
+#ifdef CONFIG_ARM
+       get_current_general_regs(&all_regs);
+       get_current_cp14_regs(&all_regs);
+       get_current_cp15_regs(&all_regs);
+       dump_execute((const char *)ptr, &all_regs);
+#else
+       struct pt_regs regs;
+       
+       get_current_regs(&regs);
+       dump_execute((const char *)ptr, &regs);
+#endif
+       return 0;
+}
+
+extern struct notifier_block *panic_notifier_list;
+static int panic_event(struct notifier_block *, unsigned long, void *);
+static struct notifier_block panic_block = {
+       .notifier_call = panic_event,
+};
+
+#ifdef CONFIG_MAGIC_SYSRQ
+/* Sysrq handler */
+static void sysrq_handle_crashdump(int key, struct pt_regs *pt_regs,
+               struct tty_struct *tty) {
+       dump_execute("sysrq", pt_regs);
+}
+
+static struct sysrq_key_op sysrq_crashdump_op = {
+       .handler        =       sysrq_handle_crashdump,
+       .help_msg       =       "Dump",
+       .action_msg     =       "Starting crash dump",
+};
+#endif
+
+static inline void
+dump_sysrq_register(void) 
+{
+#ifdef CONFIG_MAGIC_SYSRQ
+       __sysrq_lock_table();
+       __sysrq_put_key_op(DUMP_SYSRQ_KEY, &sysrq_crashdump_op);
+       __sysrq_unlock_table();
+#endif
+}
+
+static inline void
+dump_sysrq_unregister(void)
+{
+#ifdef CONFIG_MAGIC_SYSRQ
+       __sysrq_lock_table();
+       if (__sysrq_get_key_op(DUMP_SYSRQ_KEY) == &sysrq_crashdump_op)
+               __sysrq_put_key_op(DUMP_SYSRQ_KEY, NULL);
+       __sysrq_unlock_table();
+#endif
+}
+
+/*
+ * Name: dump_init()
+ * Func: Initialize the dump process.  This will set up any architecture
+ *       dependent code.  The big key is we need the memory offsets before
+ *       the page table is initialized, because the base memory offset
+ *       is changed after paging_init() is called.
+ */
+static int __init
+dump_init(void)
+{
+       struct sysinfo info;
+       int err;
+
+       /* try to create our dump device */
+       err = misc_register(&dump_miscdev);
+       if (err) {
+               printk("cannot register dump character device!\n");
+               return err;
+       }
+
+       __dump_init((u64)PAGE_OFFSET);
+
+       /* set the dump_compression_list structure up */
+       dump_register_compression(&dump_none_compression);
+
+       /* grab the total memory size now (not if/when we crash) */
+       si_meminfo(&info);
+
+       /* set the memory size */
+       dump_header.dh_memory_size = (u64)info.totalram;
+
+       sysctl_header = register_sysctl_table(kernel_root, 0);
+       dump_sysrq_register();
+
+       notifier_chain_register(&panic_notifier_list, &panic_block);
+       dump_function_ptr = dump_execute;
+
+       pr_info("Crash dump driver initialized.\n");
+       return 0;
+}
+
+static void __exit
+dump_cleanup(void)
+{
+       dump_okay = 0;
+
+       if (dump_config.dumper)
+               dump_unconfigure();
+
+       /* arch-specific cleanup routine */
+       __dump_cleanup();
+
+       /* ignore errors while unregistering -- since can't do anything */
+       unregister_sysctl_table(sysctl_header);
+       misc_deregister(&dump_miscdev);
+       dump_sysrq_unregister();
+       notifier_chain_unregister(&panic_notifier_list, &panic_block);
+       dump_function_ptr = NULL;
+}
+
+EXPORT_SYMBOL(dump_register_compression);
+EXPORT_SYMBOL(dump_unregister_compression);
+EXPORT_SYMBOL(dump_register_device);
+EXPORT_SYMBOL(dump_unregister_device);
+EXPORT_SYMBOL(dump_config);
+EXPORT_SYMBOL(dump_silence_level);
+
+EXPORT_SYMBOL(__dump_irq_enable);
+EXPORT_SYMBOL(__dump_irq_restore);
+
+MODULE_AUTHOR("Matt D. Robinson <yakker@sourceforge.net>");
+MODULE_DESCRIPTION("Linux Kernel Crash Dump (LKCD) driver");
+MODULE_LICENSE("GPL");
+
+module_init(dump_init);
+module_exit(dump_cleanup);
diff --git a/include/asm-i386/dump.h b/include/asm-i386/dump.h
new file mode 100644 (file)
index 0000000..a0921e9
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Kernel header file for Linux crash dumps.
+ *
+ * Created by: Matt Robinson (yakker@sgi.com)
+ *
+ * Copyright 1999 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/* This header file holds the architecture specific crash dump header */
+#ifndef _ASM_DUMP_H
+#define _ASM_DUMP_H
+
+/* necessary header files */
+#include <asm/ptrace.h>
+#include <asm/page.h>
+#include <linux/threads.h>
+#include <linux/mm.h>
+
+/* definitions */
+#define DUMP_ASM_MAGIC_NUMBER  0xdeaddeadULL   /* magic number            */
+#define DUMP_ASM_VERSION_NUMBER        0x3     /* version number          */
+
+/* max number of cpus */
+#define DUMP_MAX_NUM_CPUS 32
+
+/*
+ * Structure: __dump_header_asm
+ *  Function: This is the header for architecture-specific stuff.  It
+ *            follows right after the dump header.
+ */
+struct __dump_header_asm {
+       /* the dump magic number -- unique to verify dump is valid */
+       u64             dha_magic_number;
+
+       /* the version number of this dump */
+       u32             dha_version;
+
+       /* the size of this header (in case we can't read it) */
+       u32             dha_header_size;
+
+       /* the esp for i386 systems */
+       u32             dha_esp;
+
+       /* the eip for i386 systems */
+       u32             dha_eip;
+
+       /* the dump registers */
+       struct pt_regs  dha_regs;
+
+       /* smp specific */
+       u32             dha_smp_num_cpus;
+       u32             dha_dumping_cpu;
+       struct pt_regs  dha_smp_regs[DUMP_MAX_NUM_CPUS];
+       u32             dha_smp_current_task[DUMP_MAX_NUM_CPUS];
+       u32             dha_stack[DUMP_MAX_NUM_CPUS];
+       u32             dha_stack_ptr[DUMP_MAX_NUM_CPUS];
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+
+extern struct __dump_header_asm dump_header_asm;
+
+#ifdef CONFIG_SMP
+extern cpumask_t irq_affinity[];
+extern int (*dump_ipi_function_ptr)(struct pt_regs *);
+extern void dump_send_ipi(void);
+#else
+#define dump_send_ipi() do { } while(0)
+#endif
+
+static inline void get_current_regs(struct pt_regs *regs)
+{
+       __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx));
+       __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx));
+       __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx));
+       __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi));
+       __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi));
+       __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp));
+       __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax));
+       __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp));
+       __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss));
+       __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs));
+       __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds));
+       __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes));
+       __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags));
+       regs->eip = (unsigned long)current_text_addr();
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_DUMP_H */
diff --git a/include/linux/dump.h b/include/linux/dump.h
new file mode 100644 (file)
index 0000000..00c690f
--- /dev/null
@@ -0,0 +1,385 @@
+/*
+ * Kernel header file for Linux crash dumps.
+ *
+ * Created by: Matt Robinson (yakker@sgi.com)
+ * Copyright 1999 - 2002 Silicon Graphics, Inc. All rights reserved.
+ *
+ * vmdump.h to dump.h by: Matt D. Robinson (yakker@sourceforge.net)
+ * Copyright 2001 - 2002 Matt D. Robinson.  All rights reserved.
+ * Copyright (C) 2002 Free Software Foundation, Inc. All rights reserved.
+ *
+ * Most of this is the same old stuff from vmdump.h, except now we're
+ * actually a stand-alone driver plugged into the block layer interface,
+ * with the exception that we now allow for compression modes externally
+ * loaded (e.g., someone can come up with their own).
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+/* This header file includes all structure definitions for crash dumps. */
+#ifndef _DUMP_H
+#define _DUMP_H
+
+#if defined(CONFIG_CRASH_DUMP) || defined (CONFIG_CRASH_DUMP_MODULE)
+
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/dumpdev.h>
+#include <asm/ioctl.h>
+
+/* 
+ * Predefine default DUMP_PAGE constants, asm header may override.
+ *
+ * On ia64 discontinuous memory systems it's possible for the memory
+ * banks to stop at 2**12 page alignments, the smallest possible page
+ * size. But the system page size, PAGE_SIZE, is in fact larger.
+ */
+#define DUMP_PAGE_SHIFT        PAGE_SHIFT
+#define DUMP_PAGE_MASK         PAGE_MASK
+#define DUMP_PAGE_ALIGN(addr)  PAGE_ALIGN(addr)
+#define DUMP_HEADER_OFFSET     PAGE_SIZE
+
+#define OLDMINORBITS   8
+#define OLDMINORMASK   ((1U << OLDMINORBITS) -1)
+
+/* keep DUMP_PAGE_SIZE constant to 4K = 1<<12
+ * it may be different from PAGE_SIZE then.
+ */
+#define DUMP_PAGE_SIZE         4096
+
+/* 
+ * Predefined default memcpy() to use when copying memory to the dump buffer.
+ *
+ * On ia64 there is a heads up function that can be called to let the prom
+ * machine check monitor know that the current activity is risky and it should
+ * ignore the fault (nofault). In this case the ia64 header will redefine this
+ * macro to __dump_memcpy() and use it's arch specific version.
+ */
+#define DUMP_memcpy            memcpy
+
+/* necessary header files */
+#include <asm/dump.h>                  /* for architecture-specific header */
+
+/* 
+ * Size of the buffer that's used to hold:
+ *
+ *     1. the dump header (padded to fill the complete buffer)
+ *     2. the possibly compressed page headers and data
+ */
+#define DUMP_BUFFER_SIZE       (64 * 1024)  /* size of dump buffer         */
+#define DUMP_HEADER_SIZE       DUMP_BUFFER_SIZE
+
+/* standard header definitions */
+#define DUMP_MAGIC_NUMBER      0xa8190173618f23edULL  /* dump magic number */
+#define DUMP_MAGIC_LIVE                0xa8190173618f23cdULL  /* live magic number */
+#define DUMP_VERSION_NUMBER    0x8     /* dump version number              */
+#define DUMP_PANIC_LEN         0x100   /* dump panic string length         */
+
+/* dump levels - type specific stuff added later -- add as necessary */
+#define DUMP_LEVEL_NONE                0x0     /* no dumping at all -- just bail   */
+#define DUMP_LEVEL_HEADER      0x1     /* kernel dump header only          */
+#define DUMP_LEVEL_KERN                0x2     /* dump header and kernel pages     */
+#define DUMP_LEVEL_USED                0x4     /* dump header, kernel/user pages   */
+#define DUMP_LEVEL_ALL_RAM     0x8     /* dump header, all RAM pages       */
+#define DUMP_LEVEL_ALL         0x10    /* dump all memory RAM and firmware */
+
+
+/* dump compression options -- add as necessary */
+#define DUMP_COMPRESS_NONE     0x0     /* don't compress this dump         */
+#define DUMP_COMPRESS_RLE      0x1     /* use RLE compression              */
+#define DUMP_COMPRESS_GZIP     0x2     /* use GZIP compression             */
+
+/* dump flags - any dump-type specific flags -- add as necessary */
+#define DUMP_FLAGS_NONE                0x0     /* no flags are set for this dump   */
+#define DUMP_FLAGS_SOFTBOOT    0x2     /* 2 stage soft-boot based dump     */
+#define DUMP_FLAGS_NONDISRUPT   0X1    /* non-disruptive dumping           */
+
+#define DUMP_FLAGS_TARGETMASK  0xf0000000 /* handle special case targets   */
+#define DUMP_FLAGS_DISKDUMP    0x80000000 /* dump to local disk            */
+#define DUMP_FLAGS_NETDUMP     0x40000000 /* dump over the network         */
+
+/* dump header flags -- add as necessary */
+#define DUMP_DH_FLAGS_NONE     0x0     /* no flags set (error condition!)  */
+#define DUMP_DH_RAW            0x1     /* raw page (no compression)        */
+#define DUMP_DH_COMPRESSED     0x2     /* page is compressed               */
+#define DUMP_DH_END            0x4     /* end marker on a full dump        */
+#define DUMP_DH_TRUNCATED      0x8     /* dump is incomplete               */
+#define DUMP_DH_TEST_PATTERN   0x10    /* dump page is a test pattern      */
+#define DUMP_DH_NOT_USED       0x20    /* 1st bit not used in flags        */
+
+/* names for various dump parameters in /proc/kernel */
+#define DUMP_ROOT_NAME         "sys/dump"
+#define DUMP_DEVICE_NAME       "device"
+#define DUMP_COMPRESS_NAME     "compress"
+#define DUMP_LEVEL_NAME                "level"
+#define DUMP_FLAGS_NAME                "flags"
+#define DUMP_ADDR_NAME         "addr"
+
+#define DUMP_SYSRQ_KEY         'd'     /* key to use for MAGIC_SYSRQ key   */
+
+/* CTL_DUMP names: */
+enum
+{
+       CTL_DUMP_DEVICE=1,
+       CTL_DUMP_COMPRESS=3,
+       CTL_DUMP_LEVEL=3,
+       CTL_DUMP_FLAGS=4,
+       CTL_DUMP_ADDR=5,
+       CTL_DUMP_TEST=6,
+};
+
+
+/* page size for gzip compression -- buffered slightly beyond hardware PAGE_SIZE used by DUMP */
+#define DUMP_DPC_PAGE_SIZE     (DUMP_PAGE_SIZE + 512)
+
+/* dump ioctl() control options */
+#define DIOSDUMPDEV     _IOW('p', 0xA0, unsigned int)  /* set the dump device              */
+#define DIOGDUMPDEV     _IOR('p', 0xA1, unsigned int)  /* get the dump device              */
+#define DIOSDUMPLEVEL   _IOW('p', 0xA2, unsigned int)  /* set the dump level               */
+#define DIOGDUMPLEVEL   _IOR('p', 0xA3, unsigned int)  /* get the dump level               */
+#define DIOSDUMPFLAGS   _IOW('p', 0xA4, unsigned int)  /* set the dump flag parameters     */
+#define DIOGDUMPFLAGS   _IOR('p', 0xA5, unsigned int)  /* get the dump flag parameters     */
+#define DIOSDUMPCOMPRESS _IOW('p', 0xA6, unsigned int) /* set the dump compress level      */
+#define DIOGDUMPCOMPRESS _IOR('p', 0xA7, unsigned int) /* get the dump compress level      */
+
+/* these ioctls are used only by netdump module */
+#define DIOSTARGETIP    _IOW('p', 0xA8, unsigned int)  /* set the target m/c's ip           */
+#define DIOGTARGETIP    _IOR('p', 0xA9, unsigned int)  /* get the target m/c's ip           */
+#define DIOSTARGETPORT  _IOW('p', 0xAA, unsigned int) /* set the target m/c's port          */
+#define DIOGTARGETPORT  _IOR('p', 0xAB, unsigned int) /* get the target m/c's port          */
+#define DIOSSOURCEPORT  _IOW('p', 0xAC, unsigned int) /* set the source m/c's port          */
+#define DIOGSOURCEPORT  _IOR('p', 0xAD, unsigned int) /* get the source m/c's port          */
+#define DIOSETHADDR     _IOW('p', 0xAE, unsigned int) /* set ethernet address      */
+#define DIOGETHADDR     _IOR('p', 0xAF, unsigned int) /* get ethernet address       */
+#define DIOGDUMPOKAY   _IOR('p', 0xB0, unsigned int) /* check if dump is configured      */
+#define DIOSDUMPTAKE    _IOW('p', 0xB1, unsigned int) /* Take a manual dump               */
+
+/*
+ * Structure: __dump_header
+ *  Function: This is the header dumped at the top of every valid crash
+ *            dump.  
+ */
+struct __dump_header {
+       /* the dump magic number -- unique to verify dump is valid */
+       u64     dh_magic_number;
+
+       /* the version number of this dump */
+       u32     dh_version;
+
+       /* the size of this header (in case we can't read it) */
+       u32     dh_header_size;
+
+       /* the level of this dump (just a header?) */
+       u32     dh_dump_level;
+
+       /* 
+        * We assume dump_page_size to be 4K in every case.
+        * Store here the configurable system page size (4K, 8K, 16K, etc.) 
+        */
+       u32     dh_page_size;
+
+       /* the size of all physical memory */
+       u64     dh_memory_size;
+
+       /* the start of physical memory */
+       u64     dh_memory_start;
+
+       /* the end of physical memory */
+       u64     dh_memory_end;
+
+       /* the number of hardware/physical pages in this dump specifically */
+       u32     dh_num_dump_pages;
+
+       /* the panic string, if available */
+       char    dh_panic_string[DUMP_PANIC_LEN];
+
+       /* timeval depends on architecture, two long values */
+       struct {
+               u64 tv_sec;
+               u64 tv_usec;
+       } dh_time; /* the time of the system crash */
+
+       /* the NEW utsname (uname) information -- in character form */
+       /* we do this so we don't have to include utsname.h         */
+       /* plus it helps us be more architecture independent        */
+       /* now maybe one day soon they'll make the [65] a #define!  */
+       char    dh_utsname_sysname[65];
+       char    dh_utsname_nodename[65];
+       char    dh_utsname_release[65];
+       char    dh_utsname_version[65];
+       char    dh_utsname_machine[65];
+       char    dh_utsname_domainname[65];
+
+       /* the address of current task (OLD = void *, NEW = u64) */
+       u64     dh_current_task;
+
+       /* what type of compression we're using in this dump (if any) */
+       u32     dh_dump_compress;
+
+       /* any additional flags */
+       u32     dh_dump_flags;
+
+       /* any additional flags */
+       u32     dh_dump_device;
+} __attribute__((packed));
+
+/*
+ * Structure: __dump_page
+ *  Function: To act as the header associated to each physical page of
+ *            memory saved in the system crash dump.  This allows for
+ *            easy reassembly of each crash dump page.  The address bits
+ *            are split to make things easier for 64-bit/32-bit system
+ *            conversions.
+ *
+ * dp_byte_offset and dp_page_index are landmarks that are helpful when
+ * looking at a hex dump of /dev/vmdump,
+ */
+struct __dump_page {
+       /* the address of this dump page */
+       u64     dp_address;
+
+       /* the size of this dump page */
+       u32     dp_size;
+
+       /* flags (currently DUMP_COMPRESSED, DUMP_RAW or DUMP_END) */
+       u32     dp_flags;
+} __attribute__((packed));
+
+/*
+ * Structure: __lkcdinfo
+ * Function:  This structure contains information needed for the lkcdutils
+ *            package (particularly lcrash) to determine what information is
+ *            associated to this kernel, specifically.
+ */
+struct __lkcdinfo {
+       int     arch;
+       int     ptrsz;
+       int     byte_order;
+       int     linux_release;
+       int     page_shift;
+       int     page_size;
+       u64     page_mask;
+       u64     page_offset;
+       int     stack_offset;
+};
+
+#ifdef __KERNEL__
+
+/*
+ * Structure: __dump_compress
+ *  Function: This is what an individual compression mechanism can use
+ *            to plug in their own compression techniques.  It's always
+ *            best to build these as individual modules so that people
+ *            can put in whatever they want.
+ */
+struct __dump_compress {
+       /* the list_head structure for list storage */
+       struct list_head list;
+
+       /* the type of compression to use (DUMP_COMPRESS_XXX) */
+       int compress_type;
+       const char *compress_name;
+
+       /* the compression function to call */
+       u16 (*compress_func)(const u8 *, u16, u8 *, u16);
+};
+
+/* functions for dump compression registration */
+extern void dump_register_compression(struct __dump_compress *);
+extern void dump_unregister_compression(int);
+
+/*
+ * Structure dump_mbank[]:
+ *
+ * For CONFIG_DISCONTIGMEM systems this array specifies the
+ * memory banks/chunks that need to be dumped after a panic.
+ *
+ * For classic systems it specifies a single set of pages from
+ * 0 to max_mapnr.
+ */
+struct __dump_mbank {
+       u64     start;
+       u64     end;
+       int     type;
+       int     pad1;
+       long    pad2;
+};
+
+#define DUMP_MBANK_TYPE_CONVENTIONAL_MEMORY            1
+#define DUMP_MBANK_TYPE_OTHER                          2
+
+#define MAXCHUNKS 256
+extern int dump_mbanks;
+extern struct __dump_mbank dump_mbank[MAXCHUNKS];
+
+/* notification event codes */
+#define DUMP_BEGIN             0x0001  /* dump beginning */
+#define DUMP_END               0x0002  /* dump ending */
+
+/* Scheduler soft spin control.
+ *
+ * 0 - no dump in progress
+ * 1 - cpu0 is dumping, ...
+ */
+extern unsigned long dump_oncpu;
+extern void dump_execute(const char *, const struct pt_regs *);
+
+/*
+ *     Notifier list for kernel code which wants to be called
+ *     at kernel dump. 
+ */
+extern struct notifier_block *dump_notifier_list;
+static inline int register_dump_notifier(struct notifier_block *nb)
+{
+       return notifier_chain_register(&dump_notifier_list, nb);
+}
+static inline int unregister_dump_notifier(struct notifier_block * nb)
+{
+       return notifier_chain_unregister(&dump_notifier_list, nb);
+}
+
+extern void (*dump_function_ptr)(const char *, const struct pt_regs *);
+static inline void dump(char * str, struct pt_regs * regs)
+{
+       if (dump_function_ptr)
+               dump_function_ptr(str, regs);
+}
+
+/*
+ * Common Arch Specific Functions should be declared here.
+ * This allows the C compiler to detect discrepancies.
+ */
+extern void    __dump_open(void);
+extern void    __dump_cleanup(void);
+extern void    __dump_init(u64);
+extern void    __dump_save_regs(struct pt_regs *, const struct pt_regs *);
+extern int     __dump_configure_header(const struct pt_regs *);
+extern int     __dump_irq_enable(void);
+extern void    __dump_irq_restore(void);
+extern int     __dump_page_valid(unsigned long index);
+#ifdef CONFIG_SMP
+extern void    __dump_save_other_cpus(void);
+#else
+#define        __dump_save_other_cpus()
+#endif
+
+extern int manual_handle_crashdump(void);
+
+/* to track all used (compound + zero order) pages */
+#define PageInuse(p)   (PageCompound(p) || page_count(p))
+
+#endif /* __KERNEL__ */
+
+#else  /* !CONFIG_CRASH_DUMP */
+
+/* If not configured then make code disappear! */
+#define register_dump_watchdog(x)      do { } while(0)
+#define unregister_dump_watchdog(x)    do { } while(0)
+#define register_dump_notifier(x)      do { } while(0)
+#define unregister_dump_notifier(x)    do { } while(0)
+#define dump_in_progress()             0
+#define dump(x, y)                     do { } while(0)
+
+#endif /* !CONFIG_CRASH_DUMP */
+
+#endif /* _DUMP_H */
diff --git a/include/linux/dump_netdev.h b/include/linux/dump_netdev.h
new file mode 100644 (file)
index 0000000..b2f811f
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ *  linux/drivers/net/netconsole.h
+ *
+ *  Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
+ *
+ *  This file contains the implementation of an IRQ-safe, crash-safe
+ *  kernel console implementation that outputs kernel messages to the
+ *  network.
+ *
+ * Modification history:
+ *
+ * 2001-09-17    started by Ingo Molnar.
+ */
+
+/****************************************************************
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2, or (at your option)
+ *      any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ ****************************************************************/
+
+#define NETCONSOLE_VERSION 0x03
+
+enum netdump_commands {
+       COMM_NONE = 0,
+       COMM_SEND_MEM = 1,
+       COMM_EXIT = 2,
+       COMM_REBOOT = 3,
+       COMM_HELLO = 4,
+       COMM_GET_NR_PAGES = 5,
+       COMM_GET_PAGE_SIZE = 6,
+       COMM_START_NETDUMP_ACK = 7,
+       COMM_GET_REGS = 8,
+       COMM_GET_MAGIC = 9,
+       COMM_START_WRITE_NETDUMP_ACK = 10,
+};
+
+typedef struct netdump_req_s {
+       u64 magic;
+       u32 nr;
+       u32 command;
+       u32 from;
+       u32 to;
+} req_t;
+
+enum netdump_replies {
+       REPLY_NONE = 0,
+       REPLY_ERROR = 1,
+       REPLY_LOG = 2,
+       REPLY_MEM = 3,
+       REPLY_RESERVED = 4,
+       REPLY_HELLO = 5,
+       REPLY_NR_PAGES = 6,
+       REPLY_PAGE_SIZE = 7,
+       REPLY_START_NETDUMP = 8,
+       REPLY_END_NETDUMP = 9,
+       REPLY_REGS = 10,
+       REPLY_MAGIC = 11,
+       REPLY_START_WRITE_NETDUMP = 12,
+};
+
+typedef struct netdump_reply_s {
+       u32 nr;
+       u32 code;
+       u32 info;
+} reply_t;
+
+#define HEADER_LEN (1 + sizeof(reply_t))
+
+
diff --git a/include/linux/dumpdev.h b/include/linux/dumpdev.h
new file mode 100644 (file)
index 0000000..51ef84d
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * Generic dump device interfaces for flexible system dump 
+ * (Enables variation of dump target types e.g disk, network, memory)
+ *
+ * These interfaces have evolved based on discussions on lkcd-devel. 
+ * Eventually the intent is to support primary and secondary or 
+ * alternate targets registered at the same time, with scope for 
+ * situation based failover or multiple dump devices used for parallel 
+ * dump i/o.
+ *
+ * Started: Oct 2002 - Suparna Bhattacharya (suparna@in.ibm.com)
+ *
+ * Copyright (C) 2001 - 2002 Matt D. Robinson.  All rights reserved.
+ * Copyright (C) 2002 International Business Machines Corp. 
+ *
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+#ifndef _LINUX_DUMPDEV_H
+#define _LINUX_DUMPDEV_H
+
+#include <linux/kernel.h>
+#include <linux/wait.h>
+#include <linux/bio.h>
+
+/* Determined by the dump target (device) type */
+
+struct dump_dev;
+
+struct dump_dev_ops {
+       int (*open)(struct dump_dev *, unsigned long); /* configure */
+       int (*release)(struct dump_dev *); /* unconfigure */
+       int (*silence)(struct dump_dev *); /* when dump starts */
+       int (*resume)(struct dump_dev *); /* when dump is over */
+       int (*seek)(struct dump_dev *, loff_t);
+       /* trigger a write (async in nature typically) */
+       int (*write)(struct dump_dev *, void *, unsigned long);
+       /* not usually used during dump, but option available */
+       int (*read)(struct dump_dev *, void *, unsigned long);
+       /* use to poll for completion */
+       int (*ready)(struct dump_dev *, void *); 
+       int (*ioctl)(struct dump_dev *, unsigned int, unsigned long);
+};
+
+struct dump_dev {
+       char type_name[32]; /* block, net-poll etc */
+       unsigned long device_id; /* interpreted differently for various types */
+       struct dump_dev_ops *ops;
+       struct list_head list;
+       loff_t curr_offset;
+};
+
+/*
+ * dump_dev type variations: 
+ */
+
+/* block */
+struct dump_blockdev {
+       struct dump_dev ddev;
+       dev_t dev_id;
+       struct block_device *bdev;
+       struct bio *bio;
+       loff_t start_offset;
+       loff_t limit;
+       int err;
+};
+
+static inline struct dump_blockdev *DUMP_BDEV(struct dump_dev *dev)
+{
+       return container_of(dev, struct dump_blockdev, ddev);
+}
+
+
+/* mem  - for internal use by soft-boot based dumper */
+struct dump_memdev {
+       struct dump_dev ddev;
+       unsigned long indirect_map_root;
+       unsigned long nr_free;
+       struct page *curr_page;
+       unsigned long *curr_map;
+       unsigned long curr_map_offset;
+       unsigned long last_offset;
+       unsigned long last_used_offset;
+       unsigned long last_bs_offset;
+};     
+
+static inline struct dump_memdev *DUMP_MDEV(struct dump_dev *dev)
+{
+       return container_of(dev, struct dump_memdev, ddev);
+}
+
+/* Todo/future - meant for raw dedicated interfaces e.g. mini-ide driver */
+struct dump_rdev {
+       struct dump_dev ddev;
+       char name[32];
+       int (*reset)(struct dump_rdev *, unsigned int, 
+               unsigned long);
+       /* ... to do ... */
+};
+
+/* just to get the size right when saving config across a soft-reboot */
+struct dump_anydev {
+       union {
+               struct dump_blockdev bddev;
+               /* .. add other types here .. */
+       };
+};
+
+
+
+/* Dump device / target operation wrappers */
+/* These assume that dump_dev is initiatized to dump_config.dumper->dev */
+
+extern struct dump_dev *dump_dev;
+
+static inline int dump_dev_open(unsigned long arg)
+{
+       return dump_dev->ops->open(dump_dev, arg);
+}
+
+static inline int dump_dev_release(void)
+{
+       return dump_dev->ops->release(dump_dev);
+}
+
+static inline int dump_dev_silence(void)
+{
+       return dump_dev->ops->silence(dump_dev);
+}
+
+static inline int dump_dev_resume(void)
+{
+       return dump_dev->ops->resume(dump_dev);
+}
+
+static inline int dump_dev_seek(loff_t offset)
+{
+       return dump_dev->ops->seek(dump_dev, offset);
+}
+
+static inline int dump_dev_write(void *buf, unsigned long len)
+{
+       return dump_dev->ops->write(dump_dev, buf, len);
+}
+
+static inline int dump_dev_ready(void *buf)
+{
+       return dump_dev->ops->ready(dump_dev, buf);
+}
+
+static inline int dump_dev_ioctl(unsigned int cmd, unsigned long arg)
+{
+       if (!dump_dev || !dump_dev->ops->ioctl)
+               return -EINVAL;
+       return dump_dev->ops->ioctl(dump_dev, cmd, arg);
+}
+
+extern int dump_register_device(struct dump_dev *);
+extern void dump_unregister_device(struct dump_dev *);
+
+#endif /*  _LINUX_DUMPDEV_H */
diff --git a/init/kerntypes.c b/init/kerntypes.c
new file mode 100644 (file)
index 0000000..1c24c0b
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * kerntypes.c
+ *
+ * Copyright (C) 2000 Tom Morano (tjm@sgi.com) and
+ *                    Matt D. Robinson (yakker@alacritech.com)
+ *
+ * Dummy module that includes headers for all kernel types of interest. 
+ * The kernel type information is used by the lcrash utility when 
+ * analyzing system crash dumps or the live system. Using the type 
+ * information for the running system, rather than kernel header files,
+ * makes for a more flexible and robust analysis tool.
+ *
+ * This source code is released under version 2 of the GNU GPL.
+ */
+
+#include <linux/compile.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/config.h>
+#include <linux/utsname.h>
+#include <linux/dump.h>
+
+#ifdef LINUX_COMPILE_VERSION_ID_TYPE
+/* Define version type for version validation of dump and kerntypes */
+LINUX_COMPILE_VERSION_ID_TYPE;
+#endif
+
+void
+kerntypes_dummy(void)
+{
+}