This commit was manufactured by cvs2svn to create branch
[linux-2.6.git] / drivers / block / diskdump.c
1 /*
2  *  linux/drivers/block/diskdump.c
3  *
4  *  Copyright (C) 2004  FUJITSU LIMITED
5  *  Copyright (C) 2002  Red Hat, Inc.
6  *  Written by Nobuhiro Tachino (ntachino@jp.fujitsu.com)
7  *
8  *  Some codes were derived from netdump and copyright belongs to
9  *  Red Hat, Inc.
10  */
11 /*
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2, or (at your option)
15  * any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  *
26  */
27
28 #include <linux/mm.h>
29 #include <linux/init.h>
30 #include <linux/delay.h>
31 #include <linux/reboot.h>
32 #include <linux/module.h>
33 #include <linux/kernel.h>
34 #include <linux/highmem.h>
35 #include <linux/smp_lock.h>
36 #include <linux/nmi.h>
37 #include <linux/crc32.h>
38 #include <linux/slab.h>
39 #include <linux/interrupt.h>
40 #include <linux/seq_file.h>
41 #include <linux/proc_fs.h>
42 #include <linux/diskdump.h>
43 #include <asm/diskdump.h>
44
45 #define Dbg(x, ...)     pr_debug("disk_dump: " x "\n", ## __VA_ARGS__)
46 #define Err(x, ...)     pr_err  ("disk_dump: " x "\n", ## __VA_ARGS__)
47 #define Warn(x, ...)    pr_warn ("disk_dump: " x "\n", ## __VA_ARGS__)
48 #define Info(x, ...)    pr_info ("disk_dump: " x "\n", ## __VA_ARGS__)
49
50 #define ROUNDUP(x, y)   (((x) + ((y)-1))/(y))
51
52 /* 512byte sectors to blocks */
53 #define SECTOR_BLOCK(s) ((s) >> (DUMP_BLOCK_SHIFT - 9))
54
55 /* The number of block which is used for saving format information */
56 #define USER_PARAM_BLOCK        2
57
58 static int fallback_on_err = 1;
59 static int allow_risky_dumps = 1;
60 static unsigned int block_order = 2;
61 static int sample_rate = 8;
62 module_param_named(fallback_on_err, fallback_on_err, bool, S_IRUGO|S_IWUSR);
63 module_param_named(allow_risky_dumps, allow_risky_dumps, bool, S_IRUGO|S_IWUSR);
64 module_param_named(block_order, block_order, uint, S_IRUGO|S_IWUSR);
65 module_param_named(sample_rate, sample_rate, int, S_IRUGO|S_IWUSR);
66
67 static unsigned long timestamp_1sec;
68 static uint32_t module_crc;
69 static char *scratch;
70 static struct disk_dump_header dump_header;
71 static struct disk_dump_sub_header dump_sub_header;
72
73 /* Registered dump devices */
74 static LIST_HEAD(disk_dump_devices);
75
76 /* Registered dump types, e.g. SCSI, ... */
77 static LIST_HEAD(disk_dump_types);
78
79 static DECLARE_MUTEX(disk_dump_mutex);
80
81 static unsigned int header_blocks;              /* The size of all headers */
82 static unsigned int bitmap_blocks;              /* The size of bitmap header */
83 static unsigned int total_ram_blocks;           /* The size of memory */
84 static unsigned int total_blocks;               /* The sum of above */
85 /*
86  * This is not a parameter actually, but used to pass the number of
87  * required blocks to userland tools
88  */
89 module_param_named(total_blocks, total_blocks, uint, S_IRUGO);
90
91 struct notifier_block *disk_dump_notifier_list;
92 EXPORT_SYMBOL_GPL(disk_dump_notifier_list);
93
94 unsigned long volatile diskdump_base_jiffies;
95 void *diskdump_stack;
96 enum disk_dump_states disk_dump_state = DISK_DUMP_INITIAL;
97
98 extern int panic_timeout;
99 extern unsigned long max_pfn;
100
101 static asmlinkage void disk_dump(struct pt_regs *, void *);
102
103
104 #if CONFIG_SMP
105 static void freeze_cpu(void *dummy)
106 {
107         unsigned int cpu = smp_processor_id();
108
109         dump_header.tasks[cpu] = current;
110
111         platform_freeze_cpu();
112 }
113 #endif
114
115 static int lapse = 0;           /* 200msec unit */
116
117 static inline unsigned long eta(unsigned long nr, unsigned long maxnr)
118 {
119         unsigned long long eta;
120
121         if (nr == 0)
122                 nr = 1;
123
124         eta = ((maxnr << 8) / nr) * (unsigned long long)lapse;
125
126         return (unsigned long)(eta >> 8) - lapse;
127 }
128
129 static inline void print_status(unsigned int nr, unsigned int maxnr)
130 {
131         static char *spinner = "/|\\-";
132         static unsigned long long prev_timestamp = 0;
133         unsigned long long timestamp;
134
135         if (nr == 0)
136                 nr++;
137
138         platform_timestamp(timestamp);
139
140         if (timestamp - prev_timestamp > (timestamp_1sec/5)) {
141                 prev_timestamp = timestamp;
142                 lapse++;
143                 printk("%u/%u    %lu ETA %c          \r",
144                         nr, maxnr, eta(nr, maxnr) / 5, spinner[lapse & 3]);
145         }
146 }
147
148 static inline void clear_status(int nr, int maxnr)
149 {
150         printk("                                       \r");
151         lapse = 0;
152 }
153
154 /*
155  * Checking the signature on a block. The format is as follows.
156  *
157  * 1st word = 'disk'
158  * 2nd word = 'dump'
159  * 3rd word = block number
160  * 4th word = ((block number + 7) * 11) & 0xffffffff
161  * 5th word = ((4th word + 7)* 11) & 0xffffffff
162  * ..
163  *
164  * Return 1 if the signature is correct, else return 0
165  */
166 static int check_block_signature(void *buf, unsigned int block_nr)
167 {
168         int word_nr = PAGE_SIZE / sizeof(int);
169         int *words = buf;
170         unsigned int val;
171         int i;
172
173         /*
174          * Block 2 is used for the area which formatter saves options like
175          * the sampling rate or the number of blocks. the Kernel part does not
176          * check this block.
177          */
178         if (block_nr == USER_PARAM_BLOCK)
179                 return 1;
180
181         if (memcmp(buf, DUMP_PARTITION_SIGNATURE, sizeof(*words)))
182                 return 0;
183
184         val = block_nr;
185         for (i = 2; i < word_nr; i++) {
186                 if (words[i] != val)
187                         return 0;
188                 val = (val + 7) * 11;
189         }
190
191         return 1;
192 }
193
194 /*
195  * Read one block into the dump partition
196  */
197 static int read_blocks(struct disk_dump_partition *dump_part, unsigned int nr,
198                        char *buf, int len)
199 {
200         struct disk_dump_device *device = dump_part->device;
201         int ret;
202
203         local_irq_disable();
204         touch_nmi_watchdog();
205         ret = device->ops.rw_block(dump_part, READ, nr, buf, len);
206         if (ret < 0) {
207                 Err("read error on block %u", nr);
208                 return ret;
209         }
210         return 0;
211 }
212
213 static int write_blocks(struct disk_dump_partition *dump_part, unsigned int offs, char *buf, int len)
214 {
215         struct disk_dump_device *device = dump_part->device;
216         int ret;
217
218         local_irq_disable();
219         touch_nmi_watchdog();
220         ret = device->ops.rw_block(dump_part, WRITE, offs, buf, len);
221         if (ret < 0) {
222                 Err("write error on block %u", offs);
223                 return ret;
224         }
225         return 0;
226 }
227
228 /*
229  * Initialize the common header
230  */
231
232 /*
233  * Write the common header
234  */
235 static int write_header(struct disk_dump_partition *dump_part)
236 {
237         memset(scratch, 0, PAGE_SIZE);
238         memcpy(scratch, &dump_header, sizeof(dump_header));
239
240         return write_blocks(dump_part, 1, scratch, 1);
241 }
242
243 /*
244  * Check the signaures in all blocks of the dump partition
245  * Return 1 if the signature is correct, else return 0
246  */
247 static int check_dump_partition(struct disk_dump_partition *dump_part,
248                                 unsigned int partition_size)
249 {
250         unsigned int blk;
251         int ret;
252         unsigned int chunk_blks, skips;
253         int i;
254
255         if (sample_rate < 0)            /* No check */
256                 return 1;
257
258         /*
259          * If the device has limitations of transfer size, use it.
260          */
261         chunk_blks = 1 << block_order;
262         if (dump_part->device->max_blocks)
263                  chunk_blks = min(chunk_blks, dump_part->device->max_blocks);
264         skips = chunk_blks << sample_rate;
265
266         lapse = 0;
267         for (blk = 0; blk < partition_size; blk += skips) {
268                 unsigned int len;
269 redo:
270                 len = min(chunk_blks, partition_size - blk);
271                 if ((ret = read_blocks(dump_part, blk, scratch, len)) < 0)
272                         return 0;
273                 print_status(blk + 1, partition_size);
274                 for (i = 0; i < len; i++)
275                         if (!check_block_signature(scratch + i * DUMP_BLOCK_SIZE, blk + i)) {
276                                 Err("bad signature in block %u", blk + i);
277                                 return 0;
278                         }
279         }
280         /* Check the end of the dump partition */
281         if (blk - skips + chunk_blks < partition_size) {
282                 blk = partition_size - chunk_blks;
283                 goto redo;
284         }
285         clear_status(blk, partition_size);
286         return 1;
287 }
288
289 /*
290  * Write memory bitmap after location of dump headers.
291  */
292 #define PAGE_PER_BLOCK  (PAGE_SIZE * 8)
293 #define idx_to_pfn(nr, byte, bit) (((nr) * PAGE_SIZE + (byte)) * 8 + (bit))
294
295 static int write_bitmap(struct disk_dump_partition *dump_part,
296                         unsigned int bitmap_offset, unsigned int bitmap_blocks)
297 {
298         unsigned int nr;
299         unsigned long pfn, next_ram_pfn;
300         int bit, byte;
301         int ret = 0;
302         unsigned char val;
303
304         for (nr = 0; nr < bitmap_blocks; nr++) {
305                 pfn = idx_to_pfn(nr, 0, 0);
306                 next_ram_pfn = next_ram_page(pfn - 1);
307
308                 if (pfn + PAGE_PER_BLOCK <= next_ram_pfn)
309                         memset(scratch, 0, PAGE_SIZE);
310                 else
311                         for (byte = 0; byte < PAGE_SIZE; byte++) {
312                                 val = 0;
313                                 for (bit = 0; bit < 8; bit++)
314                                         if (page_is_ram(idx_to_pfn(nr, byte,
315                                                                    bit)))
316                                                 val |= (1 << bit);
317                                 scratch[byte] = (char)val;
318                         }
319                 if ((ret = write_blocks(dump_part, bitmap_offset + nr,
320                                         scratch, 1)) < 0) {
321                         Err("I/O error %d on block %u", ret, bitmap_offset + nr);
322                         break;
323                 }
324         }
325         return ret;
326 }
327
328 /*
329  * Write whole memory to dump partition.
330  * Return value is the number of writen blocks.
331  */
332 static int write_memory(struct disk_dump_partition *dump_part, int offset,
333                         unsigned int max_blocks_written,
334                         unsigned int *blocks_written)
335 {
336         char *kaddr;
337         unsigned int blocks = 0;
338         struct page *page;
339         unsigned long nr;
340         int ret = 0;
341         int blk_in_chunk = 0;
342
343         for (nr = next_ram_page(ULONG_MAX); nr < ULONG_MAX; nr = next_ram_page(nr)) {
344                 print_status(blocks, max_blocks_written);
345
346
347                 if (blocks >= max_blocks_written) {
348                         Warn("dump device is too small. %lu pages were not saved", max_pfn - blocks);
349                         goto out;
350                 }
351
352                 page = pfn_to_page(nr);
353                 if (nr != page_to_pfn(page)) {
354                         /* page_to_pfn() is called from kmap_atomic().
355                          * If page->flag is broken, it specified a wrong
356                          * zone and it causes kmap_atomic() fail.
357                          */
358                         Err("Bad page. PFN %lu flags %lx\n",
359                             nr, (unsigned long)page->flags);
360                         memset(scratch + blk_in_chunk * PAGE_SIZE, 0,
361                                PAGE_SIZE);
362                         sprintf(scratch + blk_in_chunk * PAGE_SIZE,
363                                 "Bad page. PFN %lu flags %lx\n",
364                                  nr, (unsigned long)page->flags);
365                         goto write;
366                 }
367
368                 if (!kern_addr_valid((unsigned long)pfn_to_kaddr(nr))) {
369                         memset(scratch + blk_in_chunk * PAGE_SIZE, 0,
370                                PAGE_SIZE);
371                         sprintf(scratch + blk_in_chunk * PAGE_SIZE,
372                                 "Unmapped page. PFN %lu\n", nr);
373                         goto write;
374                 }
375
376                 kaddr = kmap_atomic(page, KM_CRASHDUMP);
377                 /*
378                  * need to copy because adapter drivers use
379                  * virt_to_bus()
380                  */
381                 memcpy(scratch + blk_in_chunk * PAGE_SIZE, kaddr, PAGE_SIZE);
382                 kunmap_atomic(kaddr, KM_CRASHDUMP);
383
384 write:
385                 blk_in_chunk++;
386                 blocks++;
387
388                 if (blk_in_chunk >= (1 << block_order)) {
389                         ret = write_blocks(dump_part, offset, scratch,
390                                            blk_in_chunk);
391                         if (ret < 0) {
392                                 Err("I/O error %d on block %u", ret, offset);
393                                 break;
394                         }
395                         offset += blk_in_chunk;
396                         blk_in_chunk = 0;
397                 }
398         }
399         if (ret >= 0 && blk_in_chunk > 0) {
400                 ret = write_blocks(dump_part, offset, scratch, blk_in_chunk);
401                 if (ret < 0)
402                         Err("I/O error %d on block %u", ret, offset);
403         }
404
405 out:
406         clear_status(nr, max_blocks_written);
407
408         *blocks_written = blocks;
409         return ret;
410 }
411
412 /*
413  * Select most suitable dump device. sanity_check() returns the state
414  * of each dump device. 0 means OK, negative value means NG, and
415  * positive value means it maybe work. select_dump_partition() first
416  * try to select a sane device and if it has no sane device and
417  * allow_risky_dumps is set, it select one from maybe OK devices.
418  *
419  * XXX We cannot handle multiple partitions yet.
420  */
421 static struct disk_dump_partition *select_dump_partition(void)
422 {
423         struct disk_dump_device *dump_device;
424         struct disk_dump_partition *dump_part;
425         int sanity;
426         int strict_check = 1;
427
428 redo:
429         /*
430          * Select a sane polling driver.
431          */
432         list_for_each_entry(dump_device, &disk_dump_devices, list) {
433                 sanity = 0;
434                 if (dump_device->ops.sanity_check)
435                         sanity = dump_device->ops.sanity_check(dump_device);
436                 if (sanity < 0 || (sanity > 0 && strict_check))
437                         continue;
438                 list_for_each_entry(dump_part, &dump_device->partitions, list)
439                                 return dump_part;
440         }
441         if (allow_risky_dumps && strict_check) {
442                 strict_check = 0;
443                 goto redo;
444         }
445         return NULL;
446 }
447
448 static int dump_err = 0;        /* Indicate Error state which occured in
449                                  * disk_dump(). We need to make it global
450                                  * because disk_dump() can't pass
451                                  * error state as return value.
452                                  */
453
454 static void freeze_other_cpus(void)
455 {
456 #if CONFIG_SMP
457         int     i;
458
459         smp_call_function(freeze_cpu, NULL, 1, -1);
460         diskdump_mdelay(3000);
461         printk("CPU frozen: ");
462         for (i = 0; i < NR_CPUS; i++) {
463                 if (dump_header.tasks[i] != NULL)
464                         printk("#%d", i);
465
466         }
467         printk("\n");
468         printk("CPU#%d is executing diskdump.\n", smp_processor_id());
469 #else
470         diskdump_mdelay(1000);
471 #endif
472         dump_header.tasks[smp_processor_id()] = current;
473 }
474
475 static void start_disk_dump(struct pt_regs *regs)
476 {
477         unsigned long flags;
478
479         /* Inhibit interrupt and stop other CPUs */
480         local_irq_save(flags);
481         preempt_disable();
482
483         /*
484          * Check the checksum of myself
485          */
486         if (down_trylock(&disk_dump_mutex)) {
487                 Err("down_trylock(disk_dump_mutex) failed.");
488                 goto done;
489         }
490
491         if (!check_crc_module()) {
492                 Err("checksum error. diskdump common module may be compromised.");
493                 goto done;
494         }
495
496         disk_dump_state = DISK_DUMP_RUNNING;
497
498         diskdump_mode = 1;
499
500         Dbg("notify dump start.");
501         notifier_call_chain(&disk_dump_notifier_list, 0, NULL);
502
503         touch_nmi_watchdog();
504         freeze_other_cpus();
505
506         /*
507          *  Some platforms may want to execute netdump on its own stack.
508          */
509         platform_start_crashdump(diskdump_stack, disk_dump, regs);
510
511 done:
512         /*
513          * If diskdump failed and fallback_on_err is set,
514          * We just return and leave panic to netdump.
515          */
516         if (dump_err) {
517                 disk_dump_state = DISK_DUMP_FAILURE;
518                 if (fallback_on_err && dump_err)
519                         return;
520         } else {
521                 disk_dump_state = DISK_DUMP_SUCCESS;
522         }
523
524         Dbg("notify panic.");
525         notifier_call_chain(&panic_notifier_list, 0, NULL);
526
527         if (panic_timeout > 0) {
528                 int i;
529                 /*
530                  * Delay timeout seconds before rebooting the machine. 
531                  * We can't use the "normal" timers since we just panicked..
532                  */
533                 printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
534                 for (i = 0; i < panic_timeout; i++) {
535                         touch_nmi_watchdog();
536                         diskdump_mdelay(1000);
537                 }
538
539                 /*
540                  *      Should we run the reboot notifier. For the moment Im
541                  *      choosing not too. It might crash, be corrupt or do
542                  *      more harm than good for other reasons.
543                  */
544                 machine_restart(NULL);
545         }
546         printk(KERN_EMERG "halt\n");
547         for (;;) {
548                 touch_nmi_watchdog();
549                 machine_halt();
550                 diskdump_mdelay(1000);
551         }
552 }
553
554 static asmlinkage void disk_dump(struct pt_regs *regs, void *platform_arg)
555 {
556         struct pt_regs myregs;
557         unsigned int max_written_blocks, written_blocks;
558         struct disk_dump_device *dump_device = NULL;
559         struct disk_dump_partition *dump_part = NULL;
560         int ret;
561
562         dump_err = -EIO;
563
564         /*
565          * Setup timer/tasklet
566          */
567         dump_clear_timers();
568         dump_clear_tasklet();
569         dump_clear_workqueue();
570
571         /* Save original jiffies value */
572         diskdump_base_jiffies = jiffies;
573
574         diskdump_setup_timestamp();
575
576         platform_fix_regs();
577
578         if (list_empty(&disk_dump_devices)) {
579                 Err("adapter driver is not registered.");
580                 goto done;
581         }
582
583         printk("start dumping\n");
584
585         if (!(dump_part = select_dump_partition())) {
586                 Err("No sane dump device found");
587                 goto done;
588         }
589         dump_device = dump_part->device;
590
591         /*
592          * Stop ongoing I/O with polling driver and make the shift to I/O mode
593          * for dump
594          */
595         Dbg("do quiesce");
596         if (dump_device->ops.quiesce)
597                 if ((ret = dump_device->ops.quiesce(dump_device)) < 0) {
598                         Err("quiesce failed. error %d", ret);
599                         goto done;
600                 }
601
602         if (SECTOR_BLOCK(dump_part->nr_sects) < header_blocks + bitmap_blocks) {
603                 Warn("dump partition is too small. Aborted");
604                 goto done;
605         }
606
607         /* Check dump partition */
608         printk("check dump partition...\n");
609         if (!check_dump_partition(dump_part, total_blocks)) {
610                 Err("check partition failed.");
611                 goto done;
612         }
613
614         /*
615          * Write the common header
616          */
617         memcpy(dump_header.signature, DISK_DUMP_SIGNATURE,
618                sizeof(dump_header.signature));
619         dump_header.utsname          = system_utsname;
620         dump_header.timestamp        = xtime;
621         dump_header.status           = DUMP_HEADER_INCOMPLETED;
622         dump_header.block_size       = PAGE_SIZE;
623         dump_header.sub_hdr_size     = size_of_sub_header();
624         dump_header.bitmap_blocks    = bitmap_blocks;
625         dump_header.max_mapnr        = max_pfn;
626         dump_header.total_ram_blocks = total_ram_blocks;
627         dump_header.device_blocks    = SECTOR_BLOCK(dump_part->nr_sects);
628         dump_header.current_cpu      = smp_processor_id();
629         dump_header.nr_cpus          = num_online_cpus();
630         dump_header.written_blocks   = 2;
631
632         write_header(dump_part);
633
634         /*
635          * Write the architecture dependent header
636          */
637         Dbg("write sub header");
638         if ((ret = write_sub_header()) < 0) {
639                 Err("writing sub header failed. error %d", ret);
640                 goto done;
641         }
642
643         Dbg("writing memory bitmaps..");
644         if ((ret = write_bitmap(dump_part, header_blocks, bitmap_blocks)) < 0)
645                 goto done;
646
647         max_written_blocks = total_ram_blocks;
648         if (dump_header.device_blocks < total_blocks) {
649                 Warn("dump partition is too small. actual blocks %u. expected blocks %u. whole memory will not be saved",
650                                 dump_header.device_blocks, total_blocks);
651                 max_written_blocks -= (total_blocks - dump_header.device_blocks);
652         }
653
654         dump_header.written_blocks += dump_header.sub_hdr_size;
655         dump_header.written_blocks += dump_header.bitmap_blocks;
656         write_header(dump_part);
657
658         printk("dumping memory..\n");
659         if ((ret = write_memory(dump_part, header_blocks + bitmap_blocks,
660                                 max_written_blocks, &written_blocks)) < 0)
661                 goto done;
662
663         /*
664          * Set the number of block that is written into and write it
665          * into partition again.
666          */
667         dump_header.written_blocks += written_blocks;
668         dump_header.status = DUMP_HEADER_COMPLETED;
669         write_header(dump_part);
670
671         dump_err = 0;
672
673 done:
674         Dbg("do adapter shutdown.");
675         if (dump_device && dump_device->ops.shutdown)
676                 if (dump_device->ops.shutdown(dump_device))
677                         Err("adapter shutdown failed.");
678 }
679
680 static struct disk_dump_partition *find_dump_partition(struct block_device *bdev)
681 {
682         struct disk_dump_device *dump_device;
683         struct disk_dump_partition *dump_part;
684
685         list_for_each_entry(dump_device, &disk_dump_devices, list)
686                 list_for_each_entry(dump_part, &dump_device->partitions, list)
687                         if (dump_part->bdev == bdev)
688                                 return dump_part;
689         return NULL;
690 }
691
692 static struct disk_dump_device *find_dump_device(struct disk_dump_device *device)
693 {
694         struct disk_dump_device *dump_device;
695
696         list_for_each_entry(dump_device, &disk_dump_devices, list)
697                 if (device->device == dump_device->device)
698                         return  dump_device;
699         return NULL;
700 }
701
702 static void *find_real_device(struct device *dev,
703                               struct disk_dump_type **_dump_type)
704 {
705         void *real_device;
706         struct disk_dump_type *dump_type;
707
708         list_for_each_entry(dump_type, &disk_dump_types, list)
709                 if ((real_device = dump_type->probe(dev)) != NULL) {
710                         *_dump_type = dump_type;
711                         return real_device;
712                 }
713         return NULL;
714 }
715
716 /*
717  * Add dump partition structure corresponding to file to the dump device
718  * structure.
719  */
720 static int add_dump_partition(struct disk_dump_device *dump_device,
721                               struct block_device *bdev)
722 {
723         struct disk_dump_partition *dump_part;
724         char buffer[BDEVNAME_SIZE];
725
726         if (!(dump_part = kmalloc(sizeof(*dump_part), GFP_KERNEL)))
727                 return -ENOMEM;
728
729         dump_part->device = dump_device;
730         dump_part->bdev = bdev;
731
732         if (!bdev || !bdev->bd_part)
733                 return -EINVAL;
734         dump_part->nr_sects   = bdev->bd_part->nr_sects;
735         dump_part->start_sect = bdev->bd_part->start_sect;
736
737         if (SECTOR_BLOCK(dump_part->nr_sects) < total_blocks)
738                 Warn("%s is too small to save whole system memory\n",
739                         bdevname(bdev, buffer));
740
741         list_add(&dump_part->list, &dump_device->partitions);
742
743         return 0;
744 }
745
746 /*
747  * Add dump device and partition.
748  * Must be called with disk_dump_mutex held.
749  */
750 static int add_dump(struct device *dev, struct block_device *bdev)
751 {
752         struct disk_dump_type *dump_type = NULL;
753         struct disk_dump_device *dump_device;
754         void *real_device;
755         int ret;
756
757         if ((ret = blkdev_get(bdev, FMODE_READ, 0)) < 0)
758                 return ret;
759
760         /* Check whether this block device is already registered */
761         if (find_dump_partition(bdev)) {
762                 blkdev_put(bdev);
763                 return -EEXIST;
764         }
765
766         /* find dump_type and real device for this inode */
767         if (!(real_device = find_real_device(dev, &dump_type))) {
768                 blkdev_put(bdev);
769                 return -ENXIO;
770         }
771
772         /* Check whether this device is already registered */
773         dump_device = find_dump_device(real_device);
774         if (dump_device == NULL) {
775                 /* real_device is not registered. create new dump_device */
776                 if (!(dump_device = kmalloc(sizeof(*dump_device), GFP_KERNEL))) {
777                         blkdev_put(bdev);
778                         return -ENOMEM;
779                 }
780
781                 memset(dump_device, 0, sizeof(*dump_device));
782                 INIT_LIST_HEAD(&dump_device->partitions);
783
784                 dump_device->dump_type = dump_type;
785                 dump_device->device = real_device;
786                 if ((ret = dump_type->add_device(dump_device)) < 0) {
787                         kfree(dump_device);
788                         blkdev_put(bdev);
789                         return ret;
790                 }
791                 if (!try_module_get(dump_type->owner))
792                         return -EINVAL;
793                 list_add(&dump_device->list, &disk_dump_devices);
794         }
795
796         ret = add_dump_partition(dump_device, bdev);
797         if (ret < 0 && list_empty(&dump_device->list)) {
798                 dump_type->remove_device(dump_device);
799                 module_put(dump_type->owner);
800                 list_del(&dump_device->list);
801                 kfree(dump_device);
802         }
803         if (ret < 0)
804                 blkdev_put(bdev);
805
806         return ret;
807 }
808
809 /*
810  * Remove dump partition corresponding to bdev.
811  * Must be called with disk_dump_mutex held.
812  */
813 static int remove_dump(struct block_device *bdev)
814 {
815         struct disk_dump_device *dump_device;
816         struct disk_dump_partition *dump_part;
817         struct disk_dump_type *dump_type;
818
819         if (!(dump_part = find_dump_partition(bdev))) {
820                 bdput(bdev);
821                 return -ENOENT;
822         }
823
824         blkdev_put(bdev);
825         dump_device = dump_part->device;
826         list_del(&dump_part->list);
827         kfree(dump_part);
828
829         if (list_empty(&dump_device->partitions)) {
830                 dump_type = dump_device->dump_type;
831                 dump_type->remove_device(dump_device);
832                 module_put(dump_type->owner);
833                 list_del(&dump_device->list);
834                 kfree(dump_device);
835         }
836
837         return 0;
838 }
839
840 #ifdef CONFIG_PROC_FS
841 static struct disk_dump_partition *dump_part_by_pos(struct seq_file *seq,
842                                                     loff_t pos)
843 {
844         struct disk_dump_device *dump_device;
845         struct disk_dump_partition *dump_part;
846
847         list_for_each_entry(dump_device, &disk_dump_devices, list) {
848                 seq->private = dump_device;
849                 list_for_each_entry(dump_part, &dump_device->partitions, list)
850                         if (!pos--)
851                                 return dump_part;
852         }
853         return NULL;
854 }
855
856 static void *disk_dump_seq_start(struct seq_file *seq, loff_t *pos)
857 {
858         loff_t n = *pos;
859
860         down(&disk_dump_mutex);
861
862         if (!n--)
863                 return (void *)1;       /* header */
864
865         return dump_part_by_pos(seq, n);
866 }
867
868 static void *disk_dump_seq_next(struct seq_file *seq, void *v, loff_t *pos)
869 {
870         struct list_head *partition = v;
871         struct list_head *device = seq->private;
872         struct disk_dump_device *dump_device;
873
874         (*pos)++;
875         if (v == (void *)1)
876                 return dump_part_by_pos(seq, 0);
877
878         dump_device = list_entry(device, struct disk_dump_device, list);
879
880         partition = partition->next;
881         if (partition != &dump_device->partitions)
882                 return partition;
883
884         device = device->next;
885         seq->private = device;
886         if (device == &disk_dump_devices)
887                 return NULL;
888
889         dump_device = list_entry(device, struct disk_dump_device, list);
890
891         return dump_device->partitions.next;
892 }
893
894 static void disk_dump_seq_stop(struct seq_file *seq, void *v)
895 {
896         up(&disk_dump_mutex);
897 }
898
899 static int disk_dump_seq_show(struct seq_file *seq, void *v)
900 {
901         struct disk_dump_partition *dump_part = v;
902         char buf[BDEVNAME_SIZE];
903
904         if (v == (void *)1) {   /* header */
905                 seq_printf(seq, "# sample_rate: %u\n", sample_rate);
906                 seq_printf(seq, "# block_order: %u\n", block_order);
907                 seq_printf(seq, "# fallback_on_err: %u\n", fallback_on_err);
908                 seq_printf(seq, "# allow_risky_dumps: %u\n", allow_risky_dumps);
909                 seq_printf(seq, "# total_blocks: %u\n", total_blocks);
910                 seq_printf(seq, "#\n");
911
912                 return 0;
913         }
914
915         seq_printf(seq, "%s %lu %lu\n", bdevname(dump_part->bdev, buf),
916                         dump_part->start_sect, dump_part->nr_sects);
917         return 0;
918 }
919
920 static struct seq_operations disk_dump_seq_ops = {
921         .start  = disk_dump_seq_start,
922         .next   = disk_dump_seq_next,
923         .stop   = disk_dump_seq_stop,
924         .show   = disk_dump_seq_show,
925 };
926
927 static int disk_dump_open(struct inode *inode, struct file *file)
928 {
929         return seq_open(file, &disk_dump_seq_ops);
930 }
931
932 static struct file_operations disk_dump_fops = {
933         .owner          = THIS_MODULE,
934         .open           = disk_dump_open,
935         .read           = seq_read,
936         .llseek         = seq_lseek,
937         .release        = seq_release,
938 };
939 #endif
940
941 int register_disk_dump_device(struct device *dev, struct block_device *bdev)
942 {
943         int ret;
944
945         down(&disk_dump_mutex);
946         ret = add_dump(dev, bdev);
947         set_crc_modules();
948         up(&disk_dump_mutex);
949
950         return ret;
951 }
952
953 int unregister_disk_dump_device(struct block_device *bdev)
954 {
955         int ret;
956
957         down(&disk_dump_mutex);
958         ret = remove_dump(bdev);
959         set_crc_modules();
960         up(&disk_dump_mutex);
961
962         return ret;
963 }
964
965 int find_disk_dump_device(struct block_device *bdev)
966 {
967         int ret;
968
969         down(&disk_dump_mutex);
970         ret = (find_dump_partition(bdev) != NULL);
971         up(&disk_dump_mutex);
972
973         return ret;
974 }
975
976 int register_disk_dump_type(struct disk_dump_type *dump_type)
977 {
978         down(&disk_dump_mutex);
979         list_add(&dump_type->list, &disk_dump_types);
980         set_crc_modules();
981         up(&disk_dump_mutex);
982
983         return 0;
984 }
985
986 EXPORT_SYMBOL_GPL(register_disk_dump_type);
987
988 int unregister_disk_dump_type(struct disk_dump_type *dump_type)
989 {
990         down(&disk_dump_mutex);
991         list_del(&dump_type->list);
992         set_crc_modules();
993         up(&disk_dump_mutex);
994
995         return 0;
996 }
997
998 EXPORT_SYMBOL_GPL(unregister_disk_dump_type);
999
1000 static void compute_total_blocks(void)
1001 {
1002         unsigned long nr;
1003
1004         /*
1005          * the number of block of the common header and the header
1006          * that is depend on the architecture
1007          *
1008          * block 0:             dump partition header
1009          * block 1:             dump header
1010          * block 2:             dump subheader
1011          * block 3..n:          memory bitmap
1012          * block (n + 1)...:    saved memory
1013          *
1014          * We never overwrite block 0
1015          */
1016         header_blocks = 2 + size_of_sub_header();
1017
1018         total_ram_blocks = 0;
1019         for (nr = next_ram_page(ULONG_MAX); nr < ULONG_MAX; nr = next_ram_page(nr))
1020                 total_ram_blocks++;
1021
1022         bitmap_blocks = ROUNDUP(max_pfn, 8 * PAGE_SIZE);
1023
1024         /*
1025          * The necessary size of area for dump is:
1026          * 1 block for common header
1027          * m blocks for architecture dependent header
1028          * n blocks for memory bitmap
1029          * and whole memory
1030          */
1031         total_blocks = header_blocks + bitmap_blocks + total_ram_blocks;
1032
1033         Info("total blocks required: %u (header %u + bitmap %u + memory %u)",
1034                 total_blocks, header_blocks, bitmap_blocks, total_ram_blocks);
1035 }
1036
1037 struct disk_dump_ops dump_ops = {
1038         .add_dump       = register_disk_dump_device,
1039         .remove_dump    = unregister_disk_dump_device,
1040         .find_dump      = find_disk_dump_device,
1041 };
1042
1043 static int init_diskdump(void)
1044 {
1045         unsigned long long t0;
1046         unsigned long long t1;
1047         struct page *page;
1048
1049         if (!platform_supports_diskdump) {
1050                 Err("platform does not support diskdump.");
1051                 return -1;
1052         }
1053
1054         /* Allocate one block that is used temporally */
1055         do {
1056                 page = alloc_pages(GFP_KERNEL, block_order);
1057                 if (page != NULL)
1058                         break;
1059         } while (--block_order >= 0);
1060         if (!page) {
1061                 Err("alloc_pages failed.");
1062                 return -1;
1063         }
1064         scratch = page_address(page);
1065         Info("Maximum block size: %lu", PAGE_SIZE << block_order);
1066
1067         if (diskdump_register_hook(start_disk_dump)) {
1068                 Err("failed to register hooks.");
1069                 return -1;
1070         }
1071
1072         if (diskdump_register_ops(&dump_ops)) {
1073                 Err("failed to register ops.");
1074                 return -1;
1075         }
1076
1077         compute_total_blocks();
1078
1079         platform_timestamp(t0);
1080         diskdump_mdelay(1);
1081         platform_timestamp(t1);
1082         timestamp_1sec = (unsigned long)(t1 - t0) * 1000;
1083
1084         /*
1085          *  Allocate a separate stack for diskdump.
1086          */
1087         platform_init_stack(&diskdump_stack);
1088
1089         down(&disk_dump_mutex);
1090         set_crc_modules();
1091         up(&disk_dump_mutex);
1092
1093 #ifdef CONFIG_PROC_FS
1094         {
1095                 struct proc_dir_entry *p;
1096
1097                 p = create_proc_entry("diskdump", S_IRUGO|S_IWUSR, NULL);
1098                 if (p)
1099                         p->proc_fops = &disk_dump_fops;
1100         }
1101 #endif
1102
1103         return 0;
1104 }
1105
1106 static void cleanup_diskdump(void)
1107 {
1108         Info("shut down.");
1109         diskdump_unregister_hook();
1110         diskdump_unregister_ops();
1111         platform_cleanup_stack(diskdump_stack);
1112         free_pages((unsigned long)scratch, block_order);
1113 #ifdef CONFIG_PROC_FS
1114         remove_proc_entry("diskdump", NULL);
1115 #endif
1116 }
1117
1118 module_init(init_diskdump);
1119 module_exit(cleanup_diskdump);
1120
1121 MODULE_LICENSE("GPL");