2 * linux/kernel/power/swsusp.c
4 * This file is to realize architecture-independent
5 * machine suspend feature using pretty near only high-level routines
7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
8 * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz>
10 * This file is released under the GPLv2.
12 * I'd like to thank the following people for their work:
14 * Pavel Machek <pavel@ucw.cz>:
15 * Modifications, defectiveness pointing, being with me at the very beginning,
16 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
18 * Steve Doddi <dirk@loth.demon.co.uk>:
19 * Support the possibility of hardware state restoring.
21 * Raph <grey.havens@earthling.net>:
22 * Support for preserving states of network devices and virtual console
23 * (including X and svgatextmode)
25 * Kurt Garloff <garloff@suse.de>:
26 * Straightened the critical function in order to prevent compilers from
27 * playing tricks with local variables.
29 * Andreas Mohr <a.mohr@mailto.de>
31 * Alex Badea <vampire@go.ro>:
34 * More state savers are welcome. Especially for the scsi layer...
36 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
39 #include <linux/module.h>
41 #include <linux/suspend.h>
42 #include <linux/smp_lock.h>
43 #include <linux/file.h>
44 #include <linux/utsname.h>
45 #include <linux/version.h>
46 #include <linux/delay.h>
47 #include <linux/reboot.h>
48 #include <linux/bitops.h>
49 #include <linux/vt_kern.h>
50 #include <linux/kbd_kern.h>
51 #include <linux/keyboard.h>
52 #include <linux/spinlock.h>
53 #include <linux/genhd.h>
54 #include <linux/kernel.h>
55 #include <linux/major.h>
56 #include <linux/swap.h>
58 #include <linux/device.h>
59 #include <linux/buffer_head.h>
60 #include <linux/swapops.h>
61 #include <linux/bootmem.h>
62 #include <linux/syscalls.h>
63 #include <linux/console.h>
64 #include <linux/highmem.h>
66 #include <asm/uaccess.h>
67 #include <asm/mmu_context.h>
68 #include <asm/pgtable.h>
73 unsigned char software_suspend_enabled = 0;
76 #define RESUME_SPECIFIED 2
78 /* References to section boundaries */
79 extern char __nosave_begin, __nosave_end;
81 extern int is_head_of_free_region(struct page *);
84 spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED;
86 /* Variables to be preserved over suspend */
87 static int pagedir_order_check;
88 static int nr_copy_pages_check;
90 static int resume_status;
91 static char resume_file[256] = ""; /* For resume= kernel option */
92 static dev_t resume_device;
93 /* Local variables that should not be affected by save */
94 unsigned int nr_copy_pages __nosavedata = 0;
96 /* Suspend pagedir is allocated before final copy, therefore it
97 must be freed after resume
99 Warning: this is evil. There are actually two pagedirs at time of
100 resume. One is "pagedir_save", which is empty frame allocated at
101 time of suspend, that must be freed. Second is "pagedir_nosave",
102 allocated at time of resume, that travels through memory not to
103 collide with anything.
105 Warning: this is even more evil than it seems. Pagedirs this file
106 talks about are completely different from page directories used by
109 suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
110 static suspend_pagedir_t *pagedir_save;
111 static int pagedir_order __nosavedata = 0;
114 char dummy[PAGE_SIZE - sizeof(swp_entry_t)];
119 union swap_header swh;
121 struct suspend_header sh;
125 * XXX: We try to keep some more pages free so that I/O operations succeed
126 * without paging. Might this be more?
128 #define PAGES_FOR_IO 512
130 static const char name_suspend[] = "Suspend Machine: ";
131 static const char name_resume[] = "Resume Machine: ";
136 #define DEBUG_DEFAULT
139 #define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */
142 # define PRINTK(f, a...) printk(f, ## a)
144 # define PRINTK(f, a...) do { } while(0)
148 #define MDELAY(a) mdelay(a)
150 #define MDELAY(a) do { } while(0)
157 static __inline__ int fill_suspend_header(struct suspend_header *sh)
159 memset((char *)sh, 0, sizeof(*sh));
161 sh->version_code = LINUX_VERSION_CODE;
162 sh->num_physpages = num_physpages;
163 strncpy(sh->machine, system_utsname.machine, 8);
164 strncpy(sh->version, system_utsname.version, 20);
165 /* FIXME: Is this bogus? --RR */
166 sh->num_cpus = num_online_cpus();
167 sh->page_size = PAGE_SIZE;
168 sh->suspend_pagedir = pagedir_nosave;
169 BUG_ON (pagedir_save != pagedir_nosave);
170 sh->num_pbes = nr_copy_pages;
171 /* TODO: needed? mounted fs' last mounted date comparison
172 * [so they haven't been mounted since last suspend.
173 * Maybe it isn't.] [we'd need to do this for _all_ fs-es]
178 /* We memorize in swapfile_used what swap devices are used for suspension */
179 #define SWAPFILE_UNUSED 0
180 #define SWAPFILE_SUSPEND 1 /* This is the suspending device */
181 #define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
183 static unsigned short swapfile_used[MAX_SWAPFILES];
184 static unsigned short root_swap;
185 #define MARK_SWAP_SUSPEND 0
186 #define MARK_SWAP_RESUME 2
188 static void mark_swapfiles(swp_entry_t prev, int mode)
194 if (root_swap == 0xFFFF) /* ignored */
197 page = alloc_page(GFP_ATOMIC);
199 panic("Out of memory in mark_swapfiles");
200 cur = page_address(page);
201 /* XXX: this is dirty hack to get first page of swap file */
202 entry = swp_entry(root_swap, 0);
203 rw_swap_page_sync(READ, entry, page);
205 if (mode == MARK_SWAP_RESUME) {
206 if (!memcmp("S1",cur->swh.magic.magic,2))
207 memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
208 else if (!memcmp("S2",cur->swh.magic.magic,2))
209 memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
210 else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
211 name_resume, cur->swh.magic.magic);
213 if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)))
214 memcpy(cur->swh.magic.magic,"S1SUSP....",10);
215 else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10)))
216 memcpy(cur->swh.magic.magic,"S2SUSP....",10);
217 else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic);
218 cur->link.next = prev; /* prev is the first/last swap page of the resume area */
219 /* link.next lies *no more* in last 4/8 bytes of magic */
221 rw_swap_page_sync(WRITE, entry, page);
227 * Check whether the swap device is the specified resume
228 * device, irrespective of whether they are specified by
231 * (Thus, device inode aliasing is allowed. You can say /dev/hda4
232 * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
233 * and they'll be considered the same device. This is *necessary* for
234 * devfs, since the resume code can only recognize the form /dev/hda4,
235 * but the suspend code would see the long name.)
237 static int is_resume_device(const struct swap_info_struct *swap_info)
239 struct file *file = swap_info->swap_file;
240 struct inode *inode = file->f_dentry->d_inode;
242 return S_ISBLK(inode->i_mode) &&
243 resume_device == MKDEV(imajor(inode), iminor(inode));
246 static void read_swapfiles(void) /* This is called before saving image */
250 len=strlen(resume_file);
254 for(i=0; i<MAX_SWAPFILES; i++) {
255 if (swap_info[i].flags == 0) {
256 swapfile_used[i]=SWAPFILE_UNUSED;
259 printk(KERN_WARNING "resume= option should be used to set suspend device" );
260 if(root_swap == 0xFFFF) {
261 swapfile_used[i] = SWAPFILE_SUSPEND;
264 swapfile_used[i] = SWAPFILE_IGNORED;
266 /* we ignore all swap devices that are not the resume_file */
267 if (is_resume_device(&swap_info[i])) {
268 swapfile_used[i] = SWAPFILE_SUSPEND;
271 swapfile_used[i] = SWAPFILE_IGNORED;
279 static void lock_swapdevices(void) /* This is called after saving image so modification
280 will be lost after resume... and that's what we want. */
285 for(i = 0; i< MAX_SWAPFILES; i++)
286 if(swapfile_used[i] == SWAPFILE_IGNORED) {
287 swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
288 lock_swapdevices can unlock the devices. */
294 * write_suspend_image - Write entire image to disk.
296 * After writing suspend signature to the disk, suspend may no
297 * longer fail: we have ready-to-run image in swap, and rollback
298 * would happen on next reboot -- corrupting data.
300 * Note: The buffer we allocate to use to write the suspend header is
301 * not freed; its not needed since the system is going down anyway
302 * (plus it causes an oops and I'm lazy^H^H^H^Htoo busy).
304 static int write_suspend_image(void)
307 swp_entry_t entry, prev = { 0 };
308 int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
309 union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC);
310 unsigned long address;
316 printk( "Writing data to swap (%d pages): ", nr_copy_pages );
317 for (i=0; i<nr_copy_pages; i++) {
320 entry = get_swap_page();
322 panic("\nNot enough swapspace when writing data" );
324 if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
325 panic("\nPage %d: not enough swapspace on suspend device", i );
327 address = (pagedir_nosave+i)->address;
328 page = virt_to_page(address);
329 rw_swap_page_sync(WRITE, entry, page);
330 (pagedir_nosave+i)->swap_address = entry;
333 printk( "Writing pagedir (%d pages): ", nr_pgdir_pages);
334 for (i=0; i<nr_pgdir_pages; i++) {
335 cur = (union diskpage *)((char *) pagedir_nosave)+i;
336 BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
338 entry = get_swap_page();
340 printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
341 panic("Don't know how to recover");
342 free_page((unsigned long) buffer);
346 if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
347 panic("\nNot enough swapspace for pagedir on suspend device" );
349 BUG_ON (sizeof(swp_entry_t) != sizeof(long));
350 BUG_ON (PAGE_SIZE % sizeof(struct pbe));
352 cur->link.next = prev;
353 page = virt_to_page((unsigned long)cur);
354 rw_swap_page_sync(WRITE, entry, page);
358 BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
359 BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
360 BUG_ON (sizeof(struct link) != PAGE_SIZE);
361 entry = get_swap_page();
363 panic( "\nNot enough swapspace when writing header" );
364 if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
365 panic("\nNot enough swapspace for header on suspend device" );
367 cur = (void *) buffer;
368 if (fill_suspend_header(&cur->sh))
369 BUG(); /* Not a BUG_ON(): we want fill_suspend_header to be called, always */
371 cur->link.next = prev;
373 page = virt_to_page((unsigned long)cur);
374 rw_swap_page_sync(WRITE, entry, page);
378 mark_swapfiles(prev, MARK_SWAP_SUSPEND);
385 #ifdef CONFIG_HIGHMEM
386 struct highmem_page {
389 struct highmem_page *next;
392 struct highmem_page *highmem_copy = NULL;
394 static int save_highmem_zone(struct zone *zone)
396 unsigned long zone_pfn;
397 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
399 struct highmem_page *save;
401 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
408 page = pfn_to_page(pfn);
410 * This condition results from rvmalloc() sans vmalloc_32()
411 * and architectural memory reservations. This should be
412 * corrected eventually when the cases giving rise to this
413 * are better understood.
415 if (PageReserved(page)) {
416 printk("highmem reserved page?!\n");
419 if ((chunk_size = is_head_of_free_region(page))) {
420 pfn += chunk_size - 1;
421 zone_pfn += chunk_size - 1;
424 save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
427 save->next = highmem_copy;
429 save->data = (void *) get_zeroed_page(GFP_ATOMIC);
434 kaddr = kmap_atomic(page, KM_USER0);
435 memcpy(save->data, kaddr, PAGE_SIZE);
436 kunmap_atomic(kaddr, KM_USER0);
442 static int save_highmem(void)
446 for_each_zone(zone) {
447 if (is_highmem(zone))
448 res = save_highmem_zone(zone);
455 static int restore_highmem(void)
457 while (highmem_copy) {
458 struct highmem_page *save = highmem_copy;
460 highmem_copy = save->next;
462 kaddr = kmap_atomic(save->page, KM_USER0);
463 memcpy(kaddr, save->data, PAGE_SIZE);
464 kunmap_atomic(kaddr, KM_USER0);
465 free_page((long) save->data);
472 static int pfn_is_nosave(unsigned long pfn)
474 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
475 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
476 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
479 /* if *pagedir_p != NULL it also copies the counted pages */
480 static int count_and_copy_zone(struct zone *zone, struct pbe **pagedir_p)
482 unsigned long zone_pfn, chunk_size, nr_copy_pages = 0;
483 struct pbe *pbe = *pagedir_p;
484 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
486 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
492 page = pfn_to_page(pfn);
493 BUG_ON(PageReserved(page) && PageNosave(page));
494 if (PageNosave(page))
496 if (PageReserved(page) && pfn_is_nosave(pfn)) {
497 PRINTK("[nosave pfn 0x%lx]", pfn);
500 if ((chunk_size = is_head_of_free_region(page))) {
501 pfn += chunk_size - 1;
502 zone_pfn += chunk_size - 1;
508 pbe->orig_address = (long) page_address(page);
509 /* Copy page is dangerous: it likes to mess with
510 preempt count on specific cpus. Wrong preempt count is then copied,
512 copy_page((void *)pbe->address, (void *)pbe->orig_address);
516 return nr_copy_pages;
519 static int count_and_copy_data_pages(struct pbe *pagedir_p)
521 int nr_copy_pages = 0;
523 for_each_zone(zone) {
524 if (!is_highmem(zone))
525 nr_copy_pages += count_and_copy_zone(zone, &pagedir_p);
527 return nr_copy_pages;
530 static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir)
532 unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn;
533 pagedir_end = pagedir + (PAGE_SIZE << pagedir_order);
534 pagedir_pfn = __pa(pagedir) >> PAGE_SHIFT;
535 pagedir_end_pfn = __pa(pagedir_end) >> PAGE_SHIFT;
536 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
538 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
541 page = pfn_to_page(pfn);
542 if (!TestClearPageNosave(page))
544 else if (pfn >= pagedir_pfn && pfn < pagedir_end_pfn)
550 static void free_suspend_pagedir(unsigned long this_pagedir)
553 for_each_zone(zone) {
554 if (!is_highmem(zone))
555 free_suspend_pagedir_zone(zone, this_pagedir);
557 free_pages(this_pagedir, pagedir_order);
560 static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages)
563 suspend_pagedir_t *pagedir;
567 pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
569 p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order);
573 page = virt_to_page(pagedir);
574 for(i=0; i < 1<<pagedir_order; i++)
575 SetPageNosave(page++);
577 while(nr_copy_pages--) {
578 p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
580 free_suspend_pagedir((unsigned long) pagedir);
583 SetPageNosave(virt_to_page(p->address));
590 static int prepare_suspend_processes(void)
592 sys_sync(); /* Syncing needs pdflushd, so do it before stopping processes */
593 if (freeze_processes()) {
594 printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" );
602 * Try to free as much memory as possible, but do not OOM-kill anyone
604 * Notice: all userland should be stopped at this point, or livelock is possible.
606 static void free_some_memory(void)
608 printk("Freeing memory: ");
609 while (shrink_all_memory(10000))
614 static int suspend_prepare_image(void)
617 unsigned int nr_needed_pages = 0;
619 pagedir_nosave = NULL;
620 printk( "/critical section: ");
621 #ifdef CONFIG_HIGHMEM
622 printk( "handling highmem" );
623 if (save_highmem()) {
624 printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend);
630 printk("counting pages to copy" );
632 nr_copy_pages = count_and_copy_data_pages(NULL);
633 nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
635 printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages());
636 if(nr_free_pages() < nr_needed_pages) {
637 printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n",
638 name_suspend, nr_needed_pages-nr_free_pages());
642 si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information.
643 We should only consider resume_device. */
644 if (i.freeswap < nr_needed_pages) {
645 printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n",
646 name_suspend, nr_needed_pages-i.freeswap);
650 PRINTK( "Alloc pagedir\n" );
651 pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages);
652 if (!pagedir_nosave) {
653 /* Pagedir is big, one-chunk allocation. It is easily possible for this allocation to fail */
654 printk(KERN_CRIT "%sCouldn't allocate continuous pagedir\n", name_suspend);
657 nr_copy_pages_check = nr_copy_pages;
658 pagedir_order_check = pagedir_order;
660 drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */
661 if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */
665 * End of critical section. From now on, we can write to memory,
666 * but we should not touch disk. This specially means we must _not_
667 * touch swap space! Except we must write out our image of course.
670 printk( "critical section/: done (%d pages copied)\n", nr_copy_pages );
674 static void suspend_save_image(void)
679 write_suspend_image();
680 lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */
682 /* It is important _NOT_ to umount filesystems at this point. We want
683 * them synced (in case something goes wrong) but we DO not want to mark
684 * filesystem clean: it is not. (And it does not matter, if we resume
685 * correctly, we'll mark system clean, anyway.)
689 static void suspend_power_down(void)
693 printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": "");
695 PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state);
697 if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL))))
698 machine_restart(NULL);
707 printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend);
717 asmlinkage void do_magic_resume_1(void)
721 spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
723 device_power_down(3);
724 PRINTK( "Waiting for DMAs to settle down...\n");
725 mdelay(1000); /* We do not want some readahead with DMA to corrupt our memory, right?
726 Do it with disabled interrupts for best effect. That way, if some
727 driver scheduled DMA, we have good chance for DMA to finish ;-). */
730 asmlinkage void do_magic_resume_2(void)
732 BUG_ON (nr_copy_pages_check != nr_copy_pages);
733 BUG_ON (pagedir_order_check != pagedir_order);
735 __flush_tlb_global(); /* Even mappings of "global" things (vmalloc) need to be fixed */
737 PRINTK( "Freeing prev allocated pagedir\n" );
738 free_suspend_pagedir((unsigned long) pagedir_save);
740 #ifdef CONFIG_HIGHMEM
741 printk( "Restoring highmem\n" );
744 printk("done, devices\n");
747 spin_unlock_irq(&suspend_pagedir_lock);
750 /* Fixme: this is too late; we should do this ASAP to avoid "infinite reboots" problem */
751 PRINTK( "Fixing swap signatures... " );
752 mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
755 #ifdef SUSPEND_CONSOLE
756 acquire_console_sem();
757 update_screen(fg_console);
758 release_console_sem();
762 /* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does:
765 do_magic_suspend_1();
766 save_processor_state();
768 do_magic_suspend_2();
771 GO_TO_SWAPPER_PAGE_TABLES
775 restore_processor_state();
780 asmlinkage void do_magic_suspend_1(void)
785 spin_lock_irq(&suspend_pagedir_lock);
788 asmlinkage void do_magic_suspend_2(void)
792 device_power_down(3);
793 is_problem = suspend_prepare_image();
795 spin_unlock_irq(&suspend_pagedir_lock);
797 kernel_fpu_end(); /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */
799 suspend_save_image();
800 suspend_power_down(); /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */
803 printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend);
804 MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */
808 spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
810 free_pages((unsigned long) pagedir_nosave, pagedir_order);
811 spin_unlock_irq(&suspend_pagedir_lock);
814 PRINTK( "Fixing swap signatures... " );
815 mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
820 * This is main interface to the outside world. It needs to be
821 * called from process context.
823 int software_suspend(void)
826 if (!software_suspend_enabled)
829 software_suspend_enabled = 0;
832 if (arch_prepare_suspend()) {
833 printk("%sArchitecture failed to prepare\n", name_suspend);
836 if (pm_prepare_console())
837 printk( "%sCan't allocate a console... proceeding\n", name_suspend);
838 if (!prepare_suspend_processes()) {
840 /* At this point, all user processes and "dangerous"
841 kernel threads are stopped. Free some memory, as we
842 need half of memory free. */
845 disable_nonboot_cpus();
846 /* Save state of all device drivers, and stop them. */
847 printk("Suspending devices... ");
848 if ((res = device_suspend(3))==0) {
849 /* If stopping device drivers worked, we proceed basically into
850 * suspend_save_image.
852 * do_magic(0) returns after system is resumed.
854 * do_magic() copies all "used" memory to "free" memory, then
855 * unsuspends all device drivers, and writes memory to disk
856 * using normal kernel mechanism.
861 enable_nonboot_cpus();
864 software_suspend_enabled = 1;
866 pm_restore_console();
870 /* More restore stuff */
872 #define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
875 * Returns true if given address/order collides with any orig_address
877 static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
881 unsigned long addre = addr + (PAGE_SIZE<<order);
883 for(i=0; i < nr_copy_pages; i++)
884 if((pagedir+i)->orig_address >= addr &&
885 (pagedir+i)->orig_address < addre)
892 * We check here that pagedir & pages it points to won't collide with pages
893 * where we're going to restore from the loaded pages later
895 static int check_pagedir(void)
899 for(i=0; i < nr_copy_pages; i++) {
903 addr = get_zeroed_page(GFP_ATOMIC);
906 } while (does_collide(addr));
908 (pagedir_nosave+i)->address = addr;
913 static int relocate_pagedir(void)
916 * We have to avoid recursion (not to overflow kernel stack),
917 * and that's why code looks pretty cryptic
919 suspend_pagedir_t *old_pagedir = pagedir_nosave;
920 void **eaten_memory = NULL;
921 void **c = eaten_memory, *m, *f;
924 printk("Relocating pagedir ");
926 if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
927 printk("not necessary\n");
931 while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order)) != NULL) {
932 if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
941 printk("out of memory\n");
945 memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
953 free_pages((unsigned long)f, pagedir_order);
960 * Sanity check if this image makes sense with this kernel/swap context
961 * I really don't think that it's foolproof but more than nothing..
964 static int sanity_check_failed(char *reason)
966 printk(KERN_ERR "%s%s\n", name_resume, reason);
970 static int sanity_check(struct suspend_header *sh)
972 if (sh->version_code != LINUX_VERSION_CODE)
973 return sanity_check_failed("Incorrect kernel version");
974 if (sh->num_physpages != num_physpages)
975 return sanity_check_failed("Incorrect memory size");
976 if (strncmp(sh->machine, system_utsname.machine, 8))
977 return sanity_check_failed("Incorrect machine type");
978 if (strncmp(sh->version, system_utsname.version, 20))
979 return sanity_check_failed("Incorrect version");
980 if (sh->num_cpus != num_online_cpus())
981 return sanity_check_failed("Incorrect number of cpus");
982 if (sh->page_size != PAGE_SIZE)
983 return sanity_check_failed("Incorrect PAGE_SIZE");
987 static int bdev_read_page(struct block_device *bdev, long pos, void *buf)
989 struct buffer_head *bh;
990 BUG_ON (pos%PAGE_SIZE);
991 bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
992 if (!bh || (!bh->b_data)) {
995 memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */
996 BUG_ON(!buffer_uptodate(bh));
1001 static int bdev_write_page(struct block_device *bdev, long pos, void *buf)
1004 struct buffer_head *bh;
1005 BUG_ON (pos%PAGE_SIZE);
1006 bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
1007 if (!bh || (!bh->b_data)) {
1010 memcpy(bh->b_data, buf, PAGE_SIZE); /* FIXME: may need kmap() */
1011 BUG_ON(!buffer_uptodate(bh));
1012 generic_make_request(WRITE, bh);
1013 if (!buffer_uptodate(bh))
1014 printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file);
1019 printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file);
1023 extern dev_t __init name_to_dev_t(const char *line);
1025 static int __init __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume)
1028 int i, nr_pgdir_pages;
1030 #define PREPARENEXT \
1031 { next = cur->link.next; \
1032 next.val = swp_offset(next) * PAGE_SIZE; \
1035 if (bdev_read_page(bdev, 0, cur)) return -EIO;
1037 if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) ||
1038 (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) {
1039 printk(KERN_ERR "%sThis is normal swap space\n", name_resume );
1043 PREPARENEXT; /* We have to read next position before we overwrite it */
1045 if (!memcmp("S1",cur->swh.magic.magic,2))
1046 memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
1047 else if (!memcmp("S2",cur->swh.magic.magic,2))
1048 memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
1052 panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
1053 name_resume, cur->swh.magic.magic);
1056 /* We don't do a sanity check here: we want to restore the swap
1057 whatever version of kernel made the suspend image;
1058 We need to write swap, but swap is *not* enabled so
1059 we must write the device directly */
1060 printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file);
1061 bdev_write_page(bdev, 0, cur);
1064 printk( "%sSignature found, resuming\n", name_resume );
1067 if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1068 if (sanity_check(&cur->sh)) /* Is this same machine? */
1072 pagedir_save = cur->sh.suspend_pagedir;
1073 nr_copy_pages = cur->sh.num_pbes;
1074 nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
1075 pagedir_order = get_bitmask_order(nr_pgdir_pages);
1077 pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order);
1078 if (!pagedir_nosave)
1081 PRINTK( "%sReading pagedir, ", name_resume );
1083 /* We get pages in reverse order of saving! */
1084 for (i=nr_pgdir_pages-1; i>=0; i--) {
1086 cur = (union diskpage *)((char *) pagedir_nosave)+i;
1087 if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1092 if (relocate_pagedir())
1094 if (check_pagedir())
1097 printk( "Reading image data (%d pages): ", nr_copy_pages );
1098 for(i=0; i < nr_copy_pages; i++) {
1099 swp_entry_t swap_address = (pagedir_nosave+i)->swap_address;
1102 /* You do not need to check for overlaps...
1103 ... check_pagedir already did this work */
1104 if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address)))
1111 static int __init read_suspend_image(const char * specialfile, int noresume)
1113 union diskpage *cur;
1114 unsigned long scratch_page = 0;
1116 char b[BDEVNAME_SIZE];
1118 resume_device = name_to_dev_t(specialfile);
1119 scratch_page = get_zeroed_page(GFP_ATOMIC);
1120 cur = (void *) scratch_page;
1122 struct block_device *bdev;
1123 printk("Resuming from device %s\n",
1124 __bdevname(resume_device, b));
1125 bdev = open_by_devnum(resume_device, FMODE_READ);
1127 error = PTR_ERR(bdev);
1129 set_blocksize(bdev, PAGE_SIZE);
1130 error = __read_suspend_image(bdev, cur, noresume);
1133 } else error = -ENOMEM;
1136 free_page(scratch_page);
1139 PRINTK("Reading resume file was successful\n");
1144 printk( "%sI/O error\n", name_resume);
1147 printk( "%s%s: No such file or directory\n", name_resume, specialfile);
1150 printk( "%sNot enough memory\n", name_resume);
1153 printk( "%sError %d resuming\n", name_resume, error );
1160 * software_resume - Resume from a saved image.
1162 * Called as a late_initcall (so all devices are discovered and
1163 * initialized), we call swsusp to see if we have a saved image or not.
1164 * If so, we quiesce devices, then restore the saved image. We will
1165 * return above (in pm_suspend_disk() ) if everything goes well.
1166 * Otherwise, we fail gracefully and return to the normally
1167 * scheduled program.
1170 static int __init software_resume(void)
1172 if (num_online_cpus() > 1) {
1173 printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n");
1176 /* We enable the possibility of machine suspend */
1177 software_suspend_enabled = 1;
1181 printk( "%s", name_resume );
1182 if (resume_status == NORESUME) {
1184 read_suspend_image(resume_file, 1);
1185 printk( "disabled\n" );
1190 if (pm_prepare_console())
1191 printk("swsusp: Can't allocate a console... proceeding\n");
1193 if (!resume_file[0] && resume_status == RESUME_SPECIFIED) {
1194 printk( "suspension device unspecified\n" );
1198 printk( "resuming from %s\n", resume_file);
1199 if (read_suspend_image(resume_file, 0))
1201 /* FIXME: Should we stop processes here, just to be safer? */
1202 disable_nonboot_cpus();
1205 panic("This never returns");
1208 pm_restore_console();
1212 late_initcall(software_resume);
1214 static int __init resume_setup(char *str)
1216 if (resume_status == NORESUME)
1219 strncpy( resume_file, str, 255 );
1220 resume_status = RESUME_SPECIFIED;
1225 static int __init noresume_setup(char *str)
1227 resume_status = NORESUME;
1231 __setup("noresume", noresume_setup);
1232 __setup("resume=", resume_setup);
1234 EXPORT_SYMBOL(software_suspend);
1235 EXPORT_SYMBOL(software_suspend_enabled);