2 * linux/kernel/power/swsusp.c
4 * This file is to realize architecture-independent
5 * machine suspend feature using pretty near only high-level routines
7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
8 * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz>
10 * This file is released under the GPLv2.
12 * I'd like to thank the following people for their work:
14 * Pavel Machek <pavel@ucw.cz>:
15 * Modifications, defectiveness pointing, being with me at the very beginning,
16 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
18 * Steve Doddi <dirk@loth.demon.co.uk>:
19 * Support the possibility of hardware state restoring.
21 * Raph <grey.havens@earthling.net>:
22 * Support for preserving states of network devices and virtual console
23 * (including X and svgatextmode)
25 * Kurt Garloff <garloff@suse.de>:
26 * Straightened the critical function in order to prevent compilers from
27 * playing tricks with local variables.
29 * Andreas Mohr <a.mohr@mailto.de>
31 * Alex Badea <vampire@go.ro>:
34 * More state savers are welcome. Especially for the scsi layer...
36 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
39 #include <linux/module.h>
41 #include <linux/suspend.h>
42 #include <linux/smp_lock.h>
43 #include <linux/file.h>
44 #include <linux/utsname.h>
45 #include <linux/version.h>
46 #include <linux/delay.h>
47 #include <linux/reboot.h>
48 #include <linux/bitops.h>
49 #include <linux/vt_kern.h>
50 #include <linux/kbd_kern.h>
51 #include <linux/keyboard.h>
52 #include <linux/spinlock.h>
53 #include <linux/genhd.h>
54 #include <linux/kernel.h>
55 #include <linux/major.h>
56 #include <linux/swap.h>
58 #include <linux/device.h>
59 #include <linux/buffer_head.h>
60 #include <linux/swapops.h>
61 #include <linux/bootmem.h>
62 #include <linux/syscalls.h>
63 #include <linux/console.h>
64 #include <linux/highmem.h>
66 #include <asm/uaccess.h>
67 #include <asm/mmu_context.h>
68 #include <asm/pgtable.h>
73 unsigned char software_suspend_enabled = 0;
76 #define RESUME_SPECIFIED 2
78 /* References to section boundaries */
79 extern char __nosave_begin, __nosave_end;
81 extern int is_head_of_free_region(struct page *);
84 spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED;
86 /* Variables to be preserved over suspend */
87 static int pagedir_order_check;
88 static int nr_copy_pages_check;
90 static int resume_status;
91 static char resume_file[256] = ""; /* For resume= kernel option */
92 static dev_t resume_device;
93 /* Local variables that should not be affected by save */
94 unsigned int nr_copy_pages __nosavedata = 0;
96 /* Suspend pagedir is allocated before final copy, therefore it
97 must be freed after resume
99 Warning: this is evil. There are actually two pagedirs at time of
100 resume. One is "pagedir_save", which is empty frame allocated at
101 time of suspend, that must be freed. Second is "pagedir_nosave",
102 allocated at time of resume, that travels through memory not to
103 collide with anything.
105 Warning: this is even more evil than it seems. Pagedirs this file
106 talks about are completely different from page directories used by
109 suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
110 static suspend_pagedir_t *pagedir_save;
111 static int pagedir_order __nosavedata = 0;
114 char dummy[PAGE_SIZE - sizeof(swp_entry_t)];
119 union swap_header swh;
121 struct suspend_header sh;
125 * XXX: We try to keep some more pages free so that I/O operations succeed
126 * without paging. Might this be more?
128 #define PAGES_FOR_IO 512
130 static const char name_suspend[] = "Suspend Machine: ";
131 static const char name_resume[] = "Resume Machine: ";
136 #define DEBUG_DEFAULT
139 #define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */
142 # define PRINTK(f, a...) printk(f, ## a)
144 # define PRINTK(f, a...) do { } while(0)
148 #define MDELAY(a) mdelay(a)
150 #define MDELAY(a) do { } while(0)
157 static __inline__ int fill_suspend_header(struct suspend_header *sh)
159 memset((char *)sh, 0, sizeof(*sh));
161 sh->version_code = LINUX_VERSION_CODE;
162 sh->num_physpages = num_physpages;
163 strncpy(sh->machine, system_utsname.machine, 8);
164 strncpy(sh->version, system_utsname.version, 20);
165 /* FIXME: Is this bogus? --RR */
166 sh->num_cpus = num_online_cpus();
167 sh->page_size = PAGE_SIZE;
168 sh->suspend_pagedir = pagedir_nosave;
169 BUG_ON (pagedir_save != pagedir_nosave);
170 sh->num_pbes = nr_copy_pages;
171 /* TODO: needed? mounted fs' last mounted date comparison
172 * [so they haven't been mounted since last suspend.
173 * Maybe it isn't.] [we'd need to do this for _all_ fs-es]
178 /* We memorize in swapfile_used what swap devices are used for suspension */
179 #define SWAPFILE_UNUSED 0
180 #define SWAPFILE_SUSPEND 1 /* This is the suspending device */
181 #define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
183 static unsigned short swapfile_used[MAX_SWAPFILES];
184 static unsigned short root_swap;
185 #define MARK_SWAP_SUSPEND 0
186 #define MARK_SWAP_RESUME 2
188 static void mark_swapfiles(swp_entry_t prev, int mode)
194 if (root_swap == 0xFFFF) /* ignored */
197 page = alloc_page(GFP_ATOMIC);
199 panic("Out of memory in mark_swapfiles");
200 cur = page_address(page);
201 /* XXX: this is dirty hack to get first page of swap file */
202 entry = swp_entry(root_swap, 0);
203 rw_swap_page_sync(READ, entry, page);
205 if (mode == MARK_SWAP_RESUME) {
206 if (!memcmp("S1",cur->swh.magic.magic,2))
207 memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
208 else if (!memcmp("S2",cur->swh.magic.magic,2))
209 memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
210 else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
211 name_resume, cur->swh.magic.magic);
213 if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)))
214 memcpy(cur->swh.magic.magic,"S1SUSP....",10);
215 else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10)))
216 memcpy(cur->swh.magic.magic,"S2SUSP....",10);
217 else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic);
218 cur->link.next = prev; /* prev is the first/last swap page of the resume area */
219 /* link.next lies *no more* in last 4/8 bytes of magic */
221 rw_swap_page_sync(WRITE, entry, page);
227 * Check whether the swap device is the specified resume
228 * device, irrespective of whether they are specified by
231 * (Thus, device inode aliasing is allowed. You can say /dev/hda4
232 * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
233 * and they'll be considered the same device. This is *necessary* for
234 * devfs, since the resume code can only recognize the form /dev/hda4,
235 * but the suspend code would see the long name.)
237 static int is_resume_device(const struct swap_info_struct *swap_info)
239 struct file *file = swap_info->swap_file;
240 struct inode *inode = file->f_dentry->d_inode;
242 return S_ISBLK(inode->i_mode) &&
243 resume_device == MKDEV(imajor(inode), iminor(inode));
246 static void read_swapfiles(void) /* This is called before saving image */
250 len=strlen(resume_file);
254 for(i=0; i<MAX_SWAPFILES; i++) {
255 if (swap_info[i].flags == 0) {
256 swapfile_used[i]=SWAPFILE_UNUSED;
259 printk(KERN_WARNING "resume= option should be used to set suspend device" );
260 if(root_swap == 0xFFFF) {
261 swapfile_used[i] = SWAPFILE_SUSPEND;
264 swapfile_used[i] = SWAPFILE_IGNORED;
266 /* we ignore all swap devices that are not the resume_file */
267 if (is_resume_device(&swap_info[i])) {
268 swapfile_used[i] = SWAPFILE_SUSPEND;
271 swapfile_used[i] = SWAPFILE_IGNORED;
279 static void lock_swapdevices(void) /* This is called after saving image so modification
280 will be lost after resume... and that's what we want. */
285 for(i = 0; i< MAX_SWAPFILES; i++)
286 if(swapfile_used[i] == SWAPFILE_IGNORED) {
287 swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
288 lock_swapdevices can unlock the devices. */
294 * write_suspend_image - Write entire image to disk.
296 * After writing suspend signature to the disk, suspend may no
297 * longer fail: we have ready-to-run image in swap, and rollback
298 * would happen on next reboot -- corrupting data.
300 * Note: The buffer we allocate to use to write the suspend header is
301 * not freed; its not needed since the system is going down anyway
302 * (plus it causes an oops and I'm lazy^H^H^H^Htoo busy).
304 static int write_suspend_image(void)
307 swp_entry_t entry, prev = { 0 };
308 int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
309 union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC);
310 unsigned long address;
316 printk( "Writing data to swap (%d pages): ", nr_copy_pages );
317 for (i=0; i<nr_copy_pages; i++) {
320 if (!(entry = get_swap_page()).val)
321 panic("\nNot enough swapspace when writing data" );
323 if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
324 panic("\nPage %d: not enough swapspace on suspend device", i );
326 address = (pagedir_nosave+i)->address;
327 page = virt_to_page(address);
328 rw_swap_page_sync(WRITE, entry, page);
329 (pagedir_nosave+i)->swap_address = entry;
332 printk( "Writing pagedir (%d pages): ", nr_pgdir_pages);
333 for (i=0; i<nr_pgdir_pages; i++) {
334 cur = (union diskpage *)((char *) pagedir_nosave)+i;
335 BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
337 if (!(entry = get_swap_page()).val) {
338 printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
339 panic("Don't know how to recover");
340 free_page((unsigned long) buffer);
344 if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
345 panic("\nNot enough swapspace for pagedir on suspend device" );
347 BUG_ON (sizeof(swp_entry_t) != sizeof(long));
348 BUG_ON (PAGE_SIZE % sizeof(struct pbe));
350 cur->link.next = prev;
351 page = virt_to_page((unsigned long)cur);
352 rw_swap_page_sync(WRITE, entry, page);
356 BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
357 BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
358 BUG_ON (sizeof(struct link) != PAGE_SIZE);
359 if (!(entry = get_swap_page()).val)
360 panic( "\nNot enough swapspace when writing header" );
361 if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
362 panic("\nNot enough swapspace for header on suspend device" );
364 cur = (void *) buffer;
365 if (fill_suspend_header(&cur->sh))
366 BUG(); /* Not a BUG_ON(): we want fill_suspend_header to be called, always */
368 cur->link.next = prev;
370 page = virt_to_page((unsigned long)cur);
371 rw_swap_page_sync(WRITE, entry, page);
375 mark_swapfiles(prev, MARK_SWAP_SUSPEND);
382 #ifdef CONFIG_HIGHMEM
383 struct highmem_page {
386 struct highmem_page *next;
389 struct highmem_page *highmem_copy = NULL;
391 static int save_highmem_zone(struct zone *zone)
393 unsigned long zone_pfn;
394 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
396 struct highmem_page *save;
398 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
405 page = pfn_to_page(pfn);
407 * This condition results from rvmalloc() sans vmalloc_32()
408 * and architectural memory reservations. This should be
409 * corrected eventually when the cases giving rise to this
410 * are better understood.
412 if (PageReserved(page)) {
413 printk("highmem reserved page?!\n");
416 if ((chunk_size = is_head_of_free_region(page))) {
417 pfn += chunk_size - 1;
418 zone_pfn += chunk_size - 1;
421 save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
424 save->next = highmem_copy;
426 save->data = (void *) get_zeroed_page(GFP_ATOMIC);
431 kaddr = kmap_atomic(page, KM_USER0);
432 memcpy(save->data, kaddr, PAGE_SIZE);
433 kunmap_atomic(kaddr, KM_USER0);
439 static int save_highmem(void)
443 for_each_zone(zone) {
444 if (is_highmem(zone))
445 res = save_highmem_zone(zone);
452 static int restore_highmem(void)
454 while (highmem_copy) {
455 struct highmem_page *save = highmem_copy;
457 highmem_copy = save->next;
459 kaddr = kmap_atomic(save->page, KM_USER0);
460 memcpy(kaddr, save->data, PAGE_SIZE);
461 kunmap_atomic(kaddr, KM_USER0);
462 free_page((long) save->data);
469 static int pfn_is_nosave(unsigned long pfn)
471 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
472 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
473 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
476 /* if *pagedir_p != NULL it also copies the counted pages */
477 static int count_and_copy_zone(struct zone *zone, struct pbe **pagedir_p)
479 unsigned long zone_pfn, chunk_size, nr_copy_pages = 0;
480 struct pbe *pbe = *pagedir_p;
481 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
483 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
489 page = pfn_to_page(pfn);
490 BUG_ON(PageReserved(page) && PageNosave(page));
491 if (PageNosave(page))
493 if (PageReserved(page) && pfn_is_nosave(pfn)) {
494 PRINTK("[nosave pfn 0x%lx]", pfn);
497 if ((chunk_size = is_head_of_free_region(page))) {
498 pfn += chunk_size - 1;
499 zone_pfn += chunk_size - 1;
505 pbe->orig_address = (long) page_address(page);
506 /* Copy page is dangerous: it likes to mess with
507 preempt count on specific cpus. Wrong preempt count is then copied,
509 copy_page((void *)pbe->address, (void *)pbe->orig_address);
513 return nr_copy_pages;
516 static int count_and_copy_data_pages(struct pbe *pagedir_p)
518 int nr_copy_pages = 0;
520 for_each_zone(zone) {
521 if (!is_highmem(zone))
522 nr_copy_pages += count_and_copy_zone(zone, &pagedir_p);
524 return nr_copy_pages;
527 static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir)
529 unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn;
530 pagedir_end = pagedir + (PAGE_SIZE << pagedir_order);
531 pagedir_pfn = __pa(pagedir) >> PAGE_SHIFT;
532 pagedir_end_pfn = __pa(pagedir_end) >> PAGE_SHIFT;
533 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
535 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
538 page = pfn_to_page(pfn);
539 if (!TestClearPageNosave(page))
541 else if (pfn >= pagedir_pfn && pfn < pagedir_end_pfn)
547 static void free_suspend_pagedir(unsigned long this_pagedir)
550 for_each_zone(zone) {
551 if (!is_highmem(zone))
552 free_suspend_pagedir_zone(zone, this_pagedir);
554 free_pages(this_pagedir, pagedir_order);
557 static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages)
560 suspend_pagedir_t *pagedir;
564 pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
566 p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order);
570 page = virt_to_page(pagedir);
571 for(i=0; i < 1<<pagedir_order; i++)
572 SetPageNosave(page++);
574 while(nr_copy_pages--) {
575 p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
577 free_suspend_pagedir((unsigned long) pagedir);
580 SetPageNosave(virt_to_page(p->address));
587 static int prepare_suspend_processes(void)
589 sys_sync(); /* Syncing needs pdflushd, so do it before stopping processes */
590 if (freeze_processes()) {
591 printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" );
599 * Try to free as much memory as possible, but do not OOM-kill anyone
601 * Notice: all userland should be stopped at this point, or livelock is possible.
603 static void free_some_memory(void)
605 printk("Freeing memory: ");
606 while (shrink_all_memory(10000))
611 static int suspend_prepare_image(void)
614 unsigned int nr_needed_pages = 0;
616 pagedir_nosave = NULL;
617 printk( "/critical section: ");
618 #ifdef CONFIG_HIGHMEM
619 printk( "handling highmem" );
620 if (save_highmem()) {
621 printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend);
627 printk("counting pages to copy" );
629 nr_copy_pages = count_and_copy_data_pages(NULL);
630 nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
632 printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages());
633 if(nr_free_pages() < nr_needed_pages) {
634 printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n",
635 name_suspend, nr_needed_pages-nr_free_pages());
639 si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information.
640 We should only consider resume_device. */
641 if (i.freeswap < nr_needed_pages) {
642 printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n",
643 name_suspend, nr_needed_pages-i.freeswap);
647 PRINTK( "Alloc pagedir\n" );
648 pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages);
649 if (!pagedir_nosave) {
650 /* Pagedir is big, one-chunk allocation. It is easily possible for this allocation to fail */
651 printk(KERN_CRIT "%sCouldn't allocate continuous pagedir\n", name_suspend);
654 nr_copy_pages_check = nr_copy_pages;
655 pagedir_order_check = pagedir_order;
657 drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */
658 if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */
662 * End of critical section. From now on, we can write to memory,
663 * but we should not touch disk. This specially means we must _not_
664 * touch swap space! Except we must write out our image of course.
667 printk( "critical section/: done (%d pages copied)\n", nr_copy_pages );
671 static void suspend_save_image(void)
676 write_suspend_image();
677 lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */
679 /* It is important _NOT_ to umount filesystems at this point. We want
680 * them synced (in case something goes wrong) but we DO not want to mark
681 * filesystem clean: it is not. (And it does not matter, if we resume
682 * correctly, we'll mark system clean, anyway.)
686 static void suspend_power_down(void)
690 printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": "");
692 PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state);
694 if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL))))
695 machine_restart(NULL);
703 printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend);
713 asmlinkage void do_magic_resume_1(void)
717 spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
719 device_power_down(4);
720 PRINTK( "Waiting for DMAs to settle down...\n");
721 mdelay(1000); /* We do not want some readahead with DMA to corrupt our memory, right?
722 Do it with disabled interrupts for best effect. That way, if some
723 driver scheduled DMA, we have good chance for DMA to finish ;-). */
726 asmlinkage void do_magic_resume_2(void)
728 BUG_ON (nr_copy_pages_check != nr_copy_pages);
729 BUG_ON (pagedir_order_check != pagedir_order);
731 __flush_tlb_global(); /* Even mappings of "global" things (vmalloc) need to be fixed */
733 PRINTK( "Freeing prev allocated pagedir\n" );
734 free_suspend_pagedir((unsigned long) pagedir_save);
736 #ifdef CONFIG_HIGHMEM
737 printk( "Restoring highmem\n" );
740 printk("done, devices\n");
743 spin_unlock_irq(&suspend_pagedir_lock);
746 /* Fixme: this is too late; we should do this ASAP to avoid "infinite reboots" problem */
747 PRINTK( "Fixing swap signatures... " );
748 mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
751 #ifdef SUSPEND_CONSOLE
752 acquire_console_sem();
753 update_screen(fg_console);
754 release_console_sem();
758 /* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does:
761 do_magic_suspend_1();
762 save_processor_state();
764 do_magic_suspend_2();
767 GO_TO_SWAPPER_PAGE_TABLES
771 restore_processor_state();
776 asmlinkage void do_magic_suspend_1(void)
781 spin_lock_irq(&suspend_pagedir_lock);
784 asmlinkage void do_magic_suspend_2(void)
788 device_power_down(4);
789 is_problem = suspend_prepare_image();
791 spin_unlock_irq(&suspend_pagedir_lock);
793 kernel_fpu_end(); /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */
795 suspend_save_image();
796 suspend_power_down(); /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */
799 printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend);
800 MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */
804 spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
807 free_pages((unsigned long) pagedir_nosave, pagedir_order);
808 spin_unlock_irq(&suspend_pagedir_lock);
811 PRINTK( "Fixing swap signatures... " );
812 mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
817 * This is main interface to the outside world. It needs to be
818 * called from process context.
820 int software_suspend(void)
823 if (!software_suspend_enabled)
826 software_suspend_enabled = 0;
829 if (arch_prepare_suspend()) {
830 printk("%sArchitecture failed to prepare\n", name_suspend);
833 if (pm_prepare_console())
834 printk( "%sCan't allocate a console... proceeding\n", name_suspend);
835 if (!prepare_suspend_processes()) {
837 /* At this point, all user processes and "dangerous"
838 kernel threads are stopped. Free some memory, as we
839 need half of memory free. */
843 /* Save state of all device drivers, and stop them. */
844 if ((res = device_suspend(4))==0)
845 /* If stopping device drivers worked, we proceed basically into
846 * suspend_save_image.
848 * do_magic(0) returns after system is resumed.
850 * do_magic() copies all "used" memory to "free" memory, then
851 * unsuspends all device drivers, and writes memory to disk
852 * using normal kernel mechanism.
858 software_suspend_enabled = 1;
860 pm_restore_console();
864 /* More restore stuff */
866 #define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
869 * Returns true if given address/order collides with any orig_address
871 static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
875 unsigned long addre = addr + (PAGE_SIZE<<order);
877 for(i=0; i < nr_copy_pages; i++)
878 if((pagedir+i)->orig_address >= addr &&
879 (pagedir+i)->orig_address < addre)
886 * We check here that pagedir & pages it points to won't collide with pages
887 * where we're going to restore from the loaded pages later
889 static int check_pagedir(void)
893 for(i=0; i < nr_copy_pages; i++) {
897 addr = get_zeroed_page(GFP_ATOMIC);
900 } while (does_collide(addr));
902 (pagedir_nosave+i)->address = addr;
907 static int relocate_pagedir(void)
910 * We have to avoid recursion (not to overflow kernel stack),
911 * and that's why code looks pretty cryptic
913 suspend_pagedir_t *old_pagedir = pagedir_nosave;
914 void **eaten_memory = NULL;
915 void **c = eaten_memory, *m, *f;
918 printk("Relocating pagedir ");
920 if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
921 printk("not necessary\n");
925 while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) {
926 if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
935 printk("out of memory\n");
939 memcpy(m, old_pagedir, PAGE_SIZE << pagedir_order);
947 free_pages((unsigned long)f, pagedir_order);
954 * Sanity check if this image makes sense with this kernel/swap context
955 * I really don't think that it's foolproof but more than nothing..
958 static int sanity_check_failed(char *reason)
960 printk(KERN_ERR "%s%s\n", name_resume, reason);
964 static int sanity_check(struct suspend_header *sh)
966 if (sh->version_code != LINUX_VERSION_CODE)
967 return sanity_check_failed("Incorrect kernel version");
968 if (sh->num_physpages != num_physpages)
969 return sanity_check_failed("Incorrect memory size");
970 if (strncmp(sh->machine, system_utsname.machine, 8))
971 return sanity_check_failed("Incorrect machine type");
972 if (strncmp(sh->version, system_utsname.version, 20))
973 return sanity_check_failed("Incorrect version");
974 if (sh->num_cpus != num_online_cpus())
975 return sanity_check_failed("Incorrect number of cpus");
976 if (sh->page_size != PAGE_SIZE)
977 return sanity_check_failed("Incorrect PAGE_SIZE");
981 static int bdev_read_page(struct block_device *bdev, long pos, void *buf)
983 struct buffer_head *bh;
984 BUG_ON (pos%PAGE_SIZE);
985 bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
986 if (!bh || (!bh->b_data)) {
989 memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */
990 BUG_ON(!buffer_uptodate(bh));
995 static int bdev_write_page(struct block_device *bdev, long pos, void *buf)
998 struct buffer_head *bh;
999 BUG_ON (pos%PAGE_SIZE);
1000 bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
1001 if (!bh || (!bh->b_data)) {
1004 memcpy(bh->b_data, buf, PAGE_SIZE); /* FIXME: may need kmap() */
1005 BUG_ON(!buffer_uptodate(bh));
1006 generic_make_request(WRITE, bh);
1007 if (!buffer_uptodate(bh))
1008 printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file);
1013 printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file);
1017 extern dev_t __init name_to_dev_t(const char *line);
1019 static int __init __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume)
1022 int i, nr_pgdir_pages;
1024 #define PREPARENEXT \
1025 { next = cur->link.next; \
1026 next.val = swp_offset(next) * PAGE_SIZE; \
1029 if (bdev_read_page(bdev, 0, cur)) return -EIO;
1031 if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) ||
1032 (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) {
1033 printk(KERN_ERR "%sThis is normal swap space\n", name_resume );
1037 PREPARENEXT; /* We have to read next position before we overwrite it */
1039 if (!memcmp("S1",cur->swh.magic.magic,2))
1040 memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
1041 else if (!memcmp("S2",cur->swh.magic.magic,2))
1042 memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
1046 panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
1047 name_resume, cur->swh.magic.magic);
1050 /* We don't do a sanity check here: we want to restore the swap
1051 whatever version of kernel made the suspend image;
1052 We need to write swap, but swap is *not* enabled so
1053 we must write the device directly */
1054 printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file);
1055 bdev_write_page(bdev, 0, cur);
1058 printk( "%sSignature found, resuming\n", name_resume );
1061 if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1062 if (sanity_check(&cur->sh)) /* Is this same machine? */
1066 pagedir_save = cur->sh.suspend_pagedir;
1067 nr_copy_pages = cur->sh.num_pbes;
1068 nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
1069 pagedir_order = get_bitmask_order(nr_pgdir_pages);
1071 pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order);
1072 if (!pagedir_nosave)
1075 PRINTK( "%sReading pagedir, ", name_resume );
1077 /* We get pages in reverse order of saving! */
1078 for (i=nr_pgdir_pages-1; i>=0; i--) {
1080 cur = (union diskpage *)((char *) pagedir_nosave)+i;
1081 if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1086 if (relocate_pagedir())
1088 if (check_pagedir())
1091 printk( "Reading image data (%d pages): ", nr_copy_pages );
1092 for(i=0; i < nr_copy_pages; i++) {
1093 swp_entry_t swap_address = (pagedir_nosave+i)->swap_address;
1096 /* You do not need to check for overlaps...
1097 ... check_pagedir already did this work */
1098 if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address)))
1105 static int __init read_suspend_image(const char * specialfile, int noresume)
1107 union diskpage *cur;
1108 unsigned long scratch_page = 0;
1110 char b[BDEVNAME_SIZE];
1112 resume_device = name_to_dev_t(specialfile);
1113 scratch_page = get_zeroed_page(GFP_ATOMIC);
1114 cur = (void *) scratch_page;
1116 struct block_device *bdev;
1117 printk("Resuming from device %s\n",
1118 __bdevname(resume_device, b));
1119 bdev = open_by_devnum(resume_device, FMODE_READ);
1121 error = PTR_ERR(bdev);
1123 set_blocksize(bdev, PAGE_SIZE);
1124 error = __read_suspend_image(bdev, cur, noresume);
1127 } else error = -ENOMEM;
1130 free_page(scratch_page);
1133 PRINTK("Reading resume file was successful\n");
1138 printk( "%sI/O error\n", name_resume);
1141 printk( "%s%s: No such file or directory\n", name_resume, specialfile);
1144 printk( "%sNot enough memory\n", name_resume);
1147 printk( "%sError %d resuming\n", name_resume, error );
1154 * software_resume - Resume from a saved image.
1156 * Called as a late_initcall (so all devices are discovered and
1157 * initialized), we call swsusp to see if we have a saved image or not.
1158 * If so, we quiesce devices, then restore the saved image. We will
1159 * return above (in pm_suspend_disk() ) if everything goes well.
1160 * Otherwise, we fail gracefully and return to the normally
1161 * scheduled program.
1164 static int __init software_resume(void)
1166 if (num_online_cpus() > 1) {
1167 printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n");
1170 /* We enable the possibility of machine suspend */
1171 software_suspend_enabled = 1;
1175 printk( "%s", name_resume );
1176 if (resume_status == NORESUME) {
1178 read_suspend_image(resume_file, 1);
1179 printk( "disabled\n" );
1184 if (pm_prepare_console())
1185 printk("swsusp: Can't allocate a console... proceeding\n");
1187 if (!resume_file[0] && resume_status == RESUME_SPECIFIED) {
1188 printk( "suspension device unspecified\n" );
1192 printk( "resuming from %s\n", resume_file);
1193 if (read_suspend_image(resume_file, 0))
1197 panic("This never returns");
1200 pm_restore_console();
1204 late_initcall(software_resume);
1206 static int __init resume_setup(char *str)
1208 if (resume_status == NORESUME)
1211 strncpy( resume_file, str, 255 );
1212 resume_status = RESUME_SPECIFIED;
1217 static int __init noresume_setup(char *str)
1219 resume_status = NORESUME;
1223 __setup("noresume", noresume_setup);
1224 __setup("resume=", resume_setup);
1226 EXPORT_SYMBOL(software_suspend);
1227 EXPORT_SYMBOL(software_suspend_enabled);