2 * linux/kernel/power/swsusp.c
4 * This file is to realize architecture-independent
5 * machine suspend feature using pretty near only high-level routines
7 * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
8 * Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz>
10 * This file is released under the GPLv2.
12 * I'd like to thank the following people for their work:
14 * Pavel Machek <pavel@ucw.cz>:
15 * Modifications, defectiveness pointing, being with me at the very beginning,
16 * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
18 * Steve Doddi <dirk@loth.demon.co.uk>:
19 * Support the possibility of hardware state restoring.
21 * Raph <grey.havens@earthling.net>:
22 * Support for preserving states of network devices and virtual console
23 * (including X and svgatextmode)
25 * Kurt Garloff <garloff@suse.de>:
26 * Straightened the critical function in order to prevent compilers from
27 * playing tricks with local variables.
29 * Andreas Mohr <a.mohr@mailto.de>
31 * Alex Badea <vampire@go.ro>:
34 * More state savers are welcome. Especially for the scsi layer...
36 * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
39 #include <linux/module.h>
41 #include <linux/suspend.h>
42 #include <linux/smp_lock.h>
43 #include <linux/file.h>
44 #include <linux/utsname.h>
45 #include <linux/version.h>
46 #include <linux/delay.h>
47 #include <linux/reboot.h>
48 #include <linux/bitops.h>
49 #include <linux/vt_kern.h>
50 #include <linux/kbd_kern.h>
51 #include <linux/keyboard.h>
52 #include <linux/spinlock.h>
53 #include <linux/genhd.h>
54 #include <linux/kernel.h>
55 #include <linux/major.h>
56 #include <linux/swap.h>
58 #include <linux/device.h>
59 #include <linux/buffer_head.h>
60 #include <linux/swapops.h>
61 #include <linux/bootmem.h>
62 #include <linux/syscalls.h>
63 #include <linux/console.h>
64 #include <linux/highmem.h>
66 #include <asm/uaccess.h>
67 #include <asm/mmu_context.h>
68 #include <asm/pgtable.h>
73 unsigned char software_suspend_enabled = 0;
76 #define RESUME_SPECIFIED 2
78 /* References to section boundaries */
79 extern char __nosave_begin, __nosave_end;
81 extern int is_head_of_free_region(struct page *);
84 spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED;
86 /* Variables to be preserved over suspend */
87 static int pagedir_order_check;
88 static int nr_copy_pages_check;
90 static int resume_status;
91 static char resume_file[256] = ""; /* For resume= kernel option */
92 static dev_t resume_device;
93 /* Local variables that should not be affected by save */
94 unsigned int nr_copy_pages __nosavedata = 0;
96 /* Suspend pagedir is allocated before final copy, therefore it
97 must be freed after resume
99 Warning: this is evil. There are actually two pagedirs at time of
100 resume. One is "pagedir_save", which is empty frame allocated at
101 time of suspend, that must be freed. Second is "pagedir_nosave",
102 allocated at time of resume, that travels through memory not to
103 collide with anything.
105 Warning: this is even more evil than it seems. Pagedirs this file
106 talks about are completely different from page directories used by
109 suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
110 static suspend_pagedir_t *pagedir_save;
111 static int pagedir_order __nosavedata = 0;
114 char dummy[PAGE_SIZE - sizeof(swp_entry_t)];
119 union swap_header swh;
121 struct suspend_header sh;
125 * XXX: We try to keep some more pages free so that I/O operations succeed
126 * without paging. Might this be more?
128 #define PAGES_FOR_IO 512
130 static const char name_suspend[] = "Suspend Machine: ";
131 static const char name_resume[] = "Resume Machine: ";
136 #define DEBUG_DEFAULT
139 #define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */
142 # define PRINTK(f, a...) printk(f, ## a)
144 # define PRINTK(f, a...) do { } while(0)
148 #define MDELAY(a) mdelay(a)
150 #define MDELAY(a) do { } while(0)
157 static __inline__ int fill_suspend_header(struct suspend_header *sh)
159 memset((char *)sh, 0, sizeof(*sh));
161 sh->version_code = LINUX_VERSION_CODE;
162 sh->num_physpages = num_physpages;
163 strncpy(sh->machine, system_utsname.machine, 8);
164 strncpy(sh->version, system_utsname.version, 20);
165 /* FIXME: Is this bogus? --RR */
166 sh->num_cpus = num_online_cpus();
167 sh->page_size = PAGE_SIZE;
168 sh->suspend_pagedir = pagedir_nosave;
169 BUG_ON (pagedir_save != pagedir_nosave);
170 sh->num_pbes = nr_copy_pages;
171 /* TODO: needed? mounted fs' last mounted date comparison
172 * [so they haven't been mounted since last suspend.
173 * Maybe it isn't.] [we'd need to do this for _all_ fs-es]
178 /* We memorize in swapfile_used what swap devices are used for suspension */
179 #define SWAPFILE_UNUSED 0
180 #define SWAPFILE_SUSPEND 1 /* This is the suspending device */
181 #define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */
183 static unsigned short swapfile_used[MAX_SWAPFILES];
184 static unsigned short root_swap;
185 #define MARK_SWAP_SUSPEND 0
186 #define MARK_SWAP_RESUME 2
188 static void mark_swapfiles(swp_entry_t prev, int mode)
194 if (root_swap == 0xFFFF) /* ignored */
197 page = alloc_page(GFP_ATOMIC);
199 panic("Out of memory in mark_swapfiles");
200 cur = page_address(page);
201 /* XXX: this is dirty hack to get first page of swap file */
202 entry = swp_entry(root_swap, 0);
203 rw_swap_page_sync(READ, entry, page);
205 if (mode == MARK_SWAP_RESUME) {
206 if (!memcmp("S1",cur->swh.magic.magic,2))
207 memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
208 else if (!memcmp("S2",cur->swh.magic.magic,2))
209 memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
210 else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
211 name_resume, cur->swh.magic.magic);
213 if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)))
214 memcpy(cur->swh.magic.magic,"S1SUSP....",10);
215 else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10)))
216 memcpy(cur->swh.magic.magic,"S2SUSP....",10);
217 else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic);
218 cur->link.next = prev; /* prev is the first/last swap page of the resume area */
219 /* link.next lies *no more* in last 4/8 bytes of magic */
221 rw_swap_page_sync(WRITE, entry, page);
227 * Check whether the swap device is the specified resume
228 * device, irrespective of whether they are specified by
231 * (Thus, device inode aliasing is allowed. You can say /dev/hda4
232 * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
233 * and they'll be considered the same device. This is *necessary* for
234 * devfs, since the resume code can only recognize the form /dev/hda4,
235 * but the suspend code would see the long name.)
237 static int is_resume_device(const struct swap_info_struct *swap_info)
239 struct file *file = swap_info->swap_file;
240 struct inode *inode = file->f_dentry->d_inode;
242 return S_ISBLK(inode->i_mode) &&
243 resume_device == MKDEV(imajor(inode), iminor(inode));
246 static void read_swapfiles(void) /* This is called before saving image */
250 len=strlen(resume_file);
254 for(i=0; i<MAX_SWAPFILES; i++) {
255 if (swap_info[i].flags == 0) {
256 swapfile_used[i]=SWAPFILE_UNUSED;
259 printk(KERN_WARNING "resume= option should be used to set suspend device" );
260 if(root_swap == 0xFFFF) {
261 swapfile_used[i] = SWAPFILE_SUSPEND;
264 swapfile_used[i] = SWAPFILE_IGNORED;
266 /* we ignore all swap devices that are not the resume_file */
267 if (is_resume_device(&swap_info[i])) {
268 swapfile_used[i] = SWAPFILE_SUSPEND;
271 swapfile_used[i] = SWAPFILE_IGNORED;
279 static void lock_swapdevices(void) /* This is called after saving image so modification
280 will be lost after resume... and that's what we want. */
285 for(i = 0; i< MAX_SWAPFILES; i++)
286 if(swapfile_used[i] == SWAPFILE_IGNORED) {
287 swap_info[i].flags ^= 0xFF; /* we make the device unusable. A new call to
288 lock_swapdevices can unlock the devices. */
294 * write_suspend_image - Write entire image to disk.
296 * After writing suspend signature to the disk, suspend may no
297 * longer fail: we have ready-to-run image in swap, and rollback
298 * would happen on next reboot -- corrupting data.
300 * Note: The buffer we allocate to use to write the suspend header is
301 * not freed; its not needed since the system is going down anyway
302 * (plus it causes an oops and I'm lazy^H^H^H^Htoo busy).
304 static int write_suspend_image(void)
307 swp_entry_t entry, prev = { 0 };
308 int nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
309 union diskpage *cur, *buffer = (union diskpage *)get_zeroed_page(GFP_ATOMIC);
310 unsigned long address;
316 printk( "Writing data to swap (%d pages): ", nr_copy_pages );
317 for (i=0; i<nr_copy_pages; i++) {
320 if (!(entry = get_swap_page()).val)
321 panic("\nNot enough swapspace when writing data" );
323 if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
324 panic("\nPage %d: not enough swapspace on suspend device", i );
326 address = (pagedir_nosave+i)->address;
327 page = virt_to_page(address);
328 rw_swap_page_sync(WRITE, entry, page);
329 (pagedir_nosave+i)->swap_address = entry;
332 printk( "Writing pagedir (%d pages): ", nr_pgdir_pages);
333 for (i=0; i<nr_pgdir_pages; i++) {
334 cur = (union diskpage *)((char *) pagedir_nosave)+i;
335 BUG_ON ((char *) cur != (((char *) pagedir_nosave) + i*PAGE_SIZE));
337 if (!(entry = get_swap_page()).val) {
338 printk(KERN_CRIT "Not enough swapspace when writing pgdir\n" );
339 panic("Don't know how to recover");
340 free_page((unsigned long) buffer);
344 if(swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
345 panic("\nNot enough swapspace for pagedir on suspend device" );
347 BUG_ON (sizeof(swp_entry_t) != sizeof(long));
348 BUG_ON (PAGE_SIZE % sizeof(struct pbe));
350 cur->link.next = prev;
351 page = virt_to_page((unsigned long)cur);
352 rw_swap_page_sync(WRITE, entry, page);
356 BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t));
357 BUG_ON (sizeof(union diskpage) != PAGE_SIZE);
358 BUG_ON (sizeof(struct link) != PAGE_SIZE);
359 if (!(entry = get_swap_page()).val)
360 panic( "\nNot enough swapspace when writing header" );
361 if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND)
362 panic("\nNot enough swapspace for header on suspend device" );
364 cur = (void *) buffer;
365 if (fill_suspend_header(&cur->sh))
366 BUG(); /* Not a BUG_ON(): we want fill_suspend_header to be called, always */
368 cur->link.next = prev;
370 page = virt_to_page((unsigned long)cur);
371 rw_swap_page_sync(WRITE, entry, page);
375 mark_swapfiles(prev, MARK_SWAP_SUSPEND);
382 #ifdef CONFIG_HIGHMEM
383 struct highmem_page {
386 struct highmem_page *next;
389 struct highmem_page *highmem_copy = NULL;
391 static int save_highmem_zone(struct zone *zone)
393 unsigned long zone_pfn;
394 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
396 struct highmem_page *save;
398 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
405 page = pfn_to_page(pfn);
407 * This condition results from rvmalloc() sans vmalloc_32()
408 * and architectural memory reservations. This should be
409 * corrected eventually when the cases giving rise to this
410 * are better understood.
412 if (PageReserved(page)) {
413 printk("highmem reserved page?!\n");
416 if ((chunk_size = is_head_of_free_region(page))) {
417 pfn += chunk_size - 1;
418 zone_pfn += chunk_size - 1;
421 save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
424 save->next = highmem_copy;
426 save->data = (void *) get_zeroed_page(GFP_ATOMIC);
431 kaddr = kmap_atomic(page, KM_USER0);
432 memcpy(save->data, kaddr, PAGE_SIZE);
433 kunmap_atomic(kaddr, KM_USER0);
439 static int save_highmem(void)
443 for_each_zone(zone) {
444 if (is_highmem(zone))
445 res = save_highmem_zone(zone);
452 static int restore_highmem(void)
454 while (highmem_copy) {
455 struct highmem_page *save = highmem_copy;
457 highmem_copy = save->next;
459 kaddr = kmap_atomic(save->page, KM_USER0);
460 memcpy(kaddr, save->data, PAGE_SIZE);
461 kunmap_atomic(kaddr, KM_USER0);
462 free_page((long) save->data);
469 static int pfn_is_nosave(unsigned long pfn)
471 unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
472 unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
473 return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
476 /* if *pagedir_p != NULL it also copies the counted pages */
477 static int count_and_copy_zone(struct zone *zone, struct pbe **pagedir_p)
479 unsigned long zone_pfn, chunk_size, nr_copy_pages = 0;
480 struct pbe *pbe = *pagedir_p;
481 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
483 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
489 page = pfn_to_page(pfn);
490 BUG_ON(PageReserved(page) && PageNosave(page));
491 if (PageNosave(page))
493 if (PageReserved(page) && pfn_is_nosave(pfn)) {
494 PRINTK("[nosave pfn 0x%lx]", pfn);
497 if ((chunk_size = is_head_of_free_region(page))) {
498 pfn += chunk_size - 1;
499 zone_pfn += chunk_size - 1;
505 pbe->orig_address = (long) page_address(page);
506 copy_page((void *)pbe->address, (void *)pbe->orig_address);
510 return nr_copy_pages;
513 static int count_and_copy_data_pages(struct pbe *pagedir_p)
515 int nr_copy_pages = 0;
517 for_each_zone(zone) {
518 if (!is_highmem(zone))
519 nr_copy_pages += count_and_copy_zone(zone, &pagedir_p);
521 return nr_copy_pages;
524 static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir)
526 unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn;
527 pagedir_end = pagedir + (PAGE_SIZE << pagedir_order);
528 pagedir_pfn = __pa(pagedir) >> PAGE_SHIFT;
529 pagedir_end_pfn = __pa(pagedir_end) >> PAGE_SHIFT;
530 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
532 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
535 page = pfn_to_page(pfn);
536 if (!TestClearPageNosave(page))
538 else if (pfn >= pagedir_pfn && pfn < pagedir_end_pfn)
544 static void free_suspend_pagedir(unsigned long this_pagedir)
547 for_each_zone(zone) {
548 if (!is_highmem(zone))
549 free_suspend_pagedir_zone(zone, this_pagedir);
551 free_pages(this_pagedir, pagedir_order);
554 static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages)
557 suspend_pagedir_t *pagedir;
561 pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages));
563 p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order);
567 page = virt_to_page(pagedir);
568 for(i=0; i < 1<<pagedir_order; i++)
569 SetPageNosave(page++);
571 while(nr_copy_pages--) {
572 p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
574 free_suspend_pagedir((unsigned long) pagedir);
577 SetPageNosave(virt_to_page(p->address));
584 static int prepare_suspend_processes(void)
586 sys_sync(); /* Syncing needs pdflushd, so do it before stopping processes */
587 if (freeze_processes()) {
588 printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" );
596 * Try to free as much memory as possible, but do not OOM-kill anyone
598 * Notice: all userland should be stopped at this point, or livelock is possible.
600 static void free_some_memory(void)
602 printk("Freeing memory: ");
603 while (shrink_all_memory(10000))
608 static int suspend_prepare_image(void)
611 unsigned int nr_needed_pages = 0;
613 pagedir_nosave = NULL;
614 printk( "/critical section: ");
615 #ifdef CONFIG_HIGHMEM
616 printk( "handling highmem" );
617 if (save_highmem()) {
618 printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend);
624 printk("counting pages to copy" );
626 nr_copy_pages = count_and_copy_data_pages(NULL);
627 nr_needed_pages = nr_copy_pages + PAGES_FOR_IO;
629 printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages());
630 if(nr_free_pages() < nr_needed_pages) {
631 printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n",
632 name_suspend, nr_needed_pages-nr_free_pages());
636 si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information.
637 We should only consider resume_device. */
638 if (i.freeswap < nr_needed_pages) {
639 printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n",
640 name_suspend, nr_needed_pages-i.freeswap);
644 PRINTK( "Alloc pagedir\n" );
645 pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages);
646 if (!pagedir_nosave) {
647 /* Pagedir is big, one-chunk allocation. It is easily possible for this allocation to fail */
648 printk(KERN_CRIT "%sCouldn't allocate continuous pagedir\n", name_suspend);
651 nr_copy_pages_check = nr_copy_pages;
652 pagedir_order_check = pagedir_order;
654 drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */
655 if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */
659 * End of critical section. From now on, we can write to memory,
660 * but we should not touch disk. This specially means we must _not_
661 * touch swap space! Except we must write out our image of course.
664 printk( "critical section/: done (%d pages copied)\n", nr_copy_pages );
668 static void suspend_save_image(void)
673 write_suspend_image();
674 lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */
676 /* It is important _NOT_ to umount filesystems at this point. We want
677 * them synced (in case something goes wrong) but we DO not want to mark
678 * filesystem clean: it is not. (And it does not matter, if we resume
679 * correctly, we'll mark system clean, anyway.)
683 static void suspend_power_down(void)
687 printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": "");
689 PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state);
691 if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL))))
692 machine_restart(NULL);
700 printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend);
710 asmlinkage void do_magic_resume_1(void)
714 spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
716 device_power_down(4);
717 PRINTK( "Waiting for DMAs to settle down...\n");
718 mdelay(1000); /* We do not want some readahead with DMA to corrupt our memory, right?
719 Do it with disabled interrupts for best effect. That way, if some
720 driver scheduled DMA, we have good chance for DMA to finish ;-). */
723 asmlinkage void do_magic_resume_2(void)
725 BUG_ON (nr_copy_pages_check != nr_copy_pages);
726 BUG_ON (pagedir_order_check != pagedir_order);
728 __flush_tlb_global(); /* Even mappings of "global" things (vmalloc) need to be fixed */
730 PRINTK( "Freeing prev allocated pagedir\n" );
731 free_suspend_pagedir((unsigned long) pagedir_save);
733 #ifdef CONFIG_HIGHMEM
734 printk( "Restoring highmem\n" );
737 printk("done, devices\n");
740 spin_unlock_irq(&suspend_pagedir_lock);
743 /* Fixme: this is too late; we should do this ASAP to avoid "infinite reboots" problem */
744 PRINTK( "Fixing swap signatures... " );
745 mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
748 #ifdef SUSPEND_CONSOLE
749 acquire_console_sem();
750 update_screen(fg_console);
751 release_console_sem();
755 /* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does:
758 do_magic_suspend_1();
759 save_processor_state();
761 do_magic_suspend_2();
764 GO_TO_SWAPPER_PAGE_TABLES
768 restore_processor_state();
773 asmlinkage void do_magic_suspend_1(void)
778 spin_lock_irq(&suspend_pagedir_lock);
781 asmlinkage void do_magic_suspend_2(void)
785 device_power_down(4);
786 is_problem = suspend_prepare_image();
788 spin_unlock_irq(&suspend_pagedir_lock);
790 kernel_fpu_end(); /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */
792 suspend_save_image();
793 suspend_power_down(); /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */
796 printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend);
797 MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */
801 spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */
804 free_pages((unsigned long) pagedir_nosave, pagedir_order);
805 spin_unlock_irq(&suspend_pagedir_lock);
808 PRINTK( "Fixing swap signatures... " );
809 mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME);
814 * This is main interface to the outside world. It needs to be
815 * called from process context.
817 int software_suspend(void)
820 if (!software_suspend_enabled)
823 software_suspend_enabled = 0;
826 if (arch_prepare_suspend()) {
827 printk("%sArchitecture failed to prepare\n", name_suspend);
830 if (pm_prepare_console())
831 printk( "%sCan't allocate a console... proceeding\n", name_suspend);
832 if (!prepare_suspend_processes()) {
834 /* At this point, all user processes and "dangerous"
835 kernel threads are stopped. Free some memory, as we
836 need half of memory free. */
840 /* Save state of all device drivers, and stop them. */
841 if ((res = device_suspend(4))==0)
842 /* If stopping device drivers worked, we proceed basically into
843 * suspend_save_image.
845 * do_magic(0) returns after system is resumed.
847 * do_magic() copies all "used" memory to "free" memory, then
848 * unsuspends all device drivers, and writes memory to disk
849 * using normal kernel mechanism.
855 software_suspend_enabled = 1;
857 pm_restore_console();
861 /* More restore stuff */
863 /* FIXME: Why not memcpy(to, from, 1<<pagedir_order*PAGE_SIZE)? */
864 static void copy_pagedir(suspend_pagedir_t *to, suspend_pagedir_t *from)
867 char *topointer=(char *)to, *frompointer=(char *)from;
869 for(i=0; i < 1 << pagedir_order; i++) {
870 copy_page(topointer, frompointer);
871 topointer += PAGE_SIZE;
872 frompointer += PAGE_SIZE;
876 #define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0)
879 * Returns true if given address/order collides with any orig_address
881 static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr,
885 unsigned long addre = addr + (PAGE_SIZE<<order);
887 for(i=0; i < nr_copy_pages; i++)
888 if((pagedir+i)->orig_address >= addr &&
889 (pagedir+i)->orig_address < addre)
896 * We check here that pagedir & pages it points to won't collide with pages
897 * where we're going to restore from the loaded pages later
899 static int check_pagedir(void)
903 for(i=0; i < nr_copy_pages; i++) {
907 addr = get_zeroed_page(GFP_ATOMIC);
910 } while (does_collide(addr));
912 (pagedir_nosave+i)->address = addr;
917 static int relocate_pagedir(void)
920 * We have to avoid recursion (not to overflow kernel stack),
921 * and that's why code looks pretty cryptic
923 suspend_pagedir_t *new_pagedir, *old_pagedir = pagedir_nosave;
924 void **eaten_memory = NULL;
925 void **c = eaten_memory, *m, *f;
927 printk("Relocating pagedir");
929 if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) {
930 printk("not necessary\n");
934 while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) {
935 memset(m, 0, PAGE_SIZE);
936 if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order))
947 pagedir_nosave = new_pagedir = m;
948 copy_pagedir(new_pagedir, old_pagedir);
956 free_pages((unsigned long)f, pagedir_order);
963 * Sanity check if this image makes sense with this kernel/swap context
964 * I really don't think that it's foolproof but more than nothing..
967 static int sanity_check_failed(char *reason)
969 printk(KERN_ERR "%s%s\n", name_resume, reason);
973 static int sanity_check(struct suspend_header *sh)
975 if (sh->version_code != LINUX_VERSION_CODE)
976 return sanity_check_failed("Incorrect kernel version");
977 if (sh->num_physpages != num_physpages)
978 return sanity_check_failed("Incorrect memory size");
979 if (strncmp(sh->machine, system_utsname.machine, 8))
980 return sanity_check_failed("Incorrect machine type");
981 if (strncmp(sh->version, system_utsname.version, 20))
982 return sanity_check_failed("Incorrect version");
983 if (sh->num_cpus != num_online_cpus())
984 return sanity_check_failed("Incorrect number of cpus");
985 if (sh->page_size != PAGE_SIZE)
986 return sanity_check_failed("Incorrect PAGE_SIZE");
990 static int bdev_read_page(struct block_device *bdev, long pos, void *buf)
992 struct buffer_head *bh;
993 BUG_ON (pos%PAGE_SIZE);
994 bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
995 if (!bh || (!bh->b_data)) {
998 memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */
999 BUG_ON(!buffer_uptodate(bh));
1004 static int bdev_write_page(struct block_device *bdev, long pos, void *buf)
1007 struct buffer_head *bh;
1008 BUG_ON (pos%PAGE_SIZE);
1009 bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE);
1010 if (!bh || (!bh->b_data)) {
1013 memcpy(bh->b_data, buf, PAGE_SIZE); /* FIXME: may need kmap() */
1014 BUG_ON(!buffer_uptodate(bh));
1015 generic_make_request(WRITE, bh);
1016 if (!buffer_uptodate(bh))
1017 printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file);
1022 printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file);
1026 extern dev_t __init name_to_dev_t(const char *line);
1028 static int __init __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume)
1031 int i, nr_pgdir_pages;
1033 #define PREPARENEXT \
1034 { next = cur->link.next; \
1035 next.val = swp_offset(next) * PAGE_SIZE; \
1038 if (bdev_read_page(bdev, 0, cur)) return -EIO;
1040 if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) ||
1041 (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) {
1042 printk(KERN_ERR "%sThis is normal swap space\n", name_resume );
1046 PREPARENEXT; /* We have to read next position before we overwrite it */
1048 if (!memcmp("S1",cur->swh.magic.magic,2))
1049 memcpy(cur->swh.magic.magic,"SWAP-SPACE",10);
1050 else if (!memcmp("S2",cur->swh.magic.magic,2))
1051 memcpy(cur->swh.magic.magic,"SWAPSPACE2",10);
1055 panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n",
1056 name_resume, cur->swh.magic.magic);
1059 /* We don't do a sanity check here: we want to restore the swap
1060 whatever version of kernel made the suspend image;
1061 We need to write swap, but swap is *not* enabled so
1062 we must write the device directly */
1063 printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file);
1064 bdev_write_page(bdev, 0, cur);
1067 printk( "%sSignature found, resuming\n", name_resume );
1070 if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1071 if (sanity_check(&cur->sh)) /* Is this same machine? */
1075 pagedir_save = cur->sh.suspend_pagedir;
1076 nr_copy_pages = cur->sh.num_pbes;
1077 nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages);
1078 pagedir_order = get_bitmask_order(nr_pgdir_pages);
1080 pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order);
1081 if (!pagedir_nosave)
1084 PRINTK( "%sReading pagedir, ", name_resume );
1086 /* We get pages in reverse order of saving! */
1087 for (i=nr_pgdir_pages-1; i>=0; i--) {
1089 cur = (union diskpage *)((char *) pagedir_nosave)+i;
1090 if (bdev_read_page(bdev, next.val, cur)) return -EIO;
1095 if (relocate_pagedir())
1097 if (check_pagedir())
1100 printk( "Reading image data (%d pages): ", nr_copy_pages );
1101 for(i=0; i < nr_copy_pages; i++) {
1102 swp_entry_t swap_address = (pagedir_nosave+i)->swap_address;
1105 /* You do not need to check for overlaps...
1106 ... check_pagedir already did this work */
1107 if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address)))
1114 static int __init read_suspend_image(const char * specialfile, int noresume)
1116 union diskpage *cur;
1117 unsigned long scratch_page = 0;
1119 char b[BDEVNAME_SIZE];
1121 resume_device = name_to_dev_t(specialfile);
1122 scratch_page = get_zeroed_page(GFP_ATOMIC);
1123 cur = (void *) scratch_page;
1125 struct block_device *bdev;
1126 printk("Resuming from device %s\n",
1127 __bdevname(resume_device, b));
1128 bdev = open_by_devnum(resume_device, FMODE_READ);
1130 error = PTR_ERR(bdev);
1132 set_blocksize(bdev, PAGE_SIZE);
1133 error = __read_suspend_image(bdev, cur, noresume);
1136 } else error = -ENOMEM;
1139 free_page(scratch_page);
1142 PRINTK("Reading resume file was successful\n");
1147 printk( "%sI/O error\n", name_resume);
1150 printk( "%s%s: No such file or directory\n", name_resume, specialfile);
1153 printk( "%sNot enough memory\n", name_resume);
1156 printk( "%sError %d resuming\n", name_resume, error );
1163 * software_resume - Resume from a saved image.
1165 * Called as a late_initcall (so all devices are discovered and
1166 * initialized), we call swsusp to see if we have a saved image or not.
1167 * If so, we quiesce devices, then restore the saved image. We will
1168 * return above (in pm_suspend_disk() ) if everything goes well.
1169 * Otherwise, we fail gracefully and return to the normally
1170 * scheduled program.
1173 static int __init software_resume(void)
1175 if (num_online_cpus() > 1) {
1176 printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n");
1179 /* We enable the possibility of machine suspend */
1180 software_suspend_enabled = 1;
1184 printk( "%s", name_resume );
1185 if (resume_status == NORESUME) {
1187 read_suspend_image(resume_file, 1);
1188 printk( "disabled\n" );
1193 if (pm_prepare_console())
1194 printk("swsusp: Can't allocate a console... proceeding\n");
1196 if (!resume_file[0] && resume_status == RESUME_SPECIFIED) {
1197 printk( "suspension device unspecified\n" );
1201 printk( "resuming from %s\n", resume_file);
1202 if (read_suspend_image(resume_file, 0))
1206 panic("This never returns");
1209 pm_restore_console();
1213 late_initcall(software_resume);
1215 static int __init resume_setup(char *str)
1217 if (resume_status == NORESUME)
1220 strncpy( resume_file, str, 255 );
1221 resume_status = RESUME_SPECIFIED;
1226 static int __init noresume_setup(char *str)
1228 resume_status = NORESUME;
1232 __setup("noresume", noresume_setup);
1233 __setup("resume=", resume_setup);
1235 EXPORT_SYMBOL(software_suspend);
1236 EXPORT_SYMBOL(software_suspend_enabled);