X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fpower%2Fswsusp.c;h=31aa0390c777d186652fe1b5dc3bfb4aafbaa338;hb=refs%2Fheads%2Fvserver;hp=752f6cdb4382968d7798a4dcb94b0135998c9820;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 752f6cdb4..31aa0390c 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -1,21 +1,20 @@ /* * linux/kernel/power/swsusp.c * - * This file is to realize architecture-independent - * machine suspend feature using pretty near only high-level routines + * This file provides code to write suspend image to swap and read it back. * * Copyright (C) 1998-2001 Gabor Kuti - * Copyright (C) 1998,2001-2004 Pavel Machek + * Copyright (C) 1998,2001-2005 Pavel Machek * * This file is released under the GPLv2. * * I'd like to thank the following people for their work: - * + * * Pavel Machek : * Modifications, defectiveness pointing, being with me at the very beginning, * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. * - * Steve Doddi : + * Steve Doddi : * Support the possibility of hardware state restoring. * * Raph : @@ -31,1206 +30,300 @@ * Alex Badea : * Fixed runaway init * + * Rafael J. Wysocki + * Reworked the freeing of memory and the handling of swap + * * More state savers are welcome. Especially for the scsi layer... * * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt */ -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include #include #include #include -#include -#include #include #include #include -#include #include - -#include -#include -#include -#include +#include #include "power.h" -unsigned char software_suspend_enabled = 0; - -#define NORESUME 1 -#define RESUME_SPECIFIED 2 - -/* References to section boundaries */ -extern char __nosave_begin, __nosave_end; - -extern int is_head_of_free_region(struct page *); - -/* Locks */ -spinlock_t suspend_pagedir_lock __nosavedata = SPIN_LOCK_UNLOCKED; - -/* Variables to be preserved over suspend */ -static int pagedir_order_check; -static int nr_copy_pages_check; - -static int resume_status; -static char resume_file[256] = ""; /* For resume= kernel option */ -static dev_t resume_device; -/* Local variables that should not be affected by save */ -unsigned int nr_copy_pages __nosavedata = 0; - -/* Suspend pagedir is allocated before final copy, therefore it - must be freed after resume - - Warning: this is evil. There are actually two pagedirs at time of - resume. One is "pagedir_save", which is empty frame allocated at - time of suspend, that must be freed. Second is "pagedir_nosave", - allocated at time of resume, that travels through memory not to - collide with anything. - - Warning: this is even more evil than it seems. Pagedirs this file - talks about are completely different from page directories used by - MMU hardware. - */ -suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; -static suspend_pagedir_t *pagedir_save; -static int pagedir_order __nosavedata = 0; - -struct link { - char dummy[PAGE_SIZE - sizeof(swp_entry_t)]; - swp_entry_t next; -}; - -union diskpage { - union swap_header swh; - struct link link; - struct suspend_header sh; -}; - /* - * XXX: We try to keep some more pages free so that I/O operations succeed - * without paging. Might this be more? + * Preferred image size in bytes (tunable via /sys/power/image_size). + * When it is set to N, swsusp will do its best to ensure the image + * size will not exceed N bytes, but if that is impossible, it will + * try to create the smallest image possible. */ -#define PAGES_FOR_IO 512 +unsigned long image_size = 500 * 1024 * 1024; -static const char name_suspend[] = "Suspend Machine: "; -static const char name_resume[] = "Resume Machine: "; +int in_suspend __nosavedata = 0; -/* - * Debug - */ -#define DEBUG_DEFAULT -#undef DEBUG_PROCESS -#undef DEBUG_SLOW -#define TEST_SWSUSP 0 /* Set to 1 to reboot instead of halt machine after suspension */ - -#ifdef DEBUG_DEFAULT -# define PRINTK(f, a...) printk(f, ## a) -#else -# define PRINTK(f, a...) do { } while(0) -#endif - -#ifdef DEBUG_SLOW -#define MDELAY(a) mdelay(a) +#ifdef CONFIG_HIGHMEM +unsigned int count_highmem_pages(void); +int restore_highmem(void); #else -#define MDELAY(a) do { } while(0) +static inline int restore_highmem(void) { return 0; } +static inline unsigned int count_highmem_pages(void) { return 0; } #endif -/* - * Saving part... - */ - -static __inline__ int fill_suspend_header(struct suspend_header *sh) -{ - memset((char *)sh, 0, sizeof(*sh)); - - sh->version_code = LINUX_VERSION_CODE; - sh->num_physpages = num_physpages; - strncpy(sh->machine, system_utsname.machine, 8); - strncpy(sh->version, system_utsname.version, 20); - /* FIXME: Is this bogus? --RR */ - sh->num_cpus = num_online_cpus(); - sh->page_size = PAGE_SIZE; - sh->suspend_pagedir = pagedir_nosave; - BUG_ON (pagedir_save != pagedir_nosave); - sh->num_pbes = nr_copy_pages; - /* TODO: needed? mounted fs' last mounted date comparison - * [so they haven't been mounted since last suspend. - * Maybe it isn't.] [we'd need to do this for _all_ fs-es] - */ - return 0; -} - -/* We memorize in swapfile_used what swap devices are used for suspension */ -#define SWAPFILE_UNUSED 0 -#define SWAPFILE_SUSPEND 1 /* This is the suspending device */ -#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ - -static unsigned short swapfile_used[MAX_SWAPFILES]; -static unsigned short root_swap; -#define MARK_SWAP_SUSPEND 0 -#define MARK_SWAP_RESUME 2 - -static void mark_swapfiles(swp_entry_t prev, int mode) -{ - swp_entry_t entry; - union diskpage *cur; - struct page *page; - - if (root_swap == 0xFFFF) /* ignored */ - return; - - page = alloc_page(GFP_ATOMIC); - if (!page) - panic("Out of memory in mark_swapfiles"); - cur = page_address(page); - /* XXX: this is dirty hack to get first page of swap file */ - entry = swp_entry(root_swap, 0); - rw_swap_page_sync(READ, entry, page); - - if (mode == MARK_SWAP_RESUME) { - if (!memcmp("S1",cur->swh.magic.magic,2)) - memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); - else if (!memcmp("S2",cur->swh.magic.magic,2)) - memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); - else printk("%sUnable to find suspended-data signature (%.10s - misspelled?\n", - name_resume, cur->swh.magic.magic); - } else { - if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10))) - memcpy(cur->swh.magic.magic,"S1SUSP....",10); - else if ((!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) - memcpy(cur->swh.magic.magic,"S2SUSP....",10); - else panic("\nSwapspace is not swapspace (%.10s)\n", cur->swh.magic.magic); - cur->link.next = prev; /* prev is the first/last swap page of the resume area */ - /* link.next lies *no more* in last 4/8 bytes of magic */ - } - rw_swap_page_sync(WRITE, entry, page); - __free_page(page); -} - - -/* - * Check whether the swap device is the specified resume - * device, irrespective of whether they are specified by - * identical names. - * - * (Thus, device inode aliasing is allowed. You can say /dev/hda4 - * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs] - * and they'll be considered the same device. This is *necessary* for - * devfs, since the resume code can only recognize the form /dev/hda4, - * but the suspend code would see the long name.) - */ -static int is_resume_device(const struct swap_info_struct *swap_info) -{ - struct file *file = swap_info->swap_file; - struct inode *inode = file->f_dentry->d_inode; - - return S_ISBLK(inode->i_mode) && - resume_device == MKDEV(imajor(inode), iminor(inode)); -} - -static void read_swapfiles(void) /* This is called before saving image */ -{ - int i, len; - - len=strlen(resume_file); - root_swap = 0xFFFF; - - swap_list_lock(); - for(i=0; iaddress; - page = virt_to_page(address); - rw_swap_page_sync(WRITE, entry, page); - (pagedir_nosave+i)->swap_address = entry; - } - printk( "|\n" ); - printk( "Writing pagedir (%d pages): ", nr_pgdir_pages); - for (i=0; ilink.next = prev; - page = virt_to_page((unsigned long)cur); - rw_swap_page_sync(WRITE, entry, page); - prev = entry; - } - printk("H"); - BUG_ON (sizeof(struct suspend_header) > PAGE_SIZE-sizeof(swp_entry_t)); - BUG_ON (sizeof(union diskpage) != PAGE_SIZE); - BUG_ON (sizeof(struct link) != PAGE_SIZE); - if (!(entry = get_swap_page()).val) - panic( "\nNot enough swapspace when writing header" ); - if (swapfile_used[swp_type(entry)] != SWAPFILE_SUSPEND) - panic("\nNot enough swapspace for header on suspend device" ); - - cur = (void *) buffer; - if (fill_suspend_header(&cur->sh)) - BUG(); /* Not a BUG_ON(): we want fill_suspend_header to be called, always */ - - cur->link.next = prev; - - page = virt_to_page((unsigned long)cur); - rw_swap_page_sync(WRITE, entry, page); - prev = entry; - - printk( "S" ); - mark_swapfiles(prev, MARK_SWAP_SUSPEND); - printk( "|\n" ); - - MDELAY(1000); - return 0; -} - -#ifdef CONFIG_HIGHMEM -struct highmem_page { - char *data; - struct page *page; - struct highmem_page *next; -}; -struct highmem_page *highmem_copy = NULL; - -static int save_highmem_zone(struct zone *zone) +void free_bitmap(struct bitmap_page *bitmap) { - unsigned long zone_pfn; - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { - struct page *page; - struct highmem_page *save; - void *kaddr; - unsigned long pfn = zone_pfn + zone->zone_start_pfn; - int chunk_size; - - if (!(pfn%1000)) - printk("."); - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - /* - * This condition results from rvmalloc() sans vmalloc_32() - * and architectural memory reservations. This should be - * corrected eventually when the cases giving rise to this - * are better understood. - */ - if (PageReserved(page)) { - printk("highmem reserved page?!\n"); - continue; - } - if ((chunk_size = is_head_of_free_region(page))) { - pfn += chunk_size - 1; - zone_pfn += chunk_size - 1; - continue; - } - save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC); - if (!save) - return -ENOMEM; - save->next = highmem_copy; - save->page = page; - save->data = (void *) get_zeroed_page(GFP_ATOMIC); - if (!save->data) { - kfree(save); - return -ENOMEM; - } - kaddr = kmap_atomic(page, KM_USER0); - memcpy(save->data, kaddr, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - highmem_copy = save; - } - return 0; -} + struct bitmap_page *bp; -static int save_highmem(void) -{ - struct zone *zone; - int res = 0; - for_each_zone(zone) { - if (is_highmem(zone)) - res = save_highmem_zone(zone); - if (res) - return res; + while (bitmap) { + bp = bitmap->next; + free_page((unsigned long)bitmap); + bitmap = bp; } - return 0; } -static int restore_highmem(void) +struct bitmap_page *alloc_bitmap(unsigned int nr_bits) { - while (highmem_copy) { - struct highmem_page *save = highmem_copy; - void *kaddr; - highmem_copy = save->next; - - kaddr = kmap_atomic(save->page, KM_USER0); - memcpy(kaddr, save->data, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - free_page((long) save->data); - kfree(save); - } - return 0; -} -#endif + struct bitmap_page *bitmap, *bp; + unsigned int n; -static int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} - -/* if *pagedir_p != NULL it also copies the counted pages */ -static int count_and_copy_zone(struct zone *zone, struct pbe **pagedir_p) -{ - unsigned long zone_pfn, chunk_size, nr_copy_pages = 0; - struct pbe *pbe = *pagedir_p; - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { - struct page *page; - unsigned long pfn = zone_pfn + zone->zone_start_pfn; - - if (!(pfn%1000)) - printk("."); - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - BUG_ON(PageReserved(page) && PageNosave(page)); - if (PageNosave(page)) - continue; - if (PageReserved(page) && pfn_is_nosave(pfn)) { - PRINTK("[nosave pfn 0x%lx]", pfn); - continue; - } - if ((chunk_size = is_head_of_free_region(page))) { - pfn += chunk_size - 1; - zone_pfn += chunk_size - 1; - continue; - } - nr_copy_pages++; - if (!pbe) - continue; - pbe->orig_address = (long) page_address(page); - copy_page((void *)pbe->address, (void *)pbe->orig_address); - pbe++; - } - *pagedir_p = pbe; - return nr_copy_pages; -} - -static int count_and_copy_data_pages(struct pbe *pagedir_p) -{ - int nr_copy_pages = 0; - struct zone *zone; - for_each_zone(zone) { - if (!is_highmem(zone)) - nr_copy_pages += count_and_copy_zone(zone, &pagedir_p); - } - return nr_copy_pages; -} - -static void free_suspend_pagedir_zone(struct zone *zone, unsigned long pagedir) -{ - unsigned long zone_pfn, pagedir_end, pagedir_pfn, pagedir_end_pfn; - pagedir_end = pagedir + (PAGE_SIZE << pagedir_order); - pagedir_pfn = __pa(pagedir) >> PAGE_SHIFT; - pagedir_end_pfn = __pa(pagedir_end) >> PAGE_SHIFT; - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { - struct page *page; - unsigned long pfn = zone_pfn + zone->zone_start_pfn; - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - if (!TestClearPageNosave(page)) - continue; - else if (pfn >= pagedir_pfn && pfn < pagedir_end_pfn) - continue; - __free_page(page); - } -} - -static void free_suspend_pagedir(unsigned long this_pagedir) -{ - struct zone *zone; - for_each_zone(zone) { - if (!is_highmem(zone)) - free_suspend_pagedir_zone(zone, this_pagedir); - } - free_pages(this_pagedir, pagedir_order); -} - -static suspend_pagedir_t *create_suspend_pagedir(int nr_copy_pages) -{ - int i; - suspend_pagedir_t *pagedir; - struct pbe *p; - struct page *page; - - pagedir_order = get_bitmask_order(SUSPEND_PD_PAGES(nr_copy_pages)); - - p = pagedir = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC | __GFP_COLD, pagedir_order); - if (!pagedir) + if (!nr_bits) return NULL; - page = virt_to_page(pagedir); - for(i=0; i < 1<address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); - if (!p->address) { - free_suspend_pagedir((unsigned long) pagedir); + bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); + bp = bitmap; + for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) { + bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); + bp = bp->next; + if (!bp) { + free_bitmap(bitmap); return NULL; } - SetPageNosave(virt_to_page(p->address)); - p->orig_address = 0; - p++; } - return pagedir; + return bitmap; } -static int prepare_suspend_processes(void) +static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) { - sys_sync(); /* Syncing needs pdflushd, so do it before stopping processes */ - if (freeze_processes()) { - printk( KERN_ERR "Suspend failed: Not all processes stopped!\n" ); - thaw_processes(); - return 1; + unsigned int n; + + n = BITMAP_PAGE_BITS; + while (bitmap && n <= bit) { + n += BITMAP_PAGE_BITS; + bitmap = bitmap->next; } + if (!bitmap) + return -EINVAL; + n -= BITMAP_PAGE_BITS; + bit -= n; + n = 0; + while (bit >= BITS_PER_CHUNK) { + bit -= BITS_PER_CHUNK; + n++; + } + bitmap->chunks[n] |= (1UL << bit); return 0; } -/* - * Try to free as much memory as possible, but do not OOM-kill anyone - * - * Notice: all userland should be stopped at this point, or livelock is possible. - */ -static void free_some_memory(void) -{ - printk("Freeing memory: "); - while (shrink_all_memory(10000)) - printk("."); - printk("|\n"); -} - -static int suspend_prepare_image(void) +sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) { - struct sysinfo i; - unsigned int nr_needed_pages = 0; - - pagedir_nosave = NULL; - printk( "/critical section: "); -#ifdef CONFIG_HIGHMEM - printk( "handling highmem" ); - if (save_highmem()) { - printk(KERN_CRIT "%sNot enough free pages for highmem\n", name_suspend); - return -ENOMEM; - } - printk(", "); -#endif + unsigned long offset; - printk("counting pages to copy" ); - drain_local_pages(); - nr_copy_pages = count_and_copy_data_pages(NULL); - nr_needed_pages = nr_copy_pages + PAGES_FOR_IO; - - printk(" (pages needed: %d+%d=%d free: %d)\n",nr_copy_pages,PAGES_FOR_IO,nr_needed_pages,nr_free_pages()); - if(nr_free_pages() < nr_needed_pages) { - printk(KERN_CRIT "%sCouldn't get enough free pages, on %d pages short\n", - name_suspend, nr_needed_pages-nr_free_pages()); - root_swap = 0xFFFF; - return -ENOMEM; + offset = swp_offset(get_swap_page_of_type(swap)); + if (offset) { + if (bitmap_set(bitmap, offset)) + swap_free(swp_entry(swap, offset)); + else + return swapdev_block(swap, offset); } - si_swapinfo(&i); /* FIXME: si_swapinfo(&i) returns all swap devices information. - We should only consider resume_device. */ - if (i.freeswap < nr_needed_pages) { - printk(KERN_CRIT "%sThere's not enough swap space available, on %ld pages short\n", - name_suspend, nr_needed_pages-i.freeswap); - return -ENOSPC; - } - - PRINTK( "Alloc pagedir\n" ); - pagedir_save = pagedir_nosave = create_suspend_pagedir(nr_copy_pages); - if (!pagedir_nosave) { - /* Pagedir is big, one-chunk allocation. It is easily possible for this allocation to fail */ - printk(KERN_CRIT "%sCouldn't allocate continuous pagedir\n", name_suspend); - return -ENOMEM; - } - nr_copy_pages_check = nr_copy_pages; - pagedir_order_check = pagedir_order; - - drain_local_pages(); /* During allocating of suspend pagedir, new cold pages may appear. Kill them */ - if (nr_copy_pages != count_and_copy_data_pages(pagedir_nosave)) /* copy */ - BUG(); - - /* - * End of critical section. From now on, we can write to memory, - * but we should not touch disk. This specially means we must _not_ - * touch swap space! Except we must write out our image of course. - */ - - printk( "critical section/: done (%d pages copied)\n", nr_copy_pages ); return 0; } -static void suspend_save_image(void) +void free_all_swap_pages(int swap, struct bitmap_page *bitmap) { - device_resume(); - - lock_swapdevices(); - write_suspend_image(); - lock_swapdevices(); /* This will unlock ignored swap devices since writing is finished */ + unsigned int bit, n; + unsigned long test; - /* It is important _NOT_ to umount filesystems at this point. We want - * them synced (in case something goes wrong) but we DO not want to mark - * filesystem clean: it is not. (And it does not matter, if we resume - * correctly, we'll mark system clean, anyway.) - */ -} - -static void suspend_power_down(void) -{ - extern int C_A_D; - C_A_D = 0; - printk(KERN_EMERG "%s%s Trying to power down.\n", name_suspend, TEST_SWSUSP ? "Disable TEST_SWSUSP. NOT ": ""); -#ifdef CONFIG_VT - PRINTK(KERN_EMERG "shift_state: %04x\n", shift_state); - mdelay(1000); - if (TEST_SWSUSP ^ (!!(shift_state & (1 << KG_CTRL)))) - machine_restart(NULL); - else -#endif - { - device_shutdown(); - machine_power_off(); + bit = 0; + while (bitmap) { + for (n = 0; n < BITMAP_PAGE_CHUNKS; n++) + for (test = 1UL; test; test <<= 1) { + if (bitmap->chunks[n] & test) + swap_free(swp_entry(swap, bit)); + bit++; + } + bitmap = bitmap->next; } - - printk(KERN_EMERG "%sProbably not capable for powerdown. System halted.\n", name_suspend); - machine_halt(); - while (1); - /* NOTREACHED */ } -/* - * Magic happens here - */ - -asmlinkage void do_magic_resume_1(void) -{ - barrier(); - mb(); - spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */ - - device_power_down(4); - PRINTK( "Waiting for DMAs to settle down...\n"); - mdelay(1000); /* We do not want some readahead with DMA to corrupt our memory, right? - Do it with disabled interrupts for best effect. That way, if some - driver scheduled DMA, we have good chance for DMA to finish ;-). */ -} - -asmlinkage void do_magic_resume_2(void) -{ - BUG_ON (nr_copy_pages_check != nr_copy_pages); - BUG_ON (pagedir_order_check != pagedir_order); - - __flush_tlb_global(); /* Even mappings of "global" things (vmalloc) need to be fixed */ - - PRINTK( "Freeing prev allocated pagedir\n" ); - free_suspend_pagedir((unsigned long) pagedir_save); - -#ifdef CONFIG_HIGHMEM - printk( "Restoring highmem\n" ); - restore_highmem(); -#endif - printk("done, devices\n"); - - device_power_up(); - spin_unlock_irq(&suspend_pagedir_lock); - device_resume(); - - /* Fixme: this is too late; we should do this ASAP to avoid "infinite reboots" problem */ - PRINTK( "Fixing swap signatures... " ); - mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); - PRINTK( "ok\n" ); - -#ifdef SUSPEND_CONSOLE - acquire_console_sem(); - update_screen(fg_console); - release_console_sem(); -#endif -} - -/* do_magic() is implemented in arch/?/kernel/suspend_asm.S, and basically does: - - if (!resume) { - do_magic_suspend_1(); - save_processor_state(); - SAVE_REGISTERS - do_magic_suspend_2(); - return; - } - GO_TO_SWAPPER_PAGE_TABLES - do_magic_resume_1(); - COPY_PAGES_BACK - RESTORE_REGISTERS - restore_processor_state(); - do_magic_resume_2(); - +/** + * swsusp_show_speed - print the time elapsed between two events represented by + * @start and @stop + * + * @nr_pages - number of pages processed between @start and @stop + * @msg - introductory message to print */ -asmlinkage void do_magic_suspend_1(void) -{ - mb(); - barrier(); - BUG_ON(in_atomic()); - spin_lock_irq(&suspend_pagedir_lock); -} - -asmlinkage void do_magic_suspend_2(void) +void swsusp_show_speed(struct timeval *start, struct timeval *stop, + unsigned nr_pages, char *msg) { - int is_problem; - read_swapfiles(); - device_power_down(4); - is_problem = suspend_prepare_image(); - device_power_up(); - spin_unlock_irq(&suspend_pagedir_lock); - if (!is_problem) { - kernel_fpu_end(); /* save_processor_state() does kernel_fpu_begin, and we need to revert it in order to pass in_atomic() checks */ - BUG_ON(in_atomic()); - suspend_save_image(); - suspend_power_down(); /* FIXME: if suspend_power_down is commented out, console is lost after few suspends ?! */ - } - - printk(KERN_EMERG "%sSuspend failed, trying to recover...\n", name_suspend); - MDELAY(1000); /* So user can wait and report us messages if armageddon comes :-) */ - - barrier(); - mb(); - spin_lock_irq(&suspend_pagedir_lock); /* Done to disable interrupts */ - mdelay(1000); - - free_pages((unsigned long) pagedir_nosave, pagedir_order); - spin_unlock_irq(&suspend_pagedir_lock); + s64 elapsed_centisecs64; + int centisecs; + int k; + int kps; - device_resume(); - PRINTK( "Fixing swap signatures... " ); - mark_swapfiles(((swp_entry_t) {0}), MARK_SWAP_RESUME); - PRINTK( "ok\n" ); + elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); + do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); + centisecs = elapsed_centisecs64; + if (centisecs == 0) + centisecs = 1; /* avoid div-by-zero */ + k = nr_pages * (PAGE_SIZE / 1024); + kps = (k * 100) / centisecs; + printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k, + centisecs / 100, centisecs % 100, + kps / 1000, (kps % 1000) / 10); } -/* - * This is main interface to the outside world. It needs to be - * called from process context. +/** + * swsusp_shrink_memory - Try to free as much memory as needed + * + * ... but do not OOM-kill anyone + * + * Notice: all userland should be stopped before it is called, or + * livelock is possible. */ -int software_suspend(void) -{ - int res; - if (!software_suspend_enabled) - return -EAGAIN; - software_suspend_enabled = 0; - might_sleep(); - - if (arch_prepare_suspend()) { - printk("%sArchitecture failed to prepare\n", name_suspend); - return -EPERM; - } - if (pm_prepare_console()) - printk( "%sCan't allocate a console... proceeding\n", name_suspend); - if (!prepare_suspend_processes()) { - - /* At this point, all user processes and "dangerous" - kernel threads are stopped. Free some memory, as we - need half of memory free. */ - - free_some_memory(); - - /* Save state of all device drivers, and stop them. */ - if ((res = device_suspend(4))==0) - /* If stopping device drivers worked, we proceed basically into - * suspend_save_image. - * - * do_magic(0) returns after system is resumed. - * - * do_magic() copies all "used" memory to "free" memory, then - * unsuspends all device drivers, and writes memory to disk - * using normal kernel mechanism. - */ - do_magic(0); - thaw_processes(); - } else - res = -EBUSY; - software_suspend_enabled = 1; - MDELAY(1000); - pm_restore_console(); - return res; -} - -/* More restore stuff */ - -/* FIXME: Why not memcpy(to, from, 1< SHRINK_BITE) + tmp = SHRINK_BITE; + return shrink_all_memory(tmp); } -#define does_collide(addr) does_collide_order(pagedir_nosave, addr, 0) - -/* - * Returns true if given address/order collides with any orig_address - */ -static int does_collide_order(suspend_pagedir_t *pagedir, unsigned long addr, - int order) +int swsusp_shrink_memory(void) { - int i; - unsigned long addre = addr + (PAGE_SIZE<orig_address >= addr && - (pagedir+i)->orig_address < addre) - return 1; - - return 0; -} - -/* - * We check here that pagedir & pages it points to won't collide with pages - * where we're going to restore from the loaded pages later - */ -static int check_pagedir(void) -{ - int i; + long tmp; + struct zone *zone; + unsigned long pages = 0; + unsigned int i = 0; + char *p = "-\\|/"; + struct timeval start, stop; + + printk("Shrinking memory... "); + do_gettimeofday(&start); + do { + long size, highmem_size; + + highmem_size = count_highmem_pages(); + size = count_data_pages() + PAGES_FOR_IO; + tmp = size; + size += highmem_size; + for_each_zone (zone) + if (populated_zone(zone)) { + if (is_highmem(zone)) { + highmem_size -= zone->free_pages; + } else { + tmp -= zone->free_pages; + tmp += zone->lowmem_reserve[ZONE_NORMAL]; + tmp += snapshot_additional_pages(zone); + } + } - for(i=0; i < nr_copy_pages; i++) { - unsigned long addr; + if (highmem_size < 0) + highmem_size = 0; - do { - addr = get_zeroed_page(GFP_ATOMIC); - if(!addr) + tmp += highmem_size; + if (tmp > 0) { + tmp = __shrink_memory(tmp); + if (!tmp) return -ENOMEM; - } while (does_collide(addr)); - - (pagedir_nosave+i)->address = addr; - } - return 0; -} - -static int relocate_pagedir(void) -{ - /* - * We have to avoid recursion (not to overflow kernel stack), - * and that's why code looks pretty cryptic - */ - suspend_pagedir_t *new_pagedir, *old_pagedir = pagedir_nosave; - void **eaten_memory = NULL; - void **c = eaten_memory, *m, *f; - - printk("Relocating pagedir"); - - if(!does_collide_order(old_pagedir, (unsigned long)old_pagedir, pagedir_order)) { - printk("not necessary\n"); - return 0; - } - - while ((m = (void *) __get_free_pages(GFP_ATOMIC, pagedir_order))) { - memset(m, 0, PAGE_SIZE); - if (!does_collide_order(old_pagedir, (unsigned long)m, pagedir_order)) - break; - eaten_memory = m; - printk( "." ); - *eaten_memory = c; - c = eaten_memory; - } - - if (!m) - return -ENOMEM; - - pagedir_nosave = new_pagedir = m; - copy_pagedir(new_pagedir, old_pagedir); - - c = eaten_memory; - while(c) { - printk(":"); - f = *c; - c = *c; - if (f) - free_pages((unsigned long)f, pagedir_order); - } - printk("|\n"); - return 0; -} - -/* - * Sanity check if this image makes sense with this kernel/swap context - * I really don't think that it's foolproof but more than nothing.. - */ - -static int sanity_check_failed(char *reason) -{ - printk(KERN_ERR "%s%s\n", name_resume, reason); - return -EPERM; -} - -static int sanity_check(struct suspend_header *sh) -{ - if (sh->version_code != LINUX_VERSION_CODE) - return sanity_check_failed("Incorrect kernel version"); - if (sh->num_physpages != num_physpages) - return sanity_check_failed("Incorrect memory size"); - if (strncmp(sh->machine, system_utsname.machine, 8)) - return sanity_check_failed("Incorrect machine type"); - if (strncmp(sh->version, system_utsname.version, 20)) - return sanity_check_failed("Incorrect version"); - if (sh->num_cpus != num_online_cpus()) - return sanity_check_failed("Incorrect number of cpus"); - if (sh->page_size != PAGE_SIZE) - return sanity_check_failed("Incorrect PAGE_SIZE"); - return 0; -} - -static int bdev_read_page(struct block_device *bdev, long pos, void *buf) -{ - struct buffer_head *bh; - BUG_ON (pos%PAGE_SIZE); - bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE); - if (!bh || (!bh->b_data)) { - return -1; - } - memcpy(buf, bh->b_data, PAGE_SIZE); /* FIXME: may need kmap() */ - BUG_ON(!buffer_uptodate(bh)); - brelse(bh); - return 0; -} + pages += tmp; + } else if (size > image_size / PAGE_SIZE) { + tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); + pages += tmp; + } + printk("\b%c", p[i++%4]); + } while (tmp > 0); + do_gettimeofday(&stop); + printk("\bdone (%lu pages freed)\n", pages); + swsusp_show_speed(&start, &stop, pages, "Freed"); -static int bdev_write_page(struct block_device *bdev, long pos, void *buf) -{ -#if 0 - struct buffer_head *bh; - BUG_ON (pos%PAGE_SIZE); - bh = __bread(bdev, pos/PAGE_SIZE, PAGE_SIZE); - if (!bh || (!bh->b_data)) { - return -1; - } - memcpy(bh->b_data, buf, PAGE_SIZE); /* FIXME: may need kmap() */ - BUG_ON(!buffer_uptodate(bh)); - generic_make_request(WRITE, bh); - if (!buffer_uptodate(bh)) - printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unsuccessful...\n", name_resume, resume_file); - wait_on_buffer(bh); - brelse(bh); - return 0; -#endif - printk(KERN_CRIT "%sWarning %s: Fixing swap signatures unimplemented...\n", name_resume, resume_file); return 0; } -extern dev_t __init name_to_dev_t(const char *line); - -static int __init __read_suspend_image(struct block_device *bdev, union diskpage *cur, int noresume) +int swsusp_suspend(void) { - swp_entry_t next; - int i, nr_pgdir_pages; - -#define PREPARENEXT \ - { next = cur->link.next; \ - next.val = swp_offset(next) * PAGE_SIZE; \ - } - - if (bdev_read_page(bdev, 0, cur)) return -EIO; - - if ((!memcmp("SWAP-SPACE",cur->swh.magic.magic,10)) || - (!memcmp("SWAPSPACE2",cur->swh.magic.magic,10))) { - printk(KERN_ERR "%sThis is normal swap space\n", name_resume ); - return -EINVAL; - } - - PREPARENEXT; /* We have to read next position before we overwrite it */ - - if (!memcmp("S1",cur->swh.magic.magic,2)) - memcpy(cur->swh.magic.magic,"SWAP-SPACE",10); - else if (!memcmp("S2",cur->swh.magic.magic,2)) - memcpy(cur->swh.magic.magic,"SWAPSPACE2",10); - else { - if (noresume) - return -EINVAL; - panic("%sUnable to find suspended-data signature (%.10s - misspelled?\n", - name_resume, cur->swh.magic.magic); - } - if (noresume) { - /* We don't do a sanity check here: we want to restore the swap - whatever version of kernel made the suspend image; - We need to write swap, but swap is *not* enabled so - we must write the device directly */ - printk("%s: Fixing swap signatures %s...\n", name_resume, resume_file); - bdev_write_page(bdev, 0, cur); - } - - printk( "%sSignature found, resuming\n", name_resume ); - MDELAY(1000); - - if (bdev_read_page(bdev, next.val, cur)) return -EIO; - if (sanity_check(&cur->sh)) /* Is this same machine? */ - return -EPERM; - PREPARENEXT; - - pagedir_save = cur->sh.suspend_pagedir; - nr_copy_pages = cur->sh.num_pbes; - nr_pgdir_pages = SUSPEND_PD_PAGES(nr_copy_pages); - pagedir_order = get_bitmask_order(nr_pgdir_pages); - - pagedir_nosave = (suspend_pagedir_t *)__get_free_pages(GFP_ATOMIC, pagedir_order); - if (!pagedir_nosave) - return -ENOMEM; + int error; - PRINTK( "%sReading pagedir, ", name_resume ); + if ((error = arch_prepare_suspend())) + return error; - /* We get pages in reverse order of saving! */ - for (i=nr_pgdir_pages-1; i>=0; i--) { - BUG_ON (!next.val); - cur = (union diskpage *)((char *) pagedir_nosave)+i; - if (bdev_read_page(bdev, next.val, cur)) return -EIO; - PREPARENEXT; + local_irq_disable(); + /* At this point, device_suspend() has been called, but *not* + * device_power_down(). We *must* device_power_down() now. + * Otherwise, drivers for some devices (e.g. interrupt controllers) + * become desynchronized with the actual state of the hardware + * at resume time, and evil weirdness ensues. + */ + if ((error = device_power_down(PMSG_FREEZE))) { + printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); + goto Enable_irqs; } - BUG_ON (next.val); - - if (relocate_pagedir()) - return -ENOMEM; - if (check_pagedir()) - return -ENOMEM; - printk( "Reading image data (%d pages): ", nr_copy_pages ); - for(i=0; i < nr_copy_pages; i++) { - swp_entry_t swap_address = (pagedir_nosave+i)->swap_address; - if (!(i%100)) - printk( "." ); - /* You do not need to check for overlaps... - ... check_pagedir already did this work */ - if (bdev_read_page(bdev, swp_offset(swap_address) * PAGE_SIZE, (char *)((pagedir_nosave+i)->address))) - return -EIO; - } - printk( "|\n" ); - return 0; + save_processor_state(); + if ((error = swsusp_arch_suspend())) + printk(KERN_ERR "Error %d suspending\n", error); + /* Restore control flow magically appears here */ + restore_processor_state(); + /* NOTE: device_power_up() is just a resume() for devices + * that suspended with irqs off ... no overall powerup. + */ + device_power_up(); + Enable_irqs: + local_irq_enable(); + return error; } -static int __init read_suspend_image(const char * specialfile, int noresume) +int swsusp_resume(void) { - union diskpage *cur; - unsigned long scratch_page = 0; int error; - char b[BDEVNAME_SIZE]; - resume_device = name_to_dev_t(specialfile); - scratch_page = get_zeroed_page(GFP_ATOMIC); - cur = (void *) scratch_page; - if (cur) { - struct block_device *bdev; - printk("Resuming from device %s\n", - __bdevname(resume_device, b)); - bdev = open_by_devnum(resume_device, FMODE_READ); - if (IS_ERR(bdev)) { - error = PTR_ERR(bdev); - } else { - set_blocksize(bdev, PAGE_SIZE); - error = __read_suspend_image(bdev, cur, noresume); - blkdev_put(bdev); - } - } else error = -ENOMEM; - - if (scratch_page) - free_page(scratch_page); - switch (error) { - case 0: - PRINTK("Reading resume file was successful\n"); - break; - case -EINVAL: - break; - case -EIO: - printk( "%sI/O error\n", name_resume); - break; - case -ENOENT: - printk( "%s%s: No such file or directory\n", name_resume, specialfile); - break; - case -ENOMEM: - printk( "%sNot enough memory\n", name_resume); - break; - default: - printk( "%sError %d resuming\n", name_resume, error ); - } - MDELAY(1000); + local_irq_disable(); + /* NOTE: device_power_down() is just a suspend() with irqs off; + * it has no special "power things down" semantics + */ + if (device_power_down(PMSG_PRETHAW)) + printk(KERN_ERR "Some devices failed to power down, very bad\n"); + /* We'll ignore saved state, but this gets preempt count (etc) right */ + save_processor_state(); + error = restore_highmem(); + if (!error) { + error = swsusp_arch_resume(); + /* The code below is only ever reached in case of a failure. + * Otherwise execution continues at place where + * swsusp_arch_suspend() was called + */ + BUG_ON(!error); + /* This call to restore_highmem() undos the previous one */ + restore_highmem(); + } + /* The only reason why swsusp_arch_resume() can fail is memory being + * very tight, so we have to free it as soon as we can to avoid + * subsequent failures + */ + swsusp_free(); + restore_processor_state(); + touch_softlockup_watchdog(); + device_power_up(); + local_irq_enable(); return error; } - -/** - * software_resume - Resume from a saved image. - * - * Called as a late_initcall (so all devices are discovered and - * initialized), we call swsusp to see if we have a saved image or not. - * If so, we quiesce devices, then restore the saved image. We will - * return above (in pm_suspend_disk() ) if everything goes well. - * Otherwise, we fail gracefully and return to the normally - * scheduled program. - * - */ -static int __init software_resume(void) -{ - if (num_online_cpus() > 1) { - printk(KERN_WARNING "Software Suspend has malfunctioning SMP support. Disabled :(\n"); - return -EINVAL; - } - /* We enable the possibility of machine suspend */ - software_suspend_enabled = 1; - if (!resume_status) - return 0; - - printk( "%s", name_resume ); - if (resume_status == NORESUME) { - if(resume_file[0]) - read_suspend_image(resume_file, 1); - printk( "disabled\n" ); - return 0; - } - MDELAY(1000); - - if (pm_prepare_console()) - printk("swsusp: Can't allocate a console... proceeding\n"); - - if (!resume_file[0] && resume_status == RESUME_SPECIFIED) { - printk( "suspension device unspecified\n" ); - return -EINVAL; - } - - printk( "resuming from %s\n", resume_file); - if (read_suspend_image(resume_file, 0)) - goto read_failure; - device_suspend(4); - do_magic(1); - panic("This never returns"); - -read_failure: - pm_restore_console(); - return 0; -} - -late_initcall(software_resume); - -static int __init resume_setup(char *str) -{ - if (resume_status == NORESUME) - return 1; - - strncpy( resume_file, str, 255 ); - resume_status = RESUME_SPECIFIED; - - return 1; -} - -static int __init noresume_setup(char *str) -{ - resume_status = NORESUME; - return 1; -} - -__setup("noresume", noresume_setup); -__setup("resume=", resume_setup); - -EXPORT_SYMBOL(software_suspend); -EXPORT_SYMBOL(software_suspend_enabled);