X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fpower%2Fswsusp.c;h=c4016cbbd3e05611b7b7dccd9898be340365f31f;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=90b3b68dee3f71df12ad9b7b370e9b001a403630;hpb=cee37fe97739d85991964371c1f3a745c00dd236;p=linux-2.6.git diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 90b3b68de..c4016cbbd 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -1,21 +1,20 @@ /* * linux/kernel/power/swsusp.c * - * This file is to realize architecture-independent - * machine suspend feature using pretty near only high-level routines + * This file provides code to write suspend image to swap and read it back. * * Copyright (C) 1998-2001 Gabor Kuti - * Copyright (C) 1998,2001-2004 Pavel Machek + * Copyright (C) 1998,2001-2005 Pavel Machek * * This file is released under the GPLv2. * * I'd like to thank the following people for their work: - * + * * Pavel Machek : * Modifications, defectiveness pointing, being with me at the very beginning, * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17. * - * Steve Doddi : + * Steve Doddi : * Support the possibility of hardware state restoring. * * Raph : @@ -31,857 +30,190 @@ * Alex Badea : * Fixed runaway init * + * Rafael J. Wysocki + * Reworked the freeing of memory and the handling of swap + * * More state savers are welcome. Especially for the scsi layer... * * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt */ -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include #include #include #include -#include -#include #include #include #include -#include #include -#include - -#include -#include -#include -#include -#include #include "power.h" -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - -/* Variables to be preserved over suspend */ -static int nr_copy_pages_check; - -extern char resume_file[]; - -/* Local variables that should not be affected by save */ -unsigned int nr_copy_pages __nosavedata = 0; - -/* Suspend pagedir is allocated before final copy, therefore it - must be freed after resume - - Warning: this is evil. There are actually two pagedirs at time of - resume. One is "pagedir_save", which is empty frame allocated at - time of suspend, that must be freed. Second is "pagedir_nosave", - allocated at time of resume, that travels through memory not to - collide with anything. - - Warning: this is even more evil than it seems. Pagedirs this file - talks about are completely different from page directories used by - MMU hardware. - */ -suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; -static suspend_pagedir_t *pagedir_save; - -#define SWSUSP_SIG "S1SUSPEND" - -static struct swsusp_header { - char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; - swp_entry_t swsusp_info; - char orig_sig[10]; - char sig[10]; -} __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; - -static struct swsusp_info swsusp_info; - /* - * XXX: We try to keep some more pages free so that I/O operations succeed - * without paging. Might this be more? - */ -#define PAGES_FOR_IO 512 - -/* - * Saving part... - */ - -/* We memorize in swapfile_used what swap devices are used for suspension */ -#define SWAPFILE_UNUSED 0 -#define SWAPFILE_SUSPEND 1 /* This is the suspending device */ -#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ - -static unsigned short swapfile_used[MAX_SWAPFILES]; -static unsigned short root_swap; - -static int mark_swapfiles(swp_entry_t prev) -{ - int error; - - rw_swap_page_sync(READ, - swp_entry(root_swap, 0), - virt_to_page((unsigned long)&swsusp_header)); - if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || - !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { - memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); - memcpy(swsusp_header.sig,SWSUSP_SIG, 10); - swsusp_header.swsusp_info = prev; - error = rw_swap_page_sync(WRITE, - swp_entry(root_swap, 0), - virt_to_page((unsigned long) - &swsusp_header)); - } else { - pr_debug("swsusp: Partition is not swap space.\n"); - error = -ENODEV; - } - return error; -} - -/* - * Check whether the swap device is the specified resume - * device, irrespective of whether they are specified by - * identical names. - * - * (Thus, device inode aliasing is allowed. You can say /dev/hda4 - * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs] - * and they'll be considered the same device. This is *necessary* for - * devfs, since the resume code can only recognize the form /dev/hda4, - * but the suspend code would see the long name.) - */ -static int is_resume_device(const struct swap_info_struct *swap_info) -{ - struct file *file = swap_info->swap_file; - struct inode *inode = file->f_dentry->d_inode; - - return S_ISBLK(inode->i_mode) && - swsusp_resume_device == MKDEV(imajor(inode), iminor(inode)); -} - -static int swsusp_swap_check(void) /* This is called before saving image */ -{ - int i, len; - - len=strlen(resume_file); - root_swap = 0xFFFF; - - swap_list_lock(); - for(i=0; iswap_address; - if (entry.val) - swap_free(entry); - else - break; - (pagedir_nosave + i)->swap_address = (swp_entry_t){0}; - } -} - -/** - * data_write - Write saved image to swap. - * - * Walk the list of pages in the image and sync each one to swap. + * Preferred image size in bytes (tunable via /sys/power/image_size). + * When it is set to N, swsusp will do its best to ensure the image + * size will not exceed N bytes, but if that is impossible, it will + * try to create the smallest image possible. */ -static int data_write(void) -{ - int error = 0, i = 0; - unsigned int mod = nr_copy_pages / 100; - struct pbe *p; - - if (!mod) - mod = 1; - - printk( "Writing data to swap (%d pages)... ", nr_copy_pages ); - for_each_pbe(p, pagedir_nosave) { - if (!(i%mod)) - printk( "\b\b\b\b%3d%%", i / mod ); - if ((error = write_page(p->address, &(p->swap_address)))) - return error; - i++; - } - printk("\b\b\b\bdone\n"); - return error; -} - -static void dump_info(void) -{ - pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code); - pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages); - pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname); - pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename); - pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release); - pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version); - pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine); - pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname); - pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus); - pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages); - pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages); -} - -static void init_header(void) -{ - memset(&swsusp_info, 0, sizeof(swsusp_info)); - swsusp_info.version_code = LINUX_VERSION_CODE; - swsusp_info.num_physpages = num_physpages; - memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname)); - - swsusp_info.suspend_pagedir = pagedir_nosave; - swsusp_info.cpus = num_online_cpus(); - swsusp_info.image_pages = nr_copy_pages; -} - -static int close_swap(void) -{ - swp_entry_t entry; - int error; - - dump_info(); - error = write_page((unsigned long)&swsusp_info, &entry); - if (!error) { - printk( "S" ); - error = mark_swapfiles(entry); - printk( "|\n" ); - } - return error; -} - -/** - * free_pagedir_entries - Free pages used by the page directory. - * - * This is used during suspend for error recovery. - */ - -static void free_pagedir_entries(void) -{ - int i; - - for (i = 0; i < swsusp_info.pagedir_pages; i++) - swap_free(swsusp_info.pagedir[i]); -} - - -/** - * write_pagedir - Write the array of pages holding the page directory. - * @last: Last swap entry we write (needed for header). - */ - -static int write_pagedir(void) -{ - int error = 0; - unsigned n = 0; - struct pbe * pbe; - - printk( "Writing pagedir..."); - for_each_pb_page(pbe, pagedir_nosave) { - if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++]))) - return error; - } - - swsusp_info.pagedir_pages = n; - printk("done (%u pages)\n", n); - return error; -} - -/** - * write_suspend_image - Write entire image and metadata. - * - */ - -static int write_suspend_image(void) -{ - int error; - - init_header(); - if ((error = data_write())) - goto FreeData; - - if ((error = write_pagedir())) - goto FreePagedir; - - if ((error = close_swap())) - goto FreePagedir; - Done: - return error; - FreePagedir: - free_pagedir_entries(); - FreeData: - data_free(); - goto Done; -} +unsigned long image_size = 500 * 1024 * 1024; +int in_suspend __nosavedata = 0; #ifdef CONFIG_HIGHMEM -struct highmem_page { - char *data; - struct page *page; - struct highmem_page *next; -}; - -static struct highmem_page *highmem_copy; - -static int save_highmem_zone(struct zone *zone) -{ - unsigned long zone_pfn; - mark_free_pages(zone); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { - struct page *page; - struct highmem_page *save; - void *kaddr; - unsigned long pfn = zone_pfn + zone->zone_start_pfn; - - if (!(pfn%1000)) - printk("."); - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - /* - * This condition results from rvmalloc() sans vmalloc_32() - * and architectural memory reservations. This should be - * corrected eventually when the cases giving rise to this - * are better understood. - */ - if (PageReserved(page)) { - printk("highmem reserved page?!\n"); - continue; - } - BUG_ON(PageNosave(page)); - if (PageNosaveFree(page)) - continue; - save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC); - if (!save) - return -ENOMEM; - save->next = highmem_copy; - save->page = page; - save->data = (void *) get_zeroed_page(GFP_ATOMIC); - if (!save->data) { - kfree(save); - return -ENOMEM; - } - kaddr = kmap_atomic(page, KM_USER0); - memcpy(save->data, kaddr, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - highmem_copy = save; - } - return 0; -} -#endif /* CONFIG_HIGHMEM */ - - -static int save_highmem(void) -{ -#ifdef CONFIG_HIGHMEM - struct zone *zone; - int res = 0; - - pr_debug("swsusp: Saving Highmem\n"); - for_each_zone(zone) { - if (is_highmem(zone)) - res = save_highmem_zone(zone); - if (res) - return res; - } +unsigned int count_highmem_pages(void); +int save_highmem(void); +int restore_highmem(void); +#else +static int save_highmem(void) { return 0; } +static int restore_highmem(void) { return 0; } +static unsigned int count_highmem_pages(void) { return 0; } #endif - return 0; -} - -static int restore_highmem(void) -{ -#ifdef CONFIG_HIGHMEM - printk("swsusp: Restoring Highmem\n"); - while (highmem_copy) { - struct highmem_page *save = highmem_copy; - void *kaddr; - highmem_copy = save->next; - - kaddr = kmap_atomic(save->page, KM_USER0); - memcpy(kaddr, save->data, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - free_page((long) save->data); - kfree(save); - } -#endif - return 0; -} - - -static int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} /** - * saveable - Determine whether a page should be cloned or not. - * @pfn: The page + * The following functions are used for tracing the allocated + * swap pages, so that they can be freed in case of an error. * - * We save a page if it's Reserved, and not in the range of pages - * statically defined as 'unsaveable', or if it isn't reserved, and - * isn't part of a free chunk of pages. + * The functions operate on a linked bitmap structure defined + * in power.h */ -static int saveable(struct zone * zone, unsigned long * zone_pfn) +void free_bitmap(struct bitmap_page *bitmap) { - unsigned long pfn = *zone_pfn + zone->zone_start_pfn; - struct page * page; - - if (!pfn_valid(pfn)) - return 0; + struct bitmap_page *bp; - page = pfn_to_page(pfn); - BUG_ON(PageReserved(page) && PageNosave(page)); - if (PageNosave(page)) - return 0; - if (PageReserved(page) && pfn_is_nosave(pfn)) { - pr_debug("[nosave pfn 0x%lx]", pfn); - return 0; + while (bitmap) { + bp = bitmap->next; + free_page((unsigned long)bitmap); + bitmap = bp; } - if (PageNosaveFree(page)) - return 0; - - return 1; } -static void count_data_pages(void) +struct bitmap_page *alloc_bitmap(unsigned int nr_bits) { - struct zone *zone; - unsigned long zone_pfn; - - nr_copy_pages = 0; - - for_each_zone(zone) { - if (is_highmem(zone)) - continue; - mark_free_pages(zone); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) - nr_copy_pages += saveable(zone, &zone_pfn); - } -} + struct bitmap_page *bitmap, *bp; + unsigned int n; + if (!nr_bits) + return NULL; -static void copy_data_pages(void) -{ - struct zone *zone; - unsigned long zone_pfn; - struct pbe * pbe = pagedir_nosave; - - pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages); - for_each_zone(zone) { - if (is_highmem(zone)) - continue; - mark_free_pages(zone); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { - if (saveable(zone, &zone_pfn)) { - struct page * page; - page = pfn_to_page(zone_pfn + zone->zone_start_pfn); - BUG_ON(!pbe); - pbe->orig_address = (long) page_address(page); - /* copy_page is not usable for copying task structs. */ - memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE); - pbe = pbe->next; - } + bitmap = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); + bp = bitmap; + for (n = BITMAP_PAGE_BITS; n < nr_bits; n += BITMAP_PAGE_BITS) { + bp->next = (struct bitmap_page *)get_zeroed_page(GFP_KERNEL); + bp = bp->next; + if (!bp) { + free_bitmap(bitmap); + return NULL; } } - BUG_ON(pbe); -} - - -/** - * calc_nr - Determine the number of pages needed for a pbe list. - */ - -static int calc_nr(int nr_copy) -{ - int extra = 0; - int mod = !!(nr_copy % PBES_PER_PAGE); - int diff = (nr_copy / PBES_PER_PAGE) + mod; - - do { - extra += diff; - nr_copy += diff; - mod = !!(nr_copy % PBES_PER_PAGE); - diff = (nr_copy / PBES_PER_PAGE) + mod - extra; - } while (diff > 0); - - return nr_copy; + return bitmap; } -/** - * free_pagedir - free pages allocated with alloc_pagedir() - */ - -static inline void free_pagedir(struct pbe *pblist) +static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) { - struct pbe *pbe; + unsigned int n; - while (pblist) { - pbe = (pblist + PB_PAGE_SKIP)->next; - free_page((unsigned long)pblist); - pblist = pbe; + n = BITMAP_PAGE_BITS; + while (bitmap && n <= bit) { + n += BITMAP_PAGE_BITS; + bitmap = bitmap->next; } + if (!bitmap) + return -EINVAL; + n -= BITMAP_PAGE_BITS; + bit -= n; + n = 0; + while (bit >= BITS_PER_CHUNK) { + bit -= BITS_PER_CHUNK; + n++; + } + bitmap->chunks[n] |= (1UL << bit); + return 0; } -/** - * fill_pb_page - Create a list of PBEs on a given memory page - */ - -static inline void fill_pb_page(struct pbe *pbpage) -{ - struct pbe *p; - - p = pbpage; - pbpage += PB_PAGE_SKIP; - do - p->next = p + 1; - while (++p < pbpage); -} - -/** - * create_pbe_list - Create a list of PBEs on top of a given chain - * of memory pages allocated with alloc_pagedir() - */ - -static void create_pbe_list(struct pbe *pblist, unsigned nr_pages) -{ - struct pbe *pbpage, *p; - unsigned num = PBES_PER_PAGE; - - for_each_pb_page (pbpage, pblist) { - if (num >= nr_pages) - break; - - fill_pb_page(pbpage); - num += PBES_PER_PAGE; - } - if (pbpage) { - for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++) - p->next = p + 1; - p->next = NULL; - } - pr_debug("create_pbe_list(): initialized %d PBEs\n", num); -} - -/** - * alloc_pagedir - Allocate the page directory. - * - * First, determine exactly how many pages we need and - * allocate them. - * - * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE - * struct pbe elements (pbes) and the last element in the page points - * to the next page. - * - * On each page we set up a list of struct_pbe elements. - */ - -static struct pbe * alloc_pagedir(unsigned nr_pages) -{ - unsigned num; - struct pbe *pblist, *pbe; - - if (!nr_pages) - return NULL; - - pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); - pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); - for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; - pbe = pbe->next, num += PBES_PER_PAGE) { - pbe += PB_PAGE_SKIP; - pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); - } - if (!pbe) { /* get_zeroed_page() failed */ - free_pagedir(pblist); - pblist = NULL; - } - return pblist; -} - -/** - * free_image_pages - Free pages allocated for snapshot - */ - -static void free_image_pages(void) +unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap) { - struct pbe * p; + unsigned long offset; - for_each_pbe(p, pagedir_save) { - if (p->address) { - ClearPageNosave(virt_to_page(p->address)); - free_page(p->address); - p->address = 0; + offset = swp_offset(get_swap_page_of_type(swap)); + if (offset) { + if (bitmap_set(bitmap, offset)) { + swap_free(swp_entry(swap, offset)); + offset = 0; } } + return offset; } -/** - * alloc_image_pages - Allocate pages for the snapshot. - */ - -static int alloc_image_pages(void) -{ - struct pbe * p; - - for_each_pbe(p, pagedir_save) { - p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD); - if (!p->address) - return -ENOMEM; - SetPageNosave(virt_to_page(p->address)); - } - return 0; -} - -void swsusp_free(void) +void free_all_swap_pages(int swap, struct bitmap_page *bitmap) { - BUG_ON(PageNosave(virt_to_page(pagedir_save))); - BUG_ON(PageNosaveFree(virt_to_page(pagedir_save))); - free_image_pages(); - free_pagedir(pagedir_save); -} - - -/** - * enough_free_mem - Make sure we enough free memory to snapshot. - * - * Returns TRUE or FALSE after checking the number of available - * free pages. - */ + unsigned int bit, n; + unsigned long test; -static int enough_free_mem(void) -{ - if (nr_free_pages() < (nr_copy_pages + PAGES_FOR_IO)) { - pr_debug("swsusp: Not enough free pages: Have %d\n", - nr_free_pages()); - return 0; + bit = 0; + while (bitmap) { + for (n = 0; n < BITMAP_PAGE_CHUNKS; n++) + for (test = 1UL; test; test <<= 1) { + if (bitmap->chunks[n] & test) + swap_free(swp_entry(swap, bit)); + bit++; + } + bitmap = bitmap->next; } - return 1; } - /** - * enough_swap - Make sure we have enough swap to save the image. + * swsusp_shrink_memory - Try to free as much memory as needed * - * Returns TRUE or FALSE after checking the total amount of swap - * space avaiable. + * ... but do not OOM-kill anyone * - * FIXME: si_swapinfo(&i) returns all swap devices information. - * We should only consider resume_device. + * Notice: all userland should be stopped before it is called, or + * livelock is possible. */ -static int enough_swap(void) -{ - struct sysinfo i; - - si_swapinfo(&i); - if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) { - pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap); - return 0; - } - return 1; -} - -static int swsusp_alloc(void) -{ - int error; - - pr_debug("suspend: (pages needed: %d + %d free: %d)\n", - nr_copy_pages, PAGES_FOR_IO, nr_free_pages()); +#define SHRINK_BITE 10000 - pagedir_nosave = NULL; - if (!enough_free_mem()) - return -ENOMEM; - - if (!enough_swap()) - return -ENOSPC; - - nr_copy_pages = calc_nr(nr_copy_pages); - - if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) { - printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); - return -ENOMEM; - } - create_pbe_list(pagedir_save, nr_copy_pages); - pagedir_nosave = pagedir_save; - if ((error = alloc_image_pages())) { - printk(KERN_ERR "suspend: Allocating image pages failed.\n"); - swsusp_free(); - return error; - } - - nr_copy_pages_check = nr_copy_pages; - return 0; -} - -static int suspend_prepare_image(void) +int swsusp_shrink_memory(void) { - int error; - - pr_debug("swsusp: critical section: \n"); - if (save_highmem()) { - printk(KERN_CRIT "Suspend machine: Not enough free pages for highmem\n"); - restore_highmem(); - return -ENOMEM; - } - - drain_local_pages(); - count_data_pages(); - printk("swsusp: Need to copy %u pages\n", nr_copy_pages); - - error = swsusp_alloc(); - if (error) - return error; - - /* During allocating of suspend pagedir, new cold pages may appear. - * Kill them. - */ - drain_local_pages(); - copy_data_pages(); + long size, tmp; + struct zone *zone; + unsigned long pages = 0; + unsigned int i = 0; + char *p = "-\\|/"; - /* - * End of critical section. From now on, we can write to memory, - * but we should not touch disk. This specially means we must _not_ - * touch swap space! Except we must write out our image of course. - */ + printk("Shrinking memory... "); + do { + size = 2 * count_highmem_pages(); + size += size / 50 + count_data_pages(); + size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + + PAGES_FOR_IO; + tmp = size; + for_each_zone (zone) + if (!is_highmem(zone)) + tmp -= zone->free_pages; + if (tmp > 0) { + tmp = shrink_all_memory(SHRINK_BITE); + if (!tmp) + return -ENOMEM; + pages += tmp; + } else if (size > image_size / PAGE_SIZE) { + tmp = shrink_all_memory(SHRINK_BITE); + pages += tmp; + } + printk("\b%c", p[i++%4]); + } while (tmp > 0); + printk("\bdone (%lu pages freed)\n", pages); - printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages ); return 0; } - -/* It is important _NOT_ to umount filesystems at this point. We want - * them synced (in case something goes wrong) but we DO not want to mark - * filesystem clean: it is not. (And it does not matter, if we resume - * correctly, we'll mark system clean, anyway.) - */ -int swsusp_write(void) -{ - int error; - device_resume(); - lock_swapdevices(); - error = write_suspend_image(); - /* This will unlock ignored swap devices since writing is finished */ - lock_swapdevices(); - return error; - -} - - -extern asmlinkage int swsusp_arch_suspend(void); -extern asmlinkage int swsusp_arch_resume(void); - - -asmlinkage int swsusp_save(void) -{ - int error = 0; - - if ((error = swsusp_swap_check())) { - printk(KERN_ERR "swsusp: FATAL: cannot find swap device, try " - "swapon -a!\n"); - return error; - } - return suspend_prepare_image(); -} - int swsusp_suspend(void) { int error; + if ((error = arch_prepare_suspend())) return error; local_irq_disable(); @@ -893,18 +225,23 @@ int swsusp_suspend(void) */ if ((error = device_power_down(PMSG_FREEZE))) { printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); - local_irq_enable(); - swsusp_free(); - return error; + goto Enable_irqs; + } + + if ((error = save_highmem())) { + printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); + goto Restore_highmem; } + save_processor_state(); if ((error = swsusp_arch_suspend())) - swsusp_free(); + printk(KERN_ERR "Error %d suspending\n", error); /* Restore control flow magically appears here */ restore_processor_state(); - BUG_ON (nr_copy_pages_check != nr_copy_pages); +Restore_highmem: restore_highmem(); device_power_up(); +Enable_irqs: local_irq_enable(); return error; } @@ -922,512 +259,15 @@ int swsusp_resume(void) * execution continues at place where swsusp_arch_suspend was called */ BUG_ON(!error); + /* The only reason why swsusp_arch_resume() can fail is memory being + * very tight, so we have to free it as soon as we can to avoid + * subsequent failures + */ + swsusp_free(); restore_processor_state(); restore_highmem(); + touch_softlockup_watchdog(); device_power_up(); local_irq_enable(); return error; } - -/* More restore stuff */ - -/* - * Returns true if given address/order collides with any orig_address - */ -static int does_collide_order(unsigned long addr, int order) -{ - int i; - - for (i=0; i < (1<address = 0UL; - - for_each_pbe (p, pblist) { - p->address = get_usable_page(GFP_ATOMIC); - if (!p->address) - return -ENOMEM; - } - return 0; -} - -/** - * swsusp_pagedir_relocate - It is possible, that some memory pages - * occupied by the list of PBEs collide with pages where we're going to - * restore from the loaded pages later. We relocate them here. - */ - -static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) -{ - struct zone *zone; - unsigned long zone_pfn; - struct pbe *pbpage, *tail, *p; - void *m; - int rel = 0, error = 0; - - if (!pblist) /* a sanity check */ - return NULL; - - pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n", - swsusp_info.pagedir_pages); - - /* Set page flags */ - - for_each_zone(zone) { - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) - SetPageNosaveFree(pfn_to_page(zone_pfn + - zone->zone_start_pfn)); - } - - /* Clear orig addresses */ - - for_each_pbe (p, pblist) - ClearPageNosaveFree(virt_to_page(p->orig_address)); - - tail = pblist + PB_PAGE_SKIP; - - /* Relocate colliding pages */ - - for_each_pb_page (pbpage, pblist) { - if (does_collide_order((unsigned long)pbpage, 0)) { - m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD); - if (!m) { - error = -ENOMEM; - break; - } - memcpy(m, (void *)pbpage, PAGE_SIZE); - if (pbpage == pblist) - pblist = (struct pbe *)m; - else - tail->next = (struct pbe *)m; - - eat_page((void *)pbpage); - pbpage = (struct pbe *)m; - - /* We have to link the PBEs again */ - - for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++) - if (p->next) /* needed to save the end */ - p->next = p + 1; - - rel++; - } - tail = pbpage + PB_PAGE_SKIP; - } - - if (error) { - printk("\nswsusp: Out of memory\n\n"); - free_pagedir(pblist); - free_eaten_memory(); - pblist = NULL; - } - else - printk("swsusp: Relocated %d pages\n", rel); - - return pblist; -} - -/* - * Using bio to read from swap. - * This code requires a bit more work than just using buffer heads - * but, it is the recommended way for 2.5/2.6. - * The following are to signal the beginning and end of I/O. Bios - * finish asynchronously, while we want them to happen synchronously. - * A simple atomic_t, and a wait loop take care of this problem. - */ - -static atomic_t io_done = ATOMIC_INIT(0); - -static int end_io(struct bio * bio, unsigned int num, int err) -{ - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - panic("I/O error reading memory image"); - atomic_set(&io_done, 0); - return 0; -} - -static struct block_device * resume_bdev; - -/** - * submit - submit BIO request. - * @rw: READ or WRITE. - * @off physical offset of page. - * @page: page we're reading or writing. - * - * Straight from the textbook - allocate and initialize the bio. - * If we're writing, make sure the page is marked as dirty. - * Then submit it and wait. - */ - -static int submit(int rw, pgoff_t page_off, void * page) -{ - int error = 0; - struct bio * bio; - - bio = bio_alloc(GFP_ATOMIC, 1); - if (!bio) - return -ENOMEM; - bio->bi_sector = page_off * (PAGE_SIZE >> 9); - bio_get(bio); - bio->bi_bdev = resume_bdev; - bio->bi_end_io = end_io; - - if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) { - printk("swsusp: ERROR: adding page to bio at %ld\n",page_off); - error = -EFAULT; - goto Done; - } - - if (rw == WRITE) - bio_set_pages_dirty(bio); - - atomic_set(&io_done, 1); - submit_bio(rw | (1 << BIO_RW_SYNC), bio); - while (atomic_read(&io_done)) - yield(); - - Done: - bio_put(bio); - return error; -} - -static int bio_read_page(pgoff_t page_off, void * page) -{ - return submit(READ, page_off, page); -} - -static int bio_write_page(pgoff_t page_off, void * page) -{ - return submit(WRITE, page_off, page); -} - -/* - * Sanity check if this image makes sense with this kernel/swap context - * I really don't think that it's foolproof but more than nothing.. - */ - -static const char * sanity_check(void) -{ - dump_info(); - if(swsusp_info.version_code != LINUX_VERSION_CODE) - return "kernel version"; - if(swsusp_info.num_physpages != num_physpages) - return "memory size"; - if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname)) - return "system type"; - if (strcmp(swsusp_info.uts.release,system_utsname.release)) - return "kernel release"; - if (strcmp(swsusp_info.uts.version,system_utsname.version)) - return "version"; - if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) - return "machine"; - if(swsusp_info.cpus != num_online_cpus()) - return "number of cpus"; - return NULL; -} - - -static int check_header(void) -{ - const char * reason = NULL; - int error; - - if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) - return error; - - /* Is this same machine? */ - if ((reason = sanity_check())) { - printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); - return -EPERM; - } - nr_copy_pages = swsusp_info.image_pages; - return error; -} - -static int check_sig(void) -{ - int error; - - memset(&swsusp_header, 0, sizeof(swsusp_header)); - if ((error = bio_read_page(0, &swsusp_header))) - return error; - if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { - memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); - - /* - * Reset swap signature now. - */ - error = bio_write_page(0, &swsusp_header); - } else { - printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n"); - return -EINVAL; - } - if (!error) - pr_debug("swsusp: Signature found, resuming\n"); - return error; -} - -/** - * data_read - Read image pages from swap. - * - * You do not need to check for overlaps, check_pagedir() - * already did that. - */ - -static int data_read(struct pbe *pblist) -{ - struct pbe * p; - int error = 0; - int i = 0; - int mod = swsusp_info.image_pages / 100; - - if (!mod) - mod = 1; - - printk("swsusp: Reading image data (%lu pages): ", - swsusp_info.image_pages); - - for_each_pbe (p, pblist) { - if (!(i % mod)) - printk("\b\b\b\b%3d%%", i / mod); - - error = bio_read_page(swp_offset(p->swap_address), - (void *)p->address); - if (error) - return error; - - i++; - } - printk("\b\b\b\bdone\n"); - return error; -} - -extern dev_t name_to_dev_t(const char *line); - -/** - * read_pagedir - Read page backup list pages from swap - */ - -static int read_pagedir(struct pbe *pblist) -{ - struct pbe *pbpage, *p; - unsigned i = 0; - int error; - - if (!pblist) - return -EFAULT; - - printk("swsusp: Reading pagedir (%lu pages)\n", - swsusp_info.pagedir_pages); - - for_each_pb_page (pbpage, pblist) { - unsigned long offset = swp_offset(swsusp_info.pagedir[i++]); - - error = -EFAULT; - if (offset) { - p = (pbpage + PB_PAGE_SKIP)->next; - error = bio_read_page(offset, (void *)pbpage); - (pbpage + PB_PAGE_SKIP)->next = p; - } - if (error) - break; - } - - if (error) - free_page((unsigned long)pblist); - - BUG_ON(i != swsusp_info.pagedir_pages); - - return error; -} - - -static int check_suspend_image(void) -{ - int error = 0; - - if ((error = check_sig())) - return error; - - if ((error = check_header())) - return error; - - return 0; -} - -static int read_suspend_image(void) -{ - int error = 0; - struct pbe *p; - - if (!(p = alloc_pagedir(nr_copy_pages))) - return -ENOMEM; - - if ((error = read_pagedir(p))) - return error; - - create_pbe_list(p, nr_copy_pages); - - if (!(pagedir_nosave = swsusp_pagedir_relocate(p))) - return -ENOMEM; - - /* Allocate memory for the image and read the data from swap */ - - error = check_pagedir(pagedir_nosave); - free_eaten_memory(); - if (!error) - error = data_read(pagedir_nosave); - - if (error) { /* We fail cleanly */ - for_each_pbe (p, pagedir_nosave) - if (p->address) { - free_page(p->address); - p->address = 0UL; - } - free_pagedir(pagedir_nosave); - } - return error; -} - -/** - * swsusp_check - Check for saved image in swap - */ - -int swsusp_check(void) -{ - int error; - - if (!swsusp_resume_device) { - if (!strlen(resume_file)) - return -ENOENT; - swsusp_resume_device = name_to_dev_t(resume_file); - pr_debug("swsusp: Resume From Partition %s\n", resume_file); - } else { - pr_debug("swsusp: Resume From Partition %d:%d\n", - MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); - } - - resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); - if (!IS_ERR(resume_bdev)) { - set_blocksize(resume_bdev, PAGE_SIZE); - error = check_suspend_image(); - if (error) - blkdev_put(resume_bdev); - } else - error = PTR_ERR(resume_bdev); - - if (!error) - pr_debug("swsusp: resume file found\n"); - else - pr_debug("swsusp: Error %d check for resume file\n", error); - return error; -} - -/** - * swsusp_read - Read saved image from swap. - */ - -int swsusp_read(void) -{ - int error; - - if (IS_ERR(resume_bdev)) { - pr_debug("swsusp: block device not initialised\n"); - return PTR_ERR(resume_bdev); - } - - error = read_suspend_image(); - blkdev_put(resume_bdev); - - if (!error) - pr_debug("swsusp: Reading resume file was successful\n"); - else - pr_debug("swsusp: Error %d resuming\n", error); - return error; -} - -/** - * swsusp_close - close swap device. - */ - -void swsusp_close(void) -{ - if (IS_ERR(resume_bdev)) { - pr_debug("swsusp: block device not initialised\n"); - return; - } - - blkdev_put(resume_bdev); -}