linux 2.6.16.38 w/ vs2.0.3-rc1
[linux-2.6.git] / kernel / power / swsusp.c
1 /*
2  * linux/kernel/power/swsusp.c
3  *
4  * This file provides code to write suspend image to swap and read it back.
5  *
6  * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
7  * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
8  *
9  * This file is released under the GPLv2.
10  *
11  * I'd like to thank the following people for their work:
12  *
13  * Pavel Machek <pavel@ucw.cz>:
14  * Modifications, defectiveness pointing, being with me at the very beginning,
15  * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
16  *
17  * Steve Doddi <dirk@loth.demon.co.uk>:
18  * Support the possibility of hardware state restoring.
19  *
20  * Raph <grey.havens@earthling.net>:
21  * Support for preserving states of network devices and virtual console
22  * (including X and svgatextmode)
23  *
24  * Kurt Garloff <garloff@suse.de>:
25  * Straightened the critical function in order to prevent compilers from
26  * playing tricks with local variables.
27  *
28  * Andreas Mohr <a.mohr@mailto.de>
29  *
30  * Alex Badea <vampire@go.ro>:
31  * Fixed runaway init
32  *
33  * Rafael J. Wysocki <rjw@sisk.pl>
34  * Added the swap map data structure and reworked the handling of swap
35  *
36  * More state savers are welcome. Especially for the scsi layer...
37  *
38  * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
39  */
40
41 #include <linux/module.h>
42 #include <linux/mm.h>
43 #include <linux/suspend.h>
44 #include <linux/smp_lock.h>
45 #include <linux/file.h>
46 #include <linux/utsname.h>
47 #include <linux/version.h>
48 #include <linux/delay.h>
49 #include <linux/bitops.h>
50 #include <linux/spinlock.h>
51 #include <linux/genhd.h>
52 #include <linux/kernel.h>
53 #include <linux/major.h>
54 #include <linux/swap.h>
55 #include <linux/pm.h>
56 #include <linux/device.h>
57 #include <linux/buffer_head.h>
58 #include <linux/swapops.h>
59 #include <linux/bootmem.h>
60 #include <linux/syscalls.h>
61 #include <linux/highmem.h>
62 #include <linux/bio.h>
63
64 #include <asm/uaccess.h>
65 #include <asm/mmu_context.h>
66 #include <asm/pgtable.h>
67 #include <asm/tlbflush.h>
68 #include <asm/io.h>
69
70 #include "power.h"
71
72 /*
73  * Preferred image size in bytes (tunable via /sys/power/image_size).
74  * When it is set to N, swsusp will do its best to ensure the image
75  * size will not exceed N bytes, but if that is impossible, it will
76  * try to create the smallest image possible.
77  */
78 unsigned long image_size = 500 * 1024 * 1024;
79
80 #ifdef CONFIG_HIGHMEM
81 unsigned int count_highmem_pages(void);
82 int save_highmem(void);
83 int restore_highmem(void);
84 #else
85 static int save_highmem(void) { return 0; }
86 static int restore_highmem(void) { return 0; }
87 static unsigned int count_highmem_pages(void) { return 0; }
88 #endif
89
90 extern char resume_file[];
91
92 #define SWSUSP_SIG      "S1SUSPEND"
93
94 static struct swsusp_header {
95         char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
96         swp_entry_t image;
97         char    orig_sig[10];
98         char    sig[10];
99 } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
100
101 static struct swsusp_info swsusp_info;
102
103 /*
104  * Saving part...
105  */
106
107 static unsigned short root_swap = 0xffff;
108
109 static int mark_swapfiles(swp_entry_t start)
110 {
111         int error;
112
113         rw_swap_page_sync(READ,
114                           swp_entry(root_swap, 0),
115                           virt_to_page((unsigned long)&swsusp_header));
116         if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
117             !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
118                 memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
119                 memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
120                 swsusp_header.image = start;
121                 error = rw_swap_page_sync(WRITE,
122                                           swp_entry(root_swap, 0),
123                                           virt_to_page((unsigned long)
124                                                        &swsusp_header));
125         } else {
126                 pr_debug("swsusp: Partition is not swap space.\n");
127                 error = -ENODEV;
128         }
129         return error;
130 }
131
132 /*
133  * Check whether the swap device is the specified resume
134  * device, irrespective of whether they are specified by
135  * identical names.
136  *
137  * (Thus, device inode aliasing is allowed.  You can say /dev/hda4
138  * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
139  * and they'll be considered the same device.  This is *necessary* for
140  * devfs, since the resume code can only recognize the form /dev/hda4,
141  * but the suspend code would see the long name.)
142  */
143 static inline int is_resume_device(const struct swap_info_struct *swap_info)
144 {
145         struct file *file = swap_info->swap_file;
146         struct inode *inode = file->f_dentry->d_inode;
147
148         return S_ISBLK(inode->i_mode) &&
149                 swsusp_resume_device == MKDEV(imajor(inode), iminor(inode));
150 }
151
152 static int swsusp_swap_check(void) /* This is called before saving image */
153 {
154         int i;
155
156         spin_lock(&swap_lock);
157         for (i = 0; i < MAX_SWAPFILES; i++) {
158                 if (!(swap_info[i].flags & SWP_WRITEOK))
159                         continue;
160                 if (!swsusp_resume_device || is_resume_device(swap_info + i)) {
161                         spin_unlock(&swap_lock);
162                         root_swap = i;
163                         return 0;
164                 }
165         }
166         spin_unlock(&swap_lock);
167         return -ENODEV;
168 }
169
170 /**
171  *      write_page - Write one page to a fresh swap location.
172  *      @addr:  Address we're writing.
173  *      @loc:   Place to store the entry we used.
174  *
175  *      Allocate a new swap entry and 'sync' it. Note we discard -EIO
176  *      errors. That is an artifact left over from swsusp. It did not
177  *      check the return of rw_swap_page_sync() at all, since most pages
178  *      written back to swap would return -EIO.
179  *      This is a partial improvement, since we will at least return other
180  *      errors, though we need to eventually fix the damn code.
181  */
182 static int write_page(unsigned long addr, swp_entry_t *loc)
183 {
184         swp_entry_t entry;
185         int error = -ENOSPC;
186
187         entry = get_swap_page_of_type(root_swap);
188         if (swp_offset(entry)) {
189                 error = rw_swap_page_sync(WRITE, entry, virt_to_page(addr));
190                 if (!error || error == -EIO)
191                         *loc = entry;
192         }
193         return error;
194 }
195
196 /**
197  *      Swap map-handling functions
198  *
199  *      The swap map is a data structure used for keeping track of each page
200  *      written to the swap.  It consists of many swap_map_page structures
201  *      that contain each an array of MAP_PAGE_SIZE swap entries.
202  *      These structures are linked together with the help of either the
203  *      .next (in memory) or the .next_swap (in swap) member.
204  *
205  *      The swap map is created during suspend.  At that time we need to keep
206  *      it in memory, because we have to free all of the allocated swap
207  *      entries if an error occurs.  The memory needed is preallocated
208  *      so that we know in advance if there's enough of it.
209  *
210  *      The first swap_map_page structure is filled with the swap entries that
211  *      correspond to the first MAP_PAGE_SIZE data pages written to swap and
212  *      so on.  After the all of the data pages have been written, the order
213  *      of the swap_map_page structures in the map is reversed so that they
214  *      can be read from swap in the original order.  This causes the data
215  *      pages to be loaded in exactly the same order in which they have been
216  *      saved.
217  *
218  *      During resume we only need to use one swap_map_page structure
219  *      at a time, which means that we only need to use two memory pages for
220  *      reading the image - one for reading the swap_map_page structures
221  *      and the second for reading the data pages from swap.
222  */
223
224 #define MAP_PAGE_SIZE   ((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \
225                         / sizeof(swp_entry_t))
226
227 struct swap_map_page {
228         swp_entry_t             entries[MAP_PAGE_SIZE];
229         swp_entry_t             next_swap;
230         struct swap_map_page    *next;
231 };
232
233 static inline void free_swap_map(struct swap_map_page *swap_map)
234 {
235         struct swap_map_page *swp;
236
237         while (swap_map) {
238                 swp = swap_map->next;
239                 free_page((unsigned long)swap_map);
240                 swap_map = swp;
241         }
242 }
243
244 static struct swap_map_page *alloc_swap_map(unsigned int nr_pages)
245 {
246         struct swap_map_page *swap_map, *swp;
247         unsigned n = 0;
248
249         if (!nr_pages)
250                 return NULL;
251
252         pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages);
253         swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
254         swp = swap_map;
255         for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) {
256                 swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
257                 swp = swp->next;
258                 if (!swp) {
259                         free_swap_map(swap_map);
260                         return NULL;
261                 }
262         }
263         return swap_map;
264 }
265
266 /**
267  *      reverse_swap_map - reverse the order of pages in the swap map
268  *      @swap_map
269  */
270
271 static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map)
272 {
273         struct swap_map_page *prev, *next;
274
275         prev = NULL;
276         while (swap_map) {
277                 next = swap_map->next;
278                 swap_map->next = prev;
279                 prev = swap_map;
280                 swap_map = next;
281         }
282         return prev;
283 }
284
285 /**
286  *      free_swap_map_entries - free the swap entries allocated to store
287  *      the swap map @swap_map (this is only called in case of an error)
288  */
289 static inline void free_swap_map_entries(struct swap_map_page *swap_map)
290 {
291         while (swap_map) {
292                 if (swap_map->next_swap.val)
293                         swap_free(swap_map->next_swap);
294                 swap_map = swap_map->next;
295         }
296 }
297
298 /**
299  *      save_swap_map - save the swap map used for tracing the data pages
300  *      stored in the swap
301  */
302
303 static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start)
304 {
305         swp_entry_t entry = (swp_entry_t){0};
306         int error;
307
308         while (swap_map) {
309                 swap_map->next_swap = entry;
310                 if ((error = write_page((unsigned long)swap_map, &entry)))
311                         return error;
312                 swap_map = swap_map->next;
313         }
314         *start = entry;
315         return 0;
316 }
317
318 /**
319  *      free_image_entries - free the swap entries allocated to store
320  *      the image data pages (this is only called in case of an error)
321  */
322
323 static inline void free_image_entries(struct swap_map_page *swp)
324 {
325         unsigned k;
326
327         while (swp) {
328                 for (k = 0; k < MAP_PAGE_SIZE; k++)
329                         if (swp->entries[k].val)
330                                 swap_free(swp->entries[k]);
331                 swp = swp->next;
332         }
333 }
334
335 /**
336  *      The swap_map_handle structure is used for handling the swap map in
337  *      a file-alike way
338  */
339
340 struct swap_map_handle {
341         struct swap_map_page *cur;
342         unsigned int k;
343 };
344
345 static inline void init_swap_map_handle(struct swap_map_handle *handle,
346                                         struct swap_map_page *map)
347 {
348         handle->cur = map;
349         handle->k = 0;
350 }
351
352 static inline int swap_map_write_page(struct swap_map_handle *handle,
353                                       unsigned long addr)
354 {
355         int error;
356
357         error = write_page(addr, handle->cur->entries + handle->k);
358         if (error)
359                 return error;
360         if (++handle->k >= MAP_PAGE_SIZE) {
361                 handle->cur = handle->cur->next;
362                 handle->k = 0;
363         }
364         return 0;
365 }
366
367 /**
368  *      save_image_data - save the data pages pointed to by the PBEs
369  *      from the list @pblist using the swap map handle @handle
370  *      (assume there are @nr_pages data pages to save)
371  */
372
373 static int save_image_data(struct pbe *pblist,
374                            struct swap_map_handle *handle,
375                            unsigned int nr_pages)
376 {
377         unsigned int m;
378         struct pbe *p;
379         int error = 0;
380
381         printk("Saving image data pages (%u pages) ...     ", nr_pages);
382         m = nr_pages / 100;
383         if (!m)
384                 m = 1;
385         nr_pages = 0;
386         for_each_pbe (p, pblist) {
387                 error = swap_map_write_page(handle, p->address);
388                 if (error)
389                         break;
390                 if (!(nr_pages % m))
391                         printk("\b\b\b\b%3d%%", nr_pages / m);
392                 nr_pages++;
393         }
394         if (!error)
395                 printk("\b\b\b\bdone\n");
396         return error;
397 }
398
399 static void dump_info(void)
400 {
401         pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
402         pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
403         pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
404         pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
405         pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
406         pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
407         pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
408         pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
409         pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
410         pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
411         pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages);
412 }
413
414 static void init_header(unsigned int nr_pages)
415 {
416         memset(&swsusp_info, 0, sizeof(swsusp_info));
417         swsusp_info.version_code = LINUX_VERSION_CODE;
418         swsusp_info.num_physpages = num_physpages;
419         memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
420
421         swsusp_info.cpus = num_online_cpus();
422         swsusp_info.image_pages = nr_pages;
423         swsusp_info.pages = nr_pages +
424                 ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
425 }
426
427 /**
428  *      pack_orig_addresses - the .orig_address fields of the PBEs from the
429  *      list starting at @pbe are stored in the array @buf[] (1 page)
430  */
431
432 static inline struct pbe *pack_orig_addresses(unsigned long *buf,
433                                               struct pbe *pbe)
434 {
435         int j;
436
437         for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
438                 buf[j] = pbe->orig_address;
439                 pbe = pbe->next;
440         }
441         if (!pbe)
442                 for (; j < PAGE_SIZE / sizeof(long); j++)
443                         buf[j] = 0;
444         return pbe;
445 }
446
447 /**
448  *      save_image_metadata - save the .orig_address fields of the PBEs
449  *      from the list @pblist using the swap map handle @handle
450  */
451
452 static int save_image_metadata(struct pbe *pblist,
453                                struct swap_map_handle *handle)
454 {
455         unsigned long *buf;
456         unsigned int n = 0;
457         struct pbe *p;
458         int error = 0;
459
460         printk("Saving image metadata ... ");
461         buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
462         if (!buf)
463                 return -ENOMEM;
464         p = pblist;
465         while (p) {
466                 p = pack_orig_addresses(buf, p);
467                 error = swap_map_write_page(handle, (unsigned long)buf);
468                 if (error)
469                         break;
470                 n++;
471         }
472         free_page((unsigned long)buf);
473         if (!error)
474                 printk("done (%u pages saved)\n", n);
475         return error;
476 }
477
478 /**
479  *      enough_swap - Make sure we have enough swap to save the image.
480  *
481  *      Returns TRUE or FALSE after checking the total amount of swap
482  *      space avaiable from the resume partition.
483  */
484
485 static int enough_swap(unsigned int nr_pages)
486 {
487         unsigned int free_swap = swap_info[root_swap].pages -
488                 swap_info[root_swap].inuse_pages;
489
490         pr_debug("swsusp: free swap pages: %u\n", free_swap);
491         return free_swap > (nr_pages + PAGES_FOR_IO +
492                 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
493 }
494
495 /**
496  *      swsusp_write - Write entire image and metadata.
497  *
498  *      It is important _NOT_ to umount filesystems at this point. We want
499  *      them synced (in case something goes wrong) but we DO not want to mark
500  *      filesystem clean: it is not. (And it does not matter, if we resume
501  *      correctly, we'll mark system clean, anyway.)
502  */
503
504 int swsusp_write(struct pbe *pblist, unsigned int nr_pages)
505 {
506         struct swap_map_page *swap_map;
507         struct swap_map_handle handle;
508         swp_entry_t start;
509         int error;
510
511         if ((error = swsusp_swap_check())) {
512                 printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
513                 return error;
514         }
515         if (!enough_swap(nr_pages)) {
516                 printk(KERN_ERR "swsusp: Not enough free swap\n");
517                 return -ENOSPC;
518         }
519
520         init_header(nr_pages);
521         swap_map = alloc_swap_map(swsusp_info.pages);
522         if (!swap_map)
523                 return -ENOMEM;
524         init_swap_map_handle(&handle, swap_map);
525
526         error = swap_map_write_page(&handle, (unsigned long)&swsusp_info);
527         if (!error)
528                 error = save_image_metadata(pblist, &handle);
529         if (!error)
530                 error = save_image_data(pblist, &handle, nr_pages);
531         if (error)
532                 goto Free_image_entries;
533
534         swap_map = reverse_swap_map(swap_map);
535         error = save_swap_map(swap_map, &start);
536         if (error)
537                 goto Free_map_entries;
538
539         dump_info();
540         printk( "S" );
541         error = mark_swapfiles(start);
542         printk( "|\n" );
543         if (error)
544                 goto Free_map_entries;
545
546 Free_swap_map:
547         free_swap_map(swap_map);
548         return error;
549
550 Free_map_entries:
551         free_swap_map_entries(swap_map);
552 Free_image_entries:
553         free_image_entries(swap_map);
554         goto Free_swap_map;
555 }
556
557 /**
558  *      swsusp_shrink_memory -  Try to free as much memory as needed
559  *
560  *      ... but do not OOM-kill anyone
561  *
562  *      Notice: all userland should be stopped before it is called, or
563  *      livelock is possible.
564  */
565
566 #define SHRINK_BITE     10000
567
568 int swsusp_shrink_memory(void)
569 {
570         long size, tmp;
571         struct zone *zone;
572         unsigned long pages = 0;
573         unsigned int i = 0;
574         char *p = "-\\|/";
575
576         printk("Shrinking memory...  ");
577         do {
578                 size = 2 * count_highmem_pages();
579                 size += size / 50 + count_data_pages();
580                 size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
581                         PAGES_FOR_IO;
582                 tmp = size;
583                 for_each_zone (zone)
584                         if (!is_highmem(zone))
585                                 tmp -= zone->free_pages;
586                 if (tmp > 0) {
587                         tmp = shrink_all_memory(SHRINK_BITE);
588                         if (!tmp)
589                                 return -ENOMEM;
590                         pages += tmp;
591                 } else if (size > image_size / PAGE_SIZE) {
592                         tmp = shrink_all_memory(SHRINK_BITE);
593                         pages += tmp;
594                 }
595                 printk("\b%c", p[i++%4]);
596         } while (tmp > 0);
597         printk("\bdone (%lu pages freed)\n", pages);
598
599         return 0;
600 }
601
602 int swsusp_suspend(void)
603 {
604         int error;
605
606         if ((error = arch_prepare_suspend()))
607                 return error;
608         local_irq_disable();
609         /* At this point, device_suspend() has been called, but *not*
610          * device_power_down(). We *must* device_power_down() now.
611          * Otherwise, drivers for some devices (e.g. interrupt controllers)
612          * become desynchronized with the actual state of the hardware
613          * at resume time, and evil weirdness ensues.
614          */
615         if ((error = device_power_down(PMSG_FREEZE))) {
616                 printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
617                 goto Enable_irqs;
618         }
619
620         if ((error = save_highmem())) {
621                 printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
622                 goto Restore_highmem;
623         }
624
625         save_processor_state();
626         if ((error = swsusp_arch_suspend()))
627                 printk(KERN_ERR "Error %d suspending\n", error);
628         /* Restore control flow magically appears here */
629         restore_processor_state();
630 Restore_highmem:
631         restore_highmem();
632         device_power_up();
633 Enable_irqs:
634         local_irq_enable();
635         return error;
636 }
637
638 int swsusp_resume(void)
639 {
640         int error;
641         local_irq_disable();
642         if (device_power_down(PMSG_FREEZE))
643                 printk(KERN_ERR "Some devices failed to power down, very bad\n");
644         /* We'll ignore saved state, but this gets preempt count (etc) right */
645         save_processor_state();
646         error = swsusp_arch_resume();
647         /* Code below is only ever reached in case of failure. Otherwise
648          * execution continues at place where swsusp_arch_suspend was called
649          */
650         BUG_ON(!error);
651         /* The only reason why swsusp_arch_resume() can fail is memory being
652          * very tight, so we have to free it as soon as we can to avoid
653          * subsequent failures
654          */
655         swsusp_free();
656         restore_processor_state();
657         restore_highmem();
658         touch_softlockup_watchdog();
659         device_power_up();
660         local_irq_enable();
661         return error;
662 }
663
664 /**
665  *      mark_unsafe_pages - mark the pages that cannot be used for storing
666  *      the image during resume, because they conflict with the pages that
667  *      had been used before suspend
668  */
669
670 static void mark_unsafe_pages(struct pbe *pblist)
671 {
672         struct zone *zone;
673         unsigned long zone_pfn;
674         struct pbe *p;
675
676         if (!pblist) /* a sanity check */
677                 return;
678
679         /* Clear page flags */
680         for_each_zone (zone) {
681                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
682                         if (pfn_valid(zone_pfn + zone->zone_start_pfn))
683                                 ClearPageNosaveFree(pfn_to_page(zone_pfn +
684                                         zone->zone_start_pfn));
685         }
686
687         /* Mark orig addresses */
688         for_each_pbe (p, pblist)
689                 SetPageNosaveFree(virt_to_page(p->orig_address));
690
691 }
692
693 static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
694 {
695         /* We assume both lists contain the same number of elements */
696         while (src) {
697                 dst->orig_address = src->orig_address;
698                 dst = dst->next;
699                 src = src->next;
700         }
701 }
702
703 /*
704  *      Using bio to read from swap.
705  *      This code requires a bit more work than just using buffer heads
706  *      but, it is the recommended way for 2.5/2.6.
707  *      The following are to signal the beginning and end of I/O. Bios
708  *      finish asynchronously, while we want them to happen synchronously.
709  *      A simple atomic_t, and a wait loop take care of this problem.
710  */
711
712 static atomic_t io_done = ATOMIC_INIT(0);
713
714 static int end_io(struct bio *bio, unsigned int num, int err)
715 {
716         if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
717                 panic("I/O error reading memory image");
718         atomic_set(&io_done, 0);
719         return 0;
720 }
721
722 static struct block_device *resume_bdev;
723
724 /**
725  *      submit - submit BIO request.
726  *      @rw:    READ or WRITE.
727  *      @off    physical offset of page.
728  *      @page:  page we're reading or writing.
729  *
730  *      Straight from the textbook - allocate and initialize the bio.
731  *      If we're writing, make sure the page is marked as dirty.
732  *      Then submit it and wait.
733  */
734
735 static int submit(int rw, pgoff_t page_off, void *page)
736 {
737         int error = 0;
738         struct bio *bio;
739
740         bio = bio_alloc(GFP_ATOMIC, 1);
741         if (!bio)
742                 return -ENOMEM;
743         bio->bi_sector = page_off * (PAGE_SIZE >> 9);
744         bio->bi_bdev = resume_bdev;
745         bio->bi_end_io = end_io;
746
747         if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
748                 printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
749                 error = -EFAULT;
750                 goto Done;
751         }
752
753
754         atomic_set(&io_done, 1);
755         submit_bio(rw | (1 << BIO_RW_SYNC), bio);
756         while (atomic_read(&io_done))
757                 yield();
758         if (rw == READ)
759                 bio_set_pages_dirty(bio);
760  Done:
761         bio_put(bio);
762         return error;
763 }
764
765 static int bio_read_page(pgoff_t page_off, void *page)
766 {
767         return submit(READ, page_off, page);
768 }
769
770 static int bio_write_page(pgoff_t page_off, void *page)
771 {
772         return submit(WRITE, page_off, page);
773 }
774
775 /**
776  *      The following functions allow us to read data using a swap map
777  *      in a file-alike way
778  */
779
780 static inline void release_swap_map_reader(struct swap_map_handle *handle)
781 {
782         if (handle->cur)
783                 free_page((unsigned long)handle->cur);
784         handle->cur = NULL;
785 }
786
787 static inline int get_swap_map_reader(struct swap_map_handle *handle,
788                                       swp_entry_t start)
789 {
790         int error;
791
792         if (!swp_offset(start))
793                 return -EINVAL;
794         handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
795         if (!handle->cur)
796                 return -ENOMEM;
797         error = bio_read_page(swp_offset(start), handle->cur);
798         if (error) {
799                 release_swap_map_reader(handle);
800                 return error;
801         }
802         handle->k = 0;
803         return 0;
804 }
805
806 static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf)
807 {
808         unsigned long offset;
809         int error;
810
811         if (!handle->cur)
812                 return -EINVAL;
813         offset = swp_offset(handle->cur->entries[handle->k]);
814         if (!offset)
815                 return -EINVAL;
816         error = bio_read_page(offset, buf);
817         if (error)
818                 return error;
819         if (++handle->k >= MAP_PAGE_SIZE) {
820                 handle->k = 0;
821                 offset = swp_offset(handle->cur->next_swap);
822                 if (!offset)
823                         release_swap_map_reader(handle);
824                 else
825                         error = bio_read_page(offset, handle->cur);
826         }
827         return error;
828 }
829
830 static int check_header(void)
831 {
832         char *reason = NULL;
833
834         dump_info();
835         if (swsusp_info.version_code != LINUX_VERSION_CODE)
836                 reason = "kernel version";
837         if (swsusp_info.num_physpages != num_physpages)
838                 reason = "memory size";
839         if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
840                 reason = "system type";
841         if (strcmp(swsusp_info.uts.release,system_utsname.release))
842                 reason = "kernel release";
843         if (strcmp(swsusp_info.uts.version,system_utsname.version))
844                 reason = "version";
845         if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
846                 reason = "machine";
847         if (reason) {
848                 printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
849                 return -EPERM;
850         }
851         return 0;
852 }
853
854 /**
855  *      load_image_data - load the image data using the swap map handle
856  *      @handle and store them using the page backup list @pblist
857  *      (assume there are @nr_pages pages to load)
858  */
859
860 static int load_image_data(struct pbe *pblist,
861                            struct swap_map_handle *handle,
862                            unsigned int nr_pages)
863 {
864         int error;
865         unsigned int m;
866         struct pbe *p;
867
868         if (!pblist)
869                 return -EINVAL;
870         printk("Loading image data pages (%u pages) ...     ", nr_pages);
871         m = nr_pages / 100;
872         if (!m)
873                 m = 1;
874         nr_pages = 0;
875         p = pblist;
876         while (p) {
877                 error = swap_map_read_page(handle, (void *)p->address);
878                 if (error)
879                         break;
880                 p = p->next;
881                 if (!(nr_pages % m))
882                         printk("\b\b\b\b%3d%%", nr_pages / m);
883                 nr_pages++;
884         }
885         if (!error)
886                 printk("\b\b\b\bdone\n");
887         return error;
888 }
889
890 /**
891  *      unpack_orig_addresses - copy the elements of @buf[] (1 page) to
892  *      the PBEs in the list starting at @pbe
893  */
894
895 static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
896                                                 struct pbe *pbe)
897 {
898         int j;
899
900         for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
901                 pbe->orig_address = buf[j];
902                 pbe = pbe->next;
903         }
904         return pbe;
905 }
906
907 /**
908  *      load_image_metadata - load the image metadata using the swap map
909  *      handle @handle and put them into the PBEs in the list @pblist
910  */
911
912 static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle)
913 {
914         struct pbe *p;
915         unsigned long *buf;
916         unsigned int n = 0;
917         int error = 0;
918
919         printk("Loading image metadata ... ");
920         buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
921         if (!buf)
922                 return -ENOMEM;
923         p = pblist;
924         while (p) {
925                 error = swap_map_read_page(handle, buf);
926                 if (error)
927                         break;
928                 p = unpack_orig_addresses(buf, p);
929                 n++;
930         }
931         free_page((unsigned long)buf);
932         if (!error)
933                 printk("done (%u pages loaded)\n", n);
934         return error;
935 }
936
937 int swsusp_read(struct pbe **pblist_ptr)
938 {
939         int error;
940         struct pbe *p, *pblist;
941         struct swap_map_handle handle;
942         unsigned int nr_pages;
943
944         if (IS_ERR(resume_bdev)) {
945                 pr_debug("swsusp: block device not initialised\n");
946                 return PTR_ERR(resume_bdev);
947         }
948
949         error = get_swap_map_reader(&handle, swsusp_header.image);
950         if (!error)
951                 error = swap_map_read_page(&handle, &swsusp_info);
952         if (!error)
953                 error = check_header();
954         if (error)
955                 return error;
956         nr_pages = swsusp_info.image_pages;
957         p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0);
958         if (!p)
959                 return -ENOMEM;
960         error = load_image_metadata(p, &handle);
961         if (!error) {
962                 mark_unsafe_pages(p);
963                 pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
964                 if (pblist)
965                         copy_page_backup_list(pblist, p);
966                 free_pagedir(p);
967                 if (!pblist)
968                         error = -ENOMEM;
969
970                 /* Allocate memory for the image and read the data from swap */
971                 if (!error)
972                         error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
973                 if (!error) {
974                         release_eaten_pages();
975                         error = load_image_data(pblist, &handle, nr_pages);
976                 }
977                 if (!error)
978                         *pblist_ptr = pblist;
979         }
980         release_swap_map_reader(&handle);
981
982         blkdev_put(resume_bdev);
983
984         if (!error)
985                 pr_debug("swsusp: Reading resume file was successful\n");
986         else
987                 pr_debug("swsusp: Error %d resuming\n", error);
988         return error;
989 }
990
991 /**
992  *      swsusp_check - Check for swsusp signature in the resume device
993  */
994
995 int swsusp_check(void)
996 {
997         int error;
998
999         resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
1000         if (!IS_ERR(resume_bdev)) {
1001                 set_blocksize(resume_bdev, PAGE_SIZE);
1002                 memset(&swsusp_header, 0, sizeof(swsusp_header));
1003                 if ((error = bio_read_page(0, &swsusp_header)))
1004                         return error;
1005                 if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
1006                         memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
1007                         /* Reset swap signature now */
1008                         error = bio_write_page(0, &swsusp_header);
1009                 } else {
1010                         return -EINVAL;
1011                 }
1012                 if (error)
1013                         blkdev_put(resume_bdev);
1014                 else
1015                         pr_debug("swsusp: Signature found, resuming\n");
1016         } else {
1017                 error = PTR_ERR(resume_bdev);
1018         }
1019
1020         if (error)
1021                 pr_debug("swsusp: Error %d check for resume file\n", error);
1022
1023         return error;
1024 }
1025
1026 /**
1027  *      swsusp_close - close swap device.
1028  */
1029
1030 void swsusp_close(void)
1031 {
1032         if (IS_ERR(resume_bdev)) {
1033                 pr_debug("swsusp: block device not initialised\n");
1034                 return;
1035         }
1036
1037         blkdev_put(resume_bdev);
1038 }