linux 2.6.16.38 w/ vs2.0.3-rc1
[linux-2.6.git] / kernel / power / snapshot.c
1 /*
2  * linux/kernel/power/snapshot.c
3  *
4  * This file provide system snapshot/restore functionality.
5  *
6  * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
7  *
8  * This file is released under the GPLv2, and is based on swsusp.c.
9  *
10  */
11
12
13 #include <linux/module.h>
14 #include <linux/mm.h>
15 #include <linux/suspend.h>
16 #include <linux/smp_lock.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/spinlock.h>
20 #include <linux/kernel.h>
21 #include <linux/pm.h>
22 #include <linux/device.h>
23 #include <linux/bootmem.h>
24 #include <linux/syscalls.h>
25 #include <linux/console.h>
26 #include <linux/highmem.h>
27
28 #include <asm/uaccess.h>
29 #include <asm/mmu_context.h>
30 #include <asm/pgtable.h>
31 #include <asm/tlbflush.h>
32 #include <asm/io.h>
33
34 #include "power.h"
35
36 struct pbe *pagedir_nosave;
37 unsigned int nr_copy_pages;
38
39 #ifdef CONFIG_HIGHMEM
40 unsigned int count_highmem_pages(void)
41 {
42         struct zone *zone;
43         unsigned long zone_pfn;
44         unsigned int n = 0;
45
46         for_each_zone (zone)
47                 if (is_highmem(zone)) {
48                         mark_free_pages(zone);
49                         for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) {
50                                 struct page *page;
51                                 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
52                                 if (!pfn_valid(pfn))
53                                         continue;
54                                 page = pfn_to_page(pfn);
55                                 if (PageReserved(page))
56                                         continue;
57                                 if (PageNosaveFree(page))
58                                         continue;
59                                 n++;
60                         }
61                 }
62         return n;
63 }
64
65 struct highmem_page {
66         char *data;
67         struct page *page;
68         struct highmem_page *next;
69 };
70
71 static struct highmem_page *highmem_copy;
72
73 static int save_highmem_zone(struct zone *zone)
74 {
75         unsigned long zone_pfn;
76         mark_free_pages(zone);
77         for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
78                 struct page *page;
79                 struct highmem_page *save;
80                 void *kaddr;
81                 unsigned long pfn = zone_pfn + zone->zone_start_pfn;
82
83                 if (!(pfn%1000))
84                         printk(".");
85                 if (!pfn_valid(pfn))
86                         continue;
87                 page = pfn_to_page(pfn);
88                 /*
89                  * This condition results from rvmalloc() sans vmalloc_32()
90                  * and architectural memory reservations. This should be
91                  * corrected eventually when the cases giving rise to this
92                  * are better understood.
93                  */
94                 if (PageReserved(page))
95                         continue;
96                 BUG_ON(PageNosave(page));
97                 if (PageNosaveFree(page))
98                         continue;
99                 save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
100                 if (!save)
101                         return -ENOMEM;
102                 save->next = highmem_copy;
103                 save->page = page;
104                 save->data = (void *) get_zeroed_page(GFP_ATOMIC);
105                 if (!save->data) {
106                         kfree(save);
107                         return -ENOMEM;
108                 }
109                 kaddr = kmap_atomic(page, KM_USER0);
110                 memcpy(save->data, kaddr, PAGE_SIZE);
111                 kunmap_atomic(kaddr, KM_USER0);
112                 highmem_copy = save;
113         }
114         return 0;
115 }
116
117 int save_highmem(void)
118 {
119         struct zone *zone;
120         int res = 0;
121
122         pr_debug("swsusp: Saving Highmem\n");
123         for_each_zone (zone) {
124                 if (is_highmem(zone))
125                         res = save_highmem_zone(zone);
126                 if (res)
127                         return res;
128         }
129         return 0;
130 }
131
132 int restore_highmem(void)
133 {
134         printk("swsusp: Restoring Highmem\n");
135         while (highmem_copy) {
136                 struct highmem_page *save = highmem_copy;
137                 void *kaddr;
138                 highmem_copy = save->next;
139
140                 kaddr = kmap_atomic(save->page, KM_USER0);
141                 memcpy(kaddr, save->data, PAGE_SIZE);
142                 kunmap_atomic(kaddr, KM_USER0);
143                 free_page((long) save->data);
144                 kfree(save);
145         }
146         return 0;
147 }
148 #endif
149
150 static int pfn_is_nosave(unsigned long pfn)
151 {
152         unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
153         unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
154         return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
155 }
156
157 /**
158  *      saveable - Determine whether a page should be cloned or not.
159  *      @pfn:   The page
160  *
161  *      We save a page if it's Reserved, and not in the range of pages
162  *      statically defined as 'unsaveable', or if it isn't reserved, and
163  *      isn't part of a free chunk of pages.
164  */
165
166 static int saveable(struct zone *zone, unsigned long *zone_pfn)
167 {
168         unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
169         struct page *page;
170
171         if (!pfn_valid(pfn))
172                 return 0;
173
174         page = pfn_to_page(pfn);
175         BUG_ON(PageReserved(page) && PageNosave(page));
176         if (PageNosave(page))
177                 return 0;
178         if (PageReserved(page) && pfn_is_nosave(pfn))
179                 return 0;
180         if (PageNosaveFree(page))
181                 return 0;
182
183         return 1;
184 }
185
186 unsigned int count_data_pages(void)
187 {
188         struct zone *zone;
189         unsigned long zone_pfn;
190         unsigned int n = 0;
191
192         for_each_zone (zone) {
193                 if (is_highmem(zone))
194                         continue;
195                 mark_free_pages(zone);
196                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
197                         n += saveable(zone, &zone_pfn);
198         }
199         return n;
200 }
201
202 static void copy_data_pages(struct pbe *pblist)
203 {
204         struct zone *zone;
205         unsigned long zone_pfn;
206         struct pbe *pbe, *p;
207
208         pbe = pblist;
209         for_each_zone (zone) {
210                 if (is_highmem(zone))
211                         continue;
212                 mark_free_pages(zone);
213                 /* This is necessary for swsusp_free() */
214                 for_each_pb_page (p, pblist)
215                         SetPageNosaveFree(virt_to_page(p));
216                 for_each_pbe (p, pblist)
217                         SetPageNosaveFree(virt_to_page(p->address));
218                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
219                         if (saveable(zone, &zone_pfn)) {
220                                 struct page *page;
221                                 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
222                                 BUG_ON(!pbe);
223                                 pbe->orig_address = (unsigned long)page_address(page);
224                                 /* copy_page is not usable for copying task structs. */
225                                 memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
226                                 pbe = pbe->next;
227                         }
228                 }
229         }
230         BUG_ON(pbe);
231 }
232
233
234 /**
235  *      free_pagedir - free pages allocated with alloc_pagedir()
236  */
237
238 void free_pagedir(struct pbe *pblist)
239 {
240         struct pbe *pbe;
241
242         while (pblist) {
243                 pbe = (pblist + PB_PAGE_SKIP)->next;
244                 ClearPageNosave(virt_to_page(pblist));
245                 ClearPageNosaveFree(virt_to_page(pblist));
246                 free_page((unsigned long)pblist);
247                 pblist = pbe;
248         }
249 }
250
251 /**
252  *      fill_pb_page - Create a list of PBEs on a given memory page
253  */
254
255 static inline void fill_pb_page(struct pbe *pbpage)
256 {
257         struct pbe *p;
258
259         p = pbpage;
260         pbpage += PB_PAGE_SKIP;
261         do
262                 p->next = p + 1;
263         while (++p < pbpage);
264 }
265
266 /**
267  *      create_pbe_list - Create a list of PBEs on top of a given chain
268  *      of memory pages allocated with alloc_pagedir()
269  */
270
271 static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
272 {
273         struct pbe *pbpage, *p;
274         unsigned int num = PBES_PER_PAGE;
275
276         for_each_pb_page (pbpage, pblist) {
277                 if (num >= nr_pages)
278                         break;
279
280                 fill_pb_page(pbpage);
281                 num += PBES_PER_PAGE;
282         }
283         if (pbpage) {
284                 for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
285                         p->next = p + 1;
286                 p->next = NULL;
287         }
288 }
289
290 /**
291  *      On resume it is necessary to trace and eventually free the unsafe
292  *      pages that have been allocated, because they are needed for I/O
293  *      (on x86-64 we likely will "eat" these pages once again while
294  *      creating the temporary page translation tables)
295  */
296
297 struct eaten_page {
298         struct eaten_page *next;
299         char padding[PAGE_SIZE - sizeof(void *)];
300 };
301
302 static struct eaten_page *eaten_pages = NULL;
303
304 void release_eaten_pages(void)
305 {
306         struct eaten_page *p, *q;
307
308         p = eaten_pages;
309         while (p) {
310                 q = p->next;
311                 /* We don't want swsusp_free() to free this page again */
312                 ClearPageNosave(virt_to_page(p));
313                 free_page((unsigned long)p);
314                 p = q;
315         }
316         eaten_pages = NULL;
317 }
318
319 /**
320  *      @safe_needed - on resume, for storing the PBE list and the image,
321  *      we can only use memory pages that do not conflict with the pages
322  *      which had been used before suspend.
323  *
324  *      The unsafe pages are marked with the PG_nosave_free flag
325  *
326  *      Allocated but unusable (ie eaten) memory pages should be marked
327  *      so that swsusp_free() can release them
328  */
329
330 static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
331 {
332         void *res;
333
334         if (safe_needed)
335                 do {
336                         res = (void *)get_zeroed_page(gfp_mask);
337                         if (res && PageNosaveFree(virt_to_page(res))) {
338                                 /* This is for swsusp_free() */
339                                 SetPageNosave(virt_to_page(res));
340                                 ((struct eaten_page *)res)->next = eaten_pages;
341                                 eaten_pages = res;
342                         }
343                 } while (res && PageNosaveFree(virt_to_page(res)));
344         else
345                 res = (void *)get_zeroed_page(gfp_mask);
346         if (res) {
347                 SetPageNosave(virt_to_page(res));
348                 SetPageNosaveFree(virt_to_page(res));
349         }
350         return res;
351 }
352
353 unsigned long get_safe_page(gfp_t gfp_mask)
354 {
355         return (unsigned long)alloc_image_page(gfp_mask, 1);
356 }
357
358 /**
359  *      alloc_pagedir - Allocate the page directory.
360  *
361  *      First, determine exactly how many pages we need and
362  *      allocate them.
363  *
364  *      We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
365  *      struct pbe elements (pbes) and the last element in the page points
366  *      to the next page.
367  *
368  *      On each page we set up a list of struct_pbe elements.
369  */
370
371 struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed)
372 {
373         unsigned int num;
374         struct pbe *pblist, *pbe;
375
376         if (!nr_pages)
377                 return NULL;
378
379         pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
380         pblist = alloc_image_page(gfp_mask, safe_needed);
381         /* FIXME: rewrite this ugly loop */
382         for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
383                         pbe = pbe->next, num += PBES_PER_PAGE) {
384                 pbe += PB_PAGE_SKIP;
385                 pbe->next = alloc_image_page(gfp_mask, safe_needed);
386         }
387         if (!pbe) { /* get_zeroed_page() failed */
388                 free_pagedir(pblist);
389                 pblist = NULL;
390         } else
391                 create_pbe_list(pblist, nr_pages);
392         return pblist;
393 }
394
395 /**
396  * Free pages we allocated for suspend. Suspend pages are alocated
397  * before atomic copy, so we need to free them after resume.
398  */
399
400 void swsusp_free(void)
401 {
402         struct zone *zone;
403         unsigned long zone_pfn;
404
405         for_each_zone(zone) {
406                 for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
407                         if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
408                                 struct page *page;
409                                 page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
410                                 if (PageNosave(page) && PageNosaveFree(page)) {
411                                         ClearPageNosave(page);
412                                         ClearPageNosaveFree(page);
413                                         free_page((long) page_address(page));
414                                 }
415                         }
416         }
417 }
418
419
420 /**
421  *      enough_free_mem - Make sure we enough free memory to snapshot.
422  *
423  *      Returns TRUE or FALSE after checking the number of available
424  *      free pages.
425  */
426
427 static int enough_free_mem(unsigned int nr_pages)
428 {
429         struct zone *zone;
430         unsigned int n = 0;
431
432         for_each_zone (zone)
433                 if (!is_highmem(zone))
434                         n += zone->free_pages;
435         pr_debug("swsusp: available memory: %u pages\n", n);
436         return n > (nr_pages + PAGES_FOR_IO +
437                 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
438 }
439
440 int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed)
441 {
442         struct pbe *p;
443
444         for_each_pbe (p, pblist) {
445                 p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed);
446                 if (!p->address)
447                         return -ENOMEM;
448         }
449         return 0;
450 }
451
452 static struct pbe *swsusp_alloc(unsigned int nr_pages)
453 {
454         struct pbe *pblist;
455
456         if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) {
457                 printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
458                 return NULL;
459         }
460
461         if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) {
462                 printk(KERN_ERR "suspend: Allocating image pages failed.\n");
463                 swsusp_free();
464                 return NULL;
465         }
466
467         return pblist;
468 }
469
470 asmlinkage int swsusp_save(void)
471 {
472         unsigned int nr_pages;
473
474         pr_debug("swsusp: critical section: \n");
475
476         drain_local_pages();
477         nr_pages = count_data_pages();
478         printk("swsusp: Need to copy %u pages\n", nr_pages);
479
480         pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
481                  nr_pages,
482                  (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
483                  PAGES_FOR_IO, nr_free_pages());
484
485         if (!enough_free_mem(nr_pages)) {
486                 printk(KERN_ERR "swsusp: Not enough free memory\n");
487                 return -ENOMEM;
488         }
489
490         pagedir_nosave = swsusp_alloc(nr_pages);
491         if (!pagedir_nosave)
492                 return -ENOMEM;
493
494         /* During allocating of suspend pagedir, new cold pages may appear.
495          * Kill them.
496          */
497         drain_local_pages();
498         copy_data_pages(pagedir_nosave);
499
500         /*
501          * End of critical section. From now on, we can write to memory,
502          * but we should not touch disk. This specially means we must _not_
503          * touch swap space! Except we must write out our image of course.
504          */
505
506         nr_copy_pages = nr_pages;
507
508         printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
509         return 0;
510 }