Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / kernel / power / snapshot.c
index 3eeedbb..81fe8de 100644 (file)
@@ -150,9 +150,13 @@ int restore_highmem(void)
        }
        return 0;
 }
+#else
+static inline unsigned int count_highmem_pages(void) {return 0;}
+static inline int save_highmem(void) {return 0;}
+static inline int restore_highmem(void) {return 0;}
 #endif
 
-static int pfn_is_nosave(unsigned long pfn)
+static inline int pfn_is_nosave(unsigned long pfn)
 {
        unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
        unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
@@ -163,43 +167,43 @@ static int pfn_is_nosave(unsigned long pfn)
  *     saveable - Determine whether a page should be cloned or not.
  *     @pfn:   The page
  *
- *     We save a page if it's Reserved, and not in the range of pages
- *     statically defined as 'unsaveable', or if it isn't reserved, and
- *     isn't part of a free chunk of pages.
+ *     We save a page if it isn't Nosave, and is not in the range of pages
+ *     statically defined as 'unsaveable', and it
+ *     isn't part of a free chunk of pages.
  */
 
-static int saveable(struct zone *zone, unsigned long *zone_pfn)
+static struct page *saveable_page(unsigned long pfn)
 {
-       unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
        struct page *page;
 
        if (!pfn_valid(pfn))
-               return 0;
+               return NULL;
 
        page = pfn_to_page(pfn);
-       BUG_ON(PageReserved(page) && PageNosave(page));
+
        if (PageNosave(page))
-               return 0;
+               return NULL;
        if (PageReserved(page) && pfn_is_nosave(pfn))
-               return 0;
+               return NULL;
        if (PageNosaveFree(page))
-               return 0;
+               return NULL;
 
-       return 1;
+       return page;
 }
 
 unsigned int count_data_pages(void)
 {
        struct zone *zone;
-       unsigned long zone_pfn;
+       unsigned long pfn, max_zone_pfn;
        unsigned int n = 0;
 
        for_each_zone (zone) {
                if (is_highmem(zone))
                        continue;
                mark_free_pages(zone);
-               for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
-                       n += saveable(zone, &zone_pfn);
+               max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+               for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+                       n += !!saveable_page(pfn);
        }
        return n;
 }
@@ -207,7 +211,7 @@ unsigned int count_data_pages(void)
 static void copy_data_pages(struct pbe *pblist)
 {
        struct zone *zone;
-       unsigned long zone_pfn;
+       unsigned long pfn, max_zone_pfn;
        struct pbe *pbe, *p;
 
        pbe = pblist;
@@ -220,14 +224,21 @@ static void copy_data_pages(struct pbe *pblist)
                        SetPageNosaveFree(virt_to_page(p));
                for_each_pbe (p, pblist)
                        SetPageNosaveFree(virt_to_page(p->address));
-               for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
-                       if (saveable(zone, &zone_pfn)) {
-                               struct page *page;
-                               page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
+               max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+               for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
+                       struct page *page = saveable_page(pfn);
+
+                       if (page) {
+                               long *src, *dst;
+                               int n;
+
                                BUG_ON(!pbe);
                                pbe->orig_address = (unsigned long)page_address(page);
-                               /* copy_page is not usable for copying task structs. */
-                               memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
+                               /* copy_page and memcpy are not usable for copying task structs. */
+                               dst = (long *)pbe->address;
+                               src = (long *)pbe->orig_address;
+                               for (n = PAGE_SIZE / sizeof(long); n; n--)
+                                       *dst++ = *src++;
                                pbe = pbe->next;
                        }
                }
@@ -293,62 +304,29 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
        }
 }
 
-/**
- *     On resume it is necessary to trace and eventually free the unsafe
- *     pages that have been allocated, because they are needed for I/O
- *     (on x86-64 we likely will "eat" these pages once again while
- *     creating the temporary page translation tables)
- */
-
-struct eaten_page {
-       struct eaten_page *next;
-       char padding[PAGE_SIZE - sizeof(void *)];
-};
-
-static struct eaten_page *eaten_pages = NULL;
-
-static void release_eaten_pages(void)
-{
-       struct eaten_page *p, *q;
-
-       p = eaten_pages;
-       while (p) {
-               q = p->next;
-               /* We don't want swsusp_free() to free this page again */
-               ClearPageNosave(virt_to_page(p));
-               free_page((unsigned long)p);
-               p = q;
-       }
-       eaten_pages = NULL;
-}
+static unsigned int unsafe_pages;
 
 /**
  *     @safe_needed - on resume, for storing the PBE list and the image,
  *     we can only use memory pages that do not conflict with the pages
- *     which had been used before suspend.
+ *     used before suspend.
  *
  *     The unsafe pages are marked with the PG_nosave_free flag
- *
- *     Allocated but unusable (ie eaten) memory pages should be marked
- *     so that swsusp_free() can release them
+ *     and we count them using unsafe_pages
  */
 
-static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
+static void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
 {
        void *res;
 
+       res = (void *)get_zeroed_page(gfp_mask);
        if (safe_needed)
-               do {
+               while (res && PageNosaveFree(virt_to_page(res))) {
+                       /* The page is unsafe, mark it for swsusp_free() */
+                       SetPageNosave(virt_to_page(res));
+                       unsafe_pages++;
                        res = (void *)get_zeroed_page(gfp_mask);
-                       if (res && PageNosaveFree(virt_to_page(res))) {
-                               /* This is for swsusp_free() */
-                               SetPageNosave(virt_to_page(res));
-                               ((struct eaten_page *)res)->next = eaten_pages;
-                               eaten_pages = res;
-                       }
-               } while (res && PageNosaveFree(virt_to_page(res)));
-       else
-               res = (void *)get_zeroed_page(gfp_mask);
+               }
        if (res) {
                SetPageNosave(virt_to_page(res));
                SetPageNosaveFree(virt_to_page(res));
@@ -374,7 +352,8 @@ unsigned long get_safe_page(gfp_t gfp_mask)
  *     On each page we set up a list of struct_pbe elements.
  */
 
-struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed)
+static struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask,
+                                int safe_needed)
 {
        unsigned int num;
        struct pbe *pblist, *pbe;
@@ -405,13 +384,14 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed
 void swsusp_free(void)
 {
        struct zone *zone;
-       unsigned long zone_pfn;
+       unsigned long pfn, max_zone_pfn;
 
        for_each_zone(zone) {
-               for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
-                       if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
-                               struct page *page;
-                               page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
+               max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+               for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+                       if (pfn_valid(pfn)) {
+                               struct page *page = pfn_to_page(pfn);
+
                                if (PageNosave(page) && PageNosaveFree(page)) {
                                        ClearPageNosave(page);
                                        ClearPageNosaveFree(page);
@@ -575,7 +555,7 @@ static inline struct pbe *pack_orig_addresses(unsigned long *buf, struct pbe *pb
 
 int snapshot_read_next(struct snapshot_handle *handle, size_t count)
 {
-       if (handle->page > nr_meta_pages + nr_copy_pages)
+       if (handle->cur > nr_meta_pages + nr_copy_pages)
                return 0;
        if (!buffer) {
                /* This makes the buffer be freed by swsusp_free() */
@@ -588,8 +568,8 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
                handle->buffer = buffer;
                handle->pbe = pagedir_nosave;
        }
-       if (handle->prev < handle->page) {
-               if (handle->page <= nr_meta_pages) {
+       if (handle->prev < handle->cur) {
+               if (handle->cur <= nr_meta_pages) {
                        handle->pbe = pack_orig_addresses(buffer, handle->pbe);
                        if (!handle->pbe)
                                handle->pbe = pagedir_nosave;
@@ -597,15 +577,15 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
                        handle->buffer = (void *)handle->pbe->address;
                        handle->pbe = handle->pbe->next;
                }
-               handle->prev = handle->page;
+               handle->prev = handle->cur;
        }
-       handle->buf_offset = handle->page_offset;
-       if (handle->page_offset + count >= PAGE_SIZE) {
-               count = PAGE_SIZE - handle->page_offset;
-               handle->page_offset = 0;
-               handle->page++;
+       handle->buf_offset = handle->cur_offset;
+       if (handle->cur_offset + count >= PAGE_SIZE) {
+               count = PAGE_SIZE - handle->cur_offset;
+               handle->cur_offset = 0;
+               handle->cur++;
        } else {
-               handle->page_offset += count;
+               handle->cur_offset += count;
        }
        handle->offset += count;
        return count;
@@ -620,7 +600,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
 static int mark_unsafe_pages(struct pbe *pblist)
 {
        struct zone *zone;
-       unsigned long zone_pfn;
+       unsigned long pfn, max_zone_pfn;
        struct pbe *p;
 
        if (!pblist) /* a sanity check */
@@ -628,10 +608,10 @@ static int mark_unsafe_pages(struct pbe *pblist)
 
        /* Clear page flags */
        for_each_zone (zone) {
-               for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
-                       if (pfn_valid(zone_pfn + zone->zone_start_pfn))
-                               ClearPageNosaveFree(pfn_to_page(zone_pfn +
-                                       zone->zone_start_pfn));
+               max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+               for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+                       if (pfn_valid(pfn))
+                               ClearPageNosaveFree(pfn_to_page(pfn));
        }
 
        /* Mark orig addresses */
@@ -642,6 +622,8 @@ static int mark_unsafe_pages(struct pbe *pblist)
                        return -EFAULT;
        }
 
+       unsafe_pages = 0;
+
        return 0;
 }
 
@@ -719,42 +701,99 @@ static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
 }
 
 /**
- *     create_image - use metadata contained in the PBE list
+ *     prepare_image - use metadata contained in the PBE list
  *     pointed to by pagedir_nosave to mark the pages that will
  *     be overwritten in the process of restoring the system
- *     memory state from the image and allocate memory for
- *     the image avoiding these pages
+ *     memory state from the image ("unsafe" pages) and allocate
+ *     memory for the image
+ *
+ *     The idea is to allocate the PBE list first and then
+ *     allocate as many pages as it's needed for the image data,
+ *     but not to assign these pages to the PBEs initially.
+ *     Instead, we just mark them as allocated and create a list
+ *     of "safe" which will be used later
  */
 
-static int create_image(struct snapshot_handle *handle)
+struct safe_page {
+       struct safe_page *next;
+       char padding[PAGE_SIZE - sizeof(void *)];
+};
+
+static struct safe_page *safe_pages;
+
+static int prepare_image(struct snapshot_handle *handle)
 {
        int error = 0;
-       struct pbe *p, *pblist;
+       unsigned int nr_pages = nr_copy_pages;
+       struct pbe *p, *pblist = NULL;
 
        p = pagedir_nosave;
        error = mark_unsafe_pages(p);
        if (!error) {
-               pblist = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1);
+               pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
                if (pblist)
                        copy_page_backup_list(pblist, p);
                free_pagedir(p, 0);
                if (!pblist)
                        error = -ENOMEM;
        }
-       if (!error)
-               error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
+       safe_pages = NULL;
+       if (!error && nr_pages > unsafe_pages) {
+               nr_pages -= unsafe_pages;
+               while (nr_pages--) {
+                       struct safe_page *ptr;
+
+                       ptr = (struct safe_page *)get_zeroed_page(GFP_ATOMIC);
+                       if (!ptr) {
+                               error = -ENOMEM;
+                               break;
+                       }
+                       if (!PageNosaveFree(virt_to_page(ptr))) {
+                               /* The page is "safe", add it to the list */
+                               ptr->next = safe_pages;
+                               safe_pages = ptr;
+                       }
+                       /* Mark the page as allocated */
+                       SetPageNosave(virt_to_page(ptr));
+                       SetPageNosaveFree(virt_to_page(ptr));
+               }
+       }
        if (!error) {
-               release_eaten_pages();
                pagedir_nosave = pblist;
        } else {
-               pagedir_nosave = NULL;
                handle->pbe = NULL;
-               nr_copy_pages = 0;
-               nr_meta_pages = 0;
+               swsusp_free();
        }
        return error;
 }
 
+static void *get_buffer(struct snapshot_handle *handle)
+{
+       struct pbe *pbe = handle->pbe, *last = handle->last_pbe;
+       struct page *page = virt_to_page(pbe->orig_address);
+
+       if (PageNosave(page) && PageNosaveFree(page)) {
+               /*
+                * We have allocated the "original" page frame and we can
+                * use it directly to store the read page
+                */
+               pbe->address = 0;
+               if (last && last->next)
+                       last->next = NULL;
+               return (void *)pbe->orig_address;
+       }
+       /*
+        * The "original" page frame has not been allocated and we have to
+        * use a "safe" page frame to store the read page
+        */
+       pbe->address = (unsigned long)safe_pages;
+       safe_pages = safe_pages->next;
+       if (last)
+               last->next = pbe;
+       handle->last_pbe = pbe;
+       return (void *)pbe->address;
+}
+
 /**
  *     snapshot_write_next - used for writing the system memory snapshot.
  *
@@ -781,7 +820,7 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 {
        int error = 0;
 
-       if (handle->prev && handle->page > nr_meta_pages + nr_copy_pages)
+       if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
                return 0;
        if (!buffer) {
                /* This makes the buffer be freed by swsusp_free() */
@@ -791,33 +830,39 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
        }
        if (!handle->offset)
                handle->buffer = buffer;
-       if (handle->prev < handle->page) {
+       handle->sync_read = 1;
+       if (handle->prev < handle->cur) {
                if (!handle->prev) {
-                       error = load_header(handle, (struct swsusp_info *)buffer);
+                       error = load_header(handle,
+                                       (struct swsusp_info *)buffer);
                        if (error)
                                return error;
                } else if (handle->prev <= nr_meta_pages) {
-                       handle->pbe = unpack_orig_addresses(buffer, handle->pbe);
+                       handle->pbe = unpack_orig_addresses(buffer,
+                                                       handle->pbe);
                        if (!handle->pbe) {
-                               error = create_image(handle);
+                               error = prepare_image(handle);
                                if (error)
                                        return error;
                                handle->pbe = pagedir_nosave;
-                               handle->buffer = (void *)handle->pbe->address;
+                               handle->last_pbe = NULL;
+                               handle->buffer = get_buffer(handle);
+                               handle->sync_read = 0;
                        }
                } else {
                        handle->pbe = handle->pbe->next;
-                       handle->buffer = (void *)handle->pbe->address;
+                       handle->buffer = get_buffer(handle);
+                       handle->sync_read = 0;
                }
-               handle->prev = handle->page;
+               handle->prev = handle->cur;
        }
-       handle->buf_offset = handle->page_offset;
-       if (handle->page_offset + count >= PAGE_SIZE) {
-               count = PAGE_SIZE - handle->page_offset;
-               handle->page_offset = 0;
-               handle->page++;
+       handle->buf_offset = handle->cur_offset;
+       if (handle->cur_offset + count >= PAGE_SIZE) {
+               count = PAGE_SIZE - handle->cur_offset;
+               handle->cur_offset = 0;
+               handle->cur++;
        } else {
-               handle->page_offset += count;
+               handle->cur_offset += count;
        }
        handle->offset += count;
        return count;
@@ -826,5 +871,5 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
 int snapshot_image_loaded(struct snapshot_handle *handle)
 {
        return !(!handle->pbe || handle->pbe->next || !nr_copy_pages ||
-               handle->page <= nr_meta_pages + nr_copy_pages);
+               handle->cur <= nr_meta_pages + nr_copy_pages);
 }