vserver 1.9.3
[linux-2.6.git] / mm / readahead.c
index 71bf246..a5e6906 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
 
-void default_unplug_io_fn(struct backing_dev_info *bdi)
+void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
 }
 EXPORT_SYMBOL(default_unplug_io_fn);
@@ -28,16 +28,15 @@ struct backing_dev_info default_backing_dev_info = {
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 /*
- * Initialise a struct file's readahead state
+ * Initialise a struct file's readahead state.  Assumes that the caller has
+ * memset *ra to zero.
  */
 void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
 {
-       memset(ra, 0, sizeof(*ra));
        ra->ra_pages = mapping->backing_dev_info->ra_pages;
        ra->average = ra->ra_pages / 2;
 }
-EXPORT_SYMBOL(file_ra_state_init);
 
 /*
  * Return max readahead size for this inode in number-of-pages.
@@ -349,11 +348,10 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
                        struct file *filp, unsigned long offset)
 {
        unsigned max;
-       unsigned min;
        unsigned orig_next_size;
        unsigned actual;
        int first_access=0;
-       unsigned long preoffset=0;
+       unsigned long average;
 
        /*
         * Here we detect the case where the application is performing
@@ -374,7 +372,6 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
        if (max == 0)
                goto out;       /* No readahead */
 
-       min = get_min_readahead(ra);
        orig_next_size = ra->next_size;
 
        if (ra->next_size == 0) {
@@ -386,18 +383,10 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
                first_access=1;
                ra->next_size = max / 2;
                ra->prev_page = offset;
-               ra->serial_cnt++;
+               ra->currnt_wnd_hit++;
                goto do_io;
        }
 
-       if (offset == ra->prev_page + 1) {
-               if (ra->serial_cnt <= (max * 2))
-                       ra->serial_cnt++;
-       } else {
-               ra->average = (ra->average + ra->serial_cnt) / 2;
-               ra->serial_cnt = 1;
-       }
-       preoffset = ra->prev_page;
        ra->prev_page = offset;
 
        if (offset >= ra->start && offset <= (ra->start + ra->size)) {
@@ -406,12 +395,22 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
                 * page beyond the end.  Expand the next readahead size.
                 */
                ra->next_size += 2;
+
+               if (ra->currnt_wnd_hit <= (max * 2))
+                       ra->currnt_wnd_hit++;
        } else {
                /*
                 * A miss - lseek, pagefault, pread, etc.  Shrink the readahead
                 * window.
                 */
                ra->next_size -= 2;
+
+               average = ra->average;
+               if (average < ra->currnt_wnd_hit) {
+                       average++;
+               }
+               ra->average = (average + ra->currnt_wnd_hit) / 2;
+               ra->currnt_wnd_hit = 1;
        }
 
        if ((long)ra->next_size > (long)max)
@@ -457,18 +456,17 @@ do_io:
                 * ahead window and get some I/O underway for the new
                 * current window.
                 */
-               if (!first_access && preoffset >= ra->start &&
-                               preoffset < (ra->start + ra->size)) {
-                        /* Heuristic:  If 'n' pages were
-                         * accessed in the current window, there
-                         * is a high probability that around 'n' pages
-                         * shall be used in the next current window.
-                         *
-                         * To minimize lazy-readahead triggered
-                         * in the next current window, read in
-                         * an extra page.
+               if (!first_access) {
+                        /* Heuristic: there is a high probability
+                         * that around  ra->average number of
+                         * pages shall be accessed in the next
+                         * current window.
                          */
-                       ra->next_size = preoffset - ra->start + 2;
+                       average = ra->average;
+                       if (ra->currnt_wnd_hit > average)
+                               average = (ra->currnt_wnd_hit + ra->average + 1) / 2;
+
+                       ra->next_size = min(average , (unsigned long)max);
                }
                ra->start = offset;
                ra->size = ra->next_size;
@@ -492,21 +490,19 @@ do_io:
                 */
                if (ra->ahead_start == 0) {
                        /*
-                        * if the average io-size is less than maximum
+                        * If the average io-size is more than maximum
                         * readahead size of the file the io pattern is
                         * sequential. Hence  bring in the readahead window
                         * immediately.
-                        * Else the i/o pattern is random. Bring
-                        * in the readahead window only if the last page of
-                        * the current window is accessed (lazy readahead).
+                        * If the average io-size is less than maximum
+                        * readahead size of the file the io pattern is
+                        * random. Hence don't bother to readahead.
                         */
-                       unsigned long average = ra->average;
-
-                       if (ra->serial_cnt > average)
-                               average = (ra->serial_cnt + ra->average) / 2;
+                       average = ra->average;
+                       if (ra->currnt_wnd_hit > average)
+                               average = (ra->currnt_wnd_hit + ra->average + 1) / 2;
 
-                       if ((average >= max) || (offset == (ra->start +
-                                                       ra->size - 1))) {
+                       if (average > max) {
                                ra->ahead_start = ra->start + ra->size;
                                ra->ahead_size = ra->next_size;
                                actual = do_page_cache_readahead(mapping, filp,
@@ -552,6 +548,7 @@ void handle_ra_miss(struct address_space *mapping,
                                ra->size = max;
                                ra->ahead_start = 0;
                                ra->ahead_size = 0;
+                               ra->average = max / 2;
                        }
                }
                ra->prev_page = offset;
@@ -574,6 +571,6 @@ unsigned long max_sane_readahead(unsigned long nr)
        unsigned long inactive;
        unsigned long free;
 
-       get_zone_counts(&active, &inactive, &free);
+       __get_zone_counts(&active, &inactive, &free, NODE_DATA(numa_node_id()));
        return min(nr, (inactive + free) / 2);
 }