X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=mm%2Fpage_alloc.c;h=5581dd1a489b501a6168e7a934e19ad081df0b23;hb=9464c7cf61b9433057924c36e6e02f303a00e768;hp=7de09d07dd14c4cbdf04df402b73f972b8d9b1d3;hpb=41689045f6a3cbe0550e1d34e9cc20d2e8c432ba;p=linux-2.6.git diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7de09d07d..5581dd1a4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -14,6 +14,7 @@ * (lots of bits borrowed from Ingo Molnar & Andrew Morton) */ +#include #include #include #include @@ -36,8 +37,6 @@ #include #include #include -#include -#include #include #include @@ -85,8 +84,8 @@ EXPORT_SYMBOL(zone_table); static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; int min_free_kbytes = 1024; -unsigned long __meminitdata nr_kernel_pages; -unsigned long __meminitdata nr_all_pages; +unsigned long __initdata nr_kernel_pages; +unsigned long __initdata nr_all_pages; #ifdef CONFIG_DEBUG_VM static int page_outside_zone_boundaries(struct zone *zone, struct page *page) @@ -267,7 +266,7 @@ static inline void rmv_page_order(struct page *page) * satisfies the following equation: * P = B & ~(1 << O) * - * Assumption: *_mem_map is contiguous at least up to MAX_ORDER + * Assumption: *_mem_map is contigious at least up to MAX_ORDER */ static inline struct page * __page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order) @@ -288,27 +287,22 @@ __find_combined_index(unsigned long page_idx, unsigned int order) * we can do coalesce a page and its buddy if * (a) the buddy is not in a hole && * (b) the buddy is in the buddy system && - * (c) a page and its buddy have the same order && - * (d) a page and its buddy are in the same zone. + * (c) a page and its buddy have the same order. * * For recording whether a page is in the buddy system, we use PG_buddy. * Setting, clearing, and testing PG_buddy is serialized by zone->lock. * * For recording page's order, we use page_private(page). */ -static inline int page_is_buddy(struct page *page, struct page *buddy, - int order) +static inline int page_is_buddy(struct page *page, int order) { #ifdef CONFIG_HOLES_IN_ZONE - if (!pfn_valid(page_to_pfn(buddy))) + if (!pfn_valid(page_to_pfn(page))) return 0; #endif - if (page_zone_id(page) != page_zone_id(buddy)) - return 0; - - if (PageBuddy(buddy) && page_order(buddy) == order) { - BUG_ON(page_count(buddy) != 0); + if (PageBuddy(page) && page_order(page) == order) { + BUG_ON(page_count(page) != 0); return 1; } return 0; @@ -359,7 +353,7 @@ static inline void __free_one_page(struct page *page, struct page *buddy; buddy = __page_find_buddy(page, page_idx, order); - if (!page_is_buddy(page, buddy, order)) + if (!page_is_buddy(buddy, order)) break; /* Move the buddy up one level. */ list_del(&buddy->lru); @@ -448,8 +442,8 @@ static void __free_pages_ok(struct page *page, unsigned int order) if (arch_free_page(page, order)) return; if (!PageHighMem(page)) - debug_check_no_locks_freed(page_address(page), - PAGE_SIZE<flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1 | - 1 << PG_fs_misc | 1 << PG_mappedtodisk); + 1 << PG_checked | 1 << PG_mappedtodisk); set_page_private(page, 0); set_page_refcounted(page); kernel_map_pages(page, 1 << order, 1); @@ -711,6 +705,27 @@ void drain_local_pages(void) } #endif /* CONFIG_PM */ +static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu) +{ +#ifdef CONFIG_NUMA + pg_data_t *pg = z->zone_pgdat; + pg_data_t *orig = zonelist->zones[0]->zone_pgdat; + struct per_cpu_pageset *p; + + p = zone_pcp(z, cpu); + if (pg == orig) { + p->numa_hit++; + } else { + p->numa_miss++; + zone_pcp(zonelist->zones[0], cpu)->numa_foreign++; + } + if (pg == NODE_DATA(numa_node_id())) + p->local_node++; + else + p->other_node++; +#endif +} + /* * Free a 0-order page */ @@ -732,7 +747,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; local_irq_save(flags); - __count_vm_event(PGFREE); + __inc_page_state(pgfree); list_add(&page->lru, &pcp->list); pcp->count++; if (pcp->count >= pcp->high) { @@ -808,8 +823,8 @@ again: goto failed; } - __count_zone_vm_events(PGALLOC, zone, 1 << order); - zone_statistics(zonelist, zone); + __mod_page_state_zone(zone, pgalloc, 1 << order); + zone_statistics(zonelist, zone, cpu); local_irq_restore(flags); put_cpu(); @@ -939,7 +954,8 @@ restart: goto got_pg; do { - wakeup_kswapd(*z, order); + if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL)) + wakeup_kswapd(*z, order); } while (*(++z)); /* @@ -1213,6 +1229,141 @@ static void show_node(struct zone *zone) #define show_node(zone) do { } while (0) #endif +/* + * Accumulate the page_state information across all CPUs. + * The result is unavoidably approximate - it can change + * during and after execution of this function. + */ +static DEFINE_PER_CPU(struct page_state, page_states) = {0}; + +atomic_t nr_pagecache = ATOMIC_INIT(0); +EXPORT_SYMBOL(nr_pagecache); +#ifdef CONFIG_SMP +DEFINE_PER_CPU(long, nr_pagecache_local) = 0; +#endif + +static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) +{ + unsigned cpu; + + memset(ret, 0, nr * sizeof(unsigned long)); + cpus_and(*cpumask, *cpumask, cpu_online_map); + + for_each_cpu_mask(cpu, *cpumask) { + unsigned long *in; + unsigned long *out; + unsigned off; + unsigned next_cpu; + + in = (unsigned long *)&per_cpu(page_states, cpu); + + next_cpu = next_cpu(cpu, *cpumask); + if (likely(next_cpu < NR_CPUS)) + prefetch(&per_cpu(page_states, next_cpu)); + + out = (unsigned long *)ret; + for (off = 0; off < nr; off++) + *out++ += *in++; + } +} + +void get_page_state_node(struct page_state *ret, int node) +{ + int nr; + cpumask_t mask = node_to_cpumask(node); + + nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); + nr /= sizeof(unsigned long); + + __get_page_state(ret, nr+1, &mask); +} + +void get_page_state(struct page_state *ret) +{ + int nr; + cpumask_t mask = CPU_MASK_ALL; + + nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); + nr /= sizeof(unsigned long); + + __get_page_state(ret, nr + 1, &mask); +} + +void get_full_page_state(struct page_state *ret) +{ + cpumask_t mask = CPU_MASK_ALL; + + __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); +} + +unsigned long read_page_state_offset(unsigned long offset) +{ + unsigned long ret = 0; + int cpu; + + for_each_online_cpu(cpu) { + unsigned long in; + + in = (unsigned long)&per_cpu(page_states, cpu) + offset; + ret += *((unsigned long *)in); + } + return ret; +} + +void __mod_page_state_offset(unsigned long offset, unsigned long delta) +{ + void *ptr; + + ptr = &__get_cpu_var(page_states); + *(unsigned long *)(ptr + offset) += delta; +} +EXPORT_SYMBOL(__mod_page_state_offset); + +void mod_page_state_offset(unsigned long offset, unsigned long delta) +{ + unsigned long flags; + void *ptr; + + local_irq_save(flags); + ptr = &__get_cpu_var(page_states); + *(unsigned long *)(ptr + offset) += delta; + local_irq_restore(flags); +} +EXPORT_SYMBOL(mod_page_state_offset); + +void __get_zone_counts(unsigned long *active, unsigned long *inactive, + unsigned long *free, struct pglist_data *pgdat) +{ + struct zone *zones = pgdat->node_zones; + int i; + + *active = 0; + *inactive = 0; + *free = 0; + for (i = 0; i < MAX_NR_ZONES; i++) { + *active += zones[i].nr_active; + *inactive += zones[i].nr_inactive; + *free += zones[i].free_pages; + } +} + +void get_zone_counts(unsigned long *active, + unsigned long *inactive, unsigned long *free) +{ + struct pglist_data *pgdat; + + *active = 0; + *inactive = 0; + *free = 0; + for_each_online_pgdat(pgdat) { + unsigned long l, m, n; + __get_zone_counts(&l, &m, &n, pgdat); + *active += l; + *inactive += m; + *free += n; + } +} + void si_meminfo(struct sysinfo *val) { val->totalram = totalram_pages; @@ -1257,6 +1408,7 @@ void si_meminfo_node(struct sysinfo *val, int nid) */ void show_free_areas(void) { + struct page_state ps; int cpu, temperature; unsigned long active; unsigned long inactive; @@ -1288,6 +1440,7 @@ void show_free_areas(void) } } + get_page_state(&ps); get_zone_counts(&active, &inactive, &free); printk("Free pages: %11ukB (%ukB HighMem)\n", @@ -1298,13 +1451,13 @@ void show_free_areas(void) "unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n", active, inactive, - global_page_state(NR_FILE_DIRTY), - global_page_state(NR_WRITEBACK), - global_page_state(NR_UNSTABLE_NFS), + ps.nr_dirty, + ps.nr_writeback, + ps.nr_unstable, nr_free_pages(), - global_page_state(NR_SLAB), - global_page_state(NR_FILE_MAPPED), - global_page_state(NR_PAGETABLE)); + ps.nr_slab, + ps.nr_mapped, + ps.nr_page_table_pages); for_each_zone(zone) { int i; @@ -1339,7 +1492,7 @@ void show_free_areas(void) } for_each_zone(zone) { - unsigned long nr[MAX_ORDER], flags, order, total = 0; + unsigned long nr, flags, order, total = 0; show_node(zone); printk("%s: ", zone->name); @@ -1350,12 +1503,11 @@ void show_free_areas(void) spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { - nr[order] = zone->free_area[order].nr_free; - total += nr[order] << order; + nr = zone->free_area[order].nr_free; + total += nr << order; + printk("%lu*%lukB ", nr, K(1UL) << order); } spin_unlock_irqrestore(&zone->lock, flags); - for (order = 0; order < MAX_ORDER; order++) - printk("%lu*%lukB ", nr[order], K(1UL) << order); printk("= %lukB\n", K(total)); } @@ -1367,7 +1519,7 @@ void show_free_areas(void) * * Add all populated zones of a node to the zonelist. */ -static int __meminit build_zonelists_node(pg_data_t *pgdat, +static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int nr_zones, int zone_type) { struct zone *zone; @@ -1403,7 +1555,7 @@ static inline int highest_zone(int zone_bits) #ifdef CONFIG_NUMA #define MAX_NODE_LOAD (num_online_nodes()) -static int __meminitdata node_load[MAX_NUMNODES]; +static int __initdata node_load[MAX_NUMNODES]; /** * find_next_best_node - find the next node that should appear in a given node's fallback list * @node: node whose fallback list we're appending @@ -1418,7 +1570,7 @@ static int __meminitdata node_load[MAX_NUMNODES]; * on them otherwise. * It returns -1 if no node is found. */ -static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask) +static int __init find_next_best_node(int node, nodemask_t *used_node_mask) { int n, val; int min_val = INT_MAX; @@ -1464,7 +1616,7 @@ static int __meminit find_next_best_node(int node, nodemask_t *used_node_mask) return best_node; } -static void __meminit build_zonelists(pg_data_t *pgdat) +static void __init build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; int prev_node, load; @@ -1516,7 +1668,7 @@ static void __meminit build_zonelists(pg_data_t *pgdat) #else /* CONFIG_NUMA */ -static void __meminit build_zonelists(pg_data_t *pgdat) +static void __init build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; @@ -1554,29 +1706,14 @@ static void __meminit build_zonelists(pg_data_t *pgdat) #endif /* CONFIG_NUMA */ -/* return values int ....just for stop_machine_run() */ -static int __meminit __build_all_zonelists(void *dummy) +void __init build_all_zonelists(void) { - int nid; - for_each_online_node(nid) - build_zonelists(NODE_DATA(nid)); - return 0; -} + int i; -void __meminit build_all_zonelists(void) -{ - if (system_state == SYSTEM_BOOTING) { - __build_all_zonelists(0); - cpuset_init_current_mems_allowed(); - } else { - /* we have to stop all cpus to guaranntee there is no user - of zonelist */ - stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); - /* cpuset refresh routine should be here */ - } - vm_total_pages = nr_free_pagecache_pages(); - printk("Built %i zonelists. Total pages: %ld\n", - num_online_nodes(), vm_total_pages); + for_each_online_node(i) + build_zonelists(NODE_DATA(i)); + printk("Built %i zonelists\n", num_online_nodes()); + cpuset_init_current_mems_allowed(); } /* @@ -1592,8 +1729,7 @@ void __meminit build_all_zonelists(void) */ #define PAGES_PER_WAITQUEUE 256 -#ifndef CONFIG_MEMORY_HOTPLUG -static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) +static inline unsigned long wait_table_size(unsigned long pages) { unsigned long size = 1; @@ -1611,29 +1747,6 @@ static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) return max(size, 4UL); } -#else -/* - * A zone's size might be changed by hot-add, so it is not possible to determine - * a suitable size for its wait_table. So we use the maximum size now. - * - * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie: - * - * i386 (preemption config) : 4096 x 16 = 64Kbyte. - * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte. - * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte. - * - * The maximum entries are prepared when a zone's memory is (512K + 256) pages - * or more by the traditional way. (See above). It equals: - * - * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte. - * ia64(16K page size) : = ( 8G + 4M)byte. - * powerpc (64K page size) : = (32G +16M)byte. - */ -static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) -{ - return 4096UL; -} -#endif /* * This is an integer logarithm so that shifts can be used later @@ -1681,8 +1794,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, for (pfn = start_pfn; pfn < end_pfn; pfn++) { if (!early_pfn_valid(pfn)) continue; - if (!early_pfn_in_nid(pfn, nid)) - continue; page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); init_page_count(page); @@ -1855,14 +1966,12 @@ static inline void free_zone_pagesets(int cpu) for_each_zone(zone) { struct per_cpu_pageset *pset = zone_pcp(zone, cpu); - /* Free per_cpu_pageset if it is slab allocated */ - if (pset != &boot_pageset[cpu]) - kfree(pset); zone_pcp(zone, cpu) = NULL; + kfree(pset); } } -static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, +static int pageset_cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -1884,7 +1993,7 @@ static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, return ret; } -static struct notifier_block __cpuinitdata pageset_notifier = +static struct notifier_block pageset_notifier = { &pageset_cpuup_callback, NULL, 0 }; void __init setup_per_cpu_pageset(void) @@ -1903,46 +2012,23 @@ void __init setup_per_cpu_pageset(void) #endif static __meminit -int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) +void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { int i; struct pglist_data *pgdat = zone->zone_pgdat; - size_t alloc_size; /* * The per-page waitqueue mechanism uses hashed waitqueues * per zone. */ - zone->wait_table_hash_nr_entries = - wait_table_hash_nr_entries(zone_size_pages); - zone->wait_table_bits = - wait_table_bits(zone->wait_table_hash_nr_entries); - alloc_size = zone->wait_table_hash_nr_entries - * sizeof(wait_queue_head_t); - - if (system_state == SYSTEM_BOOTING) { - zone->wait_table = (wait_queue_head_t *) - alloc_bootmem_node(pgdat, alloc_size); - } else { - /* - * This case means that a zone whose size was 0 gets new memory - * via memory hot-add. - * But it may be the case that a new node was hot-added. In - * this case vmalloc() will not be able to use this new node's - * memory - this wait_table must be initialized to use this new - * node itself as well. - * To use this new node's memory, further consideration will be - * necessary. - */ - zone->wait_table = (wait_queue_head_t *)vmalloc(alloc_size); - } - if (!zone->wait_table) - return -ENOMEM; + zone->wait_table_size = wait_table_size(zone_size_pages); + zone->wait_table_bits = wait_table_bits(zone->wait_table_size); + zone->wait_table = (wait_queue_head_t *) + alloc_bootmem_node(pgdat, zone->wait_table_size + * sizeof(wait_queue_head_t)); - for(i = 0; i < zone->wait_table_hash_nr_entries; ++i) + for(i = 0; i < zone->wait_table_size; ++i) init_waitqueue_head(zone->wait_table + i); - - return 0; } static __meminit void zone_pcp_init(struct zone *zone) @@ -1964,15 +2050,12 @@ static __meminit void zone_pcp_init(struct zone *zone) zone->name, zone->present_pages, batch); } -__meminit int init_currently_empty_zone(struct zone *zone, - unsigned long zone_start_pfn, - unsigned long size) +static __meminit void init_currently_empty_zone(struct zone *zone, + unsigned long zone_start_pfn, unsigned long size) { struct pglist_data *pgdat = zone->zone_pgdat; - int ret; - ret = zone_wait_table_init(zone, size); - if (ret) - return ret; + + zone_wait_table_init(zone, size); pgdat->nr_zones = zone_idx(zone) + 1; zone->zone_start_pfn = zone_start_pfn; @@ -1980,8 +2063,6 @@ __meminit int init_currently_empty_zone(struct zone *zone, memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); zone_init_free_lists(pgdat, zone, zone->spanned_pages); - - return 0; } /* @@ -1990,13 +2071,12 @@ __meminit int init_currently_empty_zone(struct zone *zone, * - mark all memory queues empty * - clear the memory bitmaps */ -static void __meminit free_area_init_core(struct pglist_data *pgdat, +static void __init free_area_init_core(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { unsigned long j; int nid = pgdat->node_id; unsigned long zone_start_pfn = pgdat->node_start_pfn; - int ret; pgdat_resize_init(pgdat); pgdat->nr_zones = 0; @@ -2017,11 +2097,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->spanned_pages = size; zone->present_pages = realsize; -#ifdef CONFIG_NUMA - zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio) - / 100; - zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; -#endif zone->name = zone_names[j]; spin_lock_init(&zone->lock); spin_lock_init(&zone->lru_lock); @@ -2029,7 +2104,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->zone_pgdat = pgdat; zone->free_pages = 0; - zone->prev_priority = DEF_PRIORITY; + zone->temp_priority = zone->prev_priority = DEF_PRIORITY; zone_pcp_init(zone); INIT_LIST_HEAD(&zone->active_list); @@ -2038,14 +2113,12 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, zone->nr_scan_inactive = 0; zone->nr_active = 0; zone->nr_inactive = 0; - zap_zone_vm_stats(zone); atomic_set(&zone->reclaim_in_progress, 0); if (!size) continue; zonetable_add(zone, nid, j, zone_start_pfn, size); - ret = init_currently_empty_zone(zone, zone_start_pfn, size); - BUG_ON(ret); + init_currently_empty_zone(zone, zone_start_pfn, size); zone_start_pfn += size; } } @@ -2086,7 +2159,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) #endif /* CONFIG_FLAT_NODE_MEM_MAP */ } -void __meminit free_area_init_node(int nid, struct pglist_data *pgdat, +void __init free_area_init_node(int nid, struct pglist_data *pgdat, unsigned long *zones_size, unsigned long node_start_pfn, unsigned long *zholes_size) { @@ -2112,18 +2185,307 @@ void __init free_area_init(unsigned long *zones_size) __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } +#ifdef CONFIG_PROC_FS + +#include + +static void *frag_start(struct seq_file *m, loff_t *pos) +{ + pg_data_t *pgdat; + loff_t node = *pos; + for (pgdat = first_online_pgdat(); + pgdat && node; + pgdat = next_online_pgdat(pgdat)) + --node; + + return pgdat; +} + +static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) +{ + pg_data_t *pgdat = (pg_data_t *)arg; + + (*pos)++; + return next_online_pgdat(pgdat); +} + +static void frag_stop(struct seq_file *m, void *arg) +{ +} + +/* + * This walks the free areas for each zone. + */ +static int frag_show(struct seq_file *m, void *arg) +{ + pg_data_t *pgdat = (pg_data_t *)arg; + struct zone *zone; + struct zone *node_zones = pgdat->node_zones; + unsigned long flags; + int order; + + for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { + if (!populated_zone(zone)) + continue; + + spin_lock_irqsave(&zone->lock, flags); + seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); + for (order = 0; order < MAX_ORDER; ++order) + seq_printf(m, "%6lu ", zone->free_area[order].nr_free); + spin_unlock_irqrestore(&zone->lock, flags); + seq_putc(m, '\n'); + } + return 0; +} + +struct seq_operations fragmentation_op = { + .start = frag_start, + .next = frag_next, + .stop = frag_stop, + .show = frag_show, +}; + +/* + * Output information about zones in @pgdat. + */ +static int zoneinfo_show(struct seq_file *m, void *arg) +{ + pg_data_t *pgdat = arg; + struct zone *zone; + struct zone *node_zones = pgdat->node_zones; + unsigned long flags; + + for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { + int i; + + if (!populated_zone(zone)) + continue; + + spin_lock_irqsave(&zone->lock, flags); + seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); + seq_printf(m, + "\n pages free %lu" + "\n min %lu" + "\n low %lu" + "\n high %lu" + "\n active %lu" + "\n inactive %lu" + "\n scanned %lu (a: %lu i: %lu)" + "\n spanned %lu" + "\n present %lu", + zone->free_pages, + zone->pages_min, + zone->pages_low, + zone->pages_high, + zone->nr_active, + zone->nr_inactive, + zone->pages_scanned, + zone->nr_scan_active, zone->nr_scan_inactive, + zone->spanned_pages, + zone->present_pages); + seq_printf(m, + "\n protection: (%lu", + zone->lowmem_reserve[0]); + for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) + seq_printf(m, ", %lu", zone->lowmem_reserve[i]); + seq_printf(m, + ")" + "\n pagesets"); + for_each_online_cpu(i) { + struct per_cpu_pageset *pageset; + int j; + + pageset = zone_pcp(zone, i); + for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { + if (pageset->pcp[j].count) + break; + } + if (j == ARRAY_SIZE(pageset->pcp)) + continue; + for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { + seq_printf(m, + "\n cpu: %i pcp: %i" + "\n count: %i" + "\n high: %i" + "\n batch: %i", + i, j, + pageset->pcp[j].count, + pageset->pcp[j].high, + pageset->pcp[j].batch); + } +#ifdef CONFIG_NUMA + seq_printf(m, + "\n numa_hit: %lu" + "\n numa_miss: %lu" + "\n numa_foreign: %lu" + "\n interleave_hit: %lu" + "\n local_node: %lu" + "\n other_node: %lu", + pageset->numa_hit, + pageset->numa_miss, + pageset->numa_foreign, + pageset->interleave_hit, + pageset->local_node, + pageset->other_node); +#endif + } + seq_printf(m, + "\n all_unreclaimable: %u" + "\n prev_priority: %i" + "\n temp_priority: %i" + "\n start_pfn: %lu", + zone->all_unreclaimable, + zone->prev_priority, + zone->temp_priority, + zone->zone_start_pfn); + spin_unlock_irqrestore(&zone->lock, flags); + seq_putc(m, '\n'); + } + return 0; +} + +struct seq_operations zoneinfo_op = { + .start = frag_start, /* iterate over all zones. The same as in + * fragmentation. */ + .next = frag_next, + .stop = frag_stop, + .show = zoneinfo_show, +}; + +static char *vmstat_text[] = { + "nr_dirty", + "nr_writeback", + "nr_unstable", + "nr_page_table_pages", + "nr_mapped", + "nr_slab", + + "pgpgin", + "pgpgout", + "pswpin", + "pswpout", + + "pgalloc_high", + "pgalloc_normal", + "pgalloc_dma32", + "pgalloc_dma", + + "pgfree", + "pgactivate", + "pgdeactivate", + + "pgfault", + "pgmajfault", + + "pgrefill_high", + "pgrefill_normal", + "pgrefill_dma32", + "pgrefill_dma", + + "pgsteal_high", + "pgsteal_normal", + "pgsteal_dma32", + "pgsteal_dma", + + "pgscan_kswapd_high", + "pgscan_kswapd_normal", + "pgscan_kswapd_dma32", + "pgscan_kswapd_dma", + + "pgscan_direct_high", + "pgscan_direct_normal", + "pgscan_direct_dma32", + "pgscan_direct_dma", + + "pginodesteal", + "slabs_scanned", + "kswapd_steal", + "kswapd_inodesteal", + "pageoutrun", + "allocstall", + + "pgrotated", + "nr_bounce", +}; + +static void *vmstat_start(struct seq_file *m, loff_t *pos) +{ + struct page_state *ps; + + if (*pos >= ARRAY_SIZE(vmstat_text)) + return NULL; + + ps = kmalloc(sizeof(*ps), GFP_KERNEL); + m->private = ps; + if (!ps) + return ERR_PTR(-ENOMEM); + get_full_page_state(ps); + ps->pgpgin /= 2; /* sectors -> kbytes */ + ps->pgpgout /= 2; + return (unsigned long *)ps + *pos; +} + +static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) +{ + (*pos)++; + if (*pos >= ARRAY_SIZE(vmstat_text)) + return NULL; + return (unsigned long *)m->private + *pos; +} + +static int vmstat_show(struct seq_file *m, void *arg) +{ + unsigned long *l = arg; + unsigned long off = l - (unsigned long *)m->private; + + seq_printf(m, "%s %lu\n", vmstat_text[off], *l); + return 0; +} + +static void vmstat_stop(struct seq_file *m, void *arg) +{ + kfree(m->private); + m->private = NULL; +} + +struct seq_operations vmstat_op = { + .start = vmstat_start, + .next = vmstat_next, + .stop = vmstat_stop, + .show = vmstat_show, +}; + +#endif /* CONFIG_PROC_FS */ + #ifdef CONFIG_HOTPLUG_CPU static int page_alloc_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { int cpu = (unsigned long)hcpu; + long *count; + unsigned long *src, *dest; if (action == CPU_DEAD) { + int i; + + /* Drain local pagecache count. */ + count = &per_cpu(nr_pagecache_local, cpu); + atomic_add(*count, &nr_pagecache); + *count = 0; local_irq_disable(); __drain_pages(cpu); - vm_events_fold_cpu(cpu); + + /* Add dead cpu's page_states to our own. */ + dest = (unsigned long *)&__get_cpu_var(page_states); + src = (unsigned long *)&per_cpu(page_states, cpu); + + for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long); + i++) { + dest[i] += src[i]; + src[i] = 0; + } + local_irq_enable(); - refresh_cpu_vm_stats(cpu); } return NOTIFY_OK; } @@ -2315,40 +2677,6 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, return 0; } -#ifdef CONFIG_NUMA -int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) -{ - struct zone *zone; - int rc; - - rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); - if (rc) - return rc; - - for_each_zone(zone) - zone->min_unmapped_ratio = (zone->present_pages * - sysctl_min_unmapped_ratio) / 100; - return 0; -} - -int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) -{ - struct zone *zone; - int rc; - - rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); - if (rc) - return rc; - - for_each_zone(zone) - zone->min_slab_pages = (zone->present_pages * - sysctl_min_slab_ratio) / 100; - return 0; -} -#endif - /* * lowmem_reserve_ratio_sysctl_handler - just a wrapper around * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() @@ -2483,14 +2811,42 @@ void *__init alloc_large_system_hash(const char *tablename, } #ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE +/* + * pfn <-> page translation. out-of-line version. + * (see asm-generic/memory_model.h) + */ +#if defined(CONFIG_FLATMEM) struct page *pfn_to_page(unsigned long pfn) { - return __pfn_to_page(pfn); + return mem_map + (pfn - ARCH_PFN_OFFSET); } unsigned long page_to_pfn(struct page *page) { - return __page_to_pfn(page); + return (page - mem_map) + ARCH_PFN_OFFSET; +} +#elif defined(CONFIG_DISCONTIGMEM) +struct page *pfn_to_page(unsigned long pfn) +{ + int nid = arch_pfn_to_nid(pfn); + return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid); +} +unsigned long page_to_pfn(struct page *page) +{ + struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + return (page - pgdat->node_mem_map) + pgdat->node_start_pfn; +} +#elif defined(CONFIG_SPARSEMEM) +struct page *pfn_to_page(unsigned long pfn) +{ + return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn; +} + +unsigned long page_to_pfn(struct page *page) +{ + long section_id = page_to_section(page); + return page - __section_mem_map_addr(__nr_to_section(section_id)); } +#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */ EXPORT_SYMBOL(pfn_to_page); EXPORT_SYMBOL(page_to_pfn); #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */