X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=include%2Flinux%2Fmmzone.h;fp=include%2Flinux%2Fmmzone.h;h=ebfc238cc243482d9baa9b7bb3bd92391d4f6479;hb=64ba3f394c830ec48a1c31b53dcae312c56f1604;hp=5dfe111897f73e41e642c81aa8ccf6290c5d279f;hpb=be1e6109ac94a859551f8e1774eb9a8469fe055c;p=linux-2.6.git diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5dfe11189..ebfc238cc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -4,6 +4,7 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +#include #include #include #include @@ -12,9 +13,7 @@ #include #include #include -#include #include -#include /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_FORCE_MAX_ZONEORDER @@ -22,7 +21,6 @@ #else #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER #endif -#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) struct free_area { struct list_head free_list; @@ -46,27 +44,6 @@ struct zone_padding { #define ZONE_PADDING(name) #endif -enum zone_stat_item { - NR_ANON_PAGES, /* Mapped anonymous pages */ - NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. - only modified from process context */ - NR_FILE_PAGES, - NR_SLAB, /* Pages used by slab allocator */ - NR_PAGETABLE, /* used for pagetables */ - NR_FILE_DIRTY, - NR_WRITEBACK, - NR_UNSTABLE_NFS, /* NFS unstable pages */ - NR_BOUNCE, -#ifdef CONFIG_NUMA - NUMA_HIT, /* allocated in intended node */ - NUMA_MISS, /* allocated in non intended node */ - NUMA_FOREIGN, /* was intended here, hit elsewhere */ - NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ - NUMA_LOCAL, /* allocation from local node */ - NUMA_OTHER, /* allocation from other node */ -#endif - NR_VM_ZONE_STAT_ITEMS }; - struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ @@ -76,9 +53,13 @@ struct per_cpu_pages { struct per_cpu_pageset { struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ -#ifdef CONFIG_SMP - s8 stat_threshold; - s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; +#ifdef CONFIG_NUMA + unsigned long numa_hit; /* allocated in intended node */ + unsigned long numa_miss; /* allocated in non intended node */ + unsigned long numa_foreign; /* was intended here, hit elsewhere */ + unsigned long interleave_hit; /* interleaver prefered this zone */ + unsigned long local_node; /* allocation from local node */ + unsigned long other_node; /* allocation from other node */ #endif } ____cacheline_aligned_in_smp; @@ -151,11 +132,6 @@ struct zone { unsigned long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA - /* - * zone reclaim becomes active if more unmapped pages exist. - */ - unsigned long min_unmapped_ratio; - unsigned long min_slab_pages; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; @@ -187,8 +163,12 @@ struct zone { /* A count of how many reclaimers are scanning this zone */ atomic_t reclaim_in_progress; - /* Zone statistics */ - atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; + /* + * timestamp (in jiffies) of the last zone reclaim that did not + * result in freeing of pages. This is used to avoid repeated scans + * if all memory in the zone is in use. + */ + unsigned long last_unsuccessful_zone_reclaim; /* * prev_priority holds the scanning priority for this zone. It is @@ -200,9 +180,13 @@ struct zone { * under - it drives the swappiness decision: whether to unmap mapped * pages. * - * Access to both this field is quite racy even on uniprocessor. But + * temp_priority is used to remember the scanning priority at which + * this zone was successfully refilled to free_pages == pages_high. + * + * Access to both these fields is quite racy even on uniprocessor. But * it is expected to average out OK. */ + int temp_priority; int prev_priority; @@ -211,7 +195,7 @@ struct zone { /* * wait_table -- the array holding the hash table - * wait_table_hash_nr_entries -- the size of the hash table array + * wait_table_size -- the size of the hash table array * wait_table_bits -- wait_table_size == (1 << wait_table_bits) * * The purpose of all these is to keep track of the people @@ -234,13 +218,14 @@ struct zone { * free_area_init_core() performs the initialization of them. */ wait_queue_head_t * wait_table; - unsigned long wait_table_hash_nr_entries; + unsigned long wait_table_size; unsigned long wait_table_bits; /* * Discontig memory support fields. */ struct pglist_data *zone_pgdat; + struct page *zone_mem_map; /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; @@ -322,6 +307,7 @@ typedef struct pglist_data { unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; + struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; struct task_struct *kswapd; int kswapd_max_order; @@ -338,6 +324,8 @@ typedef struct pglist_data { #include +extern struct pglist_data *pgdat_list; + void __get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free, struct pglist_data *pgdat); void get_zone_counts(unsigned long *active, unsigned long *inactive, @@ -347,9 +335,6 @@ void wakeup_kswapd(struct zone *zone, int order); int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags); -extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, - unsigned long size); - #ifdef CONFIG_HAVE_MEMORY_PRESENT void memory_present(int nid, unsigned long start, unsigned long end); #else @@ -365,6 +350,57 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); */ #define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) +/** + * for_each_pgdat - helper macro to iterate over all nodes + * @pgdat - pointer to a pg_data_t variable + * + * Meant to help with common loops of the form + * pgdat = pgdat_list; + * while(pgdat) { + * ... + * pgdat = pgdat->pgdat_next; + * } + */ +#define for_each_pgdat(pgdat) \ + for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next) + +/* + * next_zone - helper magic for for_each_zone() + * Thanks to William Lee Irwin III for this piece of ingenuity. + */ +static inline struct zone *next_zone(struct zone *zone) +{ + pg_data_t *pgdat = zone->zone_pgdat; + + if (zone < pgdat->node_zones + MAX_NR_ZONES - 1) + zone++; + else if (pgdat->pgdat_next) { + pgdat = pgdat->pgdat_next; + zone = pgdat->node_zones; + } else + zone = NULL; + + return zone; +} + +/** + * for_each_zone - helper macro to iterate over all memory zones + * @zone - pointer to struct zone variable + * + * The user only needs to declare the zone variable, for_each_zone + * fills it in. This basically means for_each_zone() is an + * easier to read version of this piece of code: + * + * for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next) + * for (i = 0; i < MAX_NR_ZONES; ++i) { + * struct zone * z = pgdat->node_zones + i; + * ... + * } + * } + */ +#define for_each_zone(zone) \ + for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone)) + static inline int populated_zone(struct zone *zone) { return (!!zone->present_pages); @@ -416,10 +452,6 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); -int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); #include /* Returns the number of the current Node. */ @@ -440,30 +472,6 @@ extern struct pglist_data contig_page_data; #endif /* !CONFIG_NEED_MULTIPLE_NODES */ -extern struct pglist_data *first_online_pgdat(void); -extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); -extern struct zone *next_zone(struct zone *zone); - -/** - * for_each_pgdat - helper macro to iterate over all nodes - * @pgdat - pointer to a pg_data_t variable - */ -#define for_each_online_pgdat(pgdat) \ - for (pgdat = first_online_pgdat(); \ - pgdat; \ - pgdat = next_online_pgdat(pgdat)) -/** - * for_each_zone - helper macro to iterate over all memory zones - * @zone - pointer to struct zone variable - * - * The user only needs to declare the zone variable, for_each_zone - * fills it in. - */ -#define for_each_zone(zone) \ - for (zone = (first_online_pgdat())->node_zones; \ - zone; \ - zone = next_zone(zone)) - #ifdef CONFIG_SPARSEMEM #include #endif @@ -527,10 +535,6 @@ struct mem_section { * pages. However, it is stored with some other magic. * (see sparse.c::sparse_init_one_section()) * - * Additionally during early boot we encode node id of - * the location of the section here to guide allocation. - * (see sparse.c::memory_present()) - * * Making it a UL at least makes someone do a cast * before using it wrong. */ @@ -570,7 +574,6 @@ extern int __section_nr(struct mem_section* ms); #define SECTION_HAS_MEM_MAP (1UL<<1) #define SECTION_MAP_LAST_BIT (1UL<<2) #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) -#define SECTION_NID_SHIFT 2 static inline struct page *__section_mem_map_addr(struct mem_section *section) { @@ -599,6 +602,17 @@ static inline struct mem_section *__pfn_to_section(unsigned long pfn) return __nr_to_section(pfn_to_section_nr(pfn)); } +#define pfn_to_page(pfn) \ +({ \ + unsigned long __pfn = (pfn); \ + __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ +}) +#define page_to_pfn(page) \ +({ \ + page - __section_mem_map_addr(__nr_to_section( \ + page_to_section(page))); \ +}) + static inline int pfn_valid(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) @@ -628,12 +642,6 @@ void sparse_init(void); #define sparse_index_init(_sec, _nid) do {} while (0) #endif /* CONFIG_SPARSEMEM */ -#ifdef CONFIG_NODES_SPAN_OTHER_NODES -#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid)) -#else -#define early_pfn_in_nid(pfn, nid) (1) -#endif - #ifndef early_pfn_valid #define early_pfn_valid(pfn) (1) #endif