struct free_area {
struct list_head free_list;
- unsigned long *map;
+ unsigned long nr_free;
};
struct pglist_data;
*/
#if defined(CONFIG_SMP)
struct zone_padding {
- int x;
+ char x[0];
} ____cacheline_maxaligned_in_smp;
#define ZONE_PADDING(name) struct zone_padding name;
#else
#define MAX_NR_ZONES 3 /* Sync this with ZONES_SHIFT */
#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */
+
+/*
+ * When a memory allocation must conform to specific limitations (such
+ * as being suitable for DMA) the caller will pass in hints to the
+ * allocator in the gfp_mask, in the zone modifier bits. These bits
+ * are used to select a priority ordered list of memory zones which
+ * match the requested limits. GFP_ZONEMASK defines which bits within
+ * the gfp_mask should be considered as zone modifiers. Each valid
+ * combination of the zone modifier bits has a corresponding list
+ * of zones (in node_zonelists). Thus for two zone modifiers there
+ * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will
+ * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible
+ * combinations of zone modifiers in "zone modifier space".
+ */
#define GFP_ZONEMASK 0x03
+/*
+ * As an optimisation any zone modifier bits which are only valid when
+ * no other zone modifier bits are set (loners) should be placed in
+ * the highest order bits of this field. This allows us to reduce the
+ * extent of the zonelists thus saving space. For example in the case
+ * of three zone modifier bits, we could require up to eight zonelists.
+ * If the left most zone modifier is a "loner" then the highest valid
+ * zonelist would be four allowing us to allocate only five zonelists.
+ * Use the first form when the left most bit is not a "loner", otherwise
+ * use the second.
+ */
+/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */
+#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */
/*
* On machines where it is needed (eg PCs) we divide physical memory
*/
struct zone {
- /*
- * Commonly accessed fields:
- */
- spinlock_t lock;
+ /* Fields commonly accessed by the page allocator */
unsigned long free_pages;
unsigned long pages_min, pages_low, pages_high;
/*
- * protection[] is a pre-calculated number of extra pages that must be
- * available in a zone in order for __alloc_pages() to allocate memory
- * from the zone. i.e., for a GFP_KERNEL alloc of "order" there must
- * be "(1<<order) + protection[ZONE_NORMAL]" free pages in the zone
- * for us to choose to allocate the page from that zone.
- *
- * It uses both min_free_kbytes and sysctl_lower_zone_protection.
- * The protection values are recalculated if either of these values
- * change. The array elements are in zonelist order:
- * [0] == GFP_DMA, [1] == GFP_KERNEL, [2] == GFP_HIGHMEM.
+ * We don't know if the memory that we're going to allocate will be freeable
+ * or/and it will be released eventually, so to avoid totally wasting several
+ * GB of ram we must reserve some of the lower zone memory (otherwise we risk
+ * to run OOM on the lower zones despite there's tons of freeable ram
+ * on the higher zones). This array is recalculated at runtime if the
+ * sysctl_lowmem_reserve_ratio sysctl changes.
*/
- unsigned long protection[MAX_NR_ZONES];
+ unsigned long lowmem_reserve[MAX_NR_ZONES];
+
+ struct per_cpu_pageset pageset[NR_CPUS];
+
+ /*
+ * free areas of different sizes
+ */
+ spinlock_t lock;
+ struct free_area free_area[MAX_ORDER];
+
ZONE_PADDING(_pad1_)
+ /* Fields commonly accessed by the page reclaim scanner */
spinlock_t lru_lock;
struct list_head active_list;
struct list_head inactive_list;
- atomic_t nr_scan_active;
- atomic_t nr_scan_inactive;
+ unsigned long nr_scan_active;
+ unsigned long nr_scan_inactive;
unsigned long nr_active;
unsigned long nr_inactive;
- int all_unreclaimable; /* All pages pinned */
unsigned long pages_scanned; /* since last reclaim */
-
- ZONE_PADDING(_pad2_)
+ int all_unreclaimable; /* All pages pinned */
/*
* prev_priority holds the scanning priority for this zone. It is
int temp_priority;
int prev_priority;
- /*
- * free areas of different sizes
- */
- struct free_area free_area[MAX_ORDER];
+
+ ZONE_PADDING(_pad2_)
+ /* Rarely used or read-mostly fields */
/*
* wait_table -- the array holding the hash table
unsigned long wait_table_size;
unsigned long wait_table_bits;
- ZONE_PADDING(_pad3_)
-
- struct per_cpu_pageset pageset[NR_CPUS];
-
/*
* Discontig memory support fields.
*/
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
+ unsigned long spanned_pages; /* total size, including holes */
+ unsigned long present_pages; /* amount of memory (excluding holes) */
+
/*
* rarely used fields:
*/
char *name;
- unsigned long spanned_pages; /* total size, including holes */
- unsigned long present_pages; /* amount of memory (excluding holes) */
} ____cacheline_maxaligned_in_smp;
struct bootmem_data;
typedef struct pglist_data {
struct zone node_zones[MAX_NR_ZONES];
- struct zonelist node_zonelists[MAX_NR_ZONES];
+ struct zonelist node_zonelists[GFP_ZONETYPES];
int nr_zones;
struct page *node_mem_map;
struct bootmem_data *bdata;
range, including holes */
int node_id;
struct pglist_data *pgdat_next;
- wait_queue_head_t kswapd_wait;
+ wait_queue_head_t kswapd_wait;
struct task_struct *kswapd;
+ int kswapd_max_order;
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
-extern int numnodes;
extern struct pglist_data *pgdat_list;
+void __get_zone_counts(unsigned long *active, unsigned long *inactive,
+ unsigned long *free, struct pglist_data *pgdat);
void get_zone_counts(unsigned long *active, unsigned long *inactive,
unsigned long *free);
void build_all_zonelists(void);
-void wakeup_kswapd(struct zone *zone);
+void wakeup_kswapd(struct zone *zone, int order);
+int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+ int alloc_type, int can_try_harder, int gfp_high);
/*
* zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
{
pg_data_t *pgdat = zone->zone_pgdat;
- if (zone - pgdat->node_zones < MAX_NR_ZONES - 1)
+ if (zone < pgdat->node_zones + MAX_NR_ZONES - 1)
zone++;
else if (pgdat->pgdat_next) {
pgdat = pgdat->pgdat_next;
#define for_each_zone(zone) \
for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone))
+static inline int is_highmem_idx(int idx)
+{
+ return (idx == ZONE_HIGHMEM);
+}
+
+static inline int is_normal_idx(int idx)
+{
+ return (idx == ZONE_NORMAL);
+}
/**
* is_highmem - helper function to quickly check if a struct zone is a
* highmem zone or not. This is an attempt to keep references
*/
static inline int is_highmem(struct zone *zone)
{
- return (zone - zone->zone_pgdat->node_zones == ZONE_HIGHMEM);
+ return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
}
static inline int is_normal(struct zone *zone)
{
- return (zone - zone->zone_pgdat->node_zones == ZONE_NORMAL);
+ return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL;
}
/* These two functions are used to setup the per zone pages min values */
struct ctl_table;
struct file;
int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *,
- void __user *, size_t *);
-int lower_zone_protection_sysctl_handler(struct ctl_table *, int, struct file *,
- void __user *, size_t *);
+ void __user *, size_t *, loff_t *);
+extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
+ void __user *, size_t *, loff_t *);
#include <linux/topology.h>
/* Returns the number of the current Node. */
-#define numa_node_id() (cpu_to_node(smp_processor_id()))
+#define numa_node_id() (cpu_to_node(_smp_processor_id()))
#ifndef CONFIG_DISCONTIGMEM
#error ZONES_SHIFT > MAX_ZONES_SHIFT
#endif
-extern DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
-
-#if defined(CONFIG_DISCONTIGMEM) || defined(CONFIG_NUMA)
-
-#define node_online(node) test_bit(node, node_online_map)
-#define node_set_online(node) set_bit(node, node_online_map)
-#define node_set_offline(node) clear_bit(node, node_online_map)
-static inline unsigned int num_online_nodes(void)
-{
- int i, num = 0;
-
- for(i = 0; i < MAX_NUMNODES; i++){
- if (node_online(i))
- num++;
- }
- return num;
-}
-
-#else /* !CONFIG_DISCONTIGMEM && !CONFIG_NUMA */
-
-#define node_online(node) \
- ({ BUG_ON((node) != 0); test_bit(node, node_online_map); })
-#define node_set_online(node) \
- ({ BUG_ON((node) != 0); set_bit(node, node_online_map); })
-#define node_set_offline(node) \
- ({ BUG_ON((node) != 0); clear_bit(node, node_online_map); })
-#define num_online_nodes() 1
-
-#endif /* CONFIG_DISCONTIGMEM || CONFIG_NUMA */
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _LINUX_MMZONE_H */