* process policy.
* default Allocate on the local node first, or when on a VMA
* use the process policy. This is what Linux always did
- * in a NUMA aware kernel and still does by, ahem, default.
+ * in a NUMA aware kernel and still does by, ahem, default.
*
* The process policy is applied for most non interrupt memory allocations
* in that process' context. Interrupts ignore the policies and always
/* Check if all specified nodes are online */
static int nodes_online(unsigned long *nodes)
{
- DECLARE_BITMAP(offline, MAX_NUMNODES);
+ DECLARE_BITMAP(online2, MAX_NUMNODES);
- bitmap_copy(offline, node_online_map, MAX_NUMNODES);
- if (bitmap_empty(offline, MAX_NUMNODES))
- set_bit(0, offline);
- bitmap_complement(offline, MAX_NUMNODES);
- bitmap_and(offline, offline, nodes, MAX_NUMNODES);
- if (!bitmap_empty(offline, MAX_NUMNODES))
+ bitmap_copy(online2, node_online_map, MAX_NUMNODES);
+ if (bitmap_empty(online2, MAX_NUMNODES))
+ set_bit(0, online2);
+ if (!bitmap_subset(nodes, online2, MAX_NUMNODES))
return -EINVAL;
return 0;
}
unsigned long endmask;
--maxnode;
+ bitmap_zero(nodes, MAX_NUMNODES);
+ if (maxnode == 0 || !nmask)
+ return 0;
+
nlongs = BITS_TO_LONGS(maxnode);
if ((maxnode % BITS_PER_LONG) == 0)
endmask = ~0UL;
/* When the user specified more nodes than supported just check
if the non supported part is all zero. */
- if (nmask && nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
+ if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
+ if (nlongs > PAGE_SIZE/sizeof(long))
+ return -EINVAL;
for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
unsigned long t;
if (get_user(t, nmask + k))
endmask = ~0UL;
}
- bitmap_zero(nodes, MAX_NUMNODES);
- if (nmask && copy_from_user(nodes, nmask, nlongs*sizeof(unsigned long)))
+ if (copy_from_user(nodes, nmask, nlongs*sizeof(unsigned long)))
return -EFAULT;
nodes[nlongs-1] &= endmask;
return mpol_check_policy(mode, nodes);
err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, 0, &p, NULL);
if (err >= 0) {
- err = page_zone(p)->zone_pgdat->node_id;
+ err = page_to_nid(p);
put_page(p);
}
return err;
}
#ifdef CONFIG_COMPAT
-/* The other functions are compatible */
+
asmlinkage long compat_get_mempolicy(int __user *policy,
- unsigned __user *nmask, unsigned maxnode,
- unsigned addr, unsigned flags)
+ compat_ulong_t __user *nmask,
+ compat_ulong_t maxnode,
+ compat_ulong_t addr, compat_ulong_t flags)
{
long err;
unsigned long __user *nm = NULL;
+ unsigned long nr_bits, alloc_size;
+ DECLARE_BITMAP(bm, MAX_NUMNODES);
+
+ nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
+ alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+
if (nmask)
- nm = compat_alloc_user_space(ALIGN(maxnode-1, 64) / 8);
- err = sys_get_mempolicy(policy, nm, maxnode, addr, flags);
- if (!err && copy_in_user(nmask, nm, ALIGN(maxnode-1, 32)/8))
- err = -EFAULT;
+ nm = compat_alloc_user_space(alloc_size);
+
+ err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags);
+
+ if (!err && nmask) {
+ err = copy_from_user(bm, nm, alloc_size);
+ /* ensure entire bitmap is zeroed */
+ err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8);
+ err |= compat_put_bitmap(nmask, bm, nr_bits);
+ }
+
return err;
}
+
+asmlinkage long compat_set_mempolicy(int mode, compat_ulong_t __user *nmask,
+ compat_ulong_t maxnode)
+{
+ long err = 0;
+ unsigned long __user *nm = NULL;
+ unsigned long nr_bits, alloc_size;
+ DECLARE_BITMAP(bm, MAX_NUMNODES);
+
+ nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
+ alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+
+ if (nmask) {
+ err = compat_get_bitmap(bm, nmask, nr_bits);
+ nm = compat_alloc_user_space(alloc_size);
+ err |= copy_to_user(nm, bm, alloc_size);
+ }
+
+ if (err)
+ return -EFAULT;
+
+ return sys_set_mempolicy(mode, nm, nr_bits+1);
+}
+
+asmlinkage long compat_mbind(compat_ulong_t start, compat_ulong_t len,
+ compat_ulong_t mode, compat_ulong_t __user *nmask,
+ compat_ulong_t maxnode, compat_ulong_t flags)
+{
+ long err = 0;
+ unsigned long __user *nm = NULL;
+ unsigned long nr_bits, alloc_size;
+ DECLARE_BITMAP(bm, MAX_NUMNODES);
+
+ nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
+ alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+
+ if (nmask) {
+ err = compat_get_bitmap(bm, nmask, nr_bits);
+ nm = compat_alloc_user_space(alloc_size);
+ err |= copy_to_user(nm, bm, alloc_size);
+ }
+
+ if (err)
+ return -EFAULT;
+
+ return sys_mbind(start, len, mode, nm, nr_bits+1, flags);
+}
+
#endif
/* Return effective policy for a VMA */
/* Allocate a page in interleaved policy.
Own path because it needs to do special accounting. */
-static struct page *alloc_page_interleave(unsigned gfp, unsigned nid)
+static struct page *alloc_page_interleave(unsigned gfp, unsigned order, unsigned nid)
{
struct zonelist *zl;
struct page *page;
BUG_ON(!test_bit(nid, node_online_map));
zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK);
- page = __alloc_pages(gfp, 0, zl);
+ page = __alloc_pages(gfp, order, zl);
if (page && page_zone(page) == zl->zones[0]) {
zl->zones[0]->pageset[get_cpu()].interleave_hit++;
put_cpu();
/* fall back to process interleaving */
nid = interleave_nodes(pol);
}
- return alloc_page_interleave(gfp, nid);
+ return alloc_page_interleave(gfp, 0, nid);
}
return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
}
* alloc_pages_current - Allocate pages.
*
* @gfp:
- * %GFP_USER user allocation,
+ * %GFP_USER user allocation,
* %GFP_KERNEL kernel allocation,
* %GFP_HIGHMEM highmem allocation,
* %GFP_FS don't call back into a file system.
if (!pol || in_interrupt())
pol = &default_policy;
- if (pol->policy == MPOL_INTERLEAVE && order == 0)
- return alloc_page_interleave(gfp, interleave_nodes(pol));
+ if (pol->policy == MPOL_INTERLEAVE)
+ return alloc_page_interleave(gfp, order, interleave_nodes(pol));
return __alloc_pages(gfp, order, zonelist_policy(gfp, pol));
}
EXPORT_SYMBOL(alloc_pages_current);
up(&p->sem);
}
-static __init int numa_policy_init(void)
+/* assumes fs == KERNEL_DS */
+void __init numa_policy_init(void)
{
policy_cache = kmem_cache_create("numa_policy",
sizeof(struct mempolicy),
sn_cache = kmem_cache_create("shared_policy_node",
sizeof(struct sp_node),
0, SLAB_PANIC, NULL, NULL);
- return 0;
+
+ /* Set interleaving policy for system init. This way not all
+ the data structures allocated at system boot end up in node zero. */
+
+ if (sys_set_mempolicy(MPOL_INTERLEAVE, node_online_map, MAX_NUMNODES) < 0)
+ printk("numa_policy_init: interleaving failed\n");
+}
+
+/* Reset policy of current process to default.
+ * Assumes fs == KERNEL_DS */
+void numa_default_policy(void)
+{
+ sys_set_mempolicy(MPOL_DEFAULT, NULL, 0);
}
-module_init(numa_policy_init);