git://git.onelab.eu
/
linux-2.6.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
vserver 2.0 rc7
[linux-2.6.git]
/
mm
/
mempolicy.c
diff --git
a/mm/mempolicy.c
b/mm/mempolicy.c
index
8fe9c7e
..
08c41da
100644
(file)
--- a/
mm/mempolicy.c
+++ b/
mm/mempolicy.c
@@
-66,6
+66,8
@@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/cpuset.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/string.h>
@@
-74,6
+76,7
@@
#include <linux/init.h>
#include <linux/compat.h>
#include <linux/mempolicy.h>
#include <linux/init.h>
#include <linux/compat.h>
#include <linux/mempolicy.h>
+#include <asm/tlbflush.h>
#include <asm/uaccess.h>
static kmem_cache_t *policy_cache;
#include <asm/uaccess.h>
static kmem_cache_t *policy_cache;
@@
-95,7
+98,7
@@
static int nodes_online(unsigned long *nodes)
{
DECLARE_BITMAP(online2, MAX_NUMNODES);
{
DECLARE_BITMAP(online2, MAX_NUMNODES);
- bitmap_copy(online2, node
_online_map
, MAX_NUMNODES);
+ bitmap_copy(online2, node
s_addr(node_online_map)
, MAX_NUMNODES);
if (bitmap_empty(online2, MAX_NUMNODES))
set_bit(0, online2);
if (!bitmap_subset(nodes, online2, MAX_NUMNODES))
if (bitmap_empty(online2, MAX_NUMNODES))
set_bit(0, online2);
if (!bitmap_subset(nodes, online2, MAX_NUMNODES))
@@
-165,6
+168,10
@@
static int get_nodes(unsigned long *nodes, unsigned long __user *nmask,
if (copy_from_user(nodes, nmask, nlongs*sizeof(unsigned long)))
return -EFAULT;
nodes[nlongs-1] &= endmask;
if (copy_from_user(nodes, nmask, nlongs*sizeof(unsigned long)))
return -EFAULT;
nodes[nlongs-1] &= endmask;
+ /* Update current mems_allowed */
+ cpuset_update_current_mems_allowed();
+ /* Ignore nodes not set in current->mems_allowed */
+ cpuset_restrict_to_mems_allowed(nodes);
return mpol_check_policy(mode, nodes);
}
return mpol_check_policy(mode, nodes);
}
@@
-232,18
+239,29
@@
static struct mempolicy *mpol_new(int mode, unsigned long *nodes)
/* Ensure all existing pages follow the policy. */
static int
/* Ensure all existing pages follow the policy. */
static int
-verify_pages(unsigned long addr, unsigned long end, unsigned long *nodes)
+verify_pages(struct mm_struct *mm,
+ unsigned long addr, unsigned long end, unsigned long *nodes)
{
while (addr < end) {
struct page *p;
pte_t *pte;
pmd_t *pmd;
{
while (addr < end) {
struct page *p;
pte_t *pte;
pmd_t *pmd;
- pgd_t *pgd = pgd_offset_k(addr);
+ pud_t *pud;
+ pgd_t *pgd;
+ pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd)) {
if (pgd_none(*pgd)) {
- addr = (addr + PGDIR_SIZE) & PGDIR_MASK;
+ unsigned long next = (addr + PGDIR_SIZE) & PGDIR_MASK;
+ if (next > addr)
+ break;
+ addr = next;
+ continue;
+ }
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ addr = (addr + PUD_SIZE) & PUD_MASK;
continue;
}
continue;
}
- pmd = pmd_offset(p
g
d, addr);
+ pmd = pmd_offset(p
u
d, addr);
if (pmd_none(*pmd)) {
addr = (addr + PMD_SIZE) & PMD_MASK;
continue;
if (pmd_none(*pmd)) {
addr = (addr + PMD_SIZE) & PMD_MASK;
continue;
@@
-281,7
+299,8
@@
check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
if (prev && prev->vm_end < vma->vm_start)
return ERR_PTR(-EFAULT);
if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) {
if (prev && prev->vm_end < vma->vm_start)
return ERR_PTR(-EFAULT);
if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) {
- err = verify_pages(vma->vm_start, vma->vm_end, nodes);
+ err = verify_pages(vma->vm_mm,
+ vma->vm_start, vma->vm_end, nodes);
if (err) {
first = ERR_PTR(err);
break;
if (err) {
first = ERR_PTR(err);
break;
@@
-424,7
+443,7
@@
static void get_zonemask(struct mempolicy *p, unsigned long *nodes)
case MPOL_PREFERRED:
/* or use current node instead of online map? */
if (p->v.preferred_node < 0)
case MPOL_PREFERRED:
/* or use current node instead of online map? */
if (p->v.preferred_node < 0)
- bitmap_copy(nodes, node
_online_map
, MAX_NUMNODES);
+ bitmap_copy(nodes, node
s_addr(node_online_map)
, MAX_NUMNODES);
else
__set_bit(p->v.preferred_node, nodes);
break;
else
__set_bit(p->v.preferred_node, nodes);
break;
@@
-475,7
+494,7
@@
asmlinkage long sys_get_mempolicy(int __user *policy,
if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
return -EINVAL;
if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
return -EINVAL;
- if (nmask != NULL && maxnode <
numnodes
)
+ if (nmask != NULL && maxnode <
MAX_NUMNODES
)
return -EINVAL;
if (flags & MPOL_F_ADDR) {
down_read(&mm->mmap_sem);
return -EINVAL;
if (flags & MPOL_F_ADDR) {
down_read(&mm->mmap_sem);
@@
-510,9
+529,13
@@
asmlinkage long sys_get_mempolicy(int __user *policy,
} else
pval = pol->policy;
} else
pval = pol->policy;
- err = -EFAULT;
+ if (vma) {
+ up_read(¤t->mm->mmap_sem);
+ vma = NULL;
+ }
+
if (policy && put_user(pval, policy))
if (policy && put_user(pval, policy))
-
goto out
;
+
return -EFAULT
;
err = 0;
if (nmask) {
err = 0;
if (nmask) {
@@
-529,7
+552,7
@@
asmlinkage long sys_get_mempolicy(int __user *policy,
#ifdef CONFIG_COMPAT
#ifdef CONFIG_COMPAT
-asmlinkage long compat_get_mempolicy(int __user *policy,
+asmlinkage long compat_
sys_
get_mempolicy(int __user *policy,
compat_ulong_t __user *nmask,
compat_ulong_t maxnode,
compat_ulong_t addr, compat_ulong_t flags)
compat_ulong_t __user *nmask,
compat_ulong_t maxnode,
compat_ulong_t addr, compat_ulong_t flags)
@@
-557,7
+580,7
@@
asmlinkage long compat_get_mempolicy(int __user *policy,
return err;
}
return err;
}
-asmlinkage long compat_set_mempolicy(int mode, compat_ulong_t __user *nmask,
+asmlinkage long compat_s
ys_s
et_mempolicy(int mode, compat_ulong_t __user *nmask,
compat_ulong_t maxnode)
{
long err = 0;
compat_ulong_t maxnode)
{
long err = 0;
@@
-580,7
+603,7
@@
asmlinkage long compat_set_mempolicy(int mode, compat_ulong_t __user *nmask,
return sys_set_mempolicy(mode, nm, nr_bits+1);
}
return sys_set_mempolicy(mode, nm, nr_bits+1);
}
-asmlinkage long compat_mbind(compat_ulong_t start, compat_ulong_t len,
+asmlinkage long compat_
sys_
mbind(compat_ulong_t start, compat_ulong_t len,
compat_ulong_t mode, compat_ulong_t __user *nmask,
compat_ulong_t maxnode, compat_ulong_t flags)
{
compat_ulong_t mode, compat_ulong_t __user *nmask,
compat_ulong_t maxnode, compat_ulong_t flags)
{
@@
-625,7
+648,7
@@
get_vma_policy(struct vm_area_struct *vma, unsigned long addr)
}
/* Return a zonelist representing a mempolicy */
}
/* Return a zonelist representing a mempolicy */
-static struct zonelist *zonelist_policy(unsigned gfp, struct mempolicy *policy)
+static struct zonelist *zonelist_policy(unsigned
int __nocast
gfp, struct mempolicy *policy)
{
int nd;
{
int nd;
@@
-637,8
+660,10
@@
static struct zonelist *zonelist_policy(unsigned gfp, struct mempolicy *policy)
break;
case MPOL_BIND:
/* Lower zones don't get a policy applied */
break;
case MPOL_BIND:
/* Lower zones don't get a policy applied */
- if (gfp >= policy_zone)
- return policy->v.zonelist;
+ /* Careful: current->mems_allowed might have moved */
+ if ((gfp & GFP_ZONEMASK) >= policy_zone)
+ if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist))
+ return policy->v.zonelist;
/*FALL THROUGH*/
case MPOL_INTERLEAVE: /* should not happen */
case MPOL_DEFAULT:
/*FALL THROUGH*/
case MPOL_INTERLEAVE: /* should not happen */
case MPOL_DEFAULT:
@@
-687,12
+712,12
@@
static unsigned offset_il_node(struct mempolicy *pol,
/* Allocate a page in interleaved policy.
Own path because it needs to do special accounting. */
/* Allocate a page in interleaved policy.
Own path because it needs to do special accounting. */
-static struct page *alloc_page_interleave(unsigned gfp, unsigned order, unsigned nid)
+static struct page *alloc_page_interleave(unsigned
int __nocast
gfp, unsigned order, unsigned nid)
{
struct zonelist *zl;
struct page *page;
{
struct zonelist *zl;
struct page *page;
- BUG_ON(!
test_bit(nid, node_online_map
));
+ BUG_ON(!
node_online(nid
));
zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK);
page = __alloc_pages(gfp, order, zl);
if (page && page_zone(page) == zl->zones[0]) {
zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK);
page = __alloc_pages(gfp, order, zl);
if (page && page_zone(page) == zl->zones[0]) {
@@
-725,10
+750,12
@@
static struct page *alloc_page_interleave(unsigned gfp, unsigned order, unsigned
* Should be called with the mm_sem of the vma hold.
*/
struct page *
* Should be called with the mm_sem of the vma hold.
*/
struct page *
-alloc_page_vma(unsigned gfp, struct vm_area_struct *vma, unsigned long addr)
+alloc_page_vma(unsigned
int __nocast
gfp, struct vm_area_struct *vma, unsigned long addr)
{
struct mempolicy *pol = get_vma_policy(vma, addr);
{
struct mempolicy *pol = get_vma_policy(vma, addr);
+ cpuset_update_current_mems_allowed();
+
if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
unsigned nid;
if (vma) {
if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
unsigned nid;
if (vma) {
@@
-761,11
+788,17
@@
alloc_page_vma(unsigned gfp, struct vm_area_struct *vma, unsigned long addr)
* Allocate a page from the kernel page pool. When not in
* interrupt context and apply the current process NUMA policy.
* Returns NULL when no page can be allocated.
* Allocate a page from the kernel page pool. When not in
* interrupt context and apply the current process NUMA policy.
* Returns NULL when no page can be allocated.
+ *
+ * Don't call cpuset_update_current_mems_allowed() unless
+ * 1) it's ok to take cpuset_sem (can WAIT), and
+ * 2) allocating for current task (not interrupt).
*/
*/
-struct page *alloc_pages_current(unsigned gfp, unsigned order)
+struct page *alloc_pages_current(unsigned
int __nocast
gfp, unsigned order)
{
struct mempolicy *pol = current->mempolicy;
{
struct mempolicy *pol = current->mempolicy;
+ if ((gfp & __GFP_WAIT) && !in_interrupt())
+ cpuset_update_current_mems_allowed();
if (!pol || in_interrupt())
pol = &default_policy;
if (pol->policy == MPOL_INTERLEAVE)
if (!pol || in_interrupt())
pol = &default_policy;
if (pol->policy == MPOL_INTERLEAVE)
@@
-886,12
+919,12
@@
int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr)
*
* Remember policies even when nobody has shared memory mapped.
* The policies are kept in Red-Black tree linked from the inode.
*
* Remember policies even when nobody has shared memory mapped.
* The policies are kept in Red-Black tree linked from the inode.
- * They are protected by the sp->
sem semaphore
, which should be held
+ * They are protected by the sp->
lock spinlock
, which should be held
* for any accesses to the tree.
*/
/* lookup first element intersecting start-end */
* for any accesses to the tree.
*/
/* lookup first element intersecting start-end */
-/* Caller holds sp->
sem
*/
+/* Caller holds sp->
lock
*/
static struct sp_node *
sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
{
static struct sp_node *
sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
{
@@
-899,13
+932,13
@@
sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
while (n) {
struct sp_node *p = rb_entry(n, struct sp_node, nd);
while (n) {
struct sp_node *p = rb_entry(n, struct sp_node, nd);
- if (start >= p->end) {
+
+ if (start >= p->end)
n = n->rb_right;
n = n->rb_right;
- } else if (end < p->start) {
+ else if (end <= p->start)
n = n->rb_left;
n = n->rb_left;
- } else {
+ else
break;
break;
- }
}
if (!n)
return NULL;
}
if (!n)
return NULL;
@@
-923,7
+956,7
@@
sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
}
/* Insert a new shared policy into the list. */
}
/* Insert a new shared policy into the list. */
-/* Caller holds sp->
sem
*/
+/* Caller holds sp->
lock
*/
static void sp_insert(struct shared_policy *sp, struct sp_node *new)
{
struct rb_node **p = &sp->root.rb_node;
static void sp_insert(struct shared_policy *sp, struct sp_node *new)
{
struct rb_node **p = &sp->root.rb_node;
@@
-953,13
+986,15
@@
mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
struct mempolicy *pol = NULL;
struct sp_node *sn;
struct mempolicy *pol = NULL;
struct sp_node *sn;
- down(&sp->sem);
+ if (!sp->root.rb_node)
+ return NULL;
+ spin_lock(&sp->lock);
sn = sp_lookup(sp, idx, idx+1);
if (sn) {
mpol_get(sn->policy);
pol = sn->policy;
}
sn = sp_lookup(sp, idx, idx+1);
if (sn) {
mpol_get(sn->policy);
pol = sn->policy;
}
-
up(&sp->sem
);
+
spin_unlock(&sp->lock
);
return pol;
}
return pol;
}
@@
-989,9
+1024,10
@@
sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol)
static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
unsigned long end, struct sp_node *new)
{
static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
unsigned long end, struct sp_node *new)
{
- struct sp_node *n, *new2;
+ struct sp_node *n, *new2
= NULL
;
- down(&sp->sem);
+restart:
+ spin_lock(&sp->lock);
n = sp_lookup(sp, start, end);
/* Take care of old policies in the same range. */
while (n && n->start < end) {
n = sp_lookup(sp, start, end);
/* Take care of old policies in the same range. */
while (n && n->start < end) {
@@
-1004,16
+1040,18
@@
static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
} else {
/* Old policy spanning whole new range. */
if (n->end > end) {
} else {
/* Old policy spanning whole new range. */
if (n->end > end) {
- new2 = sp_alloc(end, n->end, n->policy);
if (!new2) {
if (!new2) {
- up(&sp->sem);
- return -ENOMEM;
+ spin_unlock(&sp->lock);
+ new2 = sp_alloc(end, n->end, n->policy);
+ if (!new2)
+ return -ENOMEM;
+ goto restart;
}
}
- n->end =
end
;
+ n->end =
start
;
sp_insert(sp, new2);
sp_insert(sp, new2);
- }
- /* Old crossing beginning, but not end (easy) */
- if (n->start < start && n->end > start)
+ new2 = NULL;
+ break;
+ } else
n->end = start;
}
if (!next)
n->end = start;
}
if (!next)
@@
-1022,7
+1060,11
@@
static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
}
if (new)
sp_insert(sp, new);
}
if (new)
sp_insert(sp, new);
- up(&sp->sem);
+ spin_unlock(&sp->lock);
+ if (new2) {
+ mpol_free(new2->policy);
+ kmem_cache_free(sn_cache, new2);
+ }
return 0;
}
return 0;
}
@@
-1055,16
+1097,18
@@
void mpol_free_shared_policy(struct shared_policy *p)
struct sp_node *n;
struct rb_node *next;
struct sp_node *n;
struct rb_node *next;
- down(&p->sem);
+ if (!p->root.rb_node)
+ return;
+ spin_lock(&p->lock);
next = rb_first(&p->root);
while (next) {
n = rb_entry(next, struct sp_node, nd);
next = rb_next(&n->nd);
next = rb_first(&p->root);
while (next) {
n = rb_entry(next, struct sp_node, nd);
next = rb_next(&n->nd);
- rb_erase(&n->nd, &p->root);
mpol_free(n->policy);
kmem_cache_free(sn_cache, n);
}
mpol_free(n->policy);
kmem_cache_free(sn_cache, n);
}
- up(&p->sem);
+ spin_unlock(&p->lock);
+ p->root = RB_ROOT;
}
/* assumes fs == KERNEL_DS */
}
/* assumes fs == KERNEL_DS */
@@
-1081,7
+1125,8
@@
void __init numa_policy_init(void)
/* Set interleaving policy for system init. This way not all
the data structures allocated at system boot end up in node zero. */
/* Set interleaving policy for system init. This way not all
the data structures allocated at system boot end up in node zero. */
- if (sys_set_mempolicy(MPOL_INTERLEAVE, node_online_map, MAX_NUMNODES) < 0)
+ if (sys_set_mempolicy(MPOL_INTERLEAVE, nodes_addr(node_online_map),
+ MAX_NUMNODES) < 0)
printk("numa_policy_init: interleaving failed\n");
}
printk("numa_policy_init: interleaving failed\n");
}