2 * Simple NUMA memory policy for the Linux kernel.
4 * Copyright 2003,2004 Andi Kleen, SuSE Labs.
5 * Subject to the GNU Public License, version 2.
7 * NUMA policy allows the user to give hints in which node(s) memory should
10 * Support four policies per VMA and per process:
12 * The VMA policy has priority over the process policy for a page fault.
14 * interleave Allocate memory interleaved over a set of nodes,
15 * with normal fallback if it fails.
16 * For VMA based allocations this interleaves based on the
17 * offset into the backing object or offset into the mapping
18 * for anonymous memory. For process policy an process counter
20 * bind Only allocate memory on a specific set of nodes,
22 * preferred Try a specific node first before normal fallback.
23 * As a special case node -1 here means do the allocation
24 * on the local CPU. This is normally identical to default,
25 * but useful to set in a VMA when you have a non default
27 * default Allocate on the local node first, or when on a VMA
28 * use the process policy. This is what Linux always did
29 * in a NUMA aware kernel and still does by, ahem, default.
31 * The process policy is applied for most non interrupt memory allocations
32 * in that process' context. Interrupts ignore the policies and always
33 * try to allocate on the local CPU. The VMA policy is only applied for memory
34 * allocations for a VMA in the VM.
36 * Currently there are a few corner cases in swapping where the policy
37 * is not applied, but the majority should be handled. When process policy
38 * is used it is not remembered over swap outs/swap ins.
40 * Only the highest zone in the zone hierarchy gets policied. Allocations
41 * requesting a lower zone just use default policy. This implies that
42 * on systems with highmem kernel lowmem allocation don't get policied.
43 * Same with GFP_DMA allocations.
45 * For shmfs/tmpfs/hugetlbfs shared memory the policy is shared between
46 * all users and remembered even when nobody has memory mapped.
50 fix mmap readahead to honour policy and enable policy for any page cache
52 statistics for bigpages
53 global policy for page cache? currently it uses process policy. Requires
55 handle mremap for shared memory (currently ignored for the policy)
57 make bind policy root only? It can trigger oom much faster and the
58 kernel is not always grateful with that.
59 could replace all the switch()es with a mempolicy_ops structure.
62 #include <linux/mempolicy.h>
64 #include <linux/highmem.h>
65 #include <linux/hugetlb.h>
66 #include <linux/kernel.h>
67 #include <linux/sched.h>
69 #include <linux/gfp.h>
70 #include <linux/slab.h>
71 #include <linux/string.h>
72 #include <linux/module.h>
73 #include <linux/interrupt.h>
74 #include <linux/init.h>
75 #include <linux/compat.h>
76 #include <linux/mempolicy.h>
77 #include <asm/uaccess.h>
79 static kmem_cache_t *policy_cache;
80 static kmem_cache_t *sn_cache;
82 #define PDprintk(fmt...)
84 /* Highest zone. An specific allocation for a zone below that is not
86 static int policy_zone;
88 static struct mempolicy default_policy = {
89 .refcnt = ATOMIC_INIT(1), /* never free it */
90 .policy = MPOL_DEFAULT,
93 /* Check if all specified nodes are online */
94 static int nodes_online(unsigned long *nodes)
96 DECLARE_BITMAP(offline, MAX_NUMNODES);
98 bitmap_copy(offline, node_online_map, MAX_NUMNODES);
99 if (bitmap_empty(offline, MAX_NUMNODES))
101 bitmap_complement(offline, MAX_NUMNODES);
102 bitmap_and(offline, offline, nodes, MAX_NUMNODES);
103 if (!bitmap_empty(offline, MAX_NUMNODES))
108 /* Do sanity checking on a policy */
109 static int mpol_check_policy(int mode, unsigned long *nodes)
111 int empty = bitmap_empty(nodes, MAX_NUMNODES);
119 case MPOL_INTERLEAVE:
120 /* Preferred will only use the first bit, but allow
126 return nodes_online(nodes);
129 /* Copy a node mask from user space. */
130 static int get_nodes(unsigned long *nodes, unsigned long __user *nmask,
131 unsigned long maxnode, int mode)
134 unsigned long nlongs;
135 unsigned long endmask;
138 nlongs = BITS_TO_LONGS(maxnode);
139 if ((maxnode % BITS_PER_LONG) == 0)
142 endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;
144 /* When the user specified more nodes than supported just check
145 if the non supported part is all zero. */
146 if (nmask && nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
147 for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
149 if (get_user(t, nmask + k))
151 if (k == nlongs - 1) {
157 nlongs = BITS_TO_LONGS(MAX_NUMNODES);
161 bitmap_zero(nodes, MAX_NUMNODES);
162 if (nmask && copy_from_user(nodes, nmask, nlongs*sizeof(unsigned long)))
164 nodes[nlongs-1] &= endmask;
165 return mpol_check_policy(mode, nodes);
168 /* Generate a custom zonelist for the BIND policy. */
169 static struct zonelist *bind_zonelist(unsigned long *nodes)
174 max = 1 + MAX_NR_ZONES * bitmap_weight(nodes, MAX_NUMNODES);
175 zl = kmalloc(sizeof(void *) * max, GFP_KERNEL);
179 for (nd = find_first_bit(nodes, MAX_NUMNODES);
181 nd = find_next_bit(nodes, MAX_NUMNODES, 1+nd)) {
183 for (k = MAX_NR_ZONES-1; k >= 0; k--) {
184 struct zone *z = &NODE_DATA(nd)->node_zones[k];
185 if (!z->present_pages)
187 zl->zones[num++] = z;
193 zl->zones[num] = NULL;
197 /* Create a new policy */
198 static struct mempolicy *mpol_new(int mode, unsigned long *nodes)
200 struct mempolicy *policy;
202 PDprintk("setting mode %d nodes[0] %lx\n", mode, nodes[0]);
203 if (mode == MPOL_DEFAULT)
205 policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
207 return ERR_PTR(-ENOMEM);
208 atomic_set(&policy->refcnt, 1);
210 case MPOL_INTERLEAVE:
211 bitmap_copy(policy->v.nodes, nodes, MAX_NUMNODES);
214 policy->v.preferred_node = find_first_bit(nodes, MAX_NUMNODES);
215 if (policy->v.preferred_node >= MAX_NUMNODES)
216 policy->v.preferred_node = -1;
219 policy->v.zonelist = bind_zonelist(nodes);
220 if (policy->v.zonelist == NULL) {
221 kmem_cache_free(policy_cache, policy);
222 return ERR_PTR(-ENOMEM);
226 policy->policy = mode;
230 /* Ensure all existing pages follow the policy. */
232 verify_pages(unsigned long addr, unsigned long end, unsigned long *nodes)
238 pgd_t *pgd = pgd_offset_k(addr);
239 if (pgd_none(*pgd)) {
240 addr = (addr + PGDIR_SIZE) & PGDIR_MASK;
243 pmd = pmd_offset(pgd, addr);
244 if (pmd_none(*pmd)) {
245 addr = (addr + PMD_SIZE) & PMD_MASK;
249 pte = pte_offset_map(pmd, addr);
250 if (pte_present(*pte))
254 unsigned nid = page_to_nid(p);
255 if (!test_bit(nid, nodes))
263 /* Step 1: check the range */
264 static struct vm_area_struct *
265 check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
266 unsigned long *nodes, unsigned long flags)
269 struct vm_area_struct *first, *vma, *prev;
271 first = find_vma(mm, start);
273 return ERR_PTR(-EFAULT);
275 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
276 if (!vma->vm_next && vma->vm_end < end)
277 return ERR_PTR(-EFAULT);
278 if (prev && prev->vm_end < vma->vm_start)
279 return ERR_PTR(-EFAULT);
280 if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) {
281 err = verify_pages(vma->vm_start, vma->vm_end, nodes);
283 first = ERR_PTR(err);
292 /* Apply policy to a single VMA */
293 static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
296 struct mempolicy *old = vma->vm_policy;
298 PDprintk("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
299 vma->vm_start, vma->vm_end, vma->vm_pgoff,
300 vma->vm_ops, vma->vm_file,
301 vma->vm_ops ? vma->vm_ops->set_policy : NULL);
303 if (vma->vm_ops && vma->vm_ops->set_policy)
304 err = vma->vm_ops->set_policy(vma, new);
307 vma->vm_policy = new;
313 /* Step 2: apply policy to a range and do splits. */
314 static int mbind_range(struct vm_area_struct *vma, unsigned long start,
315 unsigned long end, struct mempolicy *new)
317 struct vm_area_struct *next;
321 for (; vma && vma->vm_start < end; vma = next) {
323 if (vma->vm_start < start)
324 err = split_vma(vma->vm_mm, vma, start, 1);
325 if (!err && vma->vm_end > end)
326 err = split_vma(vma->vm_mm, vma, end, 0);
328 err = policy_vma(vma, new);
335 /* Change policy for a memory range */
336 asmlinkage long sys_mbind(unsigned long start, unsigned long len,
338 unsigned long __user *nmask, unsigned long maxnode,
341 struct vm_area_struct *vma;
342 struct mm_struct *mm = current->mm;
343 struct mempolicy *new;
345 DECLARE_BITMAP(nodes, MAX_NUMNODES);
348 if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX)
350 if (start & ~PAGE_MASK)
352 if (mode == MPOL_DEFAULT)
353 flags &= ~MPOL_MF_STRICT;
354 len = (len + PAGE_SIZE - 1) & PAGE_MASK;
361 err = get_nodes(nodes, nmask, maxnode, mode);
365 new = mpol_new(mode, nodes);
369 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
372 down_write(&mm->mmap_sem);
373 vma = check_range(mm, start, end, nodes, flags);
376 err = mbind_range(vma, start, end, new);
377 up_write(&mm->mmap_sem);
382 /* Set the process memory policy */
383 asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
384 unsigned long maxnode)
387 struct mempolicy *new;
388 DECLARE_BITMAP(nodes, MAX_NUMNODES);
392 err = get_nodes(nodes, nmask, maxnode, mode);
395 new = mpol_new(mode, nodes);
398 mpol_free(current->mempolicy);
399 current->mempolicy = new;
400 if (new && new->policy == MPOL_INTERLEAVE)
401 current->il_next = find_first_bit(new->v.nodes, MAX_NUMNODES);
405 /* Fill a zone bitmap for a policy */
406 static void get_zonemask(struct mempolicy *p, unsigned long *nodes)
410 bitmap_zero(nodes, MAX_NUMNODES);
413 for (i = 0; p->v.zonelist->zones[i]; i++)
414 __set_bit(p->v.zonelist->zones[i]->zone_pgdat->node_id, nodes);
418 case MPOL_INTERLEAVE:
419 bitmap_copy(nodes, p->v.nodes, MAX_NUMNODES);
422 /* or use current node instead of online map? */
423 if (p->v.preferred_node < 0)
424 bitmap_copy(nodes, node_online_map, MAX_NUMNODES);
426 __set_bit(p->v.preferred_node, nodes);
433 static int lookup_node(struct mm_struct *mm, unsigned long addr)
438 err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, 0, &p, NULL);
440 err = page_zone(p)->zone_pgdat->node_id;
446 /* Copy a kernel node mask to user space */
447 static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
448 void *nodes, unsigned nbytes)
450 unsigned long copy = ALIGN(maxnode-1, 64) / 8;
453 if (copy > PAGE_SIZE)
455 if (clear_user((char __user *)mask + nbytes, copy - nbytes))
459 return copy_to_user(mask, nodes, copy) ? -EFAULT : 0;
462 /* Retrieve NUMA policy */
463 asmlinkage long sys_get_mempolicy(int __user *policy,
464 unsigned long __user *nmask,
465 unsigned long maxnode,
466 unsigned long addr, unsigned long flags)
469 struct mm_struct *mm = current->mm;
470 struct vm_area_struct *vma = NULL;
471 struct mempolicy *pol = current->mempolicy;
473 if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
475 if (nmask != NULL && maxnode < numnodes)
477 if (flags & MPOL_F_ADDR) {
478 down_read(&mm->mmap_sem);
479 vma = find_vma_intersection(mm, addr, addr+1);
481 up_read(&mm->mmap_sem);
484 if (vma->vm_ops && vma->vm_ops->get_policy)
485 pol = vma->vm_ops->get_policy(vma, addr);
487 pol = vma->vm_policy;
492 pol = &default_policy;
494 if (flags & MPOL_F_NODE) {
495 if (flags & MPOL_F_ADDR) {
496 err = lookup_node(mm, addr);
500 } else if (pol == current->mempolicy &&
501 pol->policy == MPOL_INTERLEAVE) {
502 pval = current->il_next;
511 if (policy && put_user(pval, policy))
516 DECLARE_BITMAP(nodes, MAX_NUMNODES);
517 get_zonemask(pol, nodes);
518 err = copy_nodes_to_user(nmask, maxnode, nodes, sizeof(nodes));
523 up_read(¤t->mm->mmap_sem);
528 /* The other functions are compatible */
529 asmlinkage long compat_get_mempolicy(int __user *policy,
530 unsigned __user *nmask, unsigned maxnode,
531 unsigned addr, unsigned flags)
534 unsigned long __user *nm = NULL;
536 nm = compat_alloc_user_space(ALIGN(maxnode-1, 64) / 8);
537 err = sys_get_mempolicy(policy, nm, maxnode, addr, flags);
538 if (!err && copy_in_user(nmask, nm, ALIGN(maxnode-1, 32)/8))
544 /* Return effective policy for a VMA */
545 static struct mempolicy *
546 get_vma_policy(struct vm_area_struct *vma, unsigned long addr)
548 struct mempolicy *pol = current->mempolicy;
551 if (vma->vm_ops && vma->vm_ops->get_policy)
552 pol = vma->vm_ops->get_policy(vma, addr);
553 else if (vma->vm_policy &&
554 vma->vm_policy->policy != MPOL_DEFAULT)
555 pol = vma->vm_policy;
558 pol = &default_policy;
562 /* Return a zonelist representing a mempolicy */
563 static struct zonelist *zonelist_policy(unsigned gfp, struct mempolicy *policy)
567 switch (policy->policy) {
569 nd = policy->v.preferred_node;
574 /* Lower zones don't get a policy applied */
575 if (gfp >= policy_zone)
576 return policy->v.zonelist;
578 case MPOL_INTERLEAVE: /* should not happen */
586 return NODE_DATA(nd)->node_zonelists + (gfp & GFP_ZONEMASK);
589 /* Do dynamic interleaving for a process */
590 static unsigned interleave_nodes(struct mempolicy *policy)
593 struct task_struct *me = current;
596 BUG_ON(nid >= MAX_NUMNODES);
597 next = find_next_bit(policy->v.nodes, MAX_NUMNODES, 1+nid);
598 if (next >= MAX_NUMNODES)
599 next = find_first_bit(policy->v.nodes, MAX_NUMNODES);
604 /* Do static interleaving for a VMA with known offset. */
605 static unsigned offset_il_node(struct mempolicy *pol,
606 struct vm_area_struct *vma, unsigned long off)
608 unsigned nnodes = bitmap_weight(pol->v.nodes, MAX_NUMNODES);
609 unsigned target = (unsigned)off % nnodes;
615 nid = find_next_bit(pol->v.nodes, MAX_NUMNODES, nid+1);
617 } while (c <= target);
618 BUG_ON(nid >= MAX_NUMNODES);
619 BUG_ON(!test_bit(nid, pol->v.nodes));
623 /* Allocate a page in interleaved policy.
624 Own path because it needs to do special accounting. */
625 static struct page *alloc_page_interleave(unsigned gfp, unsigned nid)
630 BUG_ON(!test_bit(nid, node_online_map));
631 zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK);
632 page = __alloc_pages(gfp, 0, zl);
633 if (page && page_zone(page) == zl->zones[0]) {
634 zl->zones[0]->pageset[get_cpu()].interleave_hit++;
641 * alloc_page_vma - Allocate a page for a VMA.
644 * %GFP_USER user allocation.
645 * %GFP_KERNEL kernel allocations,
646 * %GFP_HIGHMEM highmem/user allocations,
647 * %GFP_FS allocation should not call back into a file system.
648 * %GFP_ATOMIC don't sleep.
650 * @vma: Pointer to VMA or NULL if not available.
651 * @addr: Virtual Address of the allocation. Must be inside the VMA.
653 * This function allocates a page from the kernel page pool and applies
654 * a NUMA policy associated with the VMA or the current process.
655 * When VMA is not NULL caller must hold down_read on the mmap_sem of the
656 * mm_struct of the VMA to prevent it from going away. Should be used for
657 * all allocations for pages that will be mapped into
658 * user space. Returns NULL when no page can be allocated.
660 * Should be called with the mm_sem of the vma hold.
663 alloc_page_vma(unsigned gfp, struct vm_area_struct *vma, unsigned long addr)
665 struct mempolicy *pol = get_vma_policy(vma, addr);
667 if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
671 BUG_ON(addr >= vma->vm_end);
672 BUG_ON(addr < vma->vm_start);
674 off += (addr - vma->vm_start) >> PAGE_SHIFT;
675 nid = offset_il_node(pol, vma, off);
677 /* fall back to process interleaving */
678 nid = interleave_nodes(pol);
680 return alloc_page_interleave(gfp, nid);
682 return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
686 * alloc_pages_current - Allocate pages.
689 * %GFP_USER user allocation,
690 * %GFP_KERNEL kernel allocation,
691 * %GFP_HIGHMEM highmem allocation,
692 * %GFP_FS don't call back into a file system.
693 * %GFP_ATOMIC don't sleep.
694 * @order: Power of two of allocation size in pages. 0 is a single page.
696 * Allocate a page from the kernel page pool. When not in
697 * interrupt context and apply the current process NUMA policy.
698 * Returns NULL when no page can be allocated.
700 struct page *alloc_pages_current(unsigned gfp, unsigned order)
702 struct mempolicy *pol = current->mempolicy;
704 if (!pol || in_interrupt())
705 pol = &default_policy;
706 if (pol->policy == MPOL_INTERLEAVE && order == 0)
707 return alloc_page_interleave(gfp, interleave_nodes(pol));
708 return __alloc_pages(gfp, order, zonelist_policy(gfp, pol));
710 EXPORT_SYMBOL(alloc_pages_current);
712 /* Slow path of a mempolicy copy */
713 struct mempolicy *__mpol_copy(struct mempolicy *old)
715 struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
718 return ERR_PTR(-ENOMEM);
720 atomic_set(&new->refcnt, 1);
721 if (new->policy == MPOL_BIND) {
722 int sz = ksize(old->v.zonelist);
723 new->v.zonelist = kmalloc(sz, SLAB_KERNEL);
724 if (!new->v.zonelist) {
725 kmem_cache_free(policy_cache, new);
726 return ERR_PTR(-ENOMEM);
728 memcpy(new->v.zonelist, old->v.zonelist, sz);
733 /* Slow path of a mempolicy comparison */
734 int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
738 if (a->policy != b->policy)
743 case MPOL_INTERLEAVE:
744 return bitmap_equal(a->v.nodes, b->v.nodes, MAX_NUMNODES);
746 return a->v.preferred_node == b->v.preferred_node;
749 for (i = 0; a->v.zonelist->zones[i]; i++)
750 if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i])
752 return b->v.zonelist->zones[i] == NULL;
760 /* Slow path of a mpol destructor. */
761 void __mpol_free(struct mempolicy *p)
763 if (!atomic_dec_and_test(&p->refcnt))
765 if (p->policy == MPOL_BIND)
766 kfree(p->v.zonelist);
767 p->policy = MPOL_DEFAULT;
768 kmem_cache_free(policy_cache, p);
772 * Hugetlb policy. Same as above, just works with node numbers instead of
776 /* Find first node suitable for an allocation */
777 int mpol_first_node(struct vm_area_struct *vma, unsigned long addr)
779 struct mempolicy *pol = get_vma_policy(vma, addr);
781 switch (pol->policy) {
783 return numa_node_id();
785 return pol->v.zonelist->zones[0]->zone_pgdat->node_id;
786 case MPOL_INTERLEAVE:
787 return interleave_nodes(pol);
789 return pol->v.preferred_node >= 0 ?
790 pol->v.preferred_node : numa_node_id();
796 /* Find secondary valid nodes for an allocation */
797 int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr)
799 struct mempolicy *pol = get_vma_policy(vma, addr);
801 switch (pol->policy) {
804 case MPOL_INTERLEAVE:
808 for (z = pol->v.zonelist->zones; *z; z++)
809 if ((*z)->zone_pgdat->node_id == nid)
820 * Shared memory backing store policy support.
822 * Remember policies even when nobody has shared memory mapped.
823 * The policies are kept in Red-Black tree linked from the inode.
824 * They are protected by the sp->sem semaphore, which should be held
825 * for any accesses to the tree.
828 /* lookup first element intersecting start-end */
829 /* Caller holds sp->sem */
830 static struct sp_node *
831 sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
833 struct rb_node *n = sp->root.rb_node;
836 struct sp_node *p = rb_entry(n, struct sp_node, nd);
837 if (start >= p->end) {
839 } else if (end < p->start) {
848 struct sp_node *w = NULL;
849 struct rb_node *prev = rb_prev(n);
852 w = rb_entry(prev, struct sp_node, nd);
857 return rb_entry(n, struct sp_node, nd);
860 /* Insert a new shared policy into the list. */
861 /* Caller holds sp->sem */
862 static void sp_insert(struct shared_policy *sp, struct sp_node *new)
864 struct rb_node **p = &sp->root.rb_node;
865 struct rb_node *parent = NULL;
870 nd = rb_entry(parent, struct sp_node, nd);
871 if (new->start < nd->start)
873 else if (new->end > nd->end)
878 rb_link_node(&new->nd, parent, p);
879 rb_insert_color(&new->nd, &sp->root);
880 PDprintk("inserting %lx-%lx: %d\n", new->start, new->end,
881 new->policy ? new->policy->policy : 0);
884 /* Find shared policy intersecting idx */
886 mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
888 struct mempolicy *pol = NULL;
892 sn = sp_lookup(sp, idx, idx+1);
894 mpol_get(sn->policy);
901 static void sp_delete(struct shared_policy *sp, struct sp_node *n)
903 PDprintk("deleting %lx-l%x\n", n->start, n->end);
904 rb_erase(&n->nd, &sp->root);
905 mpol_free(n->policy);
906 kmem_cache_free(sn_cache, n);
910 sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol)
912 struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
923 /* Replace a policy range. */
924 static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
925 unsigned long end, struct sp_node *new)
927 struct sp_node *n, *new2;
930 n = sp_lookup(sp, start, end);
931 /* Take care of old policies in the same range. */
932 while (n && n->start < end) {
933 struct rb_node *next = rb_next(&n->nd);
934 if (n->start >= start) {
940 /* Old policy spanning whole new range. */
942 new2 = sp_alloc(end, n->end, n->policy);
950 /* Old crossing beginning, but not end (easy) */
951 if (n->start < start && n->end > start)
956 n = rb_entry(next, struct sp_node, nd);
964 int mpol_set_shared_policy(struct shared_policy *info,
965 struct vm_area_struct *vma, struct mempolicy *npol)
968 struct sp_node *new = NULL;
969 unsigned long sz = vma_pages(vma);
971 PDprintk("set_shared_policy %lx sz %lu %d %lx\n",
973 sz, npol? npol->policy : -1,
974 npol ? npol->v.nodes[0] : -1);
977 new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol);
981 err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new);
983 kmem_cache_free(sn_cache, new);
987 /* Free a backing policy store on inode delete. */
988 void mpol_free_shared_policy(struct shared_policy *p)
991 struct rb_node *next;
994 next = rb_first(&p->root);
996 n = rb_entry(next, struct sp_node, nd);
997 next = rb_next(&n->nd);
998 rb_erase(&n->nd, &p->root);
999 mpol_free(n->policy);
1000 kmem_cache_free(sn_cache, n);
1005 static __init int numa_policy_init(void)
1007 policy_cache = kmem_cache_create("numa_policy",
1008 sizeof(struct mempolicy),
1009 0, SLAB_PANIC, NULL, NULL);
1011 sn_cache = kmem_cache_create("shared_policy_node",
1012 sizeof(struct sp_node),
1013 0, SLAB_PANIC, NULL, NULL);
1016 module_init(numa_policy_init);