#include <linux/cpu.h>
#include <linux/sysctl.h>
#include <linux/module.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include <asm/page.h>
/*
* DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
#define ARCH_KMALLOC_MINALIGN 0
#endif
+#ifndef ARCH_KMALLOC_FLAGS
+#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
+#endif
+
/* Legal flag mask for kmem_cache_create(). */
#if DEBUG
# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
SLAB_POISON | SLAB_HWCACHE_ALIGN | \
SLAB_NO_REAP | SLAB_CACHE_DMA | \
SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
- SLAB_RECLAIM_ACCOUNT )
+ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+ SLAB_DESTROY_BY_RCU)
#else
# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
- SLAB_RECLAIM_ACCOUNT)
+ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+ SLAB_DESTROY_BY_RCU)
#endif
/*
* is less than 512 (PAGE_SIZE<<3), but greater than 256.
*/
-#define BUFCTL_END 0xffffFFFF
-#define BUFCTL_FREE 0xffffFFFE
-#define SLAB_LIMIT 0xffffFFFD
-typedef unsigned int kmem_bufctl_t;
+#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
+#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
+#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2)
/* Max number of objs-per-slab for caches which use off-slab slabs.
* Needed to avoid a possible looping condition in cache_grow().
kmem_bufctl_t free;
};
+/*
+ * struct slab_rcu
+ *
+ * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
+ * arrange for kmem_freepages to be called via RCU. This is useful if
+ * we need to approach a kernel structure obliquely, from its address
+ * obtained without the usual locking. We can lock the structure to
+ * stabilize it and check it's still at the given address, only if we
+ * can be sure that the memory has not been meanwhile reused for some
+ * other kind of object (which our subsystem's lock might corrupt).
+ *
+ * rcu_read_lock before reading the address, then rcu_read_unlock after
+ * taking the spinlock within the structure expected at that address.
+ *
+ * We assume struct slab_rcu can overlay struct slab when destroying.
+ */
+struct slab_rcu {
+ struct rcu_head head;
+ kmem_cache_t *cachep;
+ void *addr;
+};
+
/*
* struct array_cache
*
* cachep->objsize - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
* cachep->objsize - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long]
*/
-static inline int obj_dbghead(kmem_cache_t *cachep)
+static int obj_dbghead(kmem_cache_t *cachep)
{
return cachep->dbghead;
}
-static inline int obj_reallen(kmem_cache_t *cachep)
+static int obj_reallen(kmem_cache_t *cachep)
{
return cachep->reallen;
}
BUG_ON(!(cachep->flags & SLAB_STORE_USER));
return (void**)(objp+cachep->objsize-BYTES_PER_WORD);
}
+
#else
-static inline int obj_dbghead(kmem_cache_t *cachep)
-{
- return 0;
-}
-static inline int obj_reallen(kmem_cache_t *cachep)
-{
- return cachep->objsize;
-}
-static inline unsigned long *dbg_redzone1(kmem_cache_t *cachep, void *objp)
-{
- BUG();
- return 0;
-}
-static inline unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp)
-{
- BUG();
- return 0;
-}
-static inline void **dbg_userword(kmem_cache_t *cachep, void *objp)
-{
- BUG();
- return 0;
-}
+
+#define obj_dbghead(x) 0
+#define obj_reallen(cachep) (cachep->objsize)
+#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long *)NULL;})
+#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long *)NULL;})
+#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
+
#endif
/*
EXPORT_SYMBOL(malloc_sizes);
/* Must match cache_sizes above. Out of line to keep cache footprint low. */
-static struct cache_names {
+struct cache_names {
char *name;
char *name_dma;
-} cache_names[] = {
+};
+
+static struct cache_names __initdata cache_names[] = {
#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
#include <linux/kmalloc_sizes.h>
- { 0, }
+ { NULL, }
#undef CACHE
};
-struct arraycache_init initarray_cache __initdata = { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
-struct arraycache_init initarray_generic __initdata = { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
+static struct arraycache_init initarray_cache __initdata =
+ { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
+static struct arraycache_init initarray_generic __initdata =
+ { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
/* internal cache of cache description objs */
static kmem_cache_t cache_cache = {
/* Guard access to the cache-chain. */
static struct semaphore cache_chain_sem;
-
-struct list_head cache_chain;
+static struct list_head cache_chain;
/*
* vm_enough_memory() looks at this to determine how many
* chicken and egg problem: delay the per-cpu array allocation
* until the general caches are up.
*/
-enum {
+static enum {
NONE,
PARTIAL,
FULL
} g_cpucache_up;
-static DEFINE_PER_CPU(struct timer_list, reap_timers);
+static DEFINE_PER_CPU(struct work_struct, reap_work);
-static void reap_timer_fnc(unsigned long data);
static void free_block(kmem_cache_t* cachep, void** objpp, int len);
static void enable_cpucache (kmem_cache_t *cachep);
+static void cache_reap (void *unused);
static inline void ** ac_entry(struct array_cache *ac)
{
return cachep->array[smp_processor_id()];
}
+static kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
+{
+ struct cache_sizes *csizep = malloc_sizes;
+
+ /* This function could be moved to the header file, and
+ * made inline so consumers can quickly determine what
+ * cache pointer they require.
+ */
+ for ( ; csizep->cs_size; csizep++) {
+ if (size > csizep->cs_size)
+ continue;
+ break;
+ }
+ return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
+}
+
/* Cal the num objs, wastage, and bytes left over for a given slab size. */
static void cache_estimate (unsigned long gfporder, size_t size, size_t align,
int flags, size_t *left_over, unsigned int *num)
}
/*
- * Start the reap timer running on the target CPU. We run at around 1 to 2Hz.
- * Add the CPU number into the expiry time to minimize the possibility of the
- * CPUs getting into lockstep and contending for the global cache chain lock.
+ * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz
+ * via the workqueue/eventd.
+ * Add the CPU number into the expiration time to minimize the possibility of
+ * the CPUs getting into lockstep and contending for the global cache chain
+ * lock.
*/
static void __devinit start_cpu_timer(int cpu)
{
- struct timer_list *rt = &per_cpu(reap_timers, cpu);
+ struct work_struct *reap_work = &per_cpu(reap_work, cpu);
- if (rt->function == NULL) {
- init_timer(rt);
- rt->expires = jiffies + HZ + 3*cpu;
- rt->data = cpu;
- rt->function = reap_timer_fnc;
- add_timer_on(rt, cpu);
+ /*
+ * When this gets called from do_initcalls via cpucache_init(),
+ * init_workqueues() has already run, so keventd will be setup
+ * at that time.
+ */
+ if (keventd_up() && reap_work->func == NULL) {
+ INIT_WORK(reap_work, cache_reap, NULL);
+ schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
}
}
-#ifdef CONFIG_HOTPLUG_CPU
-static void stop_cpu_timer(int cpu)
+static struct array_cache *alloc_arraycache(int cpu, int entries, int batchcount)
{
- struct timer_list *rt = &per_cpu(reap_timers, cpu);
+ int memsize = sizeof(void*)*entries+sizeof(struct array_cache);
+ struct array_cache *nc = NULL;
- if (rt->function) {
- del_timer_sync(rt);
- WARN_ON(timer_pending(rt));
- rt->function = NULL;
+ if (cpu != -1) {
+ nc = kmem_cache_alloc_node(kmem_find_general_cachep(memsize,
+ GFP_KERNEL), cpu_to_node(cpu));
+ }
+ if (!nc)
+ nc = kmalloc(memsize, GFP_KERNEL);
+ if (nc) {
+ nc->avail = 0;
+ nc->limit = entries;
+ nc->batchcount = batchcount;
+ nc->touched = 0;
}
+ return nc;
}
-#endif
static int __devinit cpuup_callback(struct notifier_block *nfb,
unsigned long action,
case CPU_UP_PREPARE:
down(&cache_chain_sem);
list_for_each_entry(cachep, &cache_chain, next) {
- int memsize;
struct array_cache *nc;
- memsize = sizeof(void*)*cachep->limit+sizeof(struct array_cache);
- nc = kmalloc(memsize, GFP_KERNEL);
+ nc = alloc_arraycache(cpu, cachep->limit, cachep->batchcount);
if (!nc)
goto bad;
- nc->avail = 0;
- nc->limit = cachep->limit;
- nc->batchcount = cachep->batchcount;
- nc->touched = 0;
spin_lock_irq(&cachep->spinlock);
cachep->array[cpu] = nc;
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
- stop_cpu_timer(cpu);
/* fall thru */
case CPU_UP_CANCELED:
down(&cache_chain_sem);
* eliminates "false sharing".
* Note for systems short on memory removing the alignment will
* allow tighter packing of the smaller caches. */
- sizes->cs_cachep = kmem_cache_create(
- names->name, sizes->cs_size,
- ARCH_KMALLOC_MINALIGN, 0, NULL, NULL);
- if (!sizes->cs_cachep)
- BUG();
+ sizes->cs_cachep = kmem_cache_create(names->name,
+ sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+ (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
/* Inc off-slab bufctl limit until the ceiling is hit. */
if (!(OFF_SLAB(sizes->cs_cachep))) {
offslab_limit /= sizeof(kmem_bufctl_t);
}
- sizes->cs_dmacachep = kmem_cache_create(
- names->name_dma, sizes->cs_size,
- ARCH_KMALLOC_MINALIGN, SLAB_CACHE_DMA, NULL, NULL);
- if (!sizes->cs_dmacachep)
- BUG();
+ sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
+ sizes->cs_size, ARCH_KMALLOC_MINALIGN,
+ (ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC),
+ NULL, NULL);
sizes++;
names++;
*/
}
-int __init cpucache_init(void)
+static int __init cpucache_init(void)
{
int cpu;
* did not request dmaable memory, we might get it, but that
* would be relatively rare and ignorable.
*/
-static inline void *kmem_getpages(kmem_cache_t *cachep, unsigned long flags)
+static void *kmem_getpages(kmem_cache_t *cachep, int flags, int nodeid)
{
+ struct page *page;
void *addr;
+ int i;
flags |= cachep->gfpflags;
- addr = (void*)__get_free_pages(flags, cachep->gfporder);
- if (addr) {
- int i = (1 << cachep->gfporder);
- struct page *page = virt_to_page(addr);
-
- if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
- atomic_add(i, &slab_reclaim_pages);
- add_page_state(nr_slab, i);
- while (i--) {
- SetPageSlab(page);
- page++;
- }
+ if (likely(nodeid == -1)) {
+ addr = (void*)__get_free_pages(flags, cachep->gfporder);
+ if (!addr)
+ return NULL;
+ page = virt_to_page(addr);
+ } else {
+ page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+ if (!page)
+ return NULL;
+ addr = page_address(page);
+ }
+
+ i = (1 << cachep->gfporder);
+ if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+ atomic_add(i, &slab_reclaim_pages);
+ add_page_state(nr_slab, i);
+ while (i--) {
+ SetPageSlab(page);
+ page++;
}
return addr;
}
/*
* Interface to system's page release.
*/
-static inline void kmem_freepages(kmem_cache_t *cachep, void *addr)
+static void kmem_freepages(kmem_cache_t *cachep, void *addr)
{
unsigned long i = (1<<cachep->gfporder);
struct page *page = virt_to_page(addr);
atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages);
}
+static void kmem_rcu_free(struct rcu_head *head)
+{
+ struct slab_rcu *slab_rcu = (struct slab_rcu *) head;
+ kmem_cache_t *cachep = slab_rcu->cachep;
+
+ kmem_freepages(cachep, slab_rcu->addr);
+ if (OFF_SLAB(cachep))
+ kmem_cache_free(cachep->slabp_cache, slab_rcu);
+}
+
#if DEBUG
#ifdef CONFIG_DEBUG_PAGEALLOC
}
#endif
+#if DEBUG
+
static void print_objinfo(kmem_cache_t *cachep, void *objp, int lines)
{
-#if DEBUG
int i, size;
char *realobj;
}
if (cachep->flags & SLAB_STORE_USER) {
- printk(KERN_ERR "Last user: [<%p>]", *dbg_userword(cachep, objp));
- print_symbol("(%s)", (unsigned long)*dbg_userword(cachep, objp));
+ printk(KERN_ERR "Last user: [<%p>]",
+ *dbg_userword(cachep, objp));
+ print_symbol("(%s)",
+ (unsigned long)*dbg_userword(cachep, objp));
printk("\n");
}
realobj = (char*)objp+obj_dbghead(cachep);
limit = size-i;
dump_line(realobj, i, limit);
}
-#endif
}
-#if DEBUG
-
static void check_poison_obj(kmem_cache_t *cachep, void *objp)
{
char *realobj;
*/
static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
{
+ void *addr = slabp->s_mem - slabp->colouroff;
+
#if DEBUG
int i;
for (i = 0; i < cachep->num; i++) {
}
}
#endif
-
- kmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
- if (OFF_SLAB(cachep))
- kmem_cache_free(cachep->slabp_cache, slabp);
+
+ if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
+ struct slab_rcu *slab_rcu;
+
+ slab_rcu = (struct slab_rcu *) slabp;
+ slab_rcu->cachep = cachep;
+ slab_rcu->addr = addr;
+ call_rcu(&slab_rcu->head, kmem_rcu_free);
+ } else {
+ kmem_freepages(cachep, addr);
+ if (OFF_SLAB(cachep))
+ kmem_cache_free(cachep->slabp_cache, slabp);
+ }
}
/**
* %SLAB_NO_REAP - Don't automatically reap this cache when we're under
* memory pressure.
*
- * %SLAB_HWCACHE_ALIGN - This flag has no effect and will be removed soon.
- *
+ * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
+ * cacheline. This can be beneficial if you're counting cycles as closely
+ * as davem.
*/
kmem_cache_t *
kmem_cache_create (const char *name, size_t size, size_t align,
in_interrupt() ||
(size < BYTES_PER_WORD) ||
(size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
- (dtor && !ctor) ||
- (align < 0)) {
+ (dtor && !ctor)) {
printk(KERN_ERR "%s: Early error in slab %s\n",
__FUNCTION__, name);
BUG();
*/
if ((size < 4096 || fls(size-1) == fls(size-1+3*BYTES_PER_WORD)))
flags |= SLAB_RED_ZONE|SLAB_STORE_USER;
- flags |= SLAB_POISON;
+ if (!(flags & SLAB_DESTROY_BY_RCU))
+ flags |= SLAB_POISON;
#endif
+ if (flags & SLAB_DESTROY_BY_RCU)
+ BUG_ON(flags & SLAB_POISON);
#endif
+ if (flags & SLAB_DESTROY_BY_RCU)
+ BUG_ON(dtor);
+
/*
* Always checks flags, a caller might be expecting debug
* support which isn't available.
up(&cache_chain_sem);
unlock_cpu_hotplug();
opps:
+ if (!cachep && (flags & SLAB_PANIC))
+ panic("kmem_cache_create(): failed to create slab `%s'\n",
+ name);
return cachep;
}
-
EXPORT_SYMBOL(kmem_cache_create);
-static inline void check_irq_off(void)
-{
#if DEBUG
+static void check_irq_off(void)
+{
BUG_ON(!irqs_disabled());
-#endif
}
-static inline void check_irq_on(void)
+static void check_irq_on(void)
{
-#if DEBUG
BUG_ON(irqs_disabled());
-#endif
}
-static inline void check_spinlock_acquired(kmem_cache_t *cachep)
+static void check_spinlock_acquired(kmem_cache_t *cachep)
{
#ifdef CONFIG_SMP
check_irq_off();
BUG_ON(spin_trylock(&cachep->spinlock));
#endif
}
+#else
+#define check_irq_off() do { } while(0)
+#define check_irq_on() do { } while(0)
+#define check_spinlock_acquired(x) do { } while(0)
+#endif
/*
* Waits for all CPUs to execute func().
return 1;
}
+ if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
+ synchronize_kernel();
+
/* no cpu_online check required here since we clear the percpu
* array on cpu offline and set this to NULL.
*/
EXPORT_SYMBOL(kmem_cache_destroy);
/* Get the memory for a slab management obj. */
-static inline struct slab* alloc_slabmgmt (kmem_cache_t *cachep,
+static struct slab* alloc_slabmgmt (kmem_cache_t *cachep,
void *objp, int colour_off, int local_flags)
{
struct slab *slabp;
}
}
+static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp)
+{
+ int i;
+ struct page *page;
+
+ /* Nasty!!!!!! I hope this is OK. */
+ i = 1 << cachep->gfporder;
+ page = virt_to_page(objp);
+ do {
+ SET_PAGE_CACHE(page, cachep);
+ SET_PAGE_SLAB(page, slabp);
+ page++;
+ } while (--i);
+}
+
/*
* Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache.
static int cache_grow (kmem_cache_t * cachep, int flags)
{
struct slab *slabp;
- struct page *page;
void *objp;
size_t offset;
- unsigned int i, local_flags;
+ int local_flags;
unsigned long ctor_flags;
/* Be lazy and only check for valid flags here,
/* Get mem for the objs. */
- if (!(objp = kmem_getpages(cachep, flags)))
+ if (!(objp = kmem_getpages(cachep, flags, -1)))
goto failed;
/* Get slab management. */
if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags)))
goto opps1;
- /* Nasty!!!!!! I hope this is OK. */
- i = 1 << cachep->gfporder;
- page = virt_to_page(objp);
- do {
- SET_PAGE_CACHE(page, cachep);
- SET_PAGE_SLAB(page, slabp);
- page++;
- } while (--i);
+ set_slab_attr(cachep, slabp, objp);
cache_init_objs(cachep, slabp, ctor_flags);
return 0;
}
+#if DEBUG
+
/*
* Perform extra freeing checks:
* - detect bad pointers.
* - POISON/RED_ZONE checking
* - destructor calls, for caches with POISON+dtor
*/
-static inline void kfree_debugcheck(const void *objp)
+static void kfree_debugcheck(const void *objp)
{
-#if DEBUG
struct page *page;
if (!virt_addr_valid(objp)) {
printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n", (unsigned long)objp);
BUG();
}
-#endif
}
-static inline void *cache_free_debugcheck (kmem_cache_t * cachep, void * objp, void *caller)
+static void *cache_free_debugcheck (kmem_cache_t * cachep, void * objp, void *caller)
{
-#if DEBUG
struct page *page;
unsigned int objnr;
struct slab *slabp;
poison_obj(cachep, objp, POISON_FREE);
#endif
}
-#endif
return objp;
}
-static inline void check_slabp(kmem_cache_t *cachep, struct slab *slabp)
+static void check_slabp(kmem_cache_t *cachep, struct slab *slabp)
{
-#if DEBUG
int i;
int entries = 0;
printk("\n");
BUG();
}
-#endif
}
+#else
+#define kfree_debugcheck(x) do { } while(0)
+#define cache_free_debugcheck(x,objp,z) (objp)
+#define check_slabp(x,y) do { } while(0)
+#endif
static void* cache_alloc_refill(kmem_cache_t* cachep, int flags)
{
#endif
}
-static inline void *
+#if DEBUG
+static void *
cache_alloc_debugcheck_after(kmem_cache_t *cachep,
unsigned long flags, void *objp, void *caller)
{
-#if DEBUG
if (!objp)
return objp;
if (cachep->flags & SLAB_POISON) {
cachep->ctor(objp, cachep, ctor_flags);
}
-#endif
return objp;
}
+#else
+#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
+#endif
static inline void * __cache_alloc (kmem_cache_t *cachep, int flags)
return 0;
}
+/**
+ * kmem_cache_alloc_node - Allocate an object on the specified node
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ * @nodeid: node number of the target node.
+ *
+ * Identical to kmem_cache_alloc, except that this function is slow
+ * and can sleep. And it will allocate memory on the given node, which
+ * can improve the performance for cpu bound structures.
+ */
+void *kmem_cache_alloc_node(kmem_cache_t *cachep, int nodeid)
+{
+ size_t offset;
+ void *objp;
+ struct slab *slabp;
+ kmem_bufctl_t next;
+
+ /* The main algorithms are not node aware, thus we have to cheat:
+ * We bypass all caches and allocate a new slab.
+ * The following code is a streamlined copy of cache_grow().
+ */
+
+ /* Get colour for the slab, and update the next value. */
+ spin_lock_irq(&cachep->spinlock);
+ offset = cachep->colour_next;
+ cachep->colour_next++;
+ if (cachep->colour_next >= cachep->colour)
+ cachep->colour_next = 0;
+ offset *= cachep->colour_off;
+ spin_unlock_irq(&cachep->spinlock);
+
+ /* Get mem for the objs. */
+ if (!(objp = kmem_getpages(cachep, GFP_KERNEL, nodeid)))
+ goto failed;
+
+ /* Get slab management. */
+ if (!(slabp = alloc_slabmgmt(cachep, objp, offset, GFP_KERNEL)))
+ goto opps1;
+
+ set_slab_attr(cachep, slabp, objp);
+ cache_init_objs(cachep, slabp, SLAB_CTOR_CONSTRUCTOR);
+
+ /* The first object is ours: */
+ objp = slabp->s_mem + slabp->free*cachep->objsize;
+ slabp->inuse++;
+ next = slab_bufctl(slabp)[slabp->free];
+#if DEBUG
+ slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
+#endif
+ slabp->free = next;
+
+ /* add the remaining objects into the cache */
+ spin_lock_irq(&cachep->spinlock);
+ check_slabp(cachep, slabp);
+ STATS_INC_GROWN(cachep);
+ /* Make slab active. */
+ if (slabp->free == BUFCTL_END) {
+ list_add_tail(&slabp->list, &(list3_data(cachep)->slabs_full));
+ } else {
+ list_add_tail(&slabp->list,
+ &(list3_data(cachep)->slabs_partial));
+ list3_data(cachep)->free_objects += cachep->num-1;
+ }
+ spin_unlock_irq(&cachep->spinlock);
+ objp = cache_alloc_debugcheck_after(cachep, GFP_KERNEL, objp,
+ __builtin_return_address(0));
+ return objp;
+opps1:
+ kmem_freepages(cachep, objp);
+failed:
+ return NULL;
+
+}
+EXPORT_SYMBOL(kmem_cache_alloc_node);
+
/**
* kmalloc - allocate memory
* @size: how many bytes of memory are required.
/**
* __alloc_percpu - allocate one copy of the object for every present
* cpu in the system, zeroing them.
- * Objects should be dereferenced using per_cpu_ptr/get_cpu_ptr
- * macros only.
+ * Objects should be dereferenced using the per_cpu_ptr macro only.
*
* @size: how many bytes of memory are required.
* @align: the alignment, which can't be greater than SMP_CACHE_BYTES.
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_possible(i))
continue;
- pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
+ pdata->ptrs[i] = kmem_cache_alloc_node(
+ kmem_find_general_cachep(size, GFP_KERNEL),
+ cpu_to_node(i));
+
if (!pdata->ptrs[i])
goto unwind_oom;
memset(pdata->ptrs[i], 0, size);
EXPORT_SYMBOL(kmem_cache_free);
+/**
+ * kcalloc - allocate memory for an array. The memory is set to zero.
+ * @n: number of elements.
+ * @size: element size.
+ * @flags: the type of memory to allocate.
+ */
+void *kcalloc(size_t n, size_t size, int flags)
+{
+ void *ret = NULL;
+
+ if (n != 0 && size > INT_MAX / n)
+ return ret;
+
+ ret = kmalloc(n * size, flags);
+ if (ret)
+ memset(ret, 0, n * size);
+ return ret;
+}
+
+EXPORT_SYMBOL(kcalloc);
+
/**
* kfree - free previously allocated memory
* @objp: pointer returned by kmalloc.
EXPORT_SYMBOL(kmem_cache_size);
-kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
-{
- struct cache_sizes *csizep = malloc_sizes;
-
- /* This function could be moved to the header file, and
- * made inline so consumers can quickly determine what
- * cache pointer they require.
- */
- for ( ; csizep->cs_size; csizep++) {
- if (size > csizep->cs_size)
- continue;
- break;
- }
- return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
-}
-
-EXPORT_SYMBOL(kmem_find_general_cachep);
-
struct ccupdate_struct {
kmem_cache_t *cachep;
struct array_cache *new[NR_CPUS];
memset(&new.new,0,sizeof(new.new));
for (i = 0; i < NR_CPUS; i++) {
- struct array_cache *ccnew;
-
- ccnew = kmalloc(sizeof(void*)*limit+
- sizeof(struct array_cache), GFP_KERNEL);
- if (!ccnew) {
- for (i--; i >= 0; i--) kfree(new.new[i]);
- return -ENOMEM;
+ if (cpu_online(i)) {
+ new.new[i] = alloc_arraycache(i, limit, batchcount);
+ if (!new.new[i]) {
+ for (i--; i >= 0; i--) kfree(new.new[i]);
+ return -ENOMEM;
+ }
+ } else {
+ new.new[i] = NULL;
}
- ccnew->avail = 0;
- ccnew->limit = limit;
- ccnew->batchcount = batchcount;
- ccnew->touched = 0;
- new.new[i] = ccnew;
}
new.cachep = cachep;
spin_unlock_irq(&cachep->spinlock);
kfree(ccold);
}
- new_shared = kmalloc(sizeof(void*)*batchcount*shared+
- sizeof(struct array_cache), GFP_KERNEL);
+ new_shared = alloc_arraycache(-1, batchcount*shared, 0xbaadf00d);
if (new_shared) {
struct array_cache *old;
- new_shared->avail = 0;
- new_shared->limit = batchcount*shared;
- new_shared->batchcount = 0xbaadf00d;
- new_shared->touched = 0;
spin_lock_irq(&cachep->spinlock);
old = cachep->lists.shared;
cachep->name, -err);
}
-static void drain_array(kmem_cache_t *cachep, struct array_cache *ac)
-{
- int tofree;
-
- check_irq_off();
- if (ac->touched) {
- ac->touched = 0;
- } else if (ac->avail) {
- tofree = (ac->limit+4)/5;
- if (tofree > ac->avail) {
- tofree = (ac->avail+1)/2;
- }
- spin_lock(&cachep->spinlock);
- free_block(cachep, ac_entry(ac), tofree);
- spin_unlock(&cachep->spinlock);
- ac->avail -= tofree;
- memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree],
- sizeof(void*)*ac->avail);
- }
-}
-
static void drain_array_locked(kmem_cache_t *cachep,
struct array_cache *ac, int force)
{
/**
* cache_reap - Reclaim memory from caches.
*
- * Called from a timer, every few seconds
+ * Called from workqueue/eventd every few seconds.
* Purpose:
* - clear the per-cpu caches for this CPU.
* - return freeable pages to the main free memory pool.
*
* If we cannot acquire the cache chain semaphore then just give up - we'll
- * try again next timer interrupt.
+ * try again on the next iteration.
*/
-static inline void cache_reap (void)
+static void cache_reap(void *unused)
{
struct list_head *walk;
-#if DEBUG
- BUG_ON(!in_interrupt());
- BUG_ON(in_irq());
-#endif
- if (down_trylock(&cache_chain_sem))
+ if (down_trylock(&cache_chain_sem)) {
+ /* Give up. Setup the next iteration. */
+ schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id());
return;
+ }
list_for_each(walk, &cache_chain) {
kmem_cache_t *searchp;
goto next;
check_irq_on();
- local_irq_disable();
- drain_array(searchp, ac_data(searchp));
- if(time_after(searchp->lists.next_reap, jiffies))
- goto next_irqon;
+ spin_lock_irq(&searchp->spinlock);
+
+ drain_array_locked(searchp, ac_data(searchp), 0);
- spin_lock(&searchp->spinlock);
- if(time_after(searchp->lists.next_reap, jiffies)) {
+ if(time_after(searchp->lists.next_reap, jiffies))
goto next_unlock;
- }
+
searchp->lists.next_reap = jiffies + REAPTIMEOUT_LIST3;
if (searchp->lists.shared)
spin_lock_irq(&searchp->spinlock);
} while(--tofree > 0);
next_unlock:
- spin_unlock(&searchp->spinlock);
-next_irqon:
- local_irq_enable();
+ spin_unlock_irq(&searchp->spinlock);
next:
;
}
check_irq_on();
up(&cache_chain_sem);
-}
-
-/*
- * This is a timer handler. There is one per CPU. It is called periodially
- * to shrink this CPU's caches. Otherwise there could be memory tied up
- * for long periods (or for ever) due to load changes.
- */
-static void reap_timer_fnc(unsigned long cpu)
-{
- struct timer_list *rt = &__get_cpu_var(reap_timers);
-
- /* CPU hotplug can drag us off cpu: don't run on wrong CPU */
- if (!cpu_is_offline(cpu)) {
- cache_reap();
- mod_timer(rt, jiffies + REAPTIMEOUT_CPUC + cpu);
- }
+ /* Setup the next iteration */
+ schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id());
}
#ifdef CONFIG_PROC_FS
unsigned long num_slabs;
const char *name;
char *error = NULL;
- mm_segment_t old_fs;
- char tmp;
check_irq_on();
spin_lock_irq(&cachep->spinlock);
error = "free_objects accounting error";
name = cachep->name;
-
- /*
- * Check to see if `name' resides inside a module which has been
- * unloaded (someone forgot to destroy their cache)
- */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- if (__get_user(tmp, name))
- name = "broken";
- set_fs(old_fs);
-
if (error)
printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
return size;
}
-
-void ptrinfo(unsigned long addr)
-{
- struct page *page;
-
- printk("Dumping data about address %p.\n", (void*)addr);
- if (!virt_addr_valid((void*)addr)) {
- printk("virt addr invalid.\n");
- return;
- }
-#ifdef CONFIG_MMU
- do {
- pgd_t *pgd = pgd_offset_k(addr);
- pmd_t *pmd;
- if (pgd_none(*pgd)) {
- printk("No pgd.\n");
- break;
- }
- pmd = pmd_offset(pgd, addr);
- if (pmd_none(*pmd)) {
- printk("No pmd.\n");
- break;
- }
-#ifdef CONFIG_X86
- if (pmd_large(*pmd)) {
- printk("Large page.\n");
- break;
- }
-#endif
- printk("normal page, pte_val 0x%llx\n",
- (unsigned long long)pte_val(*pte_offset_kernel(pmd, addr)));
- } while(0);
-#endif
-
- page = virt_to_page((void*)addr);
- printk("struct page at %p, flags %08lx\n",
- page, (unsigned long)page->flags);
- if (PageSlab(page)) {
- kmem_cache_t *c;
- struct slab *s;
- unsigned long flags;
- int objnr;
- void *objp;
-
- c = GET_PAGE_CACHE(page);
- printk("belongs to cache %s.\n",c->name);
-
- spin_lock_irqsave(&c->spinlock, flags);
- s = GET_PAGE_SLAB(page);
- printk("slabp %p with %d inuse objects (from %d).\n",
- s, s->inuse, c->num);
- check_slabp(c,s);
-
- objnr = (addr-(unsigned long)s->s_mem)/c->objsize;
- objp = s->s_mem+c->objsize*objnr;
- printk("points into object no %d, starting at %p, len %d.\n",
- objnr, objp, c->objsize);
- if (objnr >= c->num) {
- printk("Bad obj number.\n");
- } else {
- kernel_map_pages(virt_to_page(objp),
- c->objsize/PAGE_SIZE, 1);
-
- print_objinfo(c, objp, 2);
- }
- spin_unlock_irqrestore(&c->spinlock, flags);
-
- }
-}