X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fmbcache.c;h=e4fde1ab22cdb0a5af105cdea66cccf9473ac08e;hb=43bc926fffd92024b46cafaf7350d669ba9ca884;hp=dbc4443e6949816ebe4e2d6696ba4dd1b9d5ede1;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/fs/mbcache.c b/fs/mbcache.c index dbc4443e6..e4fde1ab2 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -54,6 +54,10 @@ printk(KERN_ERR f); \ printk("\n"); \ } while(0) + +#define MB_CACHE_WRITER ((unsigned short)~0U >> 1) + +static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); MODULE_AUTHOR("Andreas Gruenbacher "); MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); @@ -65,15 +69,27 @@ EXPORT_SYMBOL(mb_cache_destroy); EXPORT_SYMBOL(mb_cache_entry_alloc); EXPORT_SYMBOL(mb_cache_entry_insert); EXPORT_SYMBOL(mb_cache_entry_release); -EXPORT_SYMBOL(mb_cache_entry_takeout); EXPORT_SYMBOL(mb_cache_entry_free); -EXPORT_SYMBOL(mb_cache_entry_dup); EXPORT_SYMBOL(mb_cache_entry_get); #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) EXPORT_SYMBOL(mb_cache_entry_find_first); EXPORT_SYMBOL(mb_cache_entry_find_next); #endif +struct mb_cache { + struct list_head c_cache_list; + const char *c_name; + struct mb_cache_op c_op; + atomic_t c_entry_count; + int c_bucket_bits; +#ifndef MB_CACHE_INDEXES_COUNT + int c_indexes_count; +#endif + kmem_cache_t *c_entry_cache; + struct list_head *c_block_hash; + struct list_head *c_indexes_hash[0]; +}; + /* * Global data: list of all mbcache's, lru list, and a spinlock for @@ -83,7 +99,7 @@ EXPORT_SYMBOL(mb_cache_entry_find_next); static LIST_HEAD(mb_cache_list); static LIST_HEAD(mb_cache_lru_list); -static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(mb_cache_spinlock); static struct shrinker *mb_shrinker; static inline int @@ -100,7 +116,7 @@ mb_cache_indexes(struct mb_cache *cache) * What the mbcache registers as to get shrunk dynamically. */ -static int mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask); +static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); static inline int @@ -110,7 +126,7 @@ __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) } -static inline void +static void __mb_cache_entry_unhash(struct mb_cache_entry *ce) { int n; @@ -123,12 +139,12 @@ __mb_cache_entry_unhash(struct mb_cache_entry *ce) } -static inline void -__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) +static void +__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) { struct mb_cache *cache = ce->e_cache; - mb_assert(atomic_read(&ce->e_used) == 0); + mb_assert(!(ce->e_used || ce->e_queued)); if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { /* free failed -- put back on the lru list for freeing later. */ @@ -142,12 +158,19 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) } -static inline void +static void __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) { - if (atomic_dec_and_test(&ce->e_used)) { + /* Wake up all processes queuing for this cache entry. */ + if (ce->e_queued) + wake_up_all(&mb_cache_queue); + if (ce->e_used >= MB_CACHE_WRITER) + ce->e_used -= MB_CACHE_WRITER; + ce->e_used--; + if (!(ce->e_used || ce->e_queued)) { if (!__mb_cache_entry_is_hashed(ce)) goto forget; + mb_assert(list_empty(&ce->e_lru_list)); list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); } spin_unlock(&mb_cache_spinlock); @@ -170,7 +193,7 @@ forget: * Returns the number of objects which are present in the cache. */ static int -mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask) +mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(free_list); struct list_head *l, *ltmp; @@ -202,7 +225,7 @@ mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask) e_lru_list), gfp_mask); } out: - return count; + return (count / 100) * sysctl_vfs_cache_pressure; } @@ -231,7 +254,7 @@ mb_cache_create(const char *name, struct mb_cache_op *cache_op, struct mb_cache *cache = NULL; if(entry_size < sizeof(struct mb_cache_entry) + - indexes_count * sizeof(struct mb_cache_entry_index)) + indexes_count * sizeof(((struct mb_cache_entry *) 0)->e_indexes[0])) return NULL; cache = kmalloc(sizeof(struct mb_cache) + @@ -265,7 +288,7 @@ mb_cache_create(const char *name, struct mb_cache_op *cache_op, INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]); } cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, - SLAB_RECLAIM_ACCOUNT, NULL, NULL); + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL); if (!cache->c_entry_cache) goto fail; @@ -278,8 +301,7 @@ fail: if (cache) { while (--m >= 0) kfree(cache->c_indexes_hash[m]); - if (cache->c_block_hash) - kfree(cache->c_block_hash); + kfree(cache->c_block_hash); kfree(cache); } return NULL; @@ -289,15 +311,14 @@ fail: /* * mb_cache_shrink() * - * Removes all cache entires of a device from the cache. All cache entries + * Removes all cache entries of a device from the cache. All cache entries * currently in use cannot be freed, and thus remain in the cache. All others * are freed. * - * @cache: which cache to shrink * @bdev: which device's cache entries to shrink */ void -mb_cache_shrink(struct mb_cache *cache, struct block_device *bdev) +mb_cache_shrink(struct block_device *bdev) { LIST_HEAD(free_list); struct list_head *l, *ltmp; @@ -384,7 +405,8 @@ mb_cache_entry_alloc(struct mb_cache *cache) INIT_LIST_HEAD(&ce->e_lru_list); INIT_LIST_HEAD(&ce->e_block_list); ce->e_cache = cache; - atomic_set(&ce->e_used, 1); + ce->e_used = 1 + MB_CACHE_WRITER; + ce->e_queued = 0; } return ce; } @@ -455,23 +477,6 @@ mb_cache_entry_release(struct mb_cache_entry *ce) } -/* - * mb_cache_entry_takeout() - * - * Take a cache entry out of the cache, making it invalid. The entry can later - * be re-inserted using mb_cache_entry_insert(), or released using - * mb_cache_entry_release(). - */ -void -mb_cache_entry_takeout(struct mb_cache_entry *ce) -{ - spin_lock(&mb_cache_spinlock); - mb_assert(list_empty(&ce->e_lru_list)); - __mb_cache_entry_unhash(ce); - spin_unlock(&mb_cache_spinlock); -} - - /* * mb_cache_entry_free() * @@ -488,26 +493,13 @@ mb_cache_entry_free(struct mb_cache_entry *ce) } -/* - * mb_cache_entry_dup() - * - * Duplicate a handle to a cache entry (does not duplicate the cache entry - * itself). After the call, both the old and the new handle must be released. - */ -struct mb_cache_entry * -mb_cache_entry_dup(struct mb_cache_entry *ce) -{ - atomic_inc(&ce->e_used); - return ce; -} - - /* * mb_cache_entry_get() * * Get a cache entry by device / block number. (There can only be one entry * in the cache per device and block.) Returns NULL if no such cache entry - * exists. + * exists. The returned cache entry is locked for exclusive access ("single + * writer"). */ struct mb_cache_entry * mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, @@ -523,9 +515,27 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, list_for_each(l, &cache->c_block_hash[bucket]) { ce = list_entry(l, struct mb_cache_entry, e_block_list); if (ce->e_bdev == bdev && ce->e_block == block) { + DEFINE_WAIT(wait); + if (!list_empty(&ce->e_lru_list)) list_del_init(&ce->e_lru_list); - atomic_inc(&ce->e_used); + + while (ce->e_used > 0) { + ce->e_queued++; + prepare_to_wait(&mb_cache_queue, &wait, + TASK_UNINTERRUPTIBLE); + spin_unlock(&mb_cache_spinlock); + schedule(); + spin_lock(&mb_cache_spinlock); + ce->e_queued--; + } + finish_wait(&mb_cache_queue, &wait); + ce->e_used += 1 + MB_CACHE_WRITER; + + if (!__mb_cache_entry_is_hashed(ce)) { + __mb_cache_entry_release_unlock(ce); + return NULL; + } goto cleanup; } } @@ -547,9 +557,30 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head, list_entry(l, struct mb_cache_entry, e_indexes[index].o_list); if (ce->e_bdev == bdev && ce->e_indexes[index].o_key == key) { + DEFINE_WAIT(wait); + if (!list_empty(&ce->e_lru_list)) list_del_init(&ce->e_lru_list); - atomic_inc(&ce->e_used); + + /* Incrementing before holding the lock gives readers + priority over writers. */ + ce->e_used++; + while (ce->e_used >= MB_CACHE_WRITER) { + ce->e_queued++; + prepare_to_wait(&mb_cache_queue, &wait, + TASK_UNINTERRUPTIBLE); + spin_unlock(&mb_cache_spinlock); + schedule(); + spin_lock(&mb_cache_spinlock); + ce->e_queued--; + } + finish_wait(&mb_cache_queue, &wait); + + if (!__mb_cache_entry_is_hashed(ce)) { + __mb_cache_entry_release_unlock(ce); + spin_lock(&mb_cache_spinlock); + return ERR_PTR(-EAGAIN); + } return ce; } l = l->next; @@ -563,7 +594,8 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head, * * Find the first cache entry on a given device with a certain key in * an additional index. Additonal matches can be found with - * mb_cache_entry_find_next(). Returns NULL if no match was found. + * mb_cache_entry_find_next(). Returns NULL if no match was found. The + * returned cache entry is locked for shared access ("multiple readers"). * * @cache: the cache to search * @index: the number of the additonal index to search (0<=index