X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fjbd%2Fjournal.c;h=10fff94439387ab98b55dabb8d6f1af1e7a5384a;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=e4b516ac4989ef3ddef852f5c764ef816e0c3a56;hpb=76828883507a47dae78837ab5dec5a5b4513c667;p=linux-2.6.git diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index e4b516ac4..10fff9443 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1,5 +1,5 @@ /* - * linux/fs/journal.c + * linux/fs/jbd/journal.c * * Written by Stephen C. Tweedie , 1998 * @@ -31,11 +31,14 @@ #include #include #include -#include +#include #include +#include +#include +#include + #include #include -#include EXPORT_SYMBOL(journal_start); EXPORT_SYMBOL(journal_restart); @@ -81,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); +static int journal_create_jbd_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -111,18 +115,15 @@ static void commit_timeout(unsigned long __data) static int kjournald(void *arg) { - journal_t *journal = (journal_t *) arg; + journal_t *journal = arg; transaction_t *transaction; - struct timer_list timer; - daemonize("kjournald"); - - /* Set up an interval timer which can be used to trigger a - commit wakeup after the commit interval expires */ - init_timer(&timer); - timer.data = (unsigned long) current; - timer.function = commit_timeout; - journal->j_commit_timer = &timer; + /* + * Set up an interval timer which can be used to trigger a commit wakeup + * after the commit interval expires + */ + setup_timer(&journal->j_commit_timer, commit_timeout, + (unsigned long)current); /* Record that the journal thread is running */ journal->j_task = current; @@ -146,7 +147,7 @@ loop: if (journal->j_commit_sequence != journal->j_commit_request) { jbd_debug(1, "OK, requests differ\n"); spin_unlock(&journal->j_state_lock); - del_timer_sync(journal->j_commit_timer); + del_timer_sync(&journal->j_commit_timer); journal_commit_transaction(journal); spin_lock(&journal->j_state_lock); goto loop; @@ -180,7 +181,7 @@ loop: transaction->t_expires)) should_sleep = 0; if (journal->j_flags & JFS_UNMOUNT) - should_sleep = 0; + should_sleep = 0; if (should_sleep) { spin_unlock(&journal->j_state_lock); schedule(); @@ -203,7 +204,7 @@ loop: end_loop: spin_unlock(&journal->j_state_lock); - del_timer_sync(journal->j_commit_timer); + del_timer_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); jbd_debug(1, "Journal thread exiting.\n"); @@ -212,7 +213,7 @@ end_loop: static void journal_start_thread(journal_t *journal) { - kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES); + kthread_run(kjournald, journal, "kjournald"); wait_event(journal->j_wait_done_commit, journal->j_task != 0); } @@ -270,7 +271,7 @@ static void journal_kill_thread(journal_t *journal) int journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct journal_head **jh_out, - int blocknr) + unsigned long blocknr) { int need_copy_out = 0; int done_copy_out = 0; @@ -328,10 +329,10 @@ repeat: char *tmp; jbd_unlock_bh_state(bh_in); - tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); + tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { - kfree(tmp); + jbd_slab_free(tmp, bh_in->b_size); goto repeat; } @@ -577,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp) * this is a no-op. If needed, we can use j_blk_offset - everything is * ready. */ -int journal_bmap(journal_t *journal, unsigned long blocknr, +int journal_bmap(journal_t *journal, unsigned long blocknr, unsigned long *retp) { int err = 0; @@ -659,8 +660,8 @@ static journal_t * journal_init_common (void) init_waitqueue_head(&journal->j_wait_checkpoint); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); - init_MUTEX(&journal->j_barrier); - init_MUTEX(&journal->j_checkpoint_sem); + mutex_init(&journal->j_barrier); + mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); spin_lock_init(&journal->j_list_lock); spin_lock_init(&journal->j_state_lock); @@ -695,13 +696,13 @@ fail: * @bdev: Block device on which to create the journal * @fs_dev: Device which hold journalled filesystem for this journal. * @start: Block nr Start of journal. - * @len: Lenght of the journal in blocks. + * @len: Length of the journal in blocks. * @blocksize: blocksize of journalling device * @returns: a newly created journal_t * - * + * * journal_init_dev creates a journal which maps a fixed contiguous * range of blocks on an arbitrary block device. - * + * */ journal_t * journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, @@ -714,18 +715,8 @@ journal_t * journal_init_dev(struct block_device *bdev, if (!journal) return NULL; - journal->j_dev = bdev; - journal->j_fs_dev = fs_dev; - journal->j_blk_offset = start; - journal->j_maxlen = len; - journal->j_blocksize = blocksize; - - bh = __getblk(journal->j_dev, start, journal->j_blocksize); - J_ASSERT(bh != NULL); - journal->j_sb_buffer = bh; - journal->j_superblock = (journal_superblock_t *)bh->b_data; - /* journal descriptor can store up to n blocks -bzzz */ + journal->j_blocksize = blocksize; n = journal->j_blocksize / sizeof(journal_block_tag_t); journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); @@ -734,15 +725,25 @@ journal_t * journal_init_dev(struct block_device *bdev, __FUNCTION__); kfree(journal); journal = NULL; + goto out; } + journal->j_dev = bdev; + journal->j_fs_dev = fs_dev; + journal->j_blk_offset = start; + journal->j_maxlen = len; + bh = __getblk(journal->j_dev, start, journal->j_blocksize); + J_ASSERT(bh != NULL); + journal->j_sb_buffer = bh; + journal->j_superblock = (journal_superblock_t *)bh->b_data; +out: return journal; } - -/** + +/** * journal_t * journal_init_inode () - creates a journal which maps to a inode. * @inode: An inode to create the journal in - * + * * journal_init_inode creates a journal which maps an on-disk inode as * the journal. The inode must exist already, must support bmap() and * must have all data blocks preallocated. @@ -762,7 +763,7 @@ journal_t * journal_init_inode (struct inode *inode) journal->j_inode = inode; jbd_debug(1, "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", - journal, inode->i_sb->s_id, inode->i_ino, + journal, inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size, inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); @@ -797,10 +798,10 @@ journal_t * journal_init_inode (struct inode *inode) return journal; } -/* +/* * If the journal init or create aborts, we need to mark the journal * superblock as being NULL to prevent the journal destroy from writing - * back a bogus superblock. + * back a bogus superblock. */ static void journal_fail_superblock (journal_t *journal) { @@ -819,7 +820,7 @@ static void journal_fail_superblock (journal_t *journal) static int journal_reset(journal_t *journal) { journal_superblock_t *sb = journal->j_superblock; - unsigned int first, last; + unsigned long first, last; first = be32_to_cpu(sb->s_first); last = be32_to_cpu(sb->s_maxlen); @@ -843,13 +844,13 @@ static int journal_reset(journal_t *journal) return 0; } -/** +/** * int journal_create() - Initialise the new journal file * @journal: Journal to create. This structure must have been initialised - * + * * Given a journal_t structure which tells us which disk blocks we can * use, create a new journal superblock and initialise all of the - * journal fields from scratch. + * journal fields from scratch. **/ int journal_create(journal_t *journal) { @@ -914,7 +915,7 @@ int journal_create(journal_t *journal) return journal_reset(journal); } -/** +/** * void journal_update_superblock() - Update journal sb on disk. * @journal: The journal to update. * @wait: Set to '0' if you don't want to wait for IO completion. @@ -938,7 +939,7 @@ void journal_update_superblock(journal_t *journal, int wait) journal->j_transaction_sequence) { jbd_debug(1,"JBD: Skipping superblock update on recovered sb " "(start %ld, seq %d, errno %d)\n", - journal->j_tail, journal->j_tail_sequence, + journal->j_tail, journal->j_tail_sequence, journal->j_errno); goto out; } @@ -1061,7 +1062,7 @@ static int load_superblock(journal_t *journal) /** * int journal_load() - Read journal from disk. * @journal: Journal to act on. - * + * * Given a journal_t structure which tells us which disk blocks contain * a journal, read the journal from disk to initialise the in-memory * structures. @@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal) int journal_load(journal_t *journal) { int err; + journal_superblock_t *sb; err = load_superblock(journal); if (err) return err; + sb = journal->j_superblock; /* If this is a V2 superblock, then we have to check the * features flags on it. */ if (journal->j_format_version >= 2) { - journal_superblock_t *sb = journal->j_superblock; - if ((sb->s_feature_ro_compat & ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || (sb->s_feature_incompat & @@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal) } } + /* + * Create a slab for this blocksize + */ + err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); + if (err) + return err; + /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (journal_recover(journal)) @@ -1164,9 +1172,9 @@ void journal_destroy(journal_t *journal) * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount * @incompat: bitmask of incompatible features - * + * * Check whether the journal uses all of a given set of - * features. Return true (non-zero) if it does. + * features. Return true (non-zero) if it does. **/ int journal_check_used_features (journal_t *journal, unsigned long compat, @@ -1195,7 +1203,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat, * @compat: bitmask of compatible features * @ro: bitmask of features that force read-only mount * @incompat: bitmask of incompatible features - * + * * Check whether the journaling code supports the use of * all of a given set of features on this journal. Return true * (non-zero) if it can. */ @@ -1233,7 +1241,7 @@ int journal_check_available_features (journal_t *journal, unsigned long compat, * @incompat: bitmask of incompatible features * * Mark a given journal feature as present on the - * superblock. Returns true if the requested features could be set. + * superblock. Returns true if the requested features could be set. * */ @@ -1319,7 +1327,7 @@ static int journal_convert_superblock_v1(journal_t *journal, /** * int journal_flush () - Flush journal * @journal: Journal to act on. - * + * * Flush all data for a given journal to disk and empty the journal. * Filesystems can use this when remounting readonly to ensure that * recovery does not need to happen on remount. @@ -1386,7 +1394,7 @@ int journal_flush(journal_t *journal) * int journal_wipe() - Wipe journal contents * @journal: Journal to act on. * @write: flag (see below) - * + * * Wipe out all of the contents of a journal, safely. This will produce * a warning if the journal contains any valid recovery information. * Must be called between journal_init_*() and journal_load(). @@ -1441,7 +1449,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) /* * Journal abort has very specific semantics, which we describe - * for journal abort. + * for journal abort. * * Two internal function, which provide abort to te jbd layer * itself are here. @@ -1496,7 +1504,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) * Perform a complete, immediate shutdown of the ENTIRE * journal (not of a single transaction). This operation cannot be * undone without closing and reopening the journal. - * + * * The journal_abort function is intended to support higher level error * recovery mechanisms such as the ext2/ext3 remount-readonly error * mode. @@ -1530,7 +1538,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) * supply an errno; a null errno implies that absolutely no further * writes are done to the journal (unless there are any already in * progress). - * + * */ void journal_abort(journal_t *journal, int errno) @@ -1538,7 +1546,7 @@ void journal_abort(journal_t *journal, int errno) __journal_abort_soft(journal, errno); } -/** +/** * int journal_errno () - returns the journal's error state. * @journal: journal to examine. * @@ -1562,7 +1570,7 @@ int journal_errno(journal_t *journal) return err; } -/** +/** * int journal_clear_err () - clears the journal's error state * @journal: journal to act on. * @@ -1582,7 +1590,7 @@ int journal_clear_err(journal_t *journal) return err; } -/** +/** * void journal_ack_err() - Ack journal err. * @journal: journal to act on. * @@ -1604,17 +1612,88 @@ int journal_blocks_per_page(struct inode *inode) /* * Simple support for retrying memory allocations. Introduced to help to - * debug different VM deadlock avoidance strategies. + * debug different VM deadlock avoidance strategies. */ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) { return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); } +/* + * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed + * and allocate frozen and commit buffers from these slabs. + * + * Reason for doing this is to avoid, SLAB_DEBUG - since it could + * cause bh to cross page boundary. + */ + +#define JBD_MAX_SLABS 5 +#define JBD_SLAB_INDEX(size) (size >> 11) + +static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; +static const char *jbd_slab_names[JBD_MAX_SLABS] = { + "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" +}; + +static void journal_destroy_jbd_slabs(void) +{ + int i; + + for (i = 0; i < JBD_MAX_SLABS; i++) { + if (jbd_slab[i]) + kmem_cache_destroy(jbd_slab[i]); + jbd_slab[i] = NULL; + } +} + +static int journal_create_jbd_slab(size_t slab_size) +{ + int i = JBD_SLAB_INDEX(slab_size); + + BUG_ON(i >= JBD_MAX_SLABS); + + /* + * Check if we already have a slab created for this size + */ + if (jbd_slab[i]) + return 0; + + /* + * Create a slab and force alignment to be same as slabsize - + * this will make sure that allocations won't cross the page + * boundary. + */ + jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], + slab_size, slab_size, 0, NULL, NULL); + if (!jbd_slab[i]) { + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); + return -ENOMEM; + } + return 0; +} + +void * jbd_slab_alloc(size_t size, gfp_t flags) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); +} + +void jbd_slab_free(void *ptr, size_t size) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + kmem_cache_free(jbd_slab[idx], ptr); +} + /* * Journal_head storage management */ -static kmem_cache_t *journal_head_cache; +static struct kmem_cache *journal_head_cache; #ifdef CONFIG_JBD_DEBUG static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif @@ -1676,7 +1755,7 @@ static void journal_free_journal_head(struct journal_head *jh) { #ifdef CONFIG_JBD_DEBUG atomic_dec(&nr_journal_heads); - memset(jh, 0x5b, sizeof(*jh)); + memset(jh, JBD_POISON_FREE, sizeof(*jh)); #endif kmem_cache_free(journal_head_cache, jh); } @@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", __FUNCTION__); - kfree(jh->b_frozen_data); + jbd_slab_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", __FUNCTION__); - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ @@ -1917,7 +1996,7 @@ static void __exit remove_jbd_proc_entry(void) #endif -kmem_cache_t *jbd_handle_cache; +struct kmem_cache *jbd_handle_cache; static int __init journal_init_handle_cache(void) { @@ -1961,19 +2040,14 @@ static void journal_destroy_caches(void) journal_destroy_revoke_caches(); journal_destroy_journal_head_cache(); journal_destroy_handle_cache(); + journal_destroy_jbd_slabs(); } static int __init journal_init(void) { int ret; -/* Static check for data structure consistency. There's no code - * invoked --- we'll just get a linker failure if things aren't right. - */ - extern void journal_bad_superblock_size(void); - if (sizeof(struct journal_superblock_s) != 1024) - journal_bad_superblock_size(); - + BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); ret = journal_init_caches(); if (ret != 0)