X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fjbd%2Fjournal.c;h=10fff94439387ab98b55dabb8d6f1af1e7a5384a;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=3c95df4bb817c9feaf5de75834ca7b39d4b2eec7;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 3c95df4bb..10fff9443 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1,5 +1,5 @@ /* - * linux/fs/journal.c + * linux/fs/jbd/journal.c * * Written by Stephen C. Tweedie , 1998 * @@ -31,11 +31,15 @@ #include #include #include -#include +#include #include -#include +#include +#include #include +#include +#include + EXPORT_SYMBOL(journal_start); EXPORT_SYMBOL(journal_restart); EXPORT_SYMBOL(journal_extend); @@ -54,7 +58,6 @@ EXPORT_SYMBOL(journal_sync_buffer); #endif EXPORT_SYMBOL(journal_flush); EXPORT_SYMBOL(journal_revoke); -EXPORT_SYMBOL(journal_callback_set); EXPORT_SYMBOL(journal_init_dev); EXPORT_SYMBOL(journal_init_inode); @@ -65,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features); EXPORT_SYMBOL(journal_create); EXPORT_SYMBOL(journal_load); EXPORT_SYMBOL(journal_destroy); -EXPORT_SYMBOL(journal_recover); EXPORT_SYMBOL(journal_update_superblock); EXPORT_SYMBOL(journal_abort); EXPORT_SYMBOL(journal_errno); @@ -73,14 +75,16 @@ EXPORT_SYMBOL(journal_ack_err); EXPORT_SYMBOL(journal_clear_err); EXPORT_SYMBOL(log_wait_commit); EXPORT_SYMBOL(journal_start_commit); +EXPORT_SYMBOL(journal_force_commit_nested); EXPORT_SYMBOL(journal_wipe); EXPORT_SYMBOL(journal_blocks_per_page); EXPORT_SYMBOL(journal_invalidatepage); EXPORT_SYMBOL(journal_try_to_free_buffers); -EXPORT_SYMBOL(journal_bmap); EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); +static void __journal_abort_soft (journal_t *journal, int errno); +static int journal_create_jbd_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -93,16 +97,6 @@ static void commit_timeout(unsigned long __data) wake_up_process(p); } -/* Static check for data structure consistency. There's no code - * invoked --- we'll just get a linker failure if things aren't right. - */ -void __journal_internal_check(void) -{ - extern void journal_bad_superblock_size(void); - if (sizeof(struct journal_superblock_s) != 1024) - journal_bad_superblock_size(); -} - /* * kjournald: The main thread function used to manage a logging device * journal. @@ -119,24 +113,17 @@ void __journal_internal_check(void) * known as checkpointing, and this thread is responsible for that job. */ -journal_t *current_journal; // AKPM: debug - -int kjournald(void *arg) +static int kjournald(void *arg) { - journal_t *journal = (journal_t *) arg; + journal_t *journal = arg; transaction_t *transaction; - struct timer_list timer; - current_journal = journal; - - daemonize("kjournald"); - - /* Set up an interval timer which can be used to trigger a - commit wakeup after the commit interval expires */ - init_timer(&timer); - timer.data = (unsigned long) current; - timer.function = commit_timeout; - journal->j_commit_timer = &timer; + /* + * Set up an interval timer which can be used to trigger a commit wakeup + * after the commit interval expires + */ + setup_timer(&journal->j_commit_timer, commit_timeout, + (unsigned long)current); /* Record that the journal thread is running */ journal->j_task = current; @@ -151,20 +138,23 @@ int kjournald(void *arg) spin_lock(&journal->j_state_lock); loop: + if (journal->j_flags & JFS_UNMOUNT) + goto end_loop; + jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", journal->j_commit_sequence, journal->j_commit_request); if (journal->j_commit_sequence != journal->j_commit_request) { jbd_debug(1, "OK, requests differ\n"); spin_unlock(&journal->j_state_lock); - del_timer_sync(journal->j_commit_timer); + del_timer_sync(&journal->j_commit_timer); journal_commit_transaction(journal); spin_lock(&journal->j_state_lock); - goto end_loop; + goto loop; } wake_up(&journal->j_wait_done_commit); - if (current->flags & PF_FREEZE) { + if (freezing(current)) { /* * The simpler the better. Flushing journal isn't a * good idea, because that depends on threads that may @@ -172,7 +162,7 @@ loop: */ jbd_debug(1, "Now suspending kjournald\n"); spin_unlock(&journal->j_state_lock); - refrigerator(PF_FREEZE); + refrigerator(); spin_lock(&journal->j_state_lock); } else { /* @@ -190,6 +180,8 @@ loop: if (transaction && time_after_eq(jiffies, transaction->t_expires)) should_sleep = 0; + if (journal->j_flags & JFS_UNMOUNT) + should_sleep = 0; if (should_sleep) { spin_unlock(&journal->j_state_lock); schedule(); @@ -208,12 +200,11 @@ loop: journal->j_commit_request = transaction->t_tid; jbd_debug(1, "woke because of timeout\n"); } -end_loop: - if (!(journal->j_flags & JFS_UNMOUNT)) - goto loop; + goto loop; +end_loop: spin_unlock(&journal->j_state_lock); - del_timer_sync(journal->j_commit_timer); + del_timer_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); jbd_debug(1, "Journal thread exiting.\n"); @@ -222,7 +213,7 @@ end_loop: static void journal_start_thread(journal_t *journal) { - kernel_thread(kjournald, journal, CLONE_VM|CLONE_FS|CLONE_FILES); + kthread_run(kjournald, journal, "kjournald"); wait_event(journal->j_wait_done_commit, journal->j_task != 0); } @@ -280,7 +271,7 @@ static void journal_kill_thread(journal_t *journal) int journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct journal_head **jh_out, - int blocknr) + unsigned long blocknr) { int need_copy_out = 0; int done_copy_out = 0; @@ -324,8 +315,8 @@ repeat: /* * Check for escaping */ - if (*((unsigned int *)(mapped_data + new_offset)) == - htonl(JFS_MAGIC_NUMBER)) { + if (*((__be32 *)(mapped_data + new_offset)) == + cpu_to_be32(JFS_MAGIC_NUMBER)) { need_copy_out = 1; do_escape = 1; } @@ -338,10 +329,10 @@ repeat: char *tmp; jbd_unlock_bh_state(bh_in); - tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); + tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { - kfree(tmp); + jbd_slab_free(tmp, bh_in->b_size); goto repeat; } @@ -464,6 +455,39 @@ int log_start_commit(journal_t *journal, tid_t tid) return ret; } +/* + * Force and wait upon a commit if the calling process is not within + * transaction. This is used for forcing out undo-protected data which contains + * bitmaps, when the fs is running out of space. + * + * We can only force the running transaction if we don't have an active handle; + * otherwise, we will deadlock. + * + * Returns true if a transaction was started. + */ +int journal_force_commit_nested(journal_t *journal) +{ + transaction_t *transaction = NULL; + tid_t tid; + + spin_lock(&journal->j_state_lock); + if (journal->j_running_transaction && !current->journal_info) { + transaction = journal->j_running_transaction; + __log_start_commit(journal, transaction->t_tid); + } else if (journal->j_committing_transaction) + transaction = journal->j_committing_transaction; + + if (!transaction) { + spin_unlock(&journal->j_state_lock); + return 0; /* Nothing to retry */ + } + + tid = transaction->t_tid; + spin_unlock(&journal->j_state_lock); + log_wait_commit(journal, tid); + return 1; +} + /* * Start a commit of the current running transaction (if any). Returns true * if a transaction was started, and fills its tid in at *ptid @@ -554,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp) * this is a no-op. If needed, we can use j_blk_offset - everything is * ready. */ -int journal_bmap(journal_t *journal, unsigned long blocknr, +int journal_bmap(journal_t *journal, unsigned long blocknr, unsigned long *retp) { int err = 0; @@ -585,9 +609,13 @@ int journal_bmap(journal_t *journal, unsigned long blocknr, * We play buffer_head aliasing tricks to write data/metadata blocks to * the journal without copying their contents, but for journal * descriptor blocks we do need to generate bona fide buffers. + * + * After the caller of journal_get_descriptor_buffer() has finished modifying + * the buffer's contents they really should run flush_dcache_page(bh->b_page). + * But we don't bother doing that, so there will be coherency problems with + * mmaps of blockdevs which hold live JBD-controlled filesystems. */ - -struct journal_head * journal_get_descriptor_buffer(journal_t *journal) +struct journal_head *journal_get_descriptor_buffer(journal_t *journal) { struct buffer_head *bh; unsigned long blocknr; @@ -599,8 +627,10 @@ struct journal_head * journal_get_descriptor_buffer(journal_t *journal) return NULL; bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + lock_buffer(bh); memset(bh->b_data, 0, journal->j_blocksize); - bh->b_state |= (1 << BH_Dirty); + set_buffer_uptodate(bh); + unlock_buffer(bh); BUFFER_TRACE(bh, "return this buffer"); return journal_add_journal_head(bh); } @@ -630,8 +660,8 @@ static journal_t * journal_init_common (void) init_waitqueue_head(&journal->j_wait_checkpoint); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); - init_MUTEX(&journal->j_barrier); - init_MUTEX(&journal->j_checkpoint_sem); + mutex_init(&journal->j_barrier); + mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); spin_lock_init(&journal->j_list_lock); spin_lock_init(&journal->j_state_lock); @@ -666,13 +696,13 @@ fail: * @bdev: Block device on which to create the journal * @fs_dev: Device which hold journalled filesystem for this journal. * @start: Block nr Start of journal. - * @len: Lenght of the journal in blocks. + * @len: Length of the journal in blocks. * @blocksize: blocksize of journalling device * @returns: a newly created journal_t * - * + * * journal_init_dev creates a journal which maps a fixed contiguous * range of blocks on an arbitrary block device. - * + * */ journal_t * journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, @@ -680,28 +710,40 @@ journal_t * journal_init_dev(struct block_device *bdev, { journal_t *journal = journal_init_common(); struct buffer_head *bh; + int n; if (!journal) return NULL; + /* journal descriptor can store up to n blocks -bzzz */ + journal->j_blocksize = blocksize; + n = journal->j_blocksize / sizeof(journal_block_tag_t); + journal->j_wbufsize = n; + journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); + if (!journal->j_wbuf) { + printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + __FUNCTION__); + kfree(journal); + journal = NULL; + goto out; + } journal->j_dev = bdev; journal->j_fs_dev = fs_dev; journal->j_blk_offset = start; journal->j_maxlen = len; - journal->j_blocksize = blocksize; bh = __getblk(journal->j_dev, start, journal->j_blocksize); J_ASSERT(bh != NULL); journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; - +out: return journal; } - -/** + +/** * journal_t * journal_init_inode () - creates a journal which maps to a inode. * @inode: An inode to create the journal in - * + * * journal_init_inode creates a journal which maps an on-disk inode as * the journal. The inode must exist already, must support bmap() and * must have all data blocks preallocated. @@ -711,6 +753,7 @@ journal_t * journal_init_inode (struct inode *inode) struct buffer_head *bh; journal_t *journal = journal_init_common(); int err; + int n; unsigned long blocknr; if (!journal) @@ -720,13 +763,24 @@ journal_t * journal_init_inode (struct inode *inode) journal->j_inode = inode; jbd_debug(1, "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", - journal, inode->i_sb->s_id, inode->i_ino, + journal, inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size, inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; journal->j_blocksize = inode->i_sb->s_blocksize; + /* journal descriptor can store up to n blocks -bzzz */ + n = journal->j_blocksize / sizeof(journal_block_tag_t); + journal->j_wbufsize = n; + journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); + if (!journal->j_wbuf) { + printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", + __FUNCTION__); + kfree(journal); + return NULL; + } + err = journal_bmap(journal, 0, &blocknr); /* If that failed, give up */ if (err) { @@ -744,10 +798,10 @@ journal_t * journal_init_inode (struct inode *inode) return journal; } -/* +/* * If the journal init or create aborts, we need to mark the journal * superblock as being NULL to prevent the journal destroy from writing - * back a bogus superblock. + * back a bogus superblock. */ static void journal_fail_superblock (journal_t *journal) { @@ -766,10 +820,10 @@ static void journal_fail_superblock (journal_t *journal) static int journal_reset(journal_t *journal) { journal_superblock_t *sb = journal->j_superblock; - unsigned int first, last; + unsigned long first, last; - first = ntohl(sb->s_first); - last = ntohl(sb->s_maxlen); + first = be32_to_cpu(sb->s_first); + last = be32_to_cpu(sb->s_maxlen); journal->j_first = first; journal->j_last = last; @@ -790,13 +844,13 @@ static int journal_reset(journal_t *journal) return 0; } -/** +/** * int journal_create() - Initialise the new journal file * @journal: Journal to create. This structure must have been initialised - * + * * Given a journal_t structure which tells us which disk blocks we can * use, create a new journal superblock and initialise all of the - * journal fields from scratch. + * journal fields from scratch. **/ int journal_create(journal_t *journal) { @@ -846,12 +900,12 @@ int journal_create(journal_t *journal) /* OK, fill in the initial static fields in the new superblock */ sb = journal->j_superblock; - sb->s_header.h_magic = htonl(JFS_MAGIC_NUMBER); - sb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2); + sb->s_header.h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); + sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2); - sb->s_blocksize = htonl(journal->j_blocksize); - sb->s_maxlen = htonl(journal->j_maxlen); - sb->s_first = htonl(1); + sb->s_blocksize = cpu_to_be32(journal->j_blocksize); + sb->s_maxlen = cpu_to_be32(journal->j_maxlen); + sb->s_first = cpu_to_be32(1); journal->j_transaction_sequence = 1; @@ -861,7 +915,7 @@ int journal_create(journal_t *journal) return journal_reset(journal); } -/** +/** * void journal_update_superblock() - Update journal sb on disk. * @journal: The journal to update. * @wait: Set to '0' if you don't want to wait for IO completion. @@ -885,7 +939,7 @@ void journal_update_superblock(journal_t *journal, int wait) journal->j_transaction_sequence) { jbd_debug(1,"JBD: Skipping superblock update on recovered sb " "(start %ld, seq %d, errno %d)\n", - journal->j_tail, journal->j_tail_sequence, + journal->j_tail, journal->j_tail_sequence, journal->j_errno); goto out; } @@ -894,9 +948,9 @@ void journal_update_superblock(journal_t *journal, int wait) jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); - sb->s_sequence = htonl(journal->j_tail_sequence); - sb->s_start = htonl(journal->j_tail); - sb->s_errno = htonl(journal->j_errno); + sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); + sb->s_start = cpu_to_be32(journal->j_tail); + sb->s_errno = cpu_to_be32(journal->j_errno); spin_unlock(&journal->j_state_lock); BUFFER_TRACE(bh, "marking dirty"); @@ -904,7 +958,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(WRITE, 1, &bh); + ll_rw_block(SWRITE, 1, &bh); out: /* If we have just flushed the log (by marking s_start==0), then @@ -947,13 +1001,13 @@ static int journal_get_superblock(journal_t *journal) err = -EINVAL; - if (sb->s_header.h_magic != htonl(JFS_MAGIC_NUMBER) || - sb->s_blocksize != htonl(journal->j_blocksize)) { + if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) || + sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { printk(KERN_WARNING "JBD: no valid journal superblock found\n"); goto out; } - switch(ntohl(sb->s_header.h_blocktype)) { + switch(be32_to_cpu(sb->s_header.h_blocktype)) { case JFS_SUPERBLOCK_V1: journal->j_format_version = 1; break; @@ -965,9 +1019,9 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (ntohl(sb->s_maxlen) < journal->j_maxlen) - journal->j_maxlen = ntohl(sb->s_maxlen); - else if (ntohl(sb->s_maxlen) > journal->j_maxlen) { + if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) + journal->j_maxlen = be32_to_cpu(sb->s_maxlen); + else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { printk (KERN_WARNING "JBD: journal file too short\n"); goto out; } @@ -995,11 +1049,11 @@ static int load_superblock(journal_t *journal) sb = journal->j_superblock; - journal->j_tail_sequence = ntohl(sb->s_sequence); - journal->j_tail = ntohl(sb->s_start); - journal->j_first = ntohl(sb->s_first); - journal->j_last = ntohl(sb->s_maxlen); - journal->j_errno = ntohl(sb->s_errno); + journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); + journal->j_tail = be32_to_cpu(sb->s_start); + journal->j_first = be32_to_cpu(sb->s_first); + journal->j_last = be32_to_cpu(sb->s_maxlen); + journal->j_errno = be32_to_cpu(sb->s_errno); return 0; } @@ -1008,7 +1062,7 @@ static int load_superblock(journal_t *journal) /** * int journal_load() - Read journal from disk. * @journal: Journal to act on. - * + * * Given a journal_t structure which tells us which disk blocks contain * a journal, read the journal from disk to initialise the in-memory * structures. @@ -1016,17 +1070,17 @@ static int load_superblock(journal_t *journal) int journal_load(journal_t *journal) { int err; + journal_superblock_t *sb; err = load_superblock(journal); if (err) return err; + sb = journal->j_superblock; /* If this is a V2 superblock, then we have to check the * features flags on it. */ if (journal->j_format_version >= 2) { - journal_superblock_t *sb = journal->j_superblock; - if ((sb->s_feature_ro_compat & ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || (sb->s_feature_incompat & @@ -1037,6 +1091,13 @@ int journal_load(journal_t *journal) } } + /* + * Create a slab for this blocksize + */ + err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); + if (err) + return err; + /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (journal_recover(journal)) @@ -1100,15 +1161,20 @@ void journal_destroy(journal_t *journal) iput(journal->j_inode); if (journal->j_revoke) journal_destroy_revoke(journal); + kfree(journal->j_wbuf); kfree(journal); } /** *int journal_check_used_features () - Check if features specified are used. - * + * @journal: Journal to check. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount + * @incompat: bitmask of incompatible features + * * Check whether the journal uses all of a given set of - * features. Return true (non-zero) if it does. + * features. Return true (non-zero) if it does. **/ int journal_check_used_features (journal_t *journal, unsigned long compat, @@ -1133,7 +1199,11 @@ int journal_check_used_features (journal_t *journal, unsigned long compat, /** * int journal_check_available_features() - Check feature set in journalling layer - * + * @journal: Journal to check. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount + * @incompat: bitmask of incompatible features + * * Check whether the journaling code supports the use of * all of a given set of features on this journal. Return true * (non-zero) if it can. */ @@ -1165,9 +1235,13 @@ int journal_check_available_features (journal_t *journal, unsigned long compat, /** * int journal_set_features () - Mark a given journal feature in the superblock + * @journal: Journal to act on. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount + * @incompat: bitmask of incompatible features * * Mark a given journal feature as present on the - * superblock. Returns true if the requested features could be set. + * superblock. Returns true if the requested features could be set. * */ @@ -1197,6 +1271,7 @@ int journal_set_features (journal_t *journal, unsigned long compat, /** * int journal_update_format () - Update on-disk journal structure. + * @journal: Journal to act on. * * Given an initialised but unloaded journal struct, poke about in the * on-disk structure to update it to the most recent supported version. @@ -1212,7 +1287,7 @@ int journal_update_format (journal_t *journal) sb = journal->j_superblock; - switch (ntohl(sb->s_header.h_blocktype)) { + switch (be32_to_cpu(sb->s_header.h_blocktype)) { case JFS_SUPERBLOCK_V2: return 0; case JFS_SUPERBLOCK_V1: @@ -1234,7 +1309,7 @@ static int journal_convert_superblock_v1(journal_t *journal, /* Pre-initialise new fields to zero */ offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); - blocksize = ntohl(sb->s_blocksize); + blocksize = be32_to_cpu(sb->s_blocksize); memset(&sb->s_feature_compat, 0, blocksize-offset); sb->s_nr_users = cpu_to_be32(1); @@ -1252,7 +1327,7 @@ static int journal_convert_superblock_v1(journal_t *journal, /** * int journal_flush () - Flush journal * @journal: Journal to act on. - * + * * Flush all data for a given journal to disk and empty the journal. * Filesystems can use this when remounting readonly to ensure that * recovery does not need to happen on remount. @@ -1319,7 +1394,7 @@ int journal_flush(journal_t *journal) * int journal_wipe() - Wipe journal contents * @journal: Journal to act on. * @write: flag (see below) - * + * * Wipe out all of the contents of a journal, safely. This will produce * a warning if the journal contains any valid recovery information. * Must be called between journal_init_*() and journal_load(). @@ -1360,7 +1435,7 @@ int journal_wipe(journal_t *journal, int write) * device this journal is present. */ -const char *journal_dev_name(journal_t *journal, char *buffer) +static const char *journal_dev_name(journal_t *journal, char *buffer) { struct block_device *bdev; @@ -1374,7 +1449,7 @@ const char *journal_dev_name(journal_t *journal, char *buffer) /* * Journal abort has very specific semantics, which we describe - * for journal abort. + * for journal abort. * * Two internal function, which provide abort to te jbd layer * itself are here. @@ -1406,7 +1481,7 @@ void __journal_abort_hard(journal_t *journal) /* Soft abort: record the abort error status in the journal superblock, * but don't do any other IO. */ -void __journal_abort_soft (journal_t *journal, int errno) +static void __journal_abort_soft (journal_t *journal, int errno) { if (journal->j_flags & JFS_ABORT) return; @@ -1429,7 +1504,7 @@ void __journal_abort_soft (journal_t *journal, int errno) * Perform a complete, immediate shutdown of the ENTIRE * journal (not of a single transaction). This operation cannot be * undone without closing and reopening the journal. - * + * * The journal_abort function is intended to support higher level error * recovery mechanisms such as the ext2/ext3 remount-readonly error * mode. @@ -1463,7 +1538,7 @@ void __journal_abort_soft (journal_t *journal, int errno) * supply an errno; a null errno implies that absolutely no further * writes are done to the journal (unless there are any already in * progress). - * + * */ void journal_abort(journal_t *journal, int errno) @@ -1471,7 +1546,7 @@ void journal_abort(journal_t *journal, int errno) __journal_abort_soft(journal, errno); } -/** +/** * int journal_errno () - returns the journal's error state. * @journal: journal to examine. * @@ -1495,8 +1570,9 @@ int journal_errno(journal_t *journal) return err; } -/** +/** * int journal_clear_err () - clears the journal's error state + * @journal: journal to act on. * * An error must be cleared or Acked to take a FS out of readonly * mode. @@ -1514,8 +1590,9 @@ int journal_clear_err(journal_t *journal) return err; } -/** +/** * void journal_ack_err() - Ack journal err. + * @journal: journal to act on. * * An error must be cleared or Acked to take a FS out of readonly * mode. @@ -1534,22 +1611,89 @@ int journal_blocks_per_page(struct inode *inode) } /* - * Simple support for retying memory allocations. Introduced to help to - * debug different VM deadlock avoidance strategies. + * Simple support for retrying memory allocations. Introduced to help to + * debug different VM deadlock avoidance strategies. */ +void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) +{ + return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); +} + /* - * Simple support for retying memory allocations. Introduced to help to - * debug different VM deadlock avoidance strategies. + * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed + * and allocate frozen and commit buffers from these slabs. + * + * Reason for doing this is to avoid, SLAB_DEBUG - since it could + * cause bh to cross page boundary. */ -void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry) + +#define JBD_MAX_SLABS 5 +#define JBD_SLAB_INDEX(size) (size >> 11) + +static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; +static const char *jbd_slab_names[JBD_MAX_SLABS] = { + "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" +}; + +static void journal_destroy_jbd_slabs(void) { - return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); + int i; + + for (i = 0; i < JBD_MAX_SLABS; i++) { + if (jbd_slab[i]) + kmem_cache_destroy(jbd_slab[i]); + jbd_slab[i] = NULL; + } +} + +static int journal_create_jbd_slab(size_t slab_size) +{ + int i = JBD_SLAB_INDEX(slab_size); + + BUG_ON(i >= JBD_MAX_SLABS); + + /* + * Check if we already have a slab created for this size + */ + if (jbd_slab[i]) + return 0; + + /* + * Create a slab and force alignment to be same as slabsize - + * this will make sure that allocations won't cross the page + * boundary. + */ + jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], + slab_size, slab_size, 0, NULL, NULL); + if (!jbd_slab[i]) { + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); + return -ENOMEM; + } + return 0; +} + +void * jbd_slab_alloc(size_t size, gfp_t flags) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); +} + +void jbd_slab_free(void *ptr, size_t size) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + kmem_cache_free(jbd_slab[idx], ptr); } /* * Journal_head storage management */ -static kmem_cache_t *journal_head_cache; +static struct kmem_cache *journal_head_cache; #ifdef CONFIG_JBD_DEBUG static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif @@ -1577,7 +1721,7 @@ static void journal_destroy_journal_head_cache(void) { J_ASSERT(journal_head_cache != NULL); kmem_cache_destroy(journal_head_cache); - journal_head_cache = 0; + journal_head_cache = NULL; } /* @@ -1611,7 +1755,7 @@ static void journal_free_journal_head(struct journal_head *jh) { #ifdef CONFIG_JBD_DEBUG atomic_dec(&nr_journal_heads); - memset(jh, 0x5b, sizeof(*jh)); + memset(jh, JBD_POISON_FREE, sizeof(*jh)); #endif kmem_cache_free(journal_head_cache, jh); } @@ -1726,6 +1870,7 @@ static void __journal_remove_journal_head(struct buffer_head *bh) if (jh->b_transaction == NULL && jh->b_next_transaction == NULL && jh->b_cp_transaction == NULL) { + J_ASSERT_JH(jh, jh->b_jlist == BJ_None); J_ASSERT_BH(bh, buffer_jbd(bh)); J_ASSERT_BH(bh, jh2bh(jh) == bh); BUFFER_TRACE(bh, "remove journal_head"); @@ -1733,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", __FUNCTION__); - kfree(jh->b_frozen_data); + jbd_slab_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", __FUNCTION__); - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ @@ -1802,7 +1947,7 @@ EXPORT_SYMBOL(journal_enable_debug); static struct proc_dir_entry *proc_jbd_debug; -int read_jbd_debug(char *page, char **start, off_t off, +static int read_jbd_debug(char *page, char **start, off_t off, int count, int *eof, void *data) { int ret; @@ -1812,7 +1957,7 @@ int read_jbd_debug(char *page, char **start, off_t off, return ret; } -int write_jbd_debug(struct file *file, const char __user *buffer, +static int write_jbd_debug(struct file *file, const char __user *buffer, unsigned long count, void *data) { char buf[32]; @@ -1851,7 +1996,7 @@ static void __exit remove_jbd_proc_entry(void) #endif -kmem_cache_t *jbd_handle_cache; +struct kmem_cache *jbd_handle_cache; static int __init journal_init_handle_cache(void) { @@ -1895,12 +2040,15 @@ static void journal_destroy_caches(void) journal_destroy_revoke_caches(); journal_destroy_journal_head_cache(); journal_destroy_handle_cache(); + journal_destroy_jbd_slabs(); } static int __init journal_init(void) { int ret; + BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); + ret = journal_init_caches(); if (ret != 0) journal_destroy_caches();