+ get_fs_excl();
+
+ /* if all the work is already done, get out of here */
+ if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
+ atomic_read(&(jl->j_commit_left)) <= 0) {
+ goto flush_older_and_return;
+ }
+
+ /* start by putting the commit list on disk. This will also flush
+ ** the commit lists of any olders transactions
+ */
+ flush_commit_list(s, jl, 1);
+
+ if (!(jl->j_state & LIST_DIRTY)
+ && !reiserfs_is_journal_aborted(journal))
+ BUG();
+
+ /* are we done now? */
+ if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
+ atomic_read(&(jl->j_commit_left)) <= 0) {
+ goto flush_older_and_return;
+ }
+
+ /* loop through each cnode, see if we need to write it,
+ ** or wait on a more recent transaction, or just ignore it
+ */
+ if (atomic_read(&(journal->j_wcount)) != 0) {
+ reiserfs_panic(s,
+ "journal-844: panic journal list is flushing, wcount is not 0\n");
+ }
+ cn = jl->j_realblock;
+ while (cn) {
+ was_jwait = 0;
+ was_dirty = 0;
+ saved_bh = NULL;
+ /* blocknr of 0 is no longer in the hash, ignore it */
+ if (cn->blocknr == 0) {
+ goto free_cnode;
+ }
+
+ /* This transaction failed commit. Don't write out to the disk */
+ if (!(jl->j_state & LIST_DIRTY))
+ goto free_cnode;
+
+ pjl = find_newer_jl_for_cn(cn);
+ /* the order is important here. We check pjl to make sure we
+ ** don't clear BH_JDirty_wait if we aren't the one writing this
+ ** block to disk
+ */
+ if (!pjl && cn->bh) {
+ saved_bh = cn->bh;
+
+ /* we do this to make sure nobody releases the buffer while
+ ** we are working with it
+ */
+ get_bh(saved_bh);
+
+ if (buffer_journal_dirty(saved_bh)) {
+ BUG_ON(!can_dirty(cn));
+ was_jwait = 1;
+ was_dirty = 1;
+ } else if (can_dirty(cn)) {
+ /* everything with !pjl && jwait should be writable */
+ BUG();
+ }
+ }
+
+ /* if someone has this block in a newer transaction, just make
+ ** sure they are commited, and don't try writing it to disk
+ */
+ if (pjl) {
+ if (atomic_read(&pjl->j_commit_left))
+ flush_commit_list(s, pjl, 1);
+ goto free_cnode;
+ }
+
+ /* bh == NULL when the block got to disk on its own, OR,
+ ** the block got freed in a future transaction
+ */
+ if (saved_bh == NULL) {
+ goto free_cnode;
+ }
+
+ /* this should never happen. kupdate_one_transaction has this list
+ ** locked while it works, so we should never see a buffer here that
+ ** is not marked JDirty_wait
+ */
+ if ((!was_jwait) && !buffer_locked(saved_bh)) {
+ reiserfs_warning(s,
+ "journal-813: BAD! buffer %llu %cdirty %cjwait, "
+ "not in a newer tranasction",
+ (unsigned long long)saved_bh->
+ b_blocknr, was_dirty ? ' ' : '!',
+ was_jwait ? ' ' : '!');
+ }
+ if (was_dirty) {
+ /* we inc again because saved_bh gets decremented at free_cnode */
+ get_bh(saved_bh);
+ set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
+ lock_buffer(saved_bh);
+ BUG_ON(cn->blocknr != saved_bh->b_blocknr);
+ if (buffer_dirty(saved_bh))
+ submit_logged_buffer(saved_bh);
+ else
+ unlock_buffer(saved_bh);
+ count++;
+ } else {
+ reiserfs_warning(s,
+ "clm-2082: Unable to flush buffer %llu in %s",
+ (unsigned long long)saved_bh->
+ b_blocknr, __FUNCTION__);
+ }
+ free_cnode:
+ last = cn;
+ cn = cn->next;
+ if (saved_bh) {
+ /* we incremented this to keep others from taking the buffer head away */
+ put_bh(saved_bh);
+ if (atomic_read(&(saved_bh->b_count)) < 0) {
+ reiserfs_warning(s,
+ "journal-945: saved_bh->b_count < 0");
+ }
+ }
+ }
+ if (count > 0) {
+ cn = jl->j_realblock;
+ while (cn) {
+ if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
+ if (!cn->bh) {
+ reiserfs_panic(s,
+ "journal-1011: cn->bh is NULL\n");
+ }
+ wait_on_buffer(cn->bh);
+ if (!cn->bh) {
+ reiserfs_panic(s,
+ "journal-1012: cn->bh is NULL\n");
+ }
+ if (unlikely(!buffer_uptodate(cn->bh))) {
+#ifdef CONFIG_REISERFS_CHECK
+ reiserfs_warning(s,
+ "journal-949: buffer write failed\n");
+#endif
+ err = -EIO;
+ }
+ /* note, we must clear the JDirty_wait bit after the up to date
+ ** check, otherwise we race against our flushpage routine
+ */
+ BUG_ON(!test_clear_buffer_journal_dirty
+ (cn->bh));
+
+ /* undo the inc from journal_mark_dirty */
+ put_bh(cn->bh);
+ brelse(cn->bh);
+ }
+ cn = cn->next;
+ }
+ }
+
+ if (err)
+ reiserfs_abort(s, -EIO,
+ "Write error while pushing transaction to disk in %s",
+ __FUNCTION__);
+ flush_older_and_return:
+
+ /* before we can update the journal header block, we _must_ flush all
+ ** real blocks from all older transactions to disk. This is because
+ ** once the header block is updated, this transaction will not be
+ ** replayed after a crash
+ */
+ if (flushall) {
+ flush_older_journal_lists(s, jl);
+ }
+
+ err = journal->j_errno;
+ /* before we can remove everything from the hash tables for this
+ ** transaction, we must make sure it can never be replayed
+ **
+ ** since we are only called from do_journal_end, we know for sure there
+ ** are no allocations going on while we are flushing journal lists. So,
+ ** we only need to update the journal header block for the last list
+ ** being flushed
+ */
+ if (!err && flushall) {
+ err =
+ update_journal_header_block(s,
+ (jl->j_start + jl->j_len +
+ 2) % SB_ONDISK_JOURNAL_SIZE(s),
+ jl->j_trans_id);
+ if (err)
+ reiserfs_abort(s, -EIO,
+ "Write error while updating journal header in %s",
+ __FUNCTION__);
+ }
+ remove_all_from_journal_list(s, jl, 0);
+ list_del_init(&jl->j_list);
+ journal->j_num_lists--;
+ del_from_work_list(s, jl);
+
+ if (journal->j_last_flush_id != 0 &&
+ (jl->j_trans_id - journal->j_last_flush_id) != 1) {
+ reiserfs_warning(s, "clm-2201: last flush %lu, current %lu",
+ journal->j_last_flush_id, jl->j_trans_id);
+ }
+ journal->j_last_flush_id = jl->j_trans_id;
+
+ /* not strictly required since we are freeing the list, but it should
+ * help find code using dead lists later on
+ */
+ jl->j_len = 0;
+ atomic_set(&(jl->j_nonzerolen), 0);
+ jl->j_start = 0;
+ jl->j_realblock = NULL;
+ jl->j_commit_bh = NULL;
+ jl->j_trans_id = 0;
+ jl->j_state = 0;
+ put_journal_list(s, jl);
+ if (flushall)
+ up(&journal->j_flush_sem);
+ put_fs_excl();
+ return err;
+}
+
+static int write_one_transaction(struct super_block *s,
+ struct reiserfs_journal_list *jl,
+ struct buffer_chunk *chunk)
+{
+ struct reiserfs_journal_cnode *cn;
+ int ret = 0;
+
+ jl->j_state |= LIST_TOUCHED;
+ del_from_work_list(s, jl);
+ if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
+ return 0;
+ }
+
+ cn = jl->j_realblock;
+ while (cn) {
+ /* if the blocknr == 0, this has been cleared from the hash,
+ ** skip it
+ */
+ if (cn->blocknr == 0) {
+ goto next;
+ }
+ if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
+ struct buffer_head *tmp_bh;
+ /* we can race against journal_mark_freed when we try
+ * to lock_buffer(cn->bh), so we have to inc the buffer
+ * count, and recheck things after locking
+ */
+ tmp_bh = cn->bh;
+ get_bh(tmp_bh);
+ lock_buffer(tmp_bh);
+ if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
+ if (!buffer_journal_dirty(tmp_bh) ||
+ buffer_journal_prepared(tmp_bh))
+ BUG();
+ add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
+ ret++;
+ } else {
+ /* note, cn->bh might be null now */
+ unlock_buffer(tmp_bh);
+ }
+ put_bh(tmp_bh);
+ }
+ next:
+ cn = cn->next;
+ cond_resched();
+ }
+ return ret;
+}
+
+/* used by flush_commit_list */
+static int dirty_one_transaction(struct super_block *s,
+ struct reiserfs_journal_list *jl)
+{
+ struct reiserfs_journal_cnode *cn;
+ struct reiserfs_journal_list *pjl;
+ int ret = 0;
+
+ jl->j_state |= LIST_DIRTY;
+ cn = jl->j_realblock;
+ while (cn) {
+ /* look for a more recent transaction that logged this
+ ** buffer. Only the most recent transaction with a buffer in
+ ** it is allowed to send that buffer to disk
+ */
+ pjl = find_newer_jl_for_cn(cn);
+ if (!pjl && cn->blocknr && cn->bh
+ && buffer_journal_dirty(cn->bh)) {
+ BUG_ON(!can_dirty(cn));
+ /* if the buffer is prepared, it will either be logged
+ * or restored. If restored, we need to make sure
+ * it actually gets marked dirty
+ */
+ clear_buffer_journal_new(cn->bh);
+ if (buffer_journal_prepared(cn->bh)) {
+ set_buffer_journal_restore_dirty(cn->bh);
+ } else {
+ set_buffer_journal_test(cn->bh);
+ mark_buffer_dirty(cn->bh);
+ }
+ }
+ cn = cn->next;
+ }
+ return ret;
+}
+
+static int kupdate_transactions(struct super_block *s,
+ struct reiserfs_journal_list *jl,
+ struct reiserfs_journal_list **next_jl,
+ unsigned long *next_trans_id,
+ int num_blocks, int num_trans)
+{
+ int ret = 0;
+ int written = 0;
+ int transactions_flushed = 0;
+ unsigned long orig_trans_id = jl->j_trans_id;
+ struct buffer_chunk chunk;
+ struct list_head *entry;
+ struct reiserfs_journal *journal = SB_JOURNAL(s);
+ chunk.nr = 0;
+
+ down(&journal->j_flush_sem);
+ if (!journal_list_still_alive(s, orig_trans_id)) {
+ goto done;
+ }
+
+ /* we've got j_flush_sem held, nobody is going to delete any
+ * of these lists out from underneath us
+ */
+ while ((num_trans && transactions_flushed < num_trans) ||
+ (!num_trans && written < num_blocks)) {
+
+ if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
+ atomic_read(&jl->j_commit_left)
+ || !(jl->j_state & LIST_DIRTY)) {
+ del_from_work_list(s, jl);
+ break;
+ }
+ ret = write_one_transaction(s, jl, &chunk);
+
+ if (ret < 0)
+ goto done;
+ transactions_flushed++;
+ written += ret;
+ entry = jl->j_list.next;
+
+ /* did we wrap? */
+ if (entry == &journal->j_journal_list) {
+ break;
+ }
+ jl = JOURNAL_LIST_ENTRY(entry);
+
+ /* don't bother with older transactions */
+ if (jl->j_trans_id <= orig_trans_id)
+ break;
+ }
+ if (chunk.nr) {
+ write_chunk(&chunk);
+ }
+
+ done:
+ up(&journal->j_flush_sem);
+ return ret;
+}
+
+/* for o_sync and fsync heavy applications, they tend to use
+** all the journa list slots with tiny transactions. These
+** trigger lots and lots of calls to update the header block, which