2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
5 #include <linux/config.h>
6 #include <linux/time.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/smp_lock.h>
9 #include <linux/pagemap.h>
10 #include <linux/highmem.h>
11 #include <asm/uaccess.h>
12 #include <asm/unaligned.h>
13 #include <linux/buffer_head.h>
14 #include <linux/mpage.h>
15 #include <linux/writeback.h>
17 extern int reiserfs_default_io_size; /* default io size devuned in super.c */
19 /* args for the create parameter of reiserfs_get_block */
20 #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
21 #define GET_BLOCK_CREATE 1 /* add anything you need to find block */
22 #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */
23 #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */
24 #define GET_BLOCK_NO_ISEM 8 /* i_sem is not held, don't preallocate */
25 #define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */
27 static int reiserfs_get_block (struct inode * inode, sector_t block,
28 struct buffer_head * bh_result, int create);
29 static int reiserfs_commit_write(struct file *f, struct page *page,
30 unsigned from, unsigned to);
32 void reiserfs_delete_inode (struct inode * inode)
34 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2;
35 struct reiserfs_transaction_handle th ;
37 reiserfs_write_lock(inode->i_sb);
39 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
40 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
43 journal_begin(&th, inode->i_sb, jbegin_count) ;
44 reiserfs_update_inode_transaction(inode) ;
46 reiserfs_delete_object (&th, inode);
48 journal_end(&th, inode->i_sb, jbegin_count) ;
52 /* all items of file are deleted, so we can remove "save" link */
53 remove_save_link (inode, 0/* not truncate */);
55 /* no object items are in the tree */
58 clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */
60 reiserfs_write_unlock(inode->i_sb);
63 static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid,
64 loff_t offset, int type, int length )
66 key->version = version;
68 key->on_disk_key.k_dir_id = dirid;
69 key->on_disk_key.k_objectid = objectid;
70 set_cpu_key_k_offset (key, offset);
71 set_cpu_key_k_type (key, type);
72 key->key_length = length;
76 /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
77 offset and type of key */
78 void make_cpu_key (struct cpu_key * key, struct inode * inode, loff_t offset,
79 int type, int length )
81 _make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
82 le32_to_cpu (INODE_PKEY (inode)->k_objectid),
83 offset, type, length);
88 // when key is 0, do not set version and short key
90 inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key,
92 loff_t offset, int type, int length,
93 int entry_count/*or ih_free_space*/)
96 ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id);
97 ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid);
99 put_ih_version( ih, version );
100 set_le_ih_k_offset (ih, offset);
101 set_le_ih_k_type (ih, type);
102 put_ih_item_len( ih, length );
103 /* set_ih_free_space (ih, 0);*/
104 // for directory items it is entry count, for directs and stat
105 // datas - 0xffff, for indirects - 0
106 put_ih_entry_count( ih, entry_count );
110 // FIXME: we might cache recently accessed indirect item
112 // Ugh. Not too eager for that....
113 // I cut the code until such time as I see a convincing argument (benchmark).
114 // I don't want a bloated inode struct..., and I don't like code complexity....
116 /* cutting the code is fine, since it really isn't in use yet and is easy
117 ** to add back in. But, Vladimir has a really good idea here. Think
118 ** about what happens for reading a file. For each page,
119 ** The VFS layer calls reiserfs_readpage, who searches the tree to find
120 ** an indirect item. This indirect item has X number of pointers, where
121 ** X is a big number if we've done the block allocation right. But,
122 ** we only use one or two of these pointers during each call to readpage,
123 ** needlessly researching again later on.
125 ** The size of the cache could be dynamic based on the size of the file.
127 ** I'd also like to see us cache the location the stat data item, since
128 ** we are needlessly researching for that frequently.
133 /* If this page has a file tail in it, and
134 ** it was read in by get_block_create_0, the page data is valid,
135 ** but tail is still sitting in a direct item, and we can't write to
136 ** it. So, look through this page, and check all the mapped buffers
137 ** to make sure they have valid block numbers. Any that don't need
138 ** to be unmapped, so that block_prepare_write will correctly call
139 ** reiserfs_get_block to convert the tail into an unformatted node
141 static inline void fix_tail_page_for_writing(struct page *page) {
142 struct buffer_head *head, *next, *bh ;
144 if (page && page_has_buffers(page)) {
145 head = page_buffers(page) ;
148 next = bh->b_this_page ;
149 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
150 reiserfs_unmap_buffer(bh) ;
153 } while (bh != head) ;
157 /* reiserfs_get_block does not need to allocate a block only if it has been
158 done already or non-hole position has been found in the indirect item */
159 static inline int allocation_needed (int retval, b_blocknr_t allocated,
160 struct item_head * ih,
161 __u32 * item, int pos_in_item)
165 if (retval == POSITION_FOUND && is_indirect_le_ih (ih) &&
166 get_block_num(item, pos_in_item))
171 static inline int indirect_item_found (int retval, struct item_head * ih)
173 return (retval == POSITION_FOUND) && is_indirect_le_ih (ih);
177 static inline void set_block_dev_mapped (struct buffer_head * bh,
178 b_blocknr_t block, struct inode * inode)
180 map_bh(bh, inode->i_sb, block);
185 // files which were created in the earlier version can not be longer,
188 static int file_capable (struct inode * inode, long block)
190 if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 || // it is new file.
191 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
197 /*static*/ void restart_transaction(struct reiserfs_transaction_handle *th,
198 struct inode *inode, struct path *path) {
199 struct super_block *s = th->t_super ;
200 int len = th->t_blocks_allocated ;
202 /* we cannot restart while nested */
203 if (th->t_refcount > 1) {
207 reiserfs_update_sd(th, inode) ;
208 journal_end(th, s, len) ;
209 journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6) ;
210 reiserfs_update_inode_transaction(inode) ;
213 // it is called by get_block when create == 0. Returns block number
214 // for 'block'-th logical block of file. When it hits direct item it
215 // returns 0 (being called from bmap) or read direct item into piece
216 // of page (bh_result)
218 // Please improve the english/clarity in the comment above, as it is
219 // hard to understand.
221 static int _get_block_create_0 (struct inode * inode, long block,
222 struct buffer_head * bh_result,
225 INITIALIZE_PATH (path);
227 struct buffer_head * bh;
228 struct item_head * ih, tmp_ih;
235 unsigned long offset ;
237 // prepare the key to look for the 'block'-th block of file
238 make_cpu_key (&key, inode,
239 (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
242 if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
245 kunmap(bh_result->b_page) ;
246 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
247 // That there is some MMAPED data associated with it that is yet to be written to disk.
248 if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) {
255 bh = get_last_bh (&path);
257 if (is_indirect_le_ih (ih)) {
258 __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
260 /* FIXME: here we could cache indirect item or part of it in
261 the inode to avoid search_by_key in case of subsequent
263 blocknr = get_block_num(ind_item, path.pos_in_item) ;
266 map_bh(bh_result, inode->i_sb, blocknr);
267 if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
268 set_buffer_boundary(bh_result);
271 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
272 // That there is some MMAPED data associated with it that is yet to be written to disk.
273 if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) {
279 kunmap(bh_result->b_page) ;
283 // requested data are in direct item(s)
284 if (!(args & GET_BLOCK_READ_DIRECT)) {
285 // we are called by bmap. FIXME: we can not map block of file
286 // when it is stored in direct item(s)
289 kunmap(bh_result->b_page) ;
293 /* if we've got a direct item, and the buffer or page was uptodate,
294 ** we don't want to pull data off disk again. skip to the
295 ** end, where we map the buffer and return
297 if (buffer_uptodate(bh_result)) {
301 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
302 ** pages without any buffers. If the page is up to date, we don't want
303 ** read old data off disk. Set the up to date bit on the buffer instead
304 ** and jump to the end
306 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
307 set_buffer_uptodate(bh_result);
311 // read file tail into part of page
312 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
313 fs_gen = get_generation(inode->i_sb) ;
314 copy_item_head (&tmp_ih, ih);
316 /* we only want to kmap if we are reading the tail into the page.
317 ** this is not the common case, so we don't kmap until we are
318 ** sure we need to. But, this means the item might move if
322 p = (char *)kmap(bh_result->b_page) ;
323 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
328 memset (p, 0, inode->i_sb->s_blocksize);
330 if (!is_direct_le_ih (ih)) {
333 /* make sure we don't read more bytes than actually exist in
334 ** the file. This can happen in odd cases where i_size isn't
335 ** correct, and when direct item padding results in a few
336 ** extra bytes at the end of the direct item
338 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
340 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
341 chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item;
344 chars = ih_item_len(ih) - path.pos_in_item;
346 memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars);
353 if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1))
354 // we done, if read direct item is not the last item of
355 // node FIXME: we could try to check right delimiting key
356 // to see whether direct item continues in the right
357 // neighbor or rely on i_size
360 // update key to look for the next piece
361 set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
362 if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
363 // we read something from tail, even if now we got IO_ERROR
365 bh = get_last_bh (&path);
369 flush_dcache_page(bh_result->b_page) ;
370 kunmap(bh_result->b_page) ;
374 /* this buffer has valid data, but isn't valid for io. mapping it to
375 * block #0 tells the rest of reiserfs it just has a tail in it
377 map_bh(bh_result, inode->i_sb, 0);
378 set_buffer_uptodate (bh_result);
383 // this is called to create file map. So, _get_block_create_0 will not
385 int reiserfs_bmap (struct inode * inode, sector_t block,
386 struct buffer_head * bh_result, int create)
388 if (!file_capable (inode, block))
391 reiserfs_write_lock(inode->i_sb);
392 /* do not read the direct item */
393 _get_block_create_0 (inode, block, bh_result, 0) ;
394 reiserfs_write_unlock(inode->i_sb);
398 /* special version of get_block that is only used by grab_tail_page right
399 ** now. It is sent to block_prepare_write, and when you try to get a
400 ** block past the end of the file (or a block from a hole) it returns
401 ** -ENOENT instead of a valid buffer. block_prepare_write expects to
402 ** be able to do i/o on the buffers returned, unless an error value
405 ** So, this allows block_prepare_write to be used for reading a single block
406 ** in a page. Where it does not produce a valid page for holes, or past the
407 ** end of the file. This turns out to be exactly what we need for reading
408 ** tails for conversion.
410 ** The point of the wrapper is forcing a certain value for create, even
411 ** though the VFS layer is calling this function with create==1. If you
412 ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
413 ** don't use this function.
415 static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block,
416 struct buffer_head * bh_result, int create) {
417 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
420 /* This is special helper for reiserfs_get_block in case we are executing
421 direct_IO request. */
422 static int reiserfs_get_blocks_direct_io(struct inode *inode,
424 unsigned long max_blocks,
425 struct buffer_head *bh_result,
430 bh_result->b_page = NULL;
432 /* We set the b_size before reiserfs_get_block call since it is
433 referenced in convert_tail_for_hole() that may be called from
434 reiserfs_get_block() */
435 bh_result->b_size = (1 << inode->i_blkbits);
437 ret = reiserfs_get_block(inode, iblock, bh_result,
438 create | GET_BLOCK_NO_DANGLE) ;
440 /* don't allow direct io onto tail pages */
441 if (ret == 0 && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
442 /* make sure future calls to the direct io funcs for this offset
443 ** in the file fail by unmapping the buffer
445 clear_buffer_mapped(bh_result);
448 /* Possible unpacked tail. Flush the data before pages have
450 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
452 reiserfs_commit_for_inode(inode);
453 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
461 ** helper function for when reiserfs_get_block is called for a hole
462 ** but the file tail is still in a direct item
463 ** bh_result is the buffer head for the hole
464 ** tail_offset is the offset of the start of the tail in the file
466 ** This calls prepare_write, which will start a new transaction
467 ** you should not be in a transaction, or have any paths held when you
470 static int convert_tail_for_hole(struct inode *inode,
471 struct buffer_head *bh_result,
472 loff_t tail_offset) {
473 unsigned long index ;
474 unsigned long tail_end ;
475 unsigned long tail_start ;
476 struct page * tail_page ;
477 struct page * hole_page = bh_result->b_page ;
480 if ((tail_offset & (bh_result->b_size - 1)) != 1)
483 /* always try to read until the end of the block */
484 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
485 tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
487 index = tail_offset >> PAGE_CACHE_SHIFT ;
488 /* hole_page can be zero in case of direct_io, we are sure
489 that we cannot get here if we write with O_DIRECT into
491 if (!hole_page || index != hole_page->index) {
492 tail_page = grab_cache_page(inode->i_mapping, index) ;
498 tail_page = hole_page ;
501 /* we don't have to make sure the conversion did not happen while
502 ** we were locking the page because anyone that could convert
503 ** must first take i_sem.
505 ** We must fix the tail page for writing because it might have buffers
506 ** that are mapped, but have a block number of 0. This indicates tail
507 ** data that has been read directly into the page, and block_prepare_write
508 ** won't trigger a get_block in this case.
510 fix_tail_page_for_writing(tail_page) ;
511 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
515 /* tail conversion might change the data in the page */
516 flush_dcache_page(tail_page) ;
518 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end) ;
521 if (tail_page != hole_page) {
522 unlock_page(tail_page) ;
523 page_cache_release(tail_page) ;
529 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
532 b_blocknr_t *allocated_block_nr,
536 #ifdef REISERFS_PREALLOCATE
537 if (!(flags & GET_BLOCK_NO_ISEM)) {
538 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block);
541 return reiserfs_new_unf_blocknrs (th, inode, allocated_block_nr, path, block);
544 int reiserfs_get_block (struct inode * inode, sector_t block,
545 struct buffer_head * bh_result, int create)
548 b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is (unsigned) 32 bit int
549 INITIALIZE_PATH(path);
552 struct buffer_head * bh, * unbh = 0;
553 struct item_head * ih, tmp_ih;
557 struct reiserfs_transaction_handle *th = NULL;
558 /* space reserved in transaction batch:
559 . 3 balancings in direct->indirect conversion
560 . 1 block involved into reiserfs_update_sd()
561 XXX in practically impossible worst case direct2indirect()
562 can incur (much) more that 3 balancings. */
563 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1;
566 loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
569 reiserfs_write_lock(inode->i_sb);
570 version = get_inode_item_key_version (inode);
573 reiserfs_write_unlock(inode->i_sb);
577 if (!file_capable (inode, block)) {
578 reiserfs_write_unlock(inode->i_sb);
582 /* if !create, we aren't changing the FS, so we don't need to
583 ** log anything, so we don't need to start a transaction
585 if (!(create & GET_BLOCK_CREATE)) {
587 /* find number of block-th logical block of the file */
588 ret = _get_block_create_0 (inode, block, bh_result,
589 create | GET_BLOCK_READ_DIRECT) ;
590 reiserfs_write_unlock(inode->i_sb);
594 * if we're already in a transaction, make sure to close
595 * any new transactions we start in this func
597 if ((create & GET_BLOCK_NO_DANGLE) ||
598 reiserfs_transaction_running(inode->i_sb))
601 /* If file is of such a size, that it might have a tail and tails are enabled
602 ** we should mark it as possibly needing tail packing on close
604 if ( (have_large_tails (inode->i_sb) && inode->i_size < i_block_size (inode)*4) ||
605 (have_small_tails (inode->i_sb) && inode->i_size < i_block_size(inode)) )
606 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ;
608 /* set the key of the first byte in the 'block'-th block of file */
609 make_cpu_key (&key, inode, new_offset,
610 TYPE_ANY, 3/*key length*/);
611 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
613 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
618 reiserfs_update_inode_transaction(inode) ;
622 retval = search_for_position_by_key (inode->i_sb, &key, &path);
623 if (retval == IO_ERROR) {
628 bh = get_last_bh (&path);
630 item = get_item (&path);
631 pos_in_item = path.pos_in_item;
633 fs_gen = get_generation (inode->i_sb);
634 copy_item_head (&tmp_ih, ih);
636 if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
637 /* we have to allocate block for the unformatted node */
643 repeat = _allocate_block(th, block, inode, &allocated_block_nr, &path, create);
645 if (repeat == NO_DISK_SPACE) {
646 /* restart the transaction to give the journal a chance to free
647 ** some blocks. releases the path, so we have to go back to
648 ** research if we succeed on the second try
650 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
651 restart_transaction(th, inode, &path) ;
652 repeat = _allocate_block(th, block, inode, &allocated_block_nr, NULL, create);
654 if (repeat != NO_DISK_SPACE) {
661 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
666 if (indirect_item_found (retval, ih)) {
667 b_blocknr_t unfm_ptr;
668 /* 'block'-th block is in the file already (there is
669 corresponding cell in some indirect item). But it may be
670 zero unformatted node pointer (hole) */
671 unfm_ptr = get_block_num (item, pos_in_item);
673 /* use allocated block to plug the hole */
674 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
675 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
676 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
679 set_buffer_new(bh_result);
680 if (buffer_dirty(bh_result) && reiserfs_data_ordered(inode->i_sb))
681 reiserfs_add_ordered_list(inode, bh_result);
682 put_block_num(item, pos_in_item, allocated_block_nr) ;
683 unfm_ptr = allocated_block_nr;
684 journal_mark_dirty (th, inode->i_sb, bh);
685 inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
686 reiserfs_update_sd(th, inode) ;
688 set_block_dev_mapped(bh_result, unfm_ptr, inode);
691 reiserfs_end_persistent_transaction(th);
693 reiserfs_write_unlock(inode->i_sb);
695 /* the item was found, so new blocks were not added to the file
696 ** there is no need to make sure the inode is updated with this
707 /* desired position is not found or is in the direct item. We have
708 to append file with holes up to 'block'-th block converting
709 direct items to indirect one if necessary */
712 if (is_statdata_le_ih (ih)) {
714 struct cpu_key tmp_key;
716 /* indirect item has to be inserted */
717 make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT,
718 UNFM_P_SIZE, 0/* free_space */);
720 if (cpu_key_k_offset (&key) == 1) {
721 /* we are going to add 'block'-th block to the file. Use
722 allocated block for that */
723 unp = cpu_to_le32 (allocated_block_nr);
724 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
725 set_buffer_new(bh_result);
729 set_cpu_key_k_offset (&tmp_key, 1);
730 PATH_LAST_POSITION(&path) ++;
732 retval = reiserfs_insert_item (th, &path, &tmp_key, &tmp_ih, (char *)&unp);
734 reiserfs_free_block (th, allocated_block_nr);
735 goto failure; // retval == -ENOSPC or -EIO or -EEXIST
738 inode->i_blocks += inode->i_sb->s_blocksize / 512;
739 //mark_tail_converted (inode);
740 } else if (is_direct_le_ih (ih)) {
741 /* direct item has to be converted */
744 tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
745 if (tail_offset == cpu_key_k_offset (&key)) {
746 /* direct item we just found fits into block we have
747 to map. Convert it into unformatted node: use
748 bh_result for the conversion */
749 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
753 /* we have to padd file tail stored in direct item(s)
754 up to block size and convert it to unformatted
755 node. FIXME: this should also get into page cache */
759 * ugly, but we can only end the transaction if
762 if (th->t_refcount == 1) {
763 reiserfs_end_persistent_transaction(th);
767 retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
769 if ( retval != -ENOSPC )
770 printk("clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ;
771 if (allocated_block_nr) {
772 /* the bitmap, the super, and the stat data == 3 */
774 th = reiserfs_persistent_transaction(inode->i_sb,3);
776 reiserfs_free_block (th, allocated_block_nr);
782 retval = direct2indirect (th, inode, &path, unbh, tail_offset);
784 reiserfs_unmap_buffer(unbh);
785 reiserfs_free_block (th, allocated_block_nr);
788 /* it is important the set_buffer_uptodate is done after
789 ** the direct2indirect. The buffer might contain valid
790 ** data newer than the data on disk (read by readpage, changed,
791 ** and then sent here by writepage). direct2indirect needs
792 ** to know if unbh was already up to date, so it can decide
793 ** if the data in unbh needs to be replaced with data from
796 set_buffer_uptodate (unbh);
798 /* unbh->b_page == NULL in case of DIRECT_IO request, this means
799 buffer will disappear shortly, so it should not be added to
801 if ( unbh->b_page ) {
802 /* we've converted the tail, so we must
803 ** flush unbh before the transaction commits
805 reiserfs_add_tail_list(inode, unbh) ;
807 /* mark it dirty now to prevent commit_write from adding
808 ** this buffer to the inode's dirty buffer list
811 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
812 * It's still atomic, but it sets the page dirty too,
813 * which makes it eligible for writeback at any time by the
814 * VM (which was also the case with __mark_buffer_dirty())
816 mark_buffer_dirty(unbh) ;
819 //inode->i_blocks += inode->i_sb->s_blocksize / 512;
820 //mark_tail_converted (inode);
822 /* append indirect item with holes if needed, when appending
823 pointer to 'block'-th block use block, which is already
825 struct cpu_key tmp_key;
826 unp_t unf_single=0; // We use this in case we need to allocate only
827 // one block which is a fastpath
829 __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE;
832 RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
833 "vs-804: invalid position for append");
834 /* indirect item has to be appended, set up key of that position */
835 make_cpu_key (&tmp_key, inode,
836 le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
837 //pos_in_item * inode->i_sb->s_blocksize,
838 TYPE_INDIRECT, 3);// key type is unimportant
840 blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits);
841 RFALSE( blocks_needed < 0, "green-805: invalid offset");
843 if ( blocks_needed == 1 ) {
846 un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE,
847 GFP_ATOMIC); // We need to avoid scheduling.
853 memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert));
855 if ( blocks_needed <= max_to_insert) {
856 /* we are going to add target block to the file. Use allocated
858 un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr);
859 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
860 set_buffer_new(bh_result);
863 /* paste hole to the indirect item */
864 /* If kmalloc failed, max_to_insert becomes zero and it means we
865 only have space for one block */
866 blocks_needed=max_to_insert?max_to_insert:1;
868 retval = reiserfs_paste_into_item (th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed);
870 if (blocks_needed != 1)
874 reiserfs_free_block (th, allocated_block_nr);
878 inode->i_blocks += inode->i_sb->s_blocksize / 512;
880 /* We need to mark new file size in case this function will be
881 interrupted/aborted later on. And we may do this only for
883 inode->i_size += inode->i_sb->s_blocksize * blocks_needed;
885 //mark_tail_converted (inode);
891 /* this loop could log more blocks than we had originally asked
892 ** for. So, we have to allow the transaction to end if it is
893 ** too big or too full. Update the inode so things are
894 ** consistent if we crash before the function returns
896 ** release the path so that anybody waiting on the path before
897 ** ending their transaction will be able to continue.
899 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
900 restart_transaction(th, inode, &path) ;
902 /* inserting indirect pointers for a hole can take a
903 ** long time. reschedule if needed
907 retval = search_for_position_by_key (inode->i_sb, &key, &path);
908 if (retval == IO_ERROR) {
912 if (retval == POSITION_FOUND) {
913 reiserfs_warning ("vs-825: reiserfs_get_block: "
914 "%K should not be found\n", &key);
916 if (allocated_block_nr)
917 reiserfs_free_block (th, allocated_block_nr);
921 bh = get_last_bh (&path);
923 item = get_item (&path);
924 pos_in_item = path.pos_in_item;
932 reiserfs_update_sd(th, inode) ;
933 reiserfs_end_persistent_transaction(th);
935 reiserfs_write_unlock(inode->i_sb);
936 reiserfs_check_path(&path) ;
941 reiserfs_readpages(struct file *file, struct address_space *mapping,
942 struct list_head *pages, unsigned nr_pages)
944 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
948 // BAD: new directories have stat data of new type and all other items
949 // of old type. Version stored in the inode says about body items, so
950 // in update_stat_data we can not rely on inode, but have to check
951 // item version directly
954 // called by read_locked_inode
955 static void init_inode (struct inode * inode, struct path * path)
957 struct buffer_head * bh;
958 struct item_head * ih;
960 //int version = ITEM_VERSION_1;
962 bh = PATH_PLAST_BUFFER (path);
963 ih = PATH_PITEM_HEAD (path);
966 copy_key (INODE_PKEY (inode), &(ih->ih_key));
967 inode->i_blksize = reiserfs_default_io_size;
969 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list ));
970 REISERFS_I(inode)->i_flags = 0;
971 REISERFS_I(inode)->i_prealloc_block = 0;
972 REISERFS_I(inode)->i_prealloc_count = 0;
973 REISERFS_I(inode)->i_trans_id = 0;
974 REISERFS_I(inode)->i_jl = NULL;
976 if (stat_data_v1 (ih)) {
977 struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
978 unsigned long blocks;
980 set_inode_item_key_version (inode, KEY_FORMAT_3_5);
981 set_inode_sd_version (inode, STAT_DATA_V1);
982 inode->i_mode = sd_v1_mode(sd);
983 inode->i_nlink = sd_v1_nlink(sd);
984 inode->i_uid = sd_v1_uid(sd);
985 inode->i_gid = sd_v1_gid(sd);
986 inode->i_size = sd_v1_size(sd);
987 inode->i_atime.tv_sec = sd_v1_atime(sd);
988 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
989 inode->i_ctime.tv_sec = sd_v1_ctime(sd);
990 inode->i_atime.tv_nsec = 0;
991 inode->i_ctime.tv_nsec = 0;
992 inode->i_mtime.tv_nsec = 0;
994 inode->i_blocks = sd_v1_blocks(sd);
995 inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
996 blocks = (inode->i_size + 511) >> 9;
997 blocks = _ROUND_UP (blocks, inode->i_sb->s_blocksize >> 9);
998 if (inode->i_blocks > blocks) {
999 // there was a bug in <=3.5.23 when i_blocks could take negative
1000 // values. Starting from 3.5.17 this value could even be stored in
1001 // stat data. For such files we set i_blocks based on file
1002 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1003 // only updated if file's inode will ever change
1004 inode->i_blocks = blocks;
1007 rdev = sd_v1_rdev(sd);
1008 REISERFS_I(inode)->i_first_direct_byte = sd_v1_first_direct_byte(sd);
1009 /* nopack is initially zero for v1 objects. For v2 objects,
1010 nopack is initialised from sd_attrs */
1011 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1013 // new stat data found, but object may have old items
1014 // (directories and symlinks)
1015 struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
1017 inode->i_mode = sd_v2_mode(sd);
1018 inode->i_nlink = sd_v2_nlink(sd);
1019 inode->i_uid = sd_v2_uid(sd);
1020 inode->i_size = sd_v2_size(sd);
1021 inode->i_gid = sd_v2_gid(sd);
1022 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1023 inode->i_atime.tv_sec = sd_v2_atime(sd);
1024 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1025 inode->i_ctime.tv_nsec = 0;
1026 inode->i_mtime.tv_nsec = 0;
1027 inode->i_atime.tv_nsec = 0;
1028 inode->i_blocks = sd_v2_blocks(sd);
1029 rdev = sd_v2_rdev(sd);
1030 if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) )
1031 inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
1033 inode->i_generation = sd_v2_generation(sd);
1035 if (S_ISDIR (inode->i_mode) || S_ISLNK (inode->i_mode))
1036 set_inode_item_key_version (inode, KEY_FORMAT_3_5);
1038 set_inode_item_key_version (inode, KEY_FORMAT_3_6);
1039 REISERFS_I(inode)->i_first_direct_byte = 0;
1040 set_inode_sd_version (inode, STAT_DATA_V2);
1041 /* read persistent inode attributes from sd and initalise
1042 generic inode flags from them */
1043 REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd );
1044 sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode );
1048 if (S_ISREG (inode->i_mode)) {
1049 inode->i_op = &reiserfs_file_inode_operations;
1050 inode->i_fop = &reiserfs_file_operations;
1051 inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
1052 } else if (S_ISDIR (inode->i_mode)) {
1053 inode->i_op = &reiserfs_dir_inode_operations;
1054 inode->i_fop = &reiserfs_dir_operations;
1055 } else if (S_ISLNK (inode->i_mode)) {
1056 inode->i_op = &page_symlink_inode_operations;
1057 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1059 inode->i_blocks = 0;
1060 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
1065 // update new stat data with inode fields
1066 static void inode2sd (void * sd, struct inode * inode)
1068 struct stat_data * sd_v2 = (struct stat_data *)sd;
1071 set_sd_v2_mode(sd_v2, inode->i_mode );
1072 set_sd_v2_nlink(sd_v2, inode->i_nlink );
1073 set_sd_v2_uid(sd_v2, inode->i_uid );
1074 set_sd_v2_size(sd_v2, inode->i_size );
1075 set_sd_v2_gid(sd_v2, inode->i_gid );
1076 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec );
1077 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec );
1078 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec );
1079 set_sd_v2_blocks(sd_v2, inode->i_blocks );
1080 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1081 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
1083 set_sd_v2_generation(sd_v2, inode->i_generation);
1084 flags = REISERFS_I(inode)->i_attrs;
1085 i_attrs_to_sd_attrs( inode, &flags );
1086 set_sd_v2_attrs( sd_v2, flags );
1090 // used to copy inode's fields to old stat data
1091 static void inode2sd_v1 (void * sd, struct inode * inode)
1093 struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
1095 set_sd_v1_mode(sd_v1, inode->i_mode );
1096 set_sd_v1_uid(sd_v1, inode->i_uid );
1097 set_sd_v1_gid(sd_v1, inode->i_gid );
1098 set_sd_v1_nlink(sd_v1, inode->i_nlink );
1099 set_sd_v1_size(sd_v1, inode->i_size );
1100 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec );
1101 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec );
1102 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec );
1104 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1105 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
1107 set_sd_v1_blocks(sd_v1, inode->i_blocks );
1109 // Sigh. i_first_direct_byte is back
1110 set_sd_v1_first_direct_byte(sd_v1, REISERFS_I(inode)->i_first_direct_byte);
1114 /* NOTE, you must prepare the buffer head before sending it here,
1115 ** and then log it after the call
1117 static void update_stat_data (struct path * path, struct inode * inode)
1119 struct buffer_head * bh;
1120 struct item_head * ih;
1122 bh = PATH_PLAST_BUFFER (path);
1123 ih = PATH_PITEM_HEAD (path);
1125 if (!is_statdata_le_ih (ih))
1126 reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h",
1127 INODE_PKEY (inode), ih);
1129 if (stat_data_v1 (ih)) {
1130 // path points to old stat data
1131 inode2sd_v1 (B_I_PITEM (bh, ih), inode);
1133 inode2sd (B_I_PITEM (bh, ih), inode);
1140 void reiserfs_update_sd (struct reiserfs_transaction_handle *th,
1141 struct inode * inode)
1144 INITIALIZE_PATH(path);
1145 struct buffer_head *bh ;
1147 struct item_head *ih, tmp_ih ;
1150 make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant
1154 /* look for the object's stat data */
1155 retval = search_item (inode->i_sb, &key, &path);
1156 if (retval == IO_ERROR) {
1157 reiserfs_warning ("vs-13050: reiserfs_update_sd: "
1158 "i/o failure occurred trying to update %K stat data",
1162 if (retval == ITEM_NOT_FOUND) {
1163 pos = PATH_LAST_POSITION (&path);
1165 if (inode->i_nlink == 0) {
1166 /*printk ("vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found\n");*/
1169 reiserfs_warning ("vs-13060: reiserfs_update_sd: "
1170 "stat data of object %k (nlink == %d) not found (pos %d)\n",
1171 INODE_PKEY (inode), inode->i_nlink, pos);
1172 reiserfs_check_path(&path) ;
1176 /* sigh, prepare_for_journal might schedule. When it schedules the
1177 ** FS might change. We have to detect that, and loop back to the
1178 ** search if the stat data item has moved
1180 bh = get_last_bh(&path) ;
1181 ih = get_ih(&path) ;
1182 copy_item_head (&tmp_ih, ih);
1183 fs_gen = get_generation (inode->i_sb);
1184 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1185 if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
1186 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1187 continue ; /* Stat_data item has been moved after scheduling. */
1191 update_stat_data (&path, inode);
1192 journal_mark_dirty(th, th->t_super, bh) ;
1197 /* reiserfs_read_locked_inode is called to read the inode off disk, and it
1198 ** does a make_bad_inode when things go wrong. But, we need to make sure
1199 ** and clear the key in the private portion of the inode, otherwise a
1200 ** corresponding iput might try to delete whatever object the inode last
1203 static void reiserfs_make_bad_inode(struct inode *inode) {
1204 memset(INODE_PKEY(inode), 0, KEY_SIZE);
1205 make_bad_inode(inode);
1209 // initially this function was derived from minix or ext2's analog and
1210 // evolved as the prototype did
1213 int reiserfs_init_locked_inode (struct inode * inode, void *p)
1215 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p ;
1216 inode->i_ino = args->objectid;
1217 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
1221 /* looks for stat data in the tree, and fills up the fields of in-core
1222 inode stat data fields */
1223 void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args *args)
1225 INITIALIZE_PATH (path_to_sd);
1227 unsigned long dirino;
1230 dirino = args->dirid ;
1232 /* set version 1, version 2 could be used too, because stat data
1233 key is the same in both versions */
1234 key.version = KEY_FORMAT_3_5;
1235 key.on_disk_key.k_dir_id = dirino;
1236 key.on_disk_key.k_objectid = inode->i_ino;
1237 key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
1238 key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS;
1240 /* look for the object's stat data */
1241 retval = search_item (inode->i_sb, &key, &path_to_sd);
1242 if (retval == IO_ERROR) {
1243 reiserfs_warning ("vs-13070: reiserfs_read_locked_inode: "
1244 "i/o failure occurred trying to find stat data of %K\n",
1246 reiserfs_make_bad_inode(inode) ;
1249 if (retval != ITEM_FOUND) {
1250 /* a stale NFS handle can trigger this without it being an error */
1251 pathrelse (&path_to_sd);
1252 reiserfs_make_bad_inode(inode) ;
1257 init_inode (inode, &path_to_sd);
1259 /* It is possible that knfsd is trying to access inode of a file
1260 that is being removed from the disk by some other thread. As we
1261 update sd on unlink all that is required is to check for nlink
1262 here. This bug was first found by Sizif when debugging
1263 SquidNG/Butterfly, forgotten, and found again after Philippe
1264 Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1266 More logical fix would require changes in fs/inode.c:iput() to
1267 remove inode from hash-table _after_ fs cleaned disk stuff up and
1268 in iget() to return NULL if I_FREEING inode is found in
1270 /* Currently there is one place where it's ok to meet inode with
1271 nlink==0: processing of open-unlinked and half-truncated files
1272 during mount (fs/reiserfs/super.c:finish_unfinished()). */
1273 if( ( inode -> i_nlink == 0 ) &&
1274 ! REISERFS_SB(inode -> i_sb) -> s_is_unlinked_ok ) {
1275 reiserfs_warning( "vs-13075: reiserfs_read_locked_inode: "
1276 "dead inode read from disk %K. "
1277 "This is likely to be race with knfsd. Ignore\n",
1279 reiserfs_make_bad_inode( inode );
1282 reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
1287 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
1289 * @inode: inode from hash table to check
1290 * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args.
1292 * This function is called by iget5_locked() to distinguish reiserfs inodes
1293 * having the same inode numbers. Such inodes can only exist due to some
1294 * error condition. One of them should be bad. Inodes with identical
1295 * inode numbers (objectids) are distinguished by parent directory ids.
1298 int reiserfs_find_actor( struct inode *inode, void *opaque )
1300 struct reiserfs_iget_args *args;
1303 /* args is already in CPU order */
1304 return (inode->i_ino == args->objectid) &&
1305 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
1308 struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key)
1310 struct inode * inode;
1311 struct reiserfs_iget_args args ;
1313 args.objectid = key->on_disk_key.k_objectid ;
1314 args.dirid = key->on_disk_key.k_dir_id ;
1315 inode = iget5_locked (s, key->on_disk_key.k_objectid,
1316 reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args));
1318 return ERR_PTR(-ENOMEM) ;
1320 if (inode->i_state & I_NEW) {
1321 reiserfs_read_locked_inode(inode, &args);
1322 unlock_new_inode(inode);
1325 if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) {
1326 /* either due to i/o error or a stale NFS handle */
1333 struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp)
1335 __u32 *data = vobjp;
1336 struct cpu_key key ;
1337 struct dentry *result;
1338 struct inode *inode;
1340 key.on_disk_key.k_objectid = data[0] ;
1341 key.on_disk_key.k_dir_id = data[1] ;
1342 inode = reiserfs_iget(sb, &key) ;
1343 if (inode && !IS_ERR(inode) && data[2] != 0 &&
1344 data[2] != inode->i_generation) {
1349 inode = ERR_PTR(-ESTALE);
1351 return ERR_PTR(PTR_ERR(inode));
1352 result = d_alloc_anon(inode);
1355 return ERR_PTR(-ENOMEM);
1360 struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 *data,
1361 int len, int fhtype,
1362 int (*acceptable)(void *contect, struct dentry *de),
1364 __u32 obj[3], parent[3];
1366 /* fhtype happens to reflect the number of u32s encoded.
1367 * due to a bug in earlier code, fhtype might indicate there
1368 * are more u32s then actually fitted.
1369 * so if fhtype seems to be more than len, reduce fhtype.
1371 * 2 - objectid + dir_id - legacy support
1372 * 3 - objectid + dir_id + generation
1373 * 4 - objectid + dir_id + objectid and dirid of parent - legacy
1374 * 5 - objectid + dir_id + generation + objectid and dirid of parent
1375 * 6 - as above plus generation of directory
1376 * 6 does not fit in NFSv2 handles
1379 if (fhtype != 6 || len != 5)
1380 printk(KERN_WARNING "nfsd/reiserfs, fhtype=%d, len=%d - odd\n",
1387 if (fhtype == 3 || fhtype >= 5)
1389 else obj[2] = 0; /* generation number */
1392 parent[0] = data[fhtype>=5?3:2] ;
1393 parent[1] = data[fhtype>=5?4:3] ;
1395 parent[2] = data[5];
1398 return sb->s_export_op->find_exported_dentry(sb, obj, fhtype < 4 ? NULL : parent,
1399 acceptable, context);
1402 int reiserfs_encode_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_parent) {
1403 struct inode *inode = dentry->d_inode ;
1409 data[0] = inode->i_ino ;
1410 data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1411 data[2] = inode->i_generation ;
1413 /* no room for directory info? return what we've stored so far */
1414 if (maxlen < 5 || ! need_parent)
1417 spin_lock(&dentry->d_lock);
1418 inode = dentry->d_parent->d_inode ;
1419 data[3] = inode->i_ino ;
1420 data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1423 data[5] = inode->i_generation ;
1426 spin_unlock(&dentry->d_lock);
1431 /* looks for stat data, then copies fields to it, marks the buffer
1432 containing stat data as dirty */
1433 /* reiserfs inodes are never really dirty, since the dirty inode call
1434 ** always logs them. This call allows the VFS inode marking routines
1435 ** to properly mark inodes for datasync and such, but only actually
1436 ** does something when called for a synchronous update.
1438 void reiserfs_write_inode (struct inode * inode, int do_sync) {
1439 struct reiserfs_transaction_handle th ;
1440 int jbegin_count = 1 ;
1442 if (inode->i_sb->s_flags & MS_RDONLY) {
1443 reiserfs_warning("clm-6005: writing inode %lu on readonly FS\n",
1447 /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1448 ** these cases are just when the system needs ram, not when the
1449 ** inode needs to reach disk for safety, and they can safely be
1450 ** ignored because the altered inode has already been logged.
1452 if (do_sync && !(current->flags & PF_MEMALLOC)) {
1453 reiserfs_write_lock(inode->i_sb);
1454 journal_begin(&th, inode->i_sb, jbegin_count) ;
1455 reiserfs_update_sd (&th, inode);
1456 journal_end_sync(&th, inode->i_sb, jbegin_count) ;
1457 reiserfs_write_unlock(inode->i_sb);
1461 /* FIXME: no need any more. right? */
1462 int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode)
1466 reiserfs_update_sd (th, inode);
1471 /* stat data of new object is inserted already, this inserts the item
1472 containing "." and ".." entries */
1473 static int reiserfs_new_directory (struct reiserfs_transaction_handle *th,
1474 struct item_head * ih, struct path * path,
1477 struct super_block * sb = th->t_super;
1478 char empty_dir [EMPTY_DIR_SIZE];
1479 char * body = empty_dir;
1483 _make_cpu_key (&key, KEY_FORMAT_3_5, le32_to_cpu (ih->ih_key.k_dir_id),
1484 le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/);
1486 /* compose item head for new item. Directories consist of items of
1487 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1488 is done by reiserfs_new_inode */
1489 if (old_format_only (sb)) {
1490 make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1492 make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1493 INODE_PKEY (dir)->k_dir_id,
1494 INODE_PKEY (dir)->k_objectid );
1496 make_le_item_head (ih, 0, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1498 make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
1499 INODE_PKEY (dir)->k_dir_id,
1500 INODE_PKEY (dir)->k_objectid );
1503 /* look for place in the tree for new item */
1504 retval = search_item (sb, &key, path);
1505 if (retval == IO_ERROR) {
1506 reiserfs_warning ("vs-13080: reiserfs_new_directory: "
1507 "i/o failure occurred creating new directory\n");
1510 if (retval == ITEM_FOUND) {
1512 reiserfs_warning ("vs-13070: reiserfs_new_directory: "
1513 "object with this key exists (%k)", &(ih->ih_key));
1517 /* insert item, that is empty directory item */
1518 return reiserfs_insert_item (th, path, &key, ih, body);
1522 /* stat data of object has been inserted, this inserts the item
1523 containing the body of symlink */
1524 static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
1525 struct item_head * ih,
1526 struct path * path, const char * symname, int item_len)
1528 struct super_block * sb = th->t_super;
1532 _make_cpu_key (&key, KEY_FORMAT_3_5,
1533 le32_to_cpu (ih->ih_key.k_dir_id),
1534 le32_to_cpu (ih->ih_key.k_objectid),
1535 1, TYPE_DIRECT, 3/*key length*/);
1537 make_le_item_head (ih, 0, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 0/*free_space*/);
1539 /* look for place in the tree for new item */
1540 retval = search_item (sb, &key, path);
1541 if (retval == IO_ERROR) {
1542 reiserfs_warning ("vs-13080: reiserfs_new_symlinik: "
1543 "i/o failure occurred creating new symlink\n");
1546 if (retval == ITEM_FOUND) {
1548 reiserfs_warning ("vs-13080: reiserfs_new_symlink: "
1549 "object with this key exists (%k)", &(ih->ih_key));
1553 /* insert item, that is body of symlink */
1554 return reiserfs_insert_item (th, path, &key, ih, symname);
1558 /* inserts the stat data into the tree, and then calls
1559 reiserfs_new_directory (to insert ".", ".." item if new object is
1560 directory) or reiserfs_new_symlink (to insert symlink body if new
1561 object is symlink) or nothing (if new object is regular file)
1563 NOTE! uid and gid must already be set in the inode. If we return
1564 non-zero due to an error, we have to drop the quota previously allocated
1565 for the fresh inode. This can only be done outside a transaction, so
1566 if we return non-zero, we also end the transaction. */
1567 int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
1568 struct inode * dir, int mode,
1569 const char * symname,
1570 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1571 strlen (symname) for symlinks)*/
1572 loff_t i_size, struct dentry *dentry,
1573 struct inode *inode)
1575 struct super_block * sb;
1576 INITIALIZE_PATH (path_to_key);
1578 struct item_head ih;
1579 struct stat_data sd;
1583 if (!dir || !dir->i_nlink) {
1590 /* item head of new item */
1591 ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid;
1592 ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
1593 if (!ih.ih_key.k_objectid) {
1595 goto out_bad_inode ;
1597 if (old_format_only (sb))
1598 /* not a perfect generation count, as object ids can be reused, but
1599 ** this is as good as reiserfs can do right now.
1600 ** note that the private part of inode isn't filled in yet, we have
1601 ** to use the directory.
1603 inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid);
1605 #if defined( USE_INODE_GENERATION_COUNTER )
1606 inode->i_generation = le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
1608 inode->i_generation = ++event;
1611 /* fill stat data */
1612 inode->i_nlink = (S_ISDIR (mode) ? 2 : 1);
1614 /* uid and gid must already be set by the caller for quota init */
1616 /* symlink cannot be immutable or append only, right? */
1617 if( S_ISLNK( inode -> i_mode ) )
1618 inode -> i_flags &= ~ ( S_IMMUTABLE | S_APPEND );
1620 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1621 inode->i_size = i_size;
1622 inode->i_blocks = (inode->i_size + 511) >> 9;
1623 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
1624 U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/;
1626 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list ));
1627 REISERFS_I(inode)->i_flags = 0;
1628 REISERFS_I(inode)->i_prealloc_block = 0;
1629 REISERFS_I(inode)->i_prealloc_count = 0;
1630 REISERFS_I(inode)->i_trans_id = 0;
1631 REISERFS_I(inode)->i_jl = 0;
1632 REISERFS_I(inode)->i_attrs =
1633 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1634 sd_attrs_to_i_attrs( REISERFS_I(inode) -> i_attrs, inode );
1636 if (old_format_only (sb))
1637 make_le_item_head (&ih, 0, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1639 make_le_item_head (&ih, 0, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1641 /* key to search for correct place for new stat data */
1642 _make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id),
1643 le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
1645 /* find proper place for inserting of stat data */
1646 retval = search_item (sb, &key, &path_to_key);
1647 if (retval == IO_ERROR) {
1651 if (retval == ITEM_FOUND) {
1652 pathrelse (&path_to_key);
1657 if (old_format_only (sb)) {
1658 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1659 pathrelse (&path_to_key);
1660 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1664 inode2sd_v1 (&sd, inode);
1666 inode2sd (&sd, inode);
1668 // these do not go to on-disk stat data
1669 inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
1670 inode->i_blksize = reiserfs_default_io_size;
1672 // store in in-core inode the key of stat data and version all
1673 // object items will have (directory items will have old offset
1674 // format, other new objects will consist of new items)
1675 memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE);
1676 if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode))
1677 set_inode_item_key_version (inode, KEY_FORMAT_3_5);
1679 set_inode_item_key_version (inode, KEY_FORMAT_3_6);
1680 if (old_format_only (sb))
1681 set_inode_sd_version (inode, STAT_DATA_V1);
1683 set_inode_sd_version (inode, STAT_DATA_V2);
1685 /* insert the stat data into the tree */
1686 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1687 if (REISERFS_I(dir)->new_packing_locality)
1688 th->displace_new_blocks = 1;
1690 retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
1693 reiserfs_check_path(&path_to_key) ;
1697 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1698 if (!th->displace_new_blocks)
1699 REISERFS_I(dir)->new_packing_locality = 0;
1701 if (S_ISDIR(mode)) {
1702 /* insert item with "." and ".." */
1703 retval = reiserfs_new_directory (th, &ih, &path_to_key, dir);
1706 if (S_ISLNK(mode)) {
1707 /* insert body of symlink */
1708 if (!old_format_only (sb))
1709 i_size = ROUND_UP(i_size);
1710 retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size);
1714 reiserfs_check_path(&path_to_key) ;
1715 journal_end(th, th->t_super, th->t_blocks_allocated);
1716 goto out_inserted_sd;
1719 insert_inode_hash (inode);
1720 reiserfs_update_sd(th, inode);
1721 reiserfs_check_path(&path_to_key) ;
1725 /* it looks like you can easily compress these two goto targets into
1726 * one. Keeping it like this doesn't actually hurt anything, and they
1727 * are place holders for what the quota code actually needs.
1730 /* Invalidate the object, nothing was inserted yet */
1731 INODE_PKEY(inode)->k_objectid = 0;
1733 /* dquot_drop must be done outside a transaction */
1734 journal_end(th, th->t_super, th->t_blocks_allocated) ;
1735 make_bad_inode(inode);
1739 th->t_trans_id = 0; /* so the caller can't use this handle later */
1745 ** finds the tail page in the page cache,
1746 ** reads the last block in.
1748 ** On success, page_result is set to a locked, pinned page, and bh_result
1749 ** is set to an up to date buffer for the last block in the file. returns 0.
1751 ** tail conversion is not done, so bh_result might not be valid for writing
1752 ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
1753 ** trying to write the block.
1755 ** on failure, nonzero is returned, page_result and bh_result are untouched.
1757 static int grab_tail_page(struct inode *p_s_inode,
1758 struct page **page_result,
1759 struct buffer_head **bh_result) {
1761 /* we want the page with the last byte in the file,
1762 ** not the page that will hold the next byte for appending
1764 unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ;
1765 unsigned long pos = 0 ;
1766 unsigned long start = 0 ;
1767 unsigned long blocksize = p_s_inode->i_sb->s_blocksize ;
1768 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ;
1769 struct buffer_head *bh ;
1770 struct buffer_head *head ;
1771 struct page * page ;
1774 /* we know that we are only called with inode->i_size > 0.
1775 ** we also know that a file tail can never be as big as a block
1776 ** If i_size % blocksize == 0, our file is currently block aligned
1777 ** and it won't need converting or zeroing after a truncate.
1779 if ((offset & (blocksize - 1)) == 0) {
1782 page = grab_cache_page(p_s_inode->i_mapping, index) ;
1787 /* start within the page of the last block in the file */
1788 start = (offset / blocksize) * blocksize ;
1790 error = block_prepare_write(page, start, offset,
1791 reiserfs_get_block_create_0) ;
1795 head = page_buffers(page) ;
1801 bh = bh->b_this_page ;
1803 } while(bh != head) ;
1805 if (!buffer_uptodate(bh)) {
1806 /* note, this should never happen, prepare_write should
1807 ** be taking care of this for us. If the buffer isn't up to date,
1808 ** I've screwed up the code to find the buffer, or the code to
1809 ** call prepare_write
1811 reiserfs_warning("clm-6000: error reading block %lu on dev %s\n",
1813 reiserfs_bdevname (p_s_inode->i_sb)) ;
1818 *page_result = page ;
1825 page_cache_release(page) ;
1830 ** vfs version of truncate file. Must NOT be called with
1831 ** a transaction already started.
1833 ** some code taken from block_truncate_page
1835 void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) {
1836 struct reiserfs_transaction_handle th ;
1837 /* we want the offset for the first byte after the end of the file */
1838 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ;
1839 unsigned blocksize = p_s_inode->i_sb->s_blocksize ;
1841 struct page *page = NULL ;
1843 struct buffer_head *bh = NULL ;
1845 reiserfs_write_lock(p_s_inode->i_sb);
1847 if (p_s_inode->i_size > 0) {
1848 if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
1849 // -ENOENT means we truncated past the end of the file,
1850 // and get_block_create_0 could not find a block to read in,
1852 if (error != -ENOENT)
1853 reiserfs_warning("clm-6001: grab_tail_page failed %d\n", error);
1859 /* so, if page != NULL, we have a buffer head for the offset at
1860 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
1861 ** then we have an unformatted node. Otherwise, we have a direct item,
1862 ** and no zeroing is required on disk. We zero after the truncate,
1863 ** because the truncate might pack the item anyway
1864 ** (it will unmap bh if it packs).
1866 /* it is enough to reserve space in transaction for 2 balancings:
1867 one for "save" link adding and another for the first
1868 cut_from_item. 1 is for update_sd */
1869 journal_begin(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
1870 reiserfs_update_inode_transaction(p_s_inode) ;
1871 if (update_timestamps)
1872 /* we are doing real truncate: if the system crashes before the last
1873 transaction of truncating gets committed - on reboot the file
1874 either appears truncated properly or not truncated at all */
1875 add_save_link (&th, p_s_inode, 1);
1876 reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
1877 journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
1879 if (update_timestamps)
1880 remove_save_link (p_s_inode, 1/* truncate */);
1883 length = offset & (blocksize - 1) ;
1884 /* if we are not on a block boundary */
1888 length = blocksize - length ;
1889 kaddr = kmap_atomic(page, KM_USER0) ;
1890 memset(kaddr + offset, 0, length) ;
1891 flush_dcache_page(page) ;
1892 kunmap_atomic(kaddr, KM_USER0) ;
1893 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1894 mark_buffer_dirty(bh) ;
1898 page_cache_release(page) ;
1901 reiserfs_write_unlock(p_s_inode->i_sb);
1904 static int map_block_for_writepage(struct inode *inode,
1905 struct buffer_head *bh_result,
1906 unsigned long block) {
1907 struct reiserfs_transaction_handle th ;
1909 struct item_head tmp_ih ;
1910 struct item_head *ih ;
1911 struct buffer_head *bh ;
1913 struct cpu_key key ;
1914 INITIALIZE_PATH(path) ;
1916 int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
1917 loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
1919 int use_get_block = 0 ;
1920 int bytes_copied = 0 ;
1922 int trans_running = 0;
1924 /* catch places below that try to log something without starting a trans */
1927 if (!buffer_uptodate(bh_result)) {
1931 kmap(bh_result->b_page) ;
1933 reiserfs_write_lock(inode->i_sb);
1934 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
1937 retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
1938 if (retval != POSITION_FOUND) {
1943 bh = get_last_bh(&path) ;
1944 ih = get_ih(&path) ;
1945 item = get_item(&path) ;
1946 pos_in_item = path.pos_in_item ;
1948 /* we've found an unformatted node */
1949 if (indirect_item_found(retval, ih)) {
1950 if (bytes_copied > 0) {
1951 reiserfs_warning("clm-6002: bytes_copied %d\n", bytes_copied) ;
1953 if (!get_block_num(item, pos_in_item)) {
1954 /* crap, we are writing to a hole */
1958 set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
1959 } else if (is_direct_le_ih(ih)) {
1961 p = page_address(bh_result->b_page) ;
1962 p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
1963 copy_size = ih_item_len(ih) - pos_in_item;
1965 fs_gen = get_generation(inode->i_sb) ;
1966 copy_item_head(&tmp_ih, ih) ;
1968 if (!trans_running) {
1969 /* vs-3050 is gone, no need to drop the path */
1970 journal_begin(&th, inode->i_sb, jbegin_count) ;
1971 reiserfs_update_inode_transaction(inode) ;
1973 if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
1974 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1979 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
1981 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
1982 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
1986 memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
1988 journal_mark_dirty(&th, inode->i_sb, bh) ;
1989 bytes_copied += copy_size ;
1990 set_block_dev_mapped(bh_result, 0, inode);
1992 /* are there still bytes left? */
1993 if (bytes_copied < bh_result->b_size &&
1994 (byte_offset + bytes_copied) < inode->i_size) {
1995 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
1999 reiserfs_warning("clm-6003: bad item inode %lu, device %s\n", inode->i_ino, reiserfs_bdevname (inode->i_sb)) ;
2007 if (trans_running) {
2008 journal_end(&th, inode->i_sb, jbegin_count) ;
2011 reiserfs_write_unlock(inode->i_sb);
2013 /* this is where we fill in holes in the file. */
2014 if (use_get_block) {
2015 retval = reiserfs_get_block(inode, block, bh_result,
2016 GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM |
2017 GET_BLOCK_NO_DANGLE);
2019 if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
2020 /* get_block failed to find a mapped unformatted node. */
2026 kunmap(bh_result->b_page) ;
2028 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2029 /* we've copied data from the page into the direct item, so the
2030 * buffer in the page is now clean, mark it to reflect that.
2032 lock_buffer(bh_result);
2033 clear_buffer_dirty(bh_result);
2034 unlock_buffer(bh_result);
2040 * mason@suse.com: updated in 2.5.54 to follow the same general io
2041 * start/recovery path as __block_write_full_page, along with special
2042 * code to handle reiserfs tails.
2044 static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) {
2045 struct inode *inode = page->mapping->host ;
2046 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
2048 unsigned long block ;
2049 struct buffer_head *head, *bh;
2053 /* The page dirty bit is cleared before writepage is called, which
2054 * means we have to tell create_empty_buffers to make dirty buffers
2055 * The page really should be up to date at this point, so tossing
2056 * in the BH_Uptodate is just a sanity check.
2058 if (!page_has_buffers(page)) {
2059 create_empty_buffers(page, inode->i_sb->s_blocksize,
2060 (1 << BH_Dirty) | (1 << BH_Uptodate));
2062 head = page_buffers(page) ;
2064 /* last page in the file, zero out any contents past the
2065 ** last byte in the file
2067 if (page->index >= end_index) {
2069 unsigned last_offset;
2071 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
2072 /* no file contents in this page */
2073 if (page->index >= end_index + 1 || !last_offset) {
2077 kaddr = kmap_atomic(page, KM_USER0);
2078 memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
2079 flush_dcache_page(page) ;
2080 kunmap_atomic(kaddr, KM_USER0) ;
2083 block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
2084 /* first map all the buffers, logging any direct items we find */
2086 if (buffer_dirty(bh) && (!buffer_mapped(bh) ||
2087 (buffer_mapped(bh) && bh->b_blocknr == 0))) {
2088 /* not mapped yet, or it points to a direct item, search
2089 * the btree for the mapping info, and log any direct
2092 if ((error = map_block_for_writepage(inode, bh, block))) {
2096 bh = bh->b_this_page;
2098 } while(bh != head) ;
2100 /* now go through and lock any dirty buffers on the page */
2103 if (!buffer_mapped(bh))
2105 if (buffer_mapped(bh) && bh->b_blocknr == 0)
2108 /* from this point on, we know the buffer is mapped to a
2109 * real block and not a direct item
2111 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2114 if (test_set_buffer_locked(bh)) {
2115 redirty_page_for_writepage(wbc, page);
2119 if (test_clear_buffer_dirty(bh)) {
2120 mark_buffer_async_write(bh);
2124 } while((bh = bh->b_this_page) != head);
2126 BUG_ON(PageWriteback(page));
2127 set_page_writeback(page);
2131 * since any buffer might be the only dirty buffer on the page,
2132 * the first submit_bh can bring the page out of writeback.
2133 * be careful with the buffers.
2136 struct buffer_head *next = bh->b_this_page;
2137 if (buffer_async_write(bh)) {
2138 submit_bh(WRITE, bh);
2143 } while(bh != head);
2149 * if this page only had a direct item, it is very possible for
2150 * no io to be required without there being an error. Or,
2151 * someone else could have locked them and sent them down the
2152 * pipe without locking the page
2156 if (!buffer_uptodate(bh)) {
2160 bh = bh->b_this_page;
2161 } while(bh != head);
2163 SetPageUptodate(page);
2164 end_page_writeback(page);
2169 /* catches various errors, we need to make sure any valid dirty blocks
2170 * get to the media. The page is currently locked and not marked for
2173 ClearPageUptodate(page);
2177 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2179 mark_buffer_async_write(bh);
2182 * clear any dirty bits that might have come from getting
2183 * attached to a dirty page
2185 clear_buffer_dirty(bh);
2187 bh = bh->b_this_page;
2188 } while(bh != head);
2190 BUG_ON(PageWriteback(page));
2191 set_page_writeback(page);
2194 struct buffer_head *next = bh->b_this_page;
2195 if (buffer_async_write(bh)) {
2196 clear_buffer_dirty(bh);
2197 submit_bh(WRITE, bh);
2202 } while(bh != head);
2207 static int reiserfs_readpage (struct file *f, struct page * page)
2209 return block_read_full_page (page, reiserfs_get_block);
2213 static int reiserfs_writepage (struct page * page, struct writeback_control *wbc)
2215 struct inode *inode = page->mapping->host ;
2216 reiserfs_wait_on_write_block(inode->i_sb) ;
2217 return reiserfs_write_full_page(page, wbc) ;
2220 int reiserfs_prepare_write(struct file *f, struct page *page,
2221 unsigned from, unsigned to) {
2222 struct inode *inode = page->mapping->host ;
2226 reiserfs_wait_on_write_block(inode->i_sb) ;
2227 fix_tail_page_for_writing(page) ;
2228 if (reiserfs_transaction_running(inode->i_sb)) {
2229 struct reiserfs_transaction_handle *th;
2230 th = (struct reiserfs_transaction_handle *)current->journal_info;
2231 old_ref = th->t_refcount;
2235 ret = block_prepare_write(page, from, to, reiserfs_get_block) ;
2236 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2237 struct reiserfs_transaction_handle *th = current->journal_info;
2238 /* this gets a little ugly. If reiserfs_get_block returned an
2239 * error and left a transacstion running, we've got to close it,
2240 * and we've got to free handle if it was a persistent transaction.
2242 * But, if we had nested into an existing transaction, we need
2243 * to just drop the ref count on the handle.
2245 * If old_ref == 0, the transaction is from reiserfs_get_block,
2246 * and it was a persistent trans. Otherwise, it was nested above.
2248 if (th->t_refcount > old_ref) {
2252 reiserfs_write_lock(inode->i_sb);
2253 reiserfs_end_persistent_transaction(th);
2254 reiserfs_write_unlock(inode->i_sb);
2263 static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) {
2264 return generic_block_bmap(as, block, reiserfs_bmap) ;
2267 static int reiserfs_commit_write(struct file *f, struct page *page,
2268 unsigned from, unsigned to) {
2269 struct inode *inode = page->mapping->host ;
2270 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2272 struct reiserfs_transaction_handle *th = NULL;
2274 reiserfs_wait_on_write_block(inode->i_sb) ;
2275 if (reiserfs_transaction_running(inode->i_sb)) {
2276 th = current->journal_info;
2278 reiserfs_commit_page(inode, page, from, to);
2280 /* generic_commit_write does this for us, but does not update the
2281 ** transaction tracking stuff when the size changes. So, we have
2282 ** to do the i_size updates here.
2284 if (pos > inode->i_size) {
2285 struct reiserfs_transaction_handle myth ;
2286 reiserfs_write_lock(inode->i_sb);
2287 /* If the file have grown beyond the border where it
2288 can have a tail, unmark it as needing a tail
2290 if ( (have_large_tails (inode->i_sb) && inode->i_size > i_block_size (inode)*4) ||
2291 (have_small_tails (inode->i_sb) && inode->i_size > i_block_size(inode)) )
2292 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ;
2294 journal_begin(&myth, inode->i_sb, 1) ;
2295 reiserfs_update_inode_transaction(inode) ;
2296 inode->i_size = pos ;
2297 reiserfs_update_sd(&myth, inode) ;
2298 journal_end(&myth, inode->i_sb, 1) ;
2299 reiserfs_write_unlock(inode->i_sb);
2302 reiserfs_write_lock(inode->i_sb);
2303 reiserfs_end_persistent_transaction(th);
2304 reiserfs_write_unlock(inode->i_sb);
2307 /* we test for O_SYNC here so we can commit the transaction
2308 ** for any packed tails the file might have had
2310 if (f && (f->f_flags & O_SYNC)) {
2311 reiserfs_write_lock(inode->i_sb);
2312 reiserfs_commit_for_inode(inode) ;
2313 reiserfs_write_unlock(inode->i_sb);
2318 void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode )
2320 if( reiserfs_attrs( inode -> i_sb ) ) {
2321 if( sd_attrs & REISERFS_SYNC_FL )
2322 inode -> i_flags |= S_SYNC;
2324 inode -> i_flags &= ~S_SYNC;
2325 if( sd_attrs & REISERFS_IMMUTABLE_FL )
2326 inode -> i_flags |= S_IMMUTABLE;
2328 inode -> i_flags &= ~S_IMMUTABLE;
2329 if( sd_attrs & REISERFS_APPEND_FL )
2330 inode -> i_flags |= S_APPEND;
2332 inode -> i_flags &= ~S_APPEND;
2333 if( sd_attrs & REISERFS_NOATIME_FL )
2334 inode -> i_flags |= S_NOATIME;
2336 inode -> i_flags &= ~S_NOATIME;
2337 if( sd_attrs & REISERFS_NOTAIL_FL )
2338 REISERFS_I(inode)->i_flags |= i_nopack_mask;
2340 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
2344 void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs )
2346 if( reiserfs_attrs( inode -> i_sb ) ) {
2347 if( inode -> i_flags & S_IMMUTABLE )
2348 *sd_attrs |= REISERFS_IMMUTABLE_FL;
2350 *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2351 if( inode -> i_flags & S_SYNC )
2352 *sd_attrs |= REISERFS_SYNC_FL;
2354 *sd_attrs &= ~REISERFS_SYNC_FL;
2355 if( inode -> i_flags & S_NOATIME )
2356 *sd_attrs |= REISERFS_NOATIME_FL;
2358 *sd_attrs &= ~REISERFS_NOATIME_FL;
2359 if( REISERFS_I(inode)->i_flags & i_nopack_mask )
2360 *sd_attrs |= REISERFS_NOTAIL_FL;
2362 *sd_attrs &= ~REISERFS_NOTAIL_FL;
2366 /* decide if this buffer needs to stay around for data logging or ordered
2369 static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2372 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
2374 spin_lock(&j->j_dirty_buffers_lock) ;
2375 if (!buffer_mapped(bh)) {
2378 /* the page is locked, and the only places that log a data buffer
2379 * also lock the page.
2382 if (reiserfs_file_data_log(inode)) {
2383 /* very conservative, leave the buffer pinned if anyone might need it.
2384 ** this should be changed to drop the buffer if it is only in the
2385 ** current transaction
2387 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2392 if (buffer_dirty(bh) || buffer_locked(bh)) {
2393 struct reiserfs_journal_list *jl;
2394 struct reiserfs_jh *jh = bh->b_private;
2396 /* why is this safe?
2397 * reiserfs_setattr updates i_size in the on disk
2398 * stat data before allowing vmtruncate to be called.
2400 * If buffer was put onto the ordered list for this
2401 * transaction, we know for sure either this transaction
2402 * or an older one already has updated i_size on disk,
2403 * and this ordered data won't be referenced in the file
2406 * if the buffer was put onto the ordered list for an older
2407 * transaction, we need to leave it around
2409 if (jh && (jl = jh->jl) && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
2413 if (ret && bh->b_private) {
2414 reiserfs_free_jh(bh);
2416 spin_unlock(&j->j_dirty_buffers_lock) ;
2420 /* clm -- taken from fs/buffer.c:block_invalidate_page */
2421 static int reiserfs_invalidatepage(struct page *page, unsigned long offset)
2423 struct buffer_head *head, *bh, *next;
2424 struct inode *inode = page->mapping->host;
2425 unsigned int curr_off = 0;
2428 BUG_ON(!PageLocked(page));
2429 if (!page_has_buffers(page))
2432 head = page_buffers(page);
2435 unsigned int next_off = curr_off + bh->b_size;
2436 next = bh->b_this_page;
2439 * is this block fully invalidated?
2441 if (offset <= curr_off) {
2442 if (invalidatepage_can_drop(inode, bh))
2443 reiserfs_unmap_buffer(bh);
2447 curr_off = next_off;
2449 } while (bh != head);
2452 * We release buffers only if the entire page is being invalidated.
2453 * The get_block cached value has been unconditionally invalidated,
2454 * so real IO is not possible anymore.
2457 ret = try_to_release_page(page, 0);
2463 * Returns 1 if the page's buffers were dropped. The page is locked.
2465 * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
2466 * in the buffers at page_buffers(page).
2468 * even in -o notail mode, we can't be sure an old mount without -o notail
2469 * didn't create files with tails.
2471 static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
2473 struct inode *inode = page->mapping->host ;
2474 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
2475 struct buffer_head *head ;
2476 struct buffer_head *bh ;
2479 spin_lock(&j->j_dirty_buffers_lock) ;
2480 head = page_buffers(page) ;
2483 if (bh->b_private) {
2484 if (!buffer_dirty(bh) && !buffer_locked(bh)) {
2485 reiserfs_free_jh(bh);
2491 bh = bh->b_this_page ;
2492 } while (bh != head) ;
2494 ret = try_to_free_buffers(page) ;
2495 spin_unlock(&j->j_dirty_buffers_lock) ;
2499 /* We thank Mingming Cao for helping us understand in great detail what
2500 to do in this section of the code. */
2501 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
2502 const struct iovec *iov, loff_t offset, unsigned long nr_segs)
2504 struct file *file = iocb->ki_filp;
2505 struct inode *inode = file->f_mapping->host;
2507 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2508 offset, nr_segs, reiserfs_get_blocks_direct_io, NULL);
2512 struct address_space_operations reiserfs_address_space_operations = {
2513 .writepage = reiserfs_writepage,
2514 .readpage = reiserfs_readpage,
2515 .readpages = reiserfs_readpages,
2516 .releasepage = reiserfs_releasepage,
2517 .invalidatepage = reiserfs_invalidatepage,
2518 .sync_page = block_sync_page,
2519 .prepare_write = reiserfs_prepare_write,
2520 .commit_write = reiserfs_commit_write,
2521 .bmap = reiserfs_aop_bmap,
2522 .direct_IO = reiserfs_direct_IO