Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / fs / reiserfs / stree.c
1 /*
2  *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4
5 /*
6  *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7  *  Programm System Institute
8  *  Pereslavl-Zalessky Russia
9  */
10
11 /*
12  *  This file contains functions dealing with S+tree
13  *
14  * B_IS_IN_TREE
15  * copy_item_head
16  * comp_short_keys
17  * comp_keys
18  * comp_short_le_keys
19  * le_key2cpu_key
20  * comp_le_keys
21  * bin_search
22  * get_lkey
23  * get_rkey
24  * key_in_buffer
25  * decrement_bcount
26  * decrement_counters_in_path
27  * reiserfs_check_path
28  * pathrelse_and_restore
29  * pathrelse
30  * search_by_key_reada
31  * search_by_key
32  * search_for_position_by_key
33  * comp_items
34  * prepare_for_direct_item
35  * prepare_for_direntry_item
36  * prepare_for_delete_or_cut
37  * calc_deleted_bytes_number
38  * init_tb_struct
39  * padd_item
40  * reiserfs_delete_item
41  * reiserfs_delete_solid_item
42  * reiserfs_delete_object
43  * maybe_indirect_to_direct
44  * indirect_to_direct_roll_back
45  * reiserfs_cut_from_item
46  * truncate_directory
47  * reiserfs_do_truncate
48  * reiserfs_paste_into_item
49  * reiserfs_insert_item
50  */
51
52 #include <linux/config.h>
53 #include <linux/time.h>
54 #include <linux/string.h>
55 #include <linux/pagemap.h>
56 #include <linux/reiserfs_fs.h>
57 #include <linux/smp_lock.h>
58 #include <linux/buffer_head.h>
59 #include <linux/quotaops.h>
60 #include <linux/vs_dlimit.h>
61
62 /* Does the buffer contain a disk block which is in the tree. */
63 inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh)
64 {
65
66         RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT,
67                "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
68
69         return (B_LEVEL(p_s_bh) != FREE_LEVEL);
70 }
71
72 //
73 // to gets item head in le form
74 //
75 inline void copy_item_head(struct item_head *p_v_to,
76                            const struct item_head *p_v_from)
77 {
78         memcpy(p_v_to, p_v_from, IH_SIZE);
79 }
80
81 /* k1 is pointer to on-disk structure which is stored in little-endian
82    form. k2 is pointer to cpu variable. For key of items of the same
83    object this returns 0.
84    Returns: -1 if key1 < key2 
85    0 if key1 == key2
86    1 if key1 > key2 */
87 inline int comp_short_keys(const struct reiserfs_key *le_key,
88                            const struct cpu_key *cpu_key)
89 {
90         __u32 n;
91         n = le32_to_cpu(le_key->k_dir_id);
92         if (n < cpu_key->on_disk_key.k_dir_id)
93                 return -1;
94         if (n > cpu_key->on_disk_key.k_dir_id)
95                 return 1;
96         n = le32_to_cpu(le_key->k_objectid);
97         if (n < cpu_key->on_disk_key.k_objectid)
98                 return -1;
99         if (n > cpu_key->on_disk_key.k_objectid)
100                 return 1;
101         return 0;
102 }
103
104 /* k1 is pointer to on-disk structure which is stored in little-endian
105    form. k2 is pointer to cpu variable.
106    Compare keys using all 4 key fields.
107    Returns: -1 if key1 < key2 0
108    if key1 = key2 1 if key1 > key2 */
109 static inline int comp_keys(const struct reiserfs_key *le_key,
110                             const struct cpu_key *cpu_key)
111 {
112         int retval;
113
114         retval = comp_short_keys(le_key, cpu_key);
115         if (retval)
116                 return retval;
117         if (le_key_k_offset(le_key_version(le_key), le_key) <
118             cpu_key_k_offset(cpu_key))
119                 return -1;
120         if (le_key_k_offset(le_key_version(le_key), le_key) >
121             cpu_key_k_offset(cpu_key))
122                 return 1;
123
124         if (cpu_key->key_length == 3)
125                 return 0;
126
127         /* this part is needed only when tail conversion is in progress */
128         if (le_key_k_type(le_key_version(le_key), le_key) <
129             cpu_key_k_type(cpu_key))
130                 return -1;
131
132         if (le_key_k_type(le_key_version(le_key), le_key) >
133             cpu_key_k_type(cpu_key))
134                 return 1;
135
136         return 0;
137 }
138
139 inline int comp_short_le_keys(const struct reiserfs_key *key1,
140                               const struct reiserfs_key *key2)
141 {
142         __u32 *p_s_1_u32, *p_s_2_u32;
143         int n_key_length = REISERFS_SHORT_KEY_LEN;
144
145         p_s_1_u32 = (__u32 *) key1;
146         p_s_2_u32 = (__u32 *) key2;
147         for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) {
148                 if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32))
149                         return -1;
150                 if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32))
151                         return 1;
152         }
153         return 0;
154 }
155
156 inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
157 {
158         int version;
159         to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
160         to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
161
162         // find out version of the key
163         version = le_key_version(from);
164         to->version = version;
165         to->on_disk_key.k_offset = le_key_k_offset(version, from);
166         to->on_disk_key.k_type = le_key_k_type(version, from);
167 }
168
169 // this does not say which one is bigger, it only returns 1 if keys
170 // are not equal, 0 otherwise
171 inline int comp_le_keys(const struct reiserfs_key *k1,
172                         const struct reiserfs_key *k2)
173 {
174         return memcmp(k1, k2, sizeof(struct reiserfs_key));
175 }
176
177 /**************************************************************************
178  *  Binary search toolkit function                                        *
179  *  Search for an item in the array by the item key                       *
180  *  Returns:    1 if found,  0 if not found;                              *
181  *        *p_n_pos = number of the searched element if found, else the    *
182  *        number of the first element that is larger than p_v_key.        *
183  **************************************************************************/
184 /* For those not familiar with binary search: n_lbound is the leftmost item that it
185  could be, n_rbound the rightmost item that it could be.  We examine the item
186  halfway between n_lbound and n_rbound, and that tells us either that we can increase
187  n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
188  there are no possible items, and we have not found it. With each examination we
189  cut the number of possible items it could be by one more than half rounded down,
190  or we find it. */
191 static inline int bin_search(const void *p_v_key,       /* Key to search for.                   */
192                              const void *p_v_base,      /* First item in the array.             */
193                              int p_n_num,       /* Number of items in the array.        */
194                              int p_n_width,     /* Item size in the array.
195                                                    searched. Lest the reader be
196                                                    confused, note that this is crafted
197                                                    as a general function, and when it
198                                                    is applied specifically to the array
199                                                    of item headers in a node, p_n_width
200                                                    is actually the item header size not
201                                                    the item size.                      */
202                              int *p_n_pos       /* Number of the searched for element. */
203     )
204 {
205         int n_rbound, n_lbound, n_j;
206
207         for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2;
208              n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2)
209                 switch (comp_keys
210                         ((struct reiserfs_key *)((char *)p_v_base +
211                                                  n_j * p_n_width),
212                          (struct cpu_key *)p_v_key)) {
213                 case -1:
214                         n_lbound = n_j + 1;
215                         continue;
216                 case 1:
217                         n_rbound = n_j - 1;
218                         continue;
219                 case 0:
220                         *p_n_pos = n_j;
221                         return ITEM_FOUND;      /* Key found in the array.  */
222                 }
223
224         /* bin_search did not find given key, it returns position of key,
225            that is minimal and greater than the given one. */
226         *p_n_pos = n_lbound;
227         return ITEM_NOT_FOUND;
228 }
229
230 #ifdef CONFIG_REISERFS_CHECK
231 extern struct tree_balance *cur_tb;
232 #endif
233
234 /* Minimal possible key. It is never in the tree. */
235 const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
236
237 /* Maximal possible key. It is never in the tree. */
238 static const struct reiserfs_key MAX_KEY = {
239         __constant_cpu_to_le32(0xffffffff),
240         __constant_cpu_to_le32(0xffffffff),
241         {{__constant_cpu_to_le32(0xffffffff),
242           __constant_cpu_to_le32(0xffffffff)},}
243 };
244
245 /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
246    of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
247    the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
248    case we return a special key, either MIN_KEY or MAX_KEY. */
249 static inline const struct reiserfs_key *get_lkey(const struct path
250                                                   *p_s_chk_path,
251                                                   const struct super_block
252                                                   *p_s_sb)
253 {
254         int n_position, n_path_offset = p_s_chk_path->path_length;
255         struct buffer_head *p_s_parent;
256
257         RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
258                "PAP-5010: invalid offset in the path");
259
260         /* While not higher in path than first element. */
261         while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
262
263                 RFALSE(!buffer_uptodate
264                        (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
265                        "PAP-5020: parent is not uptodate");
266
267                 /* Parent at the path is not in the tree now. */
268                 if (!B_IS_IN_TREE
269                     (p_s_parent =
270                      PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
271                         return &MAX_KEY;
272                 /* Check whether position in the parent is correct. */
273                 if ((n_position =
274                      PATH_OFFSET_POSITION(p_s_chk_path,
275                                           n_path_offset)) >
276                     B_NR_ITEMS(p_s_parent))
277                         return &MAX_KEY;
278                 /* Check whether parent at the path really points to the child. */
279                 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
280                     PATH_OFFSET_PBUFFER(p_s_chk_path,
281                                         n_path_offset + 1)->b_blocknr)
282                         return &MAX_KEY;
283                 /* Return delimiting key if position in the parent is not equal to zero. */
284                 if (n_position)
285                         return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
286         }
287         /* Return MIN_KEY if we are in the root of the buffer tree. */
288         if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
289             b_blocknr == SB_ROOT_BLOCK(p_s_sb))
290                 return &MIN_KEY;
291         return &MAX_KEY;
292 }
293
294 /* Get delimiting key of the buffer at the path and its right neighbor. */
295 inline const struct reiserfs_key *get_rkey(const struct path *p_s_chk_path,
296                                            const struct super_block *p_s_sb)
297 {
298         int n_position, n_path_offset = p_s_chk_path->path_length;
299         struct buffer_head *p_s_parent;
300
301         RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
302                "PAP-5030: invalid offset in the path");
303
304         while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
305
306                 RFALSE(!buffer_uptodate
307                        (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
308                        "PAP-5040: parent is not uptodate");
309
310                 /* Parent at the path is not in the tree now. */
311                 if (!B_IS_IN_TREE
312                     (p_s_parent =
313                      PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
314                         return &MIN_KEY;
315                 /* Check whether position in the parent is correct. */
316                 if ((n_position =
317                      PATH_OFFSET_POSITION(p_s_chk_path,
318                                           n_path_offset)) >
319                     B_NR_ITEMS(p_s_parent))
320                         return &MIN_KEY;
321                 /* Check whether parent at the path really points to the child. */
322                 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
323                     PATH_OFFSET_PBUFFER(p_s_chk_path,
324                                         n_path_offset + 1)->b_blocknr)
325                         return &MIN_KEY;
326                 /* Return delimiting key if position in the parent is not the last one. */
327                 if (n_position != B_NR_ITEMS(p_s_parent))
328                         return B_N_PDELIM_KEY(p_s_parent, n_position);
329         }
330         /* Return MAX_KEY if we are in the root of the buffer tree. */
331         if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
332             b_blocknr == SB_ROOT_BLOCK(p_s_sb))
333                 return &MAX_KEY;
334         return &MIN_KEY;
335 }
336
337 /* Check whether a key is contained in the tree rooted from a buffer at a path. */
338 /* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
339    the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
340    buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
341    this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
342 static inline int key_in_buffer(struct path *p_s_chk_path,      /* Path which should be checked.  */
343                                 const struct cpu_key *p_s_key,  /* Key which should be checked.   */
344                                 struct super_block *p_s_sb      /* Super block pointer.           */
345     )
346 {
347
348         RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
349                || p_s_chk_path->path_length > MAX_HEIGHT,
350                "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
351                p_s_key, p_s_chk_path->path_length);
352         RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
353                "PAP-5060: device must not be NODEV");
354
355         if (comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1)
356                 /* left delimiting key is bigger, that the key we look for */
357                 return 0;
358         //  if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
359         if (comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1)
360                 /* p_s_key must be less than right delimitiing key */
361                 return 0;
362         return 1;
363 }
364
365 inline void decrement_bcount(struct buffer_head *p_s_bh)
366 {
367         if (p_s_bh) {
368                 if (atomic_read(&(p_s_bh->b_count))) {
369                         put_bh(p_s_bh);
370                         return;
371                 }
372                 reiserfs_panic(NULL,
373                                "PAP-5070: decrement_bcount: trying to free free buffer %b",
374                                p_s_bh);
375         }
376 }
377
378 /* Decrement b_count field of the all buffers in the path. */
379 void decrement_counters_in_path(struct path *p_s_search_path)
380 {
381         int n_path_offset = p_s_search_path->path_length;
382
383         RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
384                n_path_offset > EXTENDED_MAX_HEIGHT - 1,
385                "PAP-5080: invalid path offset of %d", n_path_offset);
386
387         while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
388                 struct buffer_head *bh;
389
390                 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
391                 decrement_bcount(bh);
392         }
393         p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
394 }
395
396 int reiserfs_check_path(struct path *p)
397 {
398         RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
399                "path not properly relsed");
400         return 0;
401 }
402
403 /* Release all buffers in the path. Restore dirty bits clean
404 ** when preparing the buffer for the log
405 **
406 ** only called from fix_nodes()
407 */
408 void pathrelse_and_restore(struct super_block *s, struct path *p_s_search_path)
409 {
410         int n_path_offset = p_s_search_path->path_length;
411
412         RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
413                "clm-4000: invalid path offset");
414
415         while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
416                 reiserfs_restore_prepared_buffer(s,
417                                                  PATH_OFFSET_PBUFFER
418                                                  (p_s_search_path,
419                                                   n_path_offset));
420                 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
421         }
422         p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
423 }
424
425 /* Release all buffers in the path. */
426 void pathrelse(struct path *p_s_search_path)
427 {
428         int n_path_offset = p_s_search_path->path_length;
429
430         RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
431                "PAP-5090: invalid path offset");
432
433         while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
434                 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
435
436         p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
437 }
438
439 static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
440 {
441         struct block_head *blkh;
442         struct item_head *ih;
443         int used_space;
444         int prev_location;
445         int i;
446         int nr;
447
448         blkh = (struct block_head *)buf;
449         if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
450                 reiserfs_warning(NULL,
451                                  "is_leaf: this should be caught earlier");
452                 return 0;
453         }
454
455         nr = blkh_nr_item(blkh);
456         if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
457                 /* item number is too big or too small */
458                 reiserfs_warning(NULL, "is_leaf: nr_item seems wrong: %z", bh);
459                 return 0;
460         }
461         ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
462         used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
463         if (used_space != blocksize - blkh_free_space(blkh)) {
464                 /* free space does not match to calculated amount of use space */
465                 reiserfs_warning(NULL, "is_leaf: free space seems wrong: %z",
466                                  bh);
467                 return 0;
468         }
469         // FIXME: it is_leaf will hit performance too much - we may have
470         // return 1 here
471
472         /* check tables of item heads */
473         ih = (struct item_head *)(buf + BLKH_SIZE);
474         prev_location = blocksize;
475         for (i = 0; i < nr; i++, ih++) {
476                 if (le_ih_k_type(ih) == TYPE_ANY) {
477                         reiserfs_warning(NULL,
478                                          "is_leaf: wrong item type for item %h",
479                                          ih);
480                         return 0;
481                 }
482                 if (ih_location(ih) >= blocksize
483                     || ih_location(ih) < IH_SIZE * nr) {
484                         reiserfs_warning(NULL,
485                                          "is_leaf: item location seems wrong: %h",
486                                          ih);
487                         return 0;
488                 }
489                 if (ih_item_len(ih) < 1
490                     || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
491                         reiserfs_warning(NULL,
492                                          "is_leaf: item length seems wrong: %h",
493                                          ih);
494                         return 0;
495                 }
496                 if (prev_location - ih_location(ih) != ih_item_len(ih)) {
497                         reiserfs_warning(NULL,
498                                          "is_leaf: item location seems wrong (second one): %h",
499                                          ih);
500                         return 0;
501                 }
502                 prev_location = ih_location(ih);
503         }
504
505         // one may imagine much more checks
506         return 1;
507 }
508
509 /* returns 1 if buf looks like an internal node, 0 otherwise */
510 static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
511 {
512         struct block_head *blkh;
513         int nr;
514         int used_space;
515
516         blkh = (struct block_head *)buf;
517         nr = blkh_level(blkh);
518         if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
519                 /* this level is not possible for internal nodes */
520                 reiserfs_warning(NULL,
521                                  "is_internal: this should be caught earlier");
522                 return 0;
523         }
524
525         nr = blkh_nr_item(blkh);
526         if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
527                 /* for internal which is not root we might check min number of keys */
528                 reiserfs_warning(NULL,
529                                  "is_internal: number of key seems wrong: %z",
530                                  bh);
531                 return 0;
532         }
533
534         used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
535         if (used_space != blocksize - blkh_free_space(blkh)) {
536                 reiserfs_warning(NULL,
537                                  "is_internal: free space seems wrong: %z", bh);
538                 return 0;
539         }
540         // one may imagine much more checks
541         return 1;
542 }
543
544 // make sure that bh contains formatted node of reiserfs tree of
545 // 'level'-th level
546 static int is_tree_node(struct buffer_head *bh, int level)
547 {
548         if (B_LEVEL(bh) != level) {
549                 reiserfs_warning(NULL,
550                                  "is_tree_node: node level %d does not match to the expected one %d",
551                                  B_LEVEL(bh), level);
552                 return 0;
553         }
554         if (level == DISK_LEAF_NODE_LEVEL)
555                 return is_leaf(bh->b_data, bh->b_size, bh);
556
557         return is_internal(bh->b_data, bh->b_size, bh);
558 }
559
560 #define SEARCH_BY_KEY_READA 16
561
562 /* The function is NOT SCHEDULE-SAFE! */
563 static void search_by_key_reada(struct super_block *s,
564                                 struct buffer_head **bh,
565                                 unsigned long *b, int num)
566 {
567         int i, j;
568
569         for (i = 0; i < num; i++) {
570                 bh[i] = sb_getblk(s, b[i]);
571         }
572         for (j = 0; j < i; j++) {
573                 /*
574                  * note, this needs attention if we are getting rid of the BKL
575                  * you have to make sure the prepared bit isn't set on this buffer
576                  */
577                 if (!buffer_uptodate(bh[j]))
578                         ll_rw_block(READA, 1, bh + j);
579                 brelse(bh[j]);
580         }
581 }
582
583 /**************************************************************************
584  * Algorithm   SearchByKey                                                *
585  *             look for item in the Disk S+Tree by its key                *
586  * Input:  p_s_sb   -  super block                                        *
587  *         p_s_key  - pointer to the key to search                        *
588  * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
589  *         p_s_search_path - path from the root to the needed leaf        *
590  **************************************************************************/
591
592 /* This function fills up the path from the root to the leaf as it
593    descends the tree looking for the key.  It uses reiserfs_bread to
594    try to find buffers in the cache given their block number.  If it
595    does not find them in the cache it reads them from disk.  For each
596    node search_by_key finds using reiserfs_bread it then uses
597    bin_search to look through that node.  bin_search will find the
598    position of the block_number of the next node if it is looking
599    through an internal node.  If it is looking through a leaf node
600    bin_search will find the position of the item which has key either
601    equal to given key, or which is the maximal key less than the given
602    key.  search_by_key returns a path that must be checked for the
603    correctness of the top of the path but need not be checked for the
604    correctness of the bottom of the path */
605 /* The function is NOT SCHEDULE-SAFE! */
606 int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,    /* Key to search. */
607                   struct path *p_s_search_path, /* This structure was
608                                                    allocated and initialized
609                                                    by the calling
610                                                    function. It is filled up
611                                                    by this function.  */
612                   int n_stop_level      /* How far down the tree to search. To
613                                            stop at leaf level - set to
614                                            DISK_LEAF_NODE_LEVEL */
615     )
616 {
617         int n_block_number;
618         int expected_level;
619         struct buffer_head *p_s_bh;
620         struct path_element *p_s_last_element;
621         int n_node_level, n_retval;
622         int right_neighbor_of_leaf_node;
623         int fs_gen;
624         struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
625         unsigned long reada_blocks[SEARCH_BY_KEY_READA];
626         int reada_count = 0;
627
628 #ifdef CONFIG_REISERFS_CHECK
629         int n_repeat_counter = 0;
630 #endif
631
632         PROC_INFO_INC(p_s_sb, search_by_key);
633
634         /* As we add each node to a path we increase its count.  This means that
635            we must be careful to release all nodes in a path before we either
636            discard the path struct or re-use the path struct, as we do here. */
637
638         decrement_counters_in_path(p_s_search_path);
639
640         right_neighbor_of_leaf_node = 0;
641
642         /* With each iteration of this loop we search through the items in the
643            current node, and calculate the next current node(next path element)
644            for the next iteration of this loop.. */
645         n_block_number = SB_ROOT_BLOCK(p_s_sb);
646         expected_level = -1;
647         while (1) {
648
649 #ifdef CONFIG_REISERFS_CHECK
650                 if (!(++n_repeat_counter % 50000))
651                         reiserfs_warning(p_s_sb, "PAP-5100: search_by_key: %s:"
652                                          "there were %d iterations of while loop "
653                                          "looking for key %K",
654                                          current->comm, n_repeat_counter,
655                                          p_s_key);
656 #endif
657
658                 /* prep path to have another element added to it. */
659                 p_s_last_element =
660                     PATH_OFFSET_PELEMENT(p_s_search_path,
661                                          ++p_s_search_path->path_length);
662                 fs_gen = get_generation(p_s_sb);
663
664                 /* Read the next tree node, and set the last element in the path to
665                    have a pointer to it. */
666                 if ((p_s_bh = p_s_last_element->pe_buffer =
667                      sb_getblk(p_s_sb, n_block_number))) {
668                         if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
669                                 search_by_key_reada(p_s_sb, reada_bh,
670                                                     reada_blocks, reada_count);
671                         }
672                         ll_rw_block(READ, 1, &p_s_bh);
673                         wait_on_buffer(p_s_bh);
674                         if (!buffer_uptodate(p_s_bh))
675                                 goto io_error;
676                 } else {
677                       io_error:
678                         p_s_search_path->path_length--;
679                         pathrelse(p_s_search_path);
680                         return IO_ERROR;
681                 }
682                 reada_count = 0;
683                 if (expected_level == -1)
684                         expected_level = SB_TREE_HEIGHT(p_s_sb);
685                 expected_level--;
686
687                 /* It is possible that schedule occurred. We must check whether the key
688                    to search is still in the tree rooted from the current buffer. If
689                    not then repeat search from the root. */
690                 if (fs_changed(fs_gen, p_s_sb) &&
691                     (!B_IS_IN_TREE(p_s_bh) ||
692                      B_LEVEL(p_s_bh) != expected_level ||
693                      !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
694                         PROC_INFO_INC(p_s_sb, search_by_key_fs_changed);
695                         PROC_INFO_INC(p_s_sb, search_by_key_restarted);
696                         PROC_INFO_INC(p_s_sb,
697                                       sbk_restarted[expected_level - 1]);
698                         decrement_counters_in_path(p_s_search_path);
699
700                         /* Get the root block number so that we can repeat the search
701                            starting from the root. */
702                         n_block_number = SB_ROOT_BLOCK(p_s_sb);
703                         expected_level = -1;
704                         right_neighbor_of_leaf_node = 0;
705
706                         /* repeat search from the root */
707                         continue;
708                 }
709
710                 /* only check that the key is in the buffer if p_s_key is not
711                    equal to the MAX_KEY. Latter case is only possible in
712                    "finish_unfinished()" processing during mount. */
713                 RFALSE(comp_keys(&MAX_KEY, p_s_key) &&
714                        !key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
715                        "PAP-5130: key is not in the buffer");
716 #ifdef CONFIG_REISERFS_CHECK
717                 if (cur_tb) {
718                         print_cur_tb("5140");
719                         reiserfs_panic(p_s_sb,
720                                        "PAP-5140: search_by_key: schedule occurred in do_balance!");
721                 }
722 #endif
723
724                 // make sure, that the node contents look like a node of
725                 // certain level
726                 if (!is_tree_node(p_s_bh, expected_level)) {
727                         reiserfs_warning(p_s_sb, "vs-5150: search_by_key: "
728                                          "invalid format found in block %ld. Fsck?",
729                                          p_s_bh->b_blocknr);
730                         pathrelse(p_s_search_path);
731                         return IO_ERROR;
732                 }
733
734                 /* ok, we have acquired next formatted node in the tree */
735                 n_node_level = B_LEVEL(p_s_bh);
736
737                 PROC_INFO_BH_STAT(p_s_sb, p_s_bh, n_node_level - 1);
738
739                 RFALSE(n_node_level < n_stop_level,
740                        "vs-5152: tree level (%d) is less than stop level (%d)",
741                        n_node_level, n_stop_level);
742
743                 n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
744                                       B_NR_ITEMS(p_s_bh),
745                                       (n_node_level ==
746                                        DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
747                                       KEY_SIZE,
748                                       &(p_s_last_element->pe_position));
749                 if (n_node_level == n_stop_level) {
750                         return n_retval;
751                 }
752
753                 /* we are not in the stop level */
754                 if (n_retval == ITEM_FOUND)
755                         /* item has been found, so we choose the pointer which is to the right of the found one */
756                         p_s_last_element->pe_position++;
757
758                 /* if item was not found we choose the position which is to
759                    the left of the found item. This requires no code,
760                    bin_search did it already. */
761
762                 /* So we have chosen a position in the current node which is
763                    an internal node.  Now we calculate child block number by
764                    position in the node. */
765                 n_block_number =
766                     B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
767
768                 /* if we are going to read leaf nodes, try for read ahead as well */
769                 if ((p_s_search_path->reada & PATH_READA) &&
770                     n_node_level == DISK_LEAF_NODE_LEVEL + 1) {
771                         int pos = p_s_last_element->pe_position;
772                         int limit = B_NR_ITEMS(p_s_bh);
773                         struct reiserfs_key *le_key;
774
775                         if (p_s_search_path->reada & PATH_READA_BACK)
776                                 limit = 0;
777                         while (reada_count < SEARCH_BY_KEY_READA) {
778                                 if (pos == limit)
779                                         break;
780                                 reada_blocks[reada_count++] =
781                                     B_N_CHILD_NUM(p_s_bh, pos);
782                                 if (p_s_search_path->reada & PATH_READA_BACK)
783                                         pos--;
784                                 else
785                                         pos++;
786
787                                 /*
788                                  * check to make sure we're in the same object
789                                  */
790                                 le_key = B_N_PDELIM_KEY(p_s_bh, pos);
791                                 if (le32_to_cpu(le_key->k_objectid) !=
792                                     p_s_key->on_disk_key.k_objectid) {
793                                         break;
794                                 }
795                         }
796                 }
797         }
798 }
799
800 /* Form the path to an item and position in this item which contains
801    file byte defined by p_s_key. If there is no such item
802    corresponding to the key, we point the path to the item with
803    maximal key less than p_s_key, and *p_n_pos_in_item is set to one
804    past the last entry/byte in the item.  If searching for entry in a
805    directory item, and it is not found, *p_n_pos_in_item is set to one
806    entry more than the entry with maximal key which is less than the
807    sought key.
808
809    Note that if there is no entry in this same node which is one more,
810    then we point to an imaginary entry.  for direct items, the
811    position is in units of bytes, for indirect items the position is
812    in units of blocknr entries, for directory items the position is in
813    units of directory entries.  */
814
815 /* The function is NOT SCHEDULE-SAFE! */
816 int search_for_position_by_key(struct super_block *p_s_sb,      /* Pointer to the super block.          */
817                                const struct cpu_key *p_cpu_key, /* Key to search (cpu variable)         */
818                                struct path *p_s_search_path     /* Filled up by this function.          */
819     )
820 {
821         struct item_head *p_le_ih;      /* pointer to on-disk structure */
822         int n_blk_size;
823         loff_t item_offset, offset;
824         struct reiserfs_dir_entry de;
825         int retval;
826
827         /* If searching for directory entry. */
828         if (is_direntry_cpu_key(p_cpu_key))
829                 return search_by_entry_key(p_s_sb, p_cpu_key, p_s_search_path,
830                                            &de);
831
832         /* If not searching for directory entry. */
833
834         /* If item is found. */
835         retval = search_item(p_s_sb, p_cpu_key, p_s_search_path);
836         if (retval == IO_ERROR)
837                 return retval;
838         if (retval == ITEM_FOUND) {
839
840                 RFALSE(!ih_item_len
841                        (B_N_PITEM_HEAD
842                         (PATH_PLAST_BUFFER(p_s_search_path),
843                          PATH_LAST_POSITION(p_s_search_path))),
844                        "PAP-5165: item length equals zero");
845
846                 pos_in_item(p_s_search_path) = 0;
847                 return POSITION_FOUND;
848         }
849
850         RFALSE(!PATH_LAST_POSITION(p_s_search_path),
851                "PAP-5170: position equals zero");
852
853         /* Item is not found. Set path to the previous item. */
854         p_le_ih =
855             B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
856                            --PATH_LAST_POSITION(p_s_search_path));
857         n_blk_size = p_s_sb->s_blocksize;
858
859         if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
860                 return FILE_NOT_FOUND;
861         }
862         // FIXME: quite ugly this far
863
864         item_offset = le_ih_k_offset(p_le_ih);
865         offset = cpu_key_k_offset(p_cpu_key);
866
867         /* Needed byte is contained in the item pointed to by the path. */
868         if (item_offset <= offset &&
869             item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) {
870                 pos_in_item(p_s_search_path) = offset - item_offset;
871                 if (is_indirect_le_ih(p_le_ih)) {
872                         pos_in_item(p_s_search_path) /= n_blk_size;
873                 }
874                 return POSITION_FOUND;
875         }
876
877         /* Needed byte is not contained in the item pointed to by the
878            path. Set pos_in_item out of the item. */
879         if (is_indirect_le_ih(p_le_ih))
880                 pos_in_item(p_s_search_path) =
881                     ih_item_len(p_le_ih) / UNFM_P_SIZE;
882         else
883                 pos_in_item(p_s_search_path) = ih_item_len(p_le_ih);
884
885         return POSITION_NOT_FOUND;
886 }
887
888 /* Compare given item and item pointed to by the path. */
889 int comp_items(const struct item_head *stored_ih, const struct path *p_s_path)
890 {
891         struct buffer_head *p_s_bh;
892         struct item_head *ih;
893
894         /* Last buffer at the path is not in the tree. */
895         if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)))
896                 return 1;
897
898         /* Last path position is invalid. */
899         if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh))
900                 return 1;
901
902         /* we need only to know, whether it is the same item */
903         ih = get_ih(p_s_path);
904         return memcmp(stored_ih, ih, IH_SIZE);
905 }
906
907 /* unformatted nodes are not logged anymore, ever.  This is safe
908 ** now
909 */
910 #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
911
912 // block can not be forgotten as it is in I/O or held by someone
913 #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
914
915 // prepare for delete or cut of direct item
916 static inline int prepare_for_direct_item(struct path *path,
917                                           struct item_head *le_ih,
918                                           struct inode *inode,
919                                           loff_t new_file_length, int *cut_size)
920 {
921         loff_t round_len;
922
923         if (new_file_length == max_reiserfs_offset(inode)) {
924                 /* item has to be deleted */
925                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
926                 return M_DELETE;
927         }
928         // new file gets truncated
929         if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
930                 // 
931                 round_len = ROUND_UP(new_file_length);
932                 /* this was n_new_file_length < le_ih ... */
933                 if (round_len < le_ih_k_offset(le_ih)) {
934                         *cut_size = -(IH_SIZE + ih_item_len(le_ih));
935                         return M_DELETE;        /* Delete this item. */
936                 }
937                 /* Calculate first position and size for cutting from item. */
938                 pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
939                 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
940
941                 return M_CUT;   /* Cut from this item. */
942         }
943
944         // old file: items may have any length
945
946         if (new_file_length < le_ih_k_offset(le_ih)) {
947                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
948                 return M_DELETE;        /* Delete this item. */
949         }
950         /* Calculate first position and size for cutting from item. */
951         *cut_size = -(ih_item_len(le_ih) -
952                       (pos_in_item(path) =
953                        new_file_length + 1 - le_ih_k_offset(le_ih)));
954         return M_CUT;           /* Cut from this item. */
955 }
956
957 static inline int prepare_for_direntry_item(struct path *path,
958                                             struct item_head *le_ih,
959                                             struct inode *inode,
960                                             loff_t new_file_length,
961                                             int *cut_size)
962 {
963         if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
964             new_file_length == max_reiserfs_offset(inode)) {
965                 RFALSE(ih_entry_count(le_ih) != 2,
966                        "PAP-5220: incorrect empty directory item (%h)", le_ih);
967                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
968                 return M_DELETE;        /* Delete the directory item containing "." and ".." entry. */
969         }
970
971         if (ih_entry_count(le_ih) == 1) {
972                 /* Delete the directory item such as there is one record only
973                    in this item */
974                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
975                 return M_DELETE;
976         }
977
978         /* Cut one record from the directory item. */
979         *cut_size =
980             -(DEH_SIZE +
981               entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
982         return M_CUT;
983 }
984
985 #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
986
987 /*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
988     If the path points to an indirect item, remove some number of its unformatted nodes.
989     In case of file truncate calculate whether this item must be deleted/truncated or last
990     unformatted node of this item will be converted to a direct item.
991     This function returns a determination of what balance mode the calling function should employ. */
992 static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct path *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed, /* Number of unformatted nodes which were removed
993                                                                                                                                                                                    from end of the file. */
994                                       int *p_n_cut_size, unsigned long long n_new_file_length   /* MAX_KEY_OFFSET in case of delete. */
995     )
996 {
997         struct super_block *p_s_sb = inode->i_sb;
998         struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path);
999         struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1000
1001         BUG_ON(!th->t_trans_id);
1002
1003         /* Stat_data item. */
1004         if (is_statdata_le_ih(p_le_ih)) {
1005
1006                 RFALSE(n_new_file_length != max_reiserfs_offset(inode),
1007                        "PAP-5210: mode must be M_DELETE");
1008
1009                 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1010                 return M_DELETE;
1011         }
1012
1013         /* Directory item. */
1014         if (is_direntry_le_ih(p_le_ih))
1015                 return prepare_for_direntry_item(p_s_path, p_le_ih, inode,
1016                                                  n_new_file_length,
1017                                                  p_n_cut_size);
1018
1019         /* Direct item. */
1020         if (is_direct_le_ih(p_le_ih))
1021                 return prepare_for_direct_item(p_s_path, p_le_ih, inode,
1022                                                n_new_file_length, p_n_cut_size);
1023
1024         /* Case of an indirect item. */
1025         {
1026             int blk_size = p_s_sb->s_blocksize;
1027             struct item_head s_ih;
1028             int need_re_search;
1029             int delete = 0;
1030             int result = M_CUT;
1031             int pos = 0;
1032
1033             if ( n_new_file_length == max_reiserfs_offset (inode) ) {
1034                 /* prepare_for_delete_or_cut() is called by
1035                  * reiserfs_delete_item() */
1036                 n_new_file_length = 0;
1037                 delete = 1;
1038             }
1039
1040             do {
1041                 need_re_search = 0;
1042                 *p_n_cut_size = 0;
1043                 p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1044                 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1045                 pos = I_UNFM_NUM(&s_ih);
1046
1047                 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) {
1048                     __u32 *unfm, block;
1049
1050                     /* Each unformatted block deletion may involve one additional
1051                      * bitmap block into the transaction, thereby the initial
1052                      * journal space reservation might not be enough. */
1053                     if (!delete && (*p_n_cut_size) != 0 &&
1054                         reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1055                         break;
1056                     }
1057
1058                     unfm = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1;
1059                     block = get_block_num(unfm, 0);
1060
1061                     if (block != 0) {
1062                         reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1063                         put_block_num(unfm, 0, 0);
1064                         journal_mark_dirty (th, p_s_sb, p_s_bh);
1065                         reiserfs_free_block(th, inode, block, 1);
1066                     }
1067
1068                     cond_resched();
1069
1070                     if (item_moved (&s_ih, p_s_path))  {
1071                         need_re_search = 1;
1072                         break;
1073                     }
1074
1075                     pos --;
1076                     (*p_n_removed) ++;
1077                     (*p_n_cut_size) -= UNFM_P_SIZE;
1078
1079                     if (pos == 0) {
1080                         (*p_n_cut_size) -= IH_SIZE;
1081                         result = M_DELETE;
1082                         break;
1083                     }
1084                 }
1085                 /* a trick.  If the buffer has been logged, this will do nothing.  If
1086                 ** we've broken the loop without logging it, it will restore the
1087                 ** buffer */
1088                 reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
1089             } while (need_re_search &&
1090                      search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND);
1091             pos_in_item(p_s_path) = pos * UNFM_P_SIZE;
1092
1093             if (*p_n_cut_size == 0) {
1094                 /* Nothing were cut. maybe convert last unformatted node to the
1095                  * direct item? */
1096                 result = M_CONVERT;
1097             }
1098             return result;
1099         }
1100 }
1101
1102 /* Calculate number of bytes which will be deleted or cut during balance */
1103 static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode)
1104 {
1105         int n_del_size;
1106         struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
1107
1108         if (is_statdata_le_ih(p_le_ih))
1109                 return 0;
1110
1111         n_del_size =
1112             (c_mode ==
1113              M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
1114         if (is_direntry_le_ih(p_le_ih)) {
1115                 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
1116                 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1117                 // empty size.  ick. FIXME, is this right?
1118                 //
1119                 return n_del_size;
1120         }
1121
1122         if (is_indirect_le_ih(p_le_ih))
1123                 n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size); // - get_ih_free_space (p_le_ih);
1124         return n_del_size;
1125 }
1126
1127 static void init_tb_struct(struct reiserfs_transaction_handle *th,
1128                            struct tree_balance *p_s_tb,
1129                            struct super_block *p_s_sb,
1130                            struct path *p_s_path, int n_size)
1131 {
1132
1133         BUG_ON(!th->t_trans_id);
1134
1135         memset(p_s_tb, '\0', sizeof(struct tree_balance));
1136         p_s_tb->transaction_handle = th;
1137         p_s_tb->tb_sb = p_s_sb;
1138         p_s_tb->tb_path = p_s_path;
1139         PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1140         PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1141         p_s_tb->insert_size[0] = n_size;
1142 }
1143
1144 void padd_item(char *item, int total_length, int length)
1145 {
1146         int i;
1147
1148         for (i = total_length; i > length;)
1149                 item[--i] = 0;
1150 }
1151
1152 #ifdef REISERQUOTA_DEBUG
1153 char key2type(struct reiserfs_key *ih)
1154 {
1155         if (is_direntry_le_key(2, ih))
1156                 return 'd';
1157         if (is_direct_le_key(2, ih))
1158                 return 'D';
1159         if (is_indirect_le_key(2, ih))
1160                 return 'i';
1161         if (is_statdata_le_key(2, ih))
1162                 return 's';
1163         return 'u';
1164 }
1165
1166 char head2type(struct item_head *ih)
1167 {
1168         if (is_direntry_le_ih(ih))
1169                 return 'd';
1170         if (is_direct_le_ih(ih))
1171                 return 'D';
1172         if (is_indirect_le_ih(ih))
1173                 return 'i';
1174         if (is_statdata_le_ih(ih))
1175                 return 's';
1176         return 'u';
1177 }
1178 #endif
1179
1180 /* Delete object item. */
1181 int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the deleted item. */
1182                          const struct cpu_key *p_s_item_key,    /* Key to search for the deleted item.  */
1183                          struct inode *p_s_inode,       /* inode is here just to update i_blocks and quotas */
1184                          struct buffer_head *p_s_un_bh)
1185 {                               /* NULL or unformatted node pointer.    */
1186         struct super_block *p_s_sb = p_s_inode->i_sb;
1187         struct tree_balance s_del_balance;
1188         struct item_head s_ih;
1189         struct item_head *q_ih;
1190         int quota_cut_bytes;
1191         int n_ret_value, n_del_size, n_removed;
1192
1193 #ifdef CONFIG_REISERFS_CHECK
1194         char c_mode;
1195         int n_iter = 0;
1196 #endif
1197
1198         BUG_ON(!th->t_trans_id);
1199
1200         init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path,
1201                        0 /*size is unknown */ );
1202
1203         while (1) {
1204                 n_removed = 0;
1205
1206 #ifdef CONFIG_REISERFS_CHECK
1207                 n_iter++;
1208                 c_mode =
1209 #endif
1210                     prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1211                                               p_s_item_key, &n_removed,
1212                                               &n_del_size,
1213                                               max_reiserfs_offset(p_s_inode));
1214
1215                 RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1216
1217                 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1218                 s_del_balance.insert_size[0] = n_del_size;
1219
1220                 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1221                 if (n_ret_value != REPEAT_SEARCH)
1222                         break;
1223
1224                 PROC_INFO_INC(p_s_sb, delete_item_restarted);
1225
1226                 // file system changed, repeat search
1227                 n_ret_value =
1228                     search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1229                 if (n_ret_value == IO_ERROR)
1230                         break;
1231                 if (n_ret_value == FILE_NOT_FOUND) {
1232                         reiserfs_warning(p_s_sb,
1233                                          "vs-5340: reiserfs_delete_item: "
1234                                          "no items of the file %K found",
1235                                          p_s_item_key);
1236                         break;
1237                 }
1238         }                       /* while (1) */
1239
1240         if (n_ret_value != CARRY_ON) {
1241                 unfix_nodes(&s_del_balance);
1242                 return 0;
1243         }
1244         // reiserfs_delete_item returns item length when success
1245         n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1246         q_ih = get_ih(p_s_path);
1247         quota_cut_bytes = ih_item_len(q_ih);
1248
1249         /* hack so the quota code doesn't have to guess if the file
1250          ** has a tail.  On tail insert, we allocate quota for 1 unformatted node.
1251          ** We test the offset because the tail might have been
1252          ** split into multiple items, and we only want to decrement for
1253          ** the unfm node once
1254          */
1255         if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
1256                 if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
1257                         quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1258                 } else {
1259                         quota_cut_bytes = 0;
1260                 }
1261         }
1262
1263         if (p_s_un_bh) {
1264                 int off;
1265                 char *data;
1266
1267                 /* We are in direct2indirect conversion, so move tail contents
1268                    to the unformatted node */
1269                 /* note, we do the copy before preparing the buffer because we
1270                  ** don't care about the contents of the unformatted node yet.
1271                  ** the only thing we really care about is the direct item's data
1272                  ** is in the unformatted node.
1273                  **
1274                  ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1275                  ** the unformatted node, which might schedule, meaning we'd have to
1276                  ** loop all the way back up to the start of the while loop.
1277                  **
1278                  ** The unformatted node must be dirtied later on.  We can't be
1279                  ** sure here if the entire tail has been deleted yet.
1280                  **
1281                  ** p_s_un_bh is from the page cache (all unformatted nodes are
1282                  ** from the page cache) and might be a highmem page.  So, we
1283                  ** can't use p_s_un_bh->b_data.
1284                  ** -clm
1285                  */
1286
1287                 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1288                 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1289                 memcpy(data + off,
1290                        B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih),
1291                        n_ret_value);
1292                 kunmap_atomic(data, KM_USER0);
1293         }
1294         /* Perform balancing after all resources have been collected at once. */
1295         do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1296
1297 #ifdef REISERQUOTA_DEBUG
1298         reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1299                        "reiserquota delete_item(): freeing %u, id=%u type=%c",
1300                        quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1301 #endif
1302         DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes);
1303         DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1304
1305         /* Return deleted body length */
1306         return n_ret_value;
1307 }
1308
1309 /* Summary Of Mechanisms For Handling Collisions Between Processes:
1310
1311  deletion of the body of the object is performed by iput(), with the
1312  result that if multiple processes are operating on a file, the
1313  deletion of the body of the file is deferred until the last process
1314  that has an open inode performs its iput().
1315
1316  writes and truncates are protected from collisions by use of
1317  semaphores.
1318
1319  creates, linking, and mknod are protected from collisions with other
1320  processes by making the reiserfs_add_entry() the last step in the
1321  creation, and then rolling back all changes if there was a collision.
1322  - Hans
1323 */
1324
1325 /* this deletes item which never gets split */
1326 void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1327                                 struct inode *inode, struct reiserfs_key *key)
1328 {
1329         struct tree_balance tb;
1330         INITIALIZE_PATH(path);
1331         int item_len = 0;
1332         int tb_init = 0;
1333         struct cpu_key cpu_key;
1334         int retval;
1335         int quota_cut_bytes = 0;
1336
1337         BUG_ON(!th->t_trans_id);
1338
1339         le_key2cpu_key(&cpu_key, key);
1340
1341         while (1) {
1342                 retval = search_item(th->t_super, &cpu_key, &path);
1343                 if (retval == IO_ERROR) {
1344                         reiserfs_warning(th->t_super,
1345                                          "vs-5350: reiserfs_delete_solid_item: "
1346                                          "i/o failure occurred trying to delete %K",
1347                                          &cpu_key);
1348                         break;
1349                 }
1350                 if (retval != ITEM_FOUND) {
1351                         pathrelse(&path);
1352                         // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1353                         if (!
1354                             ((unsigned long long)
1355                              GET_HASH_VALUE(le_key_k_offset
1356                                             (le_key_version(key), key)) == 0
1357                              && (unsigned long long)
1358                              GET_GENERATION_NUMBER(le_key_k_offset
1359                                                    (le_key_version(key),
1360                                                     key)) == 1))
1361                                 reiserfs_warning(th->t_super,
1362                                                  "vs-5355: reiserfs_delete_solid_item: %k not found",
1363                                                  key);
1364                         break;
1365                 }
1366                 if (!tb_init) {
1367                         tb_init = 1;
1368                         item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1369                         init_tb_struct(th, &tb, th->t_super, &path,
1370                                        -(IH_SIZE + item_len));
1371                 }
1372                 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1373
1374                 retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1375                 if (retval == REPEAT_SEARCH) {
1376                         PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1377                         continue;
1378                 }
1379
1380                 if (retval == CARRY_ON) {
1381                         do_balance(&tb, NULL, NULL, M_DELETE);
1382                         if (inode) {    /* Should we count quota for item? (we don't count quotas for save-links) */
1383 #ifdef REISERQUOTA_DEBUG
1384                                 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1385                                                "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1386                                                quota_cut_bytes, inode->i_uid,
1387                                                key2type(key));
1388 #endif
1389                                 DLIMIT_FREE_SPACE(inode, quota_cut_bytes);
1390                                 DQUOT_FREE_SPACE_NODIRTY(inode,
1391                                                          quota_cut_bytes);
1392                         }
1393                         break;
1394                 }
1395                 // IO_ERROR, NO_DISK_SPACE, etc
1396                 reiserfs_warning(th->t_super,
1397                                  "vs-5360: reiserfs_delete_solid_item: "
1398                                  "could not delete %K due to fix_nodes failure",
1399                                  &cpu_key);
1400                 unfix_nodes(&tb);
1401                 break;
1402         }
1403
1404         reiserfs_check_path(&path);
1405 }
1406
1407 int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1408                            struct inode *inode)
1409 {
1410         int err;
1411         inode->i_size = 0;
1412         BUG_ON(!th->t_trans_id);
1413
1414         /* for directory this deletes item containing "." and ".." */
1415         err =
1416             reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1417         if (err)
1418                 return err;
1419
1420 #if defined( USE_INODE_GENERATION_COUNTER )
1421         if (!old_format_only(th->t_super)) {
1422                 __le32 *inode_generation;
1423
1424                 inode_generation =
1425                     &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1426                 *inode_generation =
1427                     cpu_to_le32(le32_to_cpu(*inode_generation) + 1);
1428         }
1429 /* USE_INODE_GENERATION_COUNTER */
1430 #endif
1431         reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1432
1433         return err;
1434 }
1435
1436 static void unmap_buffers(struct page *page, loff_t pos)
1437 {
1438         struct buffer_head *bh;
1439         struct buffer_head *head;
1440         struct buffer_head *next;
1441         unsigned long tail_index;
1442         unsigned long cur_index;
1443
1444         if (page) {
1445                 if (page_has_buffers(page)) {
1446                         tail_index = pos & (PAGE_CACHE_SIZE - 1);
1447                         cur_index = 0;
1448                         head = page_buffers(page);
1449                         bh = head;
1450                         do {
1451                                 next = bh->b_this_page;
1452
1453                                 /* we want to unmap the buffers that contain the tail, and
1454                                  ** all the buffers after it (since the tail must be at the
1455                                  ** end of the file).  We don't want to unmap file data
1456                                  ** before the tail, since it might be dirty and waiting to
1457                                  ** reach disk
1458                                  */
1459                                 cur_index += bh->b_size;
1460                                 if (cur_index > tail_index) {
1461                                         reiserfs_unmap_buffer(bh);
1462                                 }
1463                                 bh = next;
1464                         } while (bh != head);
1465                         if (PAGE_SIZE == bh->b_size) {
1466                                 clear_page_dirty(page);
1467                         }
1468                 }
1469         }
1470 }
1471
1472 static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1473                                     struct inode *p_s_inode,
1474                                     struct page *page,
1475                                     struct path *p_s_path,
1476                                     const struct cpu_key *p_s_item_key,
1477                                     loff_t n_new_file_size, char *p_c_mode)
1478 {
1479         struct super_block *p_s_sb = p_s_inode->i_sb;
1480         int n_block_size = p_s_sb->s_blocksize;
1481         int cut_bytes;
1482         BUG_ON(!th->t_trans_id);
1483
1484         if (n_new_file_size != p_s_inode->i_size)
1485                 BUG();
1486
1487         /* the page being sent in could be NULL if there was an i/o error
1488          ** reading in the last block.  The user will hit problems trying to
1489          ** read the file, but for now we just skip the indirect2direct
1490          */
1491         if (atomic_read(&p_s_inode->i_count) > 1 ||
1492             !tail_has_to_be_packed(p_s_inode) ||
1493             !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1494                 // leave tail in an unformatted node    
1495                 *p_c_mode = M_SKIP_BALANCING;
1496                 cut_bytes =
1497                     n_block_size - (n_new_file_size & (n_block_size - 1));
1498                 pathrelse(p_s_path);
1499                 return cut_bytes;
1500         }
1501         /* Permorm the conversion to a direct_item. */
1502         /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */
1503         return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key,
1504                                n_new_file_size, p_c_mode);
1505 }
1506
1507 /* we did indirect_to_direct conversion. And we have inserted direct
1508    item successesfully, but there were no disk space to cut unfm
1509    pointer being converted. Therefore we have to delete inserted
1510    direct item(s) */
1511 static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1512                                          struct inode *inode, struct path *path)
1513 {
1514         struct cpu_key tail_key;
1515         int tail_len;
1516         int removed;
1517         BUG_ON(!th->t_trans_id);
1518
1519         make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);      // !!!!
1520         tail_key.key_length = 4;
1521
1522         tail_len =
1523             (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1524         while (tail_len) {
1525                 /* look for the last byte of the tail */
1526                 if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1527                     POSITION_NOT_FOUND)
1528                         reiserfs_panic(inode->i_sb,
1529                                        "vs-5615: indirect_to_direct_roll_back: found invalid item");
1530                 RFALSE(path->pos_in_item !=
1531                        ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1532                        "vs-5616: appended bytes found");
1533                 PATH_LAST_POSITION(path)--;
1534
1535                 removed =
1536                     reiserfs_delete_item(th, path, &tail_key, inode,
1537                                          NULL /*unbh not needed */ );
1538                 RFALSE(removed <= 0
1539                        || removed > tail_len,
1540                        "vs-5617: there was tail %d bytes, removed item length %d bytes",
1541                        tail_len, removed);
1542                 tail_len -= removed;
1543                 set_cpu_key_k_offset(&tail_key,
1544                                      cpu_key_k_offset(&tail_key) - removed);
1545         }
1546         reiserfs_warning(inode->i_sb,
1547                          "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
1548         //mark_file_without_tail (inode);
1549         mark_inode_dirty(inode);
1550 }
1551
1552 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1553 int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1554                            struct path *p_s_path,
1555                            struct cpu_key *p_s_item_key,
1556                            struct inode *p_s_inode,
1557                            struct page *page, loff_t n_new_file_size)
1558 {
1559         struct super_block *p_s_sb = p_s_inode->i_sb;
1560         /* Every function which is going to call do_balance must first
1561            create a tree_balance structure.  Then it must fill up this
1562            structure by using the init_tb_struct and fix_nodes functions.
1563            After that we can make tree balancing. */
1564         struct tree_balance s_cut_balance;
1565         struct item_head *p_le_ih;
1566         int n_cut_size = 0,     /* Amount to be cut. */
1567             n_ret_value = CARRY_ON, n_removed = 0,      /* Number of the removed unformatted nodes. */
1568             n_is_inode_locked = 0;
1569         char c_mode;            /* Mode of the balance. */
1570         int retval2 = -1;
1571         int quota_cut_bytes;
1572         loff_t tail_pos = 0;
1573
1574         BUG_ON(!th->t_trans_id);
1575
1576         init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path,
1577                        n_cut_size);
1578
1579         /* Repeat this loop until we either cut the item without needing
1580            to balance, or we fix_nodes without schedule occurring */
1581         while (1) {
1582                 /* Determine the balance mode, position of the first byte to
1583                    be cut, and size to be cut.  In case of the indirect item
1584                    free unformatted nodes which are pointed to by the cut
1585                    pointers. */
1586
1587                 c_mode =
1588                     prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1589                                               p_s_item_key, &n_removed,
1590                                               &n_cut_size, n_new_file_size);
1591                 if (c_mode == M_CONVERT) {
1592                         /* convert last unformatted node to direct item or leave
1593                            tail in the unformatted node */
1594                         RFALSE(n_ret_value != CARRY_ON,
1595                                "PAP-5570: can not convert twice");
1596
1597                         n_ret_value =
1598                             maybe_indirect_to_direct(th, p_s_inode, page,
1599                                                      p_s_path, p_s_item_key,
1600                                                      n_new_file_size, &c_mode);
1601                         if (c_mode == M_SKIP_BALANCING)
1602                                 /* tail has been left in the unformatted node */
1603                                 return n_ret_value;
1604
1605                         n_is_inode_locked = 1;
1606
1607                         /* removing of last unformatted node will change value we
1608                            have to return to truncate. Save it */
1609                         retval2 = n_ret_value;
1610                         /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1)); */
1611
1612                         /* So, we have performed the first part of the conversion:
1613                            inserting the new direct item.  Now we are removing the
1614                            last unformatted node pointer. Set key to search for
1615                            it. */
1616                         set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT);
1617                         p_s_item_key->key_length = 4;
1618                         n_new_file_size -=
1619                             (n_new_file_size & (p_s_sb->s_blocksize - 1));
1620                         tail_pos = n_new_file_size;
1621                         set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1);
1622                         if (search_for_position_by_key
1623                             (p_s_sb, p_s_item_key,
1624                              p_s_path) == POSITION_NOT_FOUND) {
1625                                 print_block(PATH_PLAST_BUFFER(p_s_path), 3,
1626                                             PATH_LAST_POSITION(p_s_path) - 1,
1627                                             PATH_LAST_POSITION(p_s_path) + 1);
1628                                 reiserfs_panic(p_s_sb,
1629                                                "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)",
1630                                                p_s_item_key);
1631                         }
1632                         continue;
1633                 }
1634                 if (n_cut_size == 0) {
1635                         pathrelse(p_s_path);
1636                         return 0;
1637                 }
1638
1639                 s_cut_balance.insert_size[0] = n_cut_size;
1640
1641                 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
1642                 if (n_ret_value != REPEAT_SEARCH)
1643                         break;
1644
1645                 PROC_INFO_INC(p_s_sb, cut_from_item_restarted);
1646
1647                 n_ret_value =
1648                     search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1649                 if (n_ret_value == POSITION_FOUND)
1650                         continue;
1651
1652                 reiserfs_warning(p_s_sb,
1653                                  "PAP-5610: reiserfs_cut_from_item: item %K not found",
1654                                  p_s_item_key);
1655                 unfix_nodes(&s_cut_balance);
1656                 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
1657         }                       /* while */
1658
1659         // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1660         if (n_ret_value != CARRY_ON) {
1661                 if (n_is_inode_locked) {
1662                         // FIXME: this seems to be not needed: we are always able
1663                         // to cut item
1664                         indirect_to_direct_roll_back(th, p_s_inode, p_s_path);
1665                 }
1666                 if (n_ret_value == NO_DISK_SPACE)
1667                         reiserfs_warning(p_s_sb, "NO_DISK_SPACE");
1668                 unfix_nodes(&s_cut_balance);
1669                 return -EIO;
1670         }
1671
1672         /* go ahead and perform balancing */
1673
1674         RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
1675
1676         /* Calculate number of bytes that need to be cut from the item. */
1677         quota_cut_bytes =
1678             (c_mode ==
1679              M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.
1680             insert_size[0];
1681         if (retval2 == -1)
1682                 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
1683         else
1684                 n_ret_value = retval2;
1685
1686         /* For direct items, we only change the quota when deleting the last
1687          ** item.
1688          */
1689         p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1690         if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1691                 if (c_mode == M_DELETE &&
1692                     (le_ih_k_offset(p_le_ih) & (p_s_sb->s_blocksize - 1)) ==
1693                     1) {
1694                         // FIXME: this is to keep 3.5 happy
1695                         REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
1696                         quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1697                 } else {
1698                         quota_cut_bytes = 0;
1699                 }
1700         }
1701 #ifdef CONFIG_REISERFS_CHECK
1702         if (n_is_inode_locked) {
1703                 struct item_head *le_ih =
1704                     PATH_PITEM_HEAD(s_cut_balance.tb_path);
1705                 /* we are going to complete indirect2direct conversion. Make
1706                    sure, that we exactly remove last unformatted node pointer
1707                    of the item */
1708                 if (!is_indirect_le_ih(le_ih))
1709                         reiserfs_panic(p_s_sb,
1710                                        "vs-5652: reiserfs_cut_from_item: "
1711                                        "item must be indirect %h", le_ih);
1712
1713                 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1714                         reiserfs_panic(p_s_sb,
1715                                        "vs-5653: reiserfs_cut_from_item: "
1716                                        "completing indirect2direct conversion indirect item %h "
1717                                        "being deleted must be of 4 byte long",
1718                                        le_ih);
1719
1720                 if (c_mode == M_CUT
1721                     && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1722                         reiserfs_panic(p_s_sb,
1723                                        "vs-5654: reiserfs_cut_from_item: "
1724                                        "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
1725                                        le_ih, s_cut_balance.insert_size[0]);
1726                 }
1727                 /* it would be useful to make sure, that right neighboring
1728                    item is direct item of this file */
1729         }
1730 #endif
1731
1732         do_balance(&s_cut_balance, NULL, NULL, c_mode);
1733         if (n_is_inode_locked) {
1734                 /* we've done an indirect->direct conversion.  when the data block
1735                  ** was freed, it was removed from the list of blocks that must
1736                  ** be flushed before the transaction commits, make sure to
1737                  ** unmap and invalidate it
1738                  */
1739                 unmap_buffers(page, tail_pos);
1740                 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask;
1741         }
1742 #ifdef REISERQUOTA_DEBUG
1743         reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE,
1744                        "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1745                        quota_cut_bytes, p_s_inode->i_uid, '?');
1746 #endif
1747         DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes);
1748         DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1749         return n_ret_value;
1750 }
1751
1752 static void truncate_directory(struct reiserfs_transaction_handle *th,
1753                                struct inode *inode)
1754 {
1755         BUG_ON(!th->t_trans_id);
1756         if (inode->i_nlink)
1757                 reiserfs_warning(inode->i_sb,
1758                                  "vs-5655: truncate_directory: link count != 0");
1759
1760         set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1761         set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1762         reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1763         reiserfs_update_sd(th, inode);
1764         set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1765         set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1766 }
1767
1768 /* Truncate file to the new size. Note, this must be called with a transaction
1769    already started */
1770 int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode,       /* ->i_size contains new
1771                                                                                                    size */
1772                          struct page *page,     /* up to date for last block */
1773                          int update_timestamps  /* when it is called by
1774                                                    file_release to convert
1775                                                    the tail - no timestamps
1776                                                    should be updated */
1777     )
1778 {
1779         INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1780         struct item_head *p_le_ih;      /* Pointer to an item header. */
1781         struct cpu_key s_item_key;      /* Key to search for a previous file item. */
1782         loff_t n_file_size,     /* Old file size. */
1783          n_new_file_size;       /* New file size. */
1784         int n_deleted;          /* Number of deleted or truncated bytes. */
1785         int retval;
1786         int err = 0;
1787
1788         BUG_ON(!th->t_trans_id);
1789         if (!
1790             (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode)
1791              || S_ISLNK(p_s_inode->i_mode)))
1792                 return 0;
1793
1794         if (S_ISDIR(p_s_inode->i_mode)) {
1795                 // deletion of directory - no need to update timestamps
1796                 truncate_directory(th, p_s_inode);
1797                 return 0;
1798         }
1799
1800         /* Get new file size. */
1801         n_new_file_size = p_s_inode->i_size;
1802
1803         // FIXME: note, that key type is unimportant here
1804         make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode),
1805                      TYPE_DIRECT, 3);
1806
1807         retval =
1808             search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1809                                        &s_search_path);
1810         if (retval == IO_ERROR) {
1811                 reiserfs_warning(p_s_inode->i_sb,
1812                                  "vs-5657: reiserfs_do_truncate: "
1813                                  "i/o failure occurred trying to truncate %K",
1814                                  &s_item_key);
1815                 err = -EIO;
1816                 goto out;
1817         }
1818         if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1819                 reiserfs_warning(p_s_inode->i_sb,
1820                                  "PAP-5660: reiserfs_do_truncate: "
1821                                  "wrong result %d of search for %K", retval,
1822                                  &s_item_key);
1823
1824                 err = -EIO;
1825                 goto out;
1826         }
1827
1828         s_search_path.pos_in_item--;
1829
1830         /* Get real file size (total length of all file items) */
1831         p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1832         if (is_statdata_le_ih(p_le_ih))
1833                 n_file_size = 0;
1834         else {
1835                 loff_t offset = le_ih_k_offset(p_le_ih);
1836                 int bytes =
1837                     op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize);
1838
1839                 /* this may mismatch with real file size: if last direct item
1840                    had no padding zeros and last unformatted node had no free
1841                    space, this file would have this file size */
1842                 n_file_size = offset + bytes - 1;
1843         }
1844         /*
1845          * are we doing a full truncate or delete, if so
1846          * kick in the reada code
1847          */
1848         if (n_new_file_size == 0)
1849                 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1850
1851         if (n_file_size == 0 || n_file_size < n_new_file_size) {
1852                 goto update_and_out;
1853         }
1854
1855         /* Update key to search for the last file item. */
1856         set_cpu_key_k_offset(&s_item_key, n_file_size);
1857
1858         do {
1859                 /* Cut or delete file item. */
1860                 n_deleted =
1861                     reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1862                                            p_s_inode, page, n_new_file_size);
1863                 if (n_deleted < 0) {
1864                         reiserfs_warning(p_s_inode->i_sb,
1865                                          "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1866                         reiserfs_check_path(&s_search_path);
1867                         return 0;
1868                 }
1869
1870                 RFALSE(n_deleted > n_file_size,
1871                        "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1872                        n_deleted, n_file_size, &s_item_key);
1873
1874                 /* Change key to search the last file item. */
1875                 n_file_size -= n_deleted;
1876
1877                 set_cpu_key_k_offset(&s_item_key, n_file_size);
1878
1879                 /* While there are bytes to truncate and previous file item is presented in the tree. */
1880
1881                 /*
1882                  ** This loop could take a really long time, and could log 
1883                  ** many more blocks than a transaction can hold.  So, we do a polite
1884                  ** journal end here, and if the transaction needs ending, we make
1885                  ** sure the file is consistent before ending the current trans
1886                  ** and starting a new one
1887                  */
1888                 if (journal_transaction_should_end(th, 0) ||
1889                     reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1890                         int orig_len_alloc = th->t_blocks_allocated;
1891                         decrement_counters_in_path(&s_search_path);
1892
1893                         if (update_timestamps) {
1894                                 p_s_inode->i_mtime = p_s_inode->i_ctime =
1895                                     CURRENT_TIME_SEC;
1896                         }
1897                         reiserfs_update_sd(th, p_s_inode);
1898
1899                         err = journal_end(th, p_s_inode->i_sb, orig_len_alloc);
1900                         if (err)
1901                                 goto out;
1902                         err = journal_begin(th, p_s_inode->i_sb,
1903                                             JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
1904                         if (err)
1905                                 goto out;
1906                         reiserfs_update_inode_transaction(p_s_inode);
1907                 }
1908         } while (n_file_size > ROUND_UP(n_new_file_size) &&
1909                  search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1910                                             &s_search_path) == POSITION_FOUND);
1911
1912         RFALSE(n_file_size > ROUND_UP(n_new_file_size),
1913                "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1914                n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1915
1916       update_and_out:
1917         if (update_timestamps) {
1918                 // this is truncate, not file closing
1919                 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1920         }
1921         reiserfs_update_sd(th, p_s_inode);
1922
1923       out:
1924         pathrelse(&s_search_path);
1925         return err;
1926 }
1927
1928 #ifdef CONFIG_REISERFS_CHECK
1929 // this makes sure, that we __append__, not overwrite or add holes
1930 static void check_research_for_paste(struct path *path,
1931                                      const struct cpu_key *p_s_key)
1932 {
1933         struct item_head *found_ih = get_ih(path);
1934
1935         if (is_direct_le_ih(found_ih)) {
1936                 if (le_ih_k_offset(found_ih) +
1937                     op_bytes_number(found_ih,
1938                                     get_last_bh(path)->b_size) !=
1939                     cpu_key_k_offset(p_s_key)
1940                     || op_bytes_number(found_ih,
1941                                        get_last_bh(path)->b_size) !=
1942                     pos_in_item(path))
1943                         reiserfs_panic(NULL,
1944                                        "PAP-5720: check_research_for_paste: "
1945                                        "found direct item %h or position (%d) does not match to key %K",
1946                                        found_ih, pos_in_item(path), p_s_key);
1947         }
1948         if (is_indirect_le_ih(found_ih)) {
1949                 if (le_ih_k_offset(found_ih) +
1950                     op_bytes_number(found_ih,
1951                                     get_last_bh(path)->b_size) !=
1952                     cpu_key_k_offset(p_s_key)
1953                     || I_UNFM_NUM(found_ih) != pos_in_item(path)
1954                     || get_ih_free_space(found_ih) != 0)
1955                         reiserfs_panic(NULL,
1956                                        "PAP-5730: check_research_for_paste: "
1957                                        "found indirect item (%h) or position (%d) does not match to key (%K)",
1958                                        found_ih, pos_in_item(path), p_s_key);
1959         }
1960 }
1961 #endif                          /* config reiserfs check */
1962
1963 /* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1964 int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct path *p_s_search_path,      /* Path to the pasted item.          */
1965                              const struct cpu_key *p_s_key,     /* Key to search for the needed item. */
1966                              struct inode *inode,       /* Inode item belongs to */
1967                              const char *p_c_body,      /* Pointer to the bytes to paste.    */
1968                              int n_pasted_size)
1969 {                               /* Size of pasted bytes.             */
1970         struct tree_balance s_paste_balance;
1971         int retval;
1972         int fs_gen;
1973
1974         BUG_ON(!th->t_trans_id);
1975
1976         fs_gen = get_generation(inode->i_sb);
1977
1978 #ifdef REISERQUOTA_DEBUG
1979         reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1980                        "reiserquota paste_into_item(): allocating %u id=%u type=%c",
1981                        n_pasted_size, inode->i_uid,
1982                        key2type(&(p_s_key->on_disk_key)));
1983 #endif
1984
1985         if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
1986                 pathrelse(p_s_search_path);
1987                 return -EDQUOT;
1988         }
1989         if (DLIMIT_ALLOC_SPACE(inode, n_pasted_size)) {
1990                 DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
1991                 pathrelse(p_s_search_path);
1992                 return -ENOSPC;
1993         }
1994         init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path,
1995                        n_pasted_size);
1996 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1997         s_paste_balance.key = p_s_key->on_disk_key;
1998 #endif
1999
2000         /* DQUOT_* can schedule, must check before the fix_nodes */
2001         if (fs_changed(fs_gen, inode->i_sb)) {
2002                 goto search_again;
2003         }
2004
2005         while ((retval =
2006                 fix_nodes(M_PASTE, &s_paste_balance, NULL,
2007                           p_c_body)) == REPEAT_SEARCH) {
2008               search_again:
2009                 /* file system changed while we were in the fix_nodes */
2010                 PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2011                 retval =
2012                     search_for_position_by_key(th->t_super, p_s_key,
2013                                                p_s_search_path);
2014                 if (retval == IO_ERROR) {
2015                         retval = -EIO;
2016                         goto error_out;
2017                 }
2018                 if (retval == POSITION_FOUND) {
2019                         reiserfs_warning(inode->i_sb,
2020                                          "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists",
2021                                          p_s_key);
2022                         retval = -EEXIST;
2023                         goto error_out;
2024                 }
2025 #ifdef CONFIG_REISERFS_CHECK
2026                 check_research_for_paste(p_s_search_path, p_s_key);
2027 #endif
2028         }
2029
2030         /* Perform balancing after all resources are collected by fix_nodes, and
2031            accessing them will not risk triggering schedule. */
2032         if (retval == CARRY_ON) {
2033                 do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE);
2034                 return 0;
2035         }
2036         retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2037       error_out:
2038         /* this also releases the path */
2039         unfix_nodes(&s_paste_balance);
2040 #ifdef REISERQUOTA_DEBUG
2041         reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2042                        "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2043                        n_pasted_size, inode->i_uid,
2044                        key2type(&(p_s_key->on_disk_key)));
2045 #endif
2046         DLIMIT_FREE_SPACE(inode, n_pasted_size);
2047         DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
2048         return retval;
2049 }
2050
2051 /* Insert new item into the buffer at the path. */
2052 int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the inserteded item.         */
2053                          const struct cpu_key *key, struct item_head *p_s_ih,   /* Pointer to the item header to insert. */
2054                          struct inode *inode, const char *p_c_body)
2055 {                               /* Pointer to the bytes to insert.      */
2056         struct tree_balance s_ins_balance;
2057         int retval;
2058         int fs_gen = 0;
2059         int quota_bytes = 0;
2060
2061         BUG_ON(!th->t_trans_id);
2062
2063         if (inode) {            /* Do we count quotas for item? */
2064                 fs_gen = get_generation(inode->i_sb);
2065                 quota_bytes = ih_item_len(p_s_ih);
2066
2067                 /* hack so the quota code doesn't have to guess if the file has
2068                  ** a tail, links are always tails, so there's no guessing needed
2069                  */
2070                 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2071                         quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2072                 }
2073 #ifdef REISERQUOTA_DEBUG
2074                 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2075                                "reiserquota insert_item(): allocating %u id=%u type=%c",
2076                                quota_bytes, inode->i_uid, head2type(p_s_ih));
2077 #endif
2078                 /* We can't dirty inode here. It would be immediately written but
2079                  * appropriate stat item isn't inserted yet... */
2080                 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
2081                         pathrelse(p_s_path);
2082                         return -EDQUOT;
2083                 }
2084                 if (DLIMIT_ALLOC_SPACE(inode, quota_bytes)) {
2085                         DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
2086                         pathrelse(p_s_path);
2087                         return -ENOSPC;
2088                 }
2089         }
2090         init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path,
2091                        IH_SIZE + ih_item_len(p_s_ih));
2092 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2093         s_ins_balance.key = key->on_disk_key;
2094 #endif
2095         /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2096         if (inode && fs_changed(fs_gen, inode->i_sb)) {
2097                 goto search_again;
2098         }
2099
2100         while ((retval =
2101                 fix_nodes(M_INSERT, &s_ins_balance, p_s_ih,
2102                           p_c_body)) == REPEAT_SEARCH) {
2103               search_again:
2104                 /* file system changed while we were in the fix_nodes */
2105                 PROC_INFO_INC(th->t_super, insert_item_restarted);
2106                 retval = search_item(th->t_super, key, p_s_path);
2107                 if (retval == IO_ERROR) {
2108                         retval = -EIO;
2109                         goto error_out;
2110                 }
2111                 if (retval == ITEM_FOUND) {
2112                         reiserfs_warning(th->t_super,
2113                                          "PAP-5760: reiserfs_insert_item: "
2114                                          "key %K already exists in the tree",
2115                                          key);
2116                         retval = -EEXIST;
2117                         goto error_out;
2118                 }
2119         }
2120
2121         /* make balancing after all resources will be collected at a time */
2122         if (retval == CARRY_ON) {
2123                 do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
2124                 return 0;
2125         }
2126
2127         retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2128       error_out:
2129         /* also releases the path */
2130         unfix_nodes(&s_ins_balance);
2131 #ifdef REISERQUOTA_DEBUG
2132         reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2133                        "reiserquota insert_item(): freeing %u id=%u type=%c",
2134                        quota_bytes, inode->i_uid, head2type(p_s_ih));
2135 #endif
2136         if (inode) {
2137                 DLIMIT_FREE_SPACE(inode, quota_bytes);
2138                 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
2139         }
2140         return retval;
2141 }