linux 2.6.16.38 w/ vs2.0.3-rc1
[linux-2.6.git] / fs / reiserfs / stree.c
1 /*
2  *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4
5 /*
6  *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7  *  Programm System Institute
8  *  Pereslavl-Zalessky Russia
9  */
10
11 /*
12  *  This file contains functions dealing with S+tree
13  *
14  * B_IS_IN_TREE
15  * copy_item_head
16  * comp_short_keys
17  * comp_keys
18  * comp_short_le_keys
19  * le_key2cpu_key
20  * comp_le_keys
21  * bin_search
22  * get_lkey
23  * get_rkey
24  * key_in_buffer
25  * decrement_bcount
26  * decrement_counters_in_path
27  * reiserfs_check_path
28  * pathrelse_and_restore
29  * pathrelse
30  * search_by_key_reada
31  * search_by_key
32  * search_for_position_by_key
33  * comp_items
34  * prepare_for_direct_item
35  * prepare_for_direntry_item
36  * prepare_for_delete_or_cut
37  * calc_deleted_bytes_number
38  * init_tb_struct
39  * padd_item
40  * reiserfs_delete_item
41  * reiserfs_delete_solid_item
42  * reiserfs_delete_object
43  * maybe_indirect_to_direct
44  * indirect_to_direct_roll_back
45  * reiserfs_cut_from_item
46  * truncate_directory
47  * reiserfs_do_truncate
48  * reiserfs_paste_into_item
49  * reiserfs_insert_item
50  */
51
52 #include <linux/config.h>
53 #include <linux/time.h>
54 #include <linux/string.h>
55 #include <linux/pagemap.h>
56 #include <linux/reiserfs_fs.h>
57 #include <linux/smp_lock.h>
58 #include <linux/buffer_head.h>
59 #include <linux/quotaops.h>
60 #include <linux/vs_dlimit.h>
61
62 /* Does the buffer contain a disk block which is in the tree. */
63 inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh)
64 {
65
66         RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT,
67                "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
68
69         return (B_LEVEL(p_s_bh) != FREE_LEVEL);
70 }
71
72 //
73 // to gets item head in le form
74 //
75 inline void copy_item_head(struct item_head *p_v_to,
76                            const struct item_head *p_v_from)
77 {
78         memcpy(p_v_to, p_v_from, IH_SIZE);
79 }
80
81 /* k1 is pointer to on-disk structure which is stored in little-endian
82    form. k2 is pointer to cpu variable. For key of items of the same
83    object this returns 0.
84    Returns: -1 if key1 < key2 
85    0 if key1 == key2
86    1 if key1 > key2 */
87 inline int comp_short_keys(const struct reiserfs_key *le_key,
88                            const struct cpu_key *cpu_key)
89 {
90         __u32 n;
91         n = le32_to_cpu(le_key->k_dir_id);
92         if (n < cpu_key->on_disk_key.k_dir_id)
93                 return -1;
94         if (n > cpu_key->on_disk_key.k_dir_id)
95                 return 1;
96         n = le32_to_cpu(le_key->k_objectid);
97         if (n < cpu_key->on_disk_key.k_objectid)
98                 return -1;
99         if (n > cpu_key->on_disk_key.k_objectid)
100                 return 1;
101         return 0;
102 }
103
104 /* k1 is pointer to on-disk structure which is stored in little-endian
105    form. k2 is pointer to cpu variable.
106    Compare keys using all 4 key fields.
107    Returns: -1 if key1 < key2 0
108    if key1 = key2 1 if key1 > key2 */
109 static inline int comp_keys(const struct reiserfs_key *le_key,
110                             const struct cpu_key *cpu_key)
111 {
112         int retval;
113
114         retval = comp_short_keys(le_key, cpu_key);
115         if (retval)
116                 return retval;
117         if (le_key_k_offset(le_key_version(le_key), le_key) <
118             cpu_key_k_offset(cpu_key))
119                 return -1;
120         if (le_key_k_offset(le_key_version(le_key), le_key) >
121             cpu_key_k_offset(cpu_key))
122                 return 1;
123
124         if (cpu_key->key_length == 3)
125                 return 0;
126
127         /* this part is needed only when tail conversion is in progress */
128         if (le_key_k_type(le_key_version(le_key), le_key) <
129             cpu_key_k_type(cpu_key))
130                 return -1;
131
132         if (le_key_k_type(le_key_version(le_key), le_key) >
133             cpu_key_k_type(cpu_key))
134                 return 1;
135
136         return 0;
137 }
138
139 inline int comp_short_le_keys(const struct reiserfs_key *key1,
140                               const struct reiserfs_key *key2)
141 {
142         __u32 *p_s_1_u32, *p_s_2_u32;
143         int n_key_length = REISERFS_SHORT_KEY_LEN;
144
145         p_s_1_u32 = (__u32 *) key1;
146         p_s_2_u32 = (__u32 *) key2;
147         for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) {
148                 if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32))
149                         return -1;
150                 if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32))
151                         return 1;
152         }
153         return 0;
154 }
155
156 inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
157 {
158         int version;
159         to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
160         to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
161
162         // find out version of the key
163         version = le_key_version(from);
164         to->version = version;
165         to->on_disk_key.k_offset = le_key_k_offset(version, from);
166         to->on_disk_key.k_type = le_key_k_type(version, from);
167 }
168
169 // this does not say which one is bigger, it only returns 1 if keys
170 // are not equal, 0 otherwise
171 inline int comp_le_keys(const struct reiserfs_key *k1,
172                         const struct reiserfs_key *k2)
173 {
174         return memcmp(k1, k2, sizeof(struct reiserfs_key));
175 }
176
177 /**************************************************************************
178  *  Binary search toolkit function                                        *
179  *  Search for an item in the array by the item key                       *
180  *  Returns:    1 if found,  0 if not found;                              *
181  *        *p_n_pos = number of the searched element if found, else the    *
182  *        number of the first element that is larger than p_v_key.        *
183  **************************************************************************/
184 /* For those not familiar with binary search: n_lbound is the leftmost item that it
185  could be, n_rbound the rightmost item that it could be.  We examine the item
186  halfway between n_lbound and n_rbound, and that tells us either that we can increase
187  n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
188  there are no possible items, and we have not found it. With each examination we
189  cut the number of possible items it could be by one more than half rounded down,
190  or we find it. */
191 static inline int bin_search(const void *p_v_key,       /* Key to search for.                   */
192                              const void *p_v_base,      /* First item in the array.             */
193                              int p_n_num,       /* Number of items in the array.        */
194                              int p_n_width,     /* Item size in the array.
195                                                    searched. Lest the reader be
196                                                    confused, note that this is crafted
197                                                    as a general function, and when it
198                                                    is applied specifically to the array
199                                                    of item headers in a node, p_n_width
200                                                    is actually the item header size not
201                                                    the item size.                      */
202                              int *p_n_pos       /* Number of the searched for element. */
203     )
204 {
205         int n_rbound, n_lbound, n_j;
206
207         for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2;
208              n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2)
209                 switch (comp_keys
210                         ((struct reiserfs_key *)((char *)p_v_base +
211                                                  n_j * p_n_width),
212                          (struct cpu_key *)p_v_key)) {
213                 case -1:
214                         n_lbound = n_j + 1;
215                         continue;
216                 case 1:
217                         n_rbound = n_j - 1;
218                         continue;
219                 case 0:
220                         *p_n_pos = n_j;
221                         return ITEM_FOUND;      /* Key found in the array.  */
222                 }
223
224         /* bin_search did not find given key, it returns position of key,
225            that is minimal and greater than the given one. */
226         *p_n_pos = n_lbound;
227         return ITEM_NOT_FOUND;
228 }
229
230 #ifdef CONFIG_REISERFS_CHECK
231 extern struct tree_balance *cur_tb;
232 #endif
233
234 /* Minimal possible key. It is never in the tree. */
235 const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
236
237 /* Maximal possible key. It is never in the tree. */
238 static const struct reiserfs_key MAX_KEY = {
239         __constant_cpu_to_le32(0xffffffff),
240         __constant_cpu_to_le32(0xffffffff),
241         {{__constant_cpu_to_le32(0xffffffff),
242           __constant_cpu_to_le32(0xffffffff)},}
243 };
244
245 /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
246    of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
247    the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
248    case we return a special key, either MIN_KEY or MAX_KEY. */
249 static inline const struct reiserfs_key *get_lkey(const struct path
250                                                   *p_s_chk_path,
251                                                   const struct super_block
252                                                   *p_s_sb)
253 {
254         int n_position, n_path_offset = p_s_chk_path->path_length;
255         struct buffer_head *p_s_parent;
256
257         RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
258                "PAP-5010: invalid offset in the path");
259
260         /* While not higher in path than first element. */
261         while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
262
263                 RFALSE(!buffer_uptodate
264                        (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
265                        "PAP-5020: parent is not uptodate");
266
267                 /* Parent at the path is not in the tree now. */
268                 if (!B_IS_IN_TREE
269                     (p_s_parent =
270                      PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
271                         return &MAX_KEY;
272                 /* Check whether position in the parent is correct. */
273                 if ((n_position =
274                      PATH_OFFSET_POSITION(p_s_chk_path,
275                                           n_path_offset)) >
276                     B_NR_ITEMS(p_s_parent))
277                         return &MAX_KEY;
278                 /* Check whether parent at the path really points to the child. */
279                 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
280                     PATH_OFFSET_PBUFFER(p_s_chk_path,
281                                         n_path_offset + 1)->b_blocknr)
282                         return &MAX_KEY;
283                 /* Return delimiting key if position in the parent is not equal to zero. */
284                 if (n_position)
285                         return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
286         }
287         /* Return MIN_KEY if we are in the root of the buffer tree. */
288         if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
289             b_blocknr == SB_ROOT_BLOCK(p_s_sb))
290                 return &MIN_KEY;
291         return &MAX_KEY;
292 }
293
294 /* Get delimiting key of the buffer at the path and its right neighbor. */
295 inline const struct reiserfs_key *get_rkey(const struct path *p_s_chk_path,
296                                            const struct super_block *p_s_sb)
297 {
298         int n_position, n_path_offset = p_s_chk_path->path_length;
299         struct buffer_head *p_s_parent;
300
301         RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
302                "PAP-5030: invalid offset in the path");
303
304         while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
305
306                 RFALSE(!buffer_uptodate
307                        (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
308                        "PAP-5040: parent is not uptodate");
309
310                 /* Parent at the path is not in the tree now. */
311                 if (!B_IS_IN_TREE
312                     (p_s_parent =
313                      PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
314                         return &MIN_KEY;
315                 /* Check whether position in the parent is correct. */
316                 if ((n_position =
317                      PATH_OFFSET_POSITION(p_s_chk_path,
318                                           n_path_offset)) >
319                     B_NR_ITEMS(p_s_parent))
320                         return &MIN_KEY;
321                 /* Check whether parent at the path really points to the child. */
322                 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
323                     PATH_OFFSET_PBUFFER(p_s_chk_path,
324                                         n_path_offset + 1)->b_blocknr)
325                         return &MIN_KEY;
326                 /* Return delimiting key if position in the parent is not the last one. */
327                 if (n_position != B_NR_ITEMS(p_s_parent))
328                         return B_N_PDELIM_KEY(p_s_parent, n_position);
329         }
330         /* Return MAX_KEY if we are in the root of the buffer tree. */
331         if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
332             b_blocknr == SB_ROOT_BLOCK(p_s_sb))
333                 return &MAX_KEY;
334         return &MIN_KEY;
335 }
336
337 /* Check whether a key is contained in the tree rooted from a buffer at a path. */
338 /* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
339    the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
340    buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
341    this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
342 static inline int key_in_buffer(struct path *p_s_chk_path,      /* Path which should be checked.  */
343                                 const struct cpu_key *p_s_key,  /* Key which should be checked.   */
344                                 struct super_block *p_s_sb      /* Super block pointer.           */
345     )
346 {
347
348         RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
349                || p_s_chk_path->path_length > MAX_HEIGHT,
350                "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
351                p_s_key, p_s_chk_path->path_length);
352         RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
353                "PAP-5060: device must not be NODEV");
354
355         if (comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1)
356                 /* left delimiting key is bigger, that the key we look for */
357                 return 0;
358         //  if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
359         if (comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1)
360                 /* p_s_key must be less than right delimitiing key */
361                 return 0;
362         return 1;
363 }
364
365 inline void decrement_bcount(struct buffer_head *p_s_bh)
366 {
367         if (p_s_bh) {
368                 if (atomic_read(&(p_s_bh->b_count))) {
369                         put_bh(p_s_bh);
370                         return;
371                 }
372                 reiserfs_panic(NULL,
373                                "PAP-5070: decrement_bcount: trying to free free buffer %b",
374                                p_s_bh);
375         }
376 }
377
378 /* Decrement b_count field of the all buffers in the path. */
379 void decrement_counters_in_path(struct path *p_s_search_path)
380 {
381         int n_path_offset = p_s_search_path->path_length;
382
383         RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
384                n_path_offset > EXTENDED_MAX_HEIGHT - 1,
385                "PAP-5080: invalid path offset of %d", n_path_offset);
386
387         while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
388                 struct buffer_head *bh;
389
390                 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
391                 decrement_bcount(bh);
392         }
393         p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
394 }
395
396 int reiserfs_check_path(struct path *p)
397 {
398         RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
399                "path not properly relsed");
400         return 0;
401 }
402
403 /* Release all buffers in the path. Restore dirty bits clean
404 ** when preparing the buffer for the log
405 **
406 ** only called from fix_nodes()
407 */
408 void pathrelse_and_restore(struct super_block *s, struct path *p_s_search_path)
409 {
410         int n_path_offset = p_s_search_path->path_length;
411
412         RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
413                "clm-4000: invalid path offset");
414
415         while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
416                 reiserfs_restore_prepared_buffer(s,
417                                                  PATH_OFFSET_PBUFFER
418                                                  (p_s_search_path,
419                                                   n_path_offset));
420                 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
421         }
422         p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
423 }
424
425 /* Release all buffers in the path. */
426 void pathrelse(struct path *p_s_search_path)
427 {
428         int n_path_offset = p_s_search_path->path_length;
429
430         RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
431                "PAP-5090: invalid path offset");
432
433         while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
434                 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
435
436         p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
437 }
438
439 static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
440 {
441         struct block_head *blkh;
442         struct item_head *ih;
443         int used_space;
444         int prev_location;
445         int i;
446         int nr;
447
448         blkh = (struct block_head *)buf;
449         if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
450                 reiserfs_warning(NULL,
451                                  "is_leaf: this should be caught earlier");
452                 return 0;
453         }
454
455         nr = blkh_nr_item(blkh);
456         if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
457                 /* item number is too big or too small */
458                 reiserfs_warning(NULL, "is_leaf: nr_item seems wrong: %z", bh);
459                 return 0;
460         }
461         ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
462         used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
463         if (used_space != blocksize - blkh_free_space(blkh)) {
464                 /* free space does not match to calculated amount of use space */
465                 reiserfs_warning(NULL, "is_leaf: free space seems wrong: %z",
466                                  bh);
467                 return 0;
468         }
469         // FIXME: it is_leaf will hit performance too much - we may have
470         // return 1 here
471
472         /* check tables of item heads */
473         ih = (struct item_head *)(buf + BLKH_SIZE);
474         prev_location = blocksize;
475         for (i = 0; i < nr; i++, ih++) {
476                 if (le_ih_k_type(ih) == TYPE_ANY) {
477                         reiserfs_warning(NULL,
478                                          "is_leaf: wrong item type for item %h",
479                                          ih);
480                         return 0;
481                 }
482                 if (ih_location(ih) >= blocksize
483                     || ih_location(ih) < IH_SIZE * nr) {
484                         reiserfs_warning(NULL,
485                                          "is_leaf: item location seems wrong: %h",
486                                          ih);
487                         return 0;
488                 }
489                 if (ih_item_len(ih) < 1
490                     || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
491                         reiserfs_warning(NULL,
492                                          "is_leaf: item length seems wrong: %h",
493                                          ih);
494                         return 0;
495                 }
496                 if (prev_location - ih_location(ih) != ih_item_len(ih)) {
497                         reiserfs_warning(NULL,
498                                          "is_leaf: item location seems wrong (second one): %h",
499                                          ih);
500                         return 0;
501                 }
502                 prev_location = ih_location(ih);
503         }
504
505         // one may imagine much more checks
506         return 1;
507 }
508
509 /* returns 1 if buf looks like an internal node, 0 otherwise */
510 static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
511 {
512         struct block_head *blkh;
513         int nr;
514         int used_space;
515
516         blkh = (struct block_head *)buf;
517         nr = blkh_level(blkh);
518         if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
519                 /* this level is not possible for internal nodes */
520                 reiserfs_warning(NULL,
521                                  "is_internal: this should be caught earlier");
522                 return 0;
523         }
524
525         nr = blkh_nr_item(blkh);
526         if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
527                 /* for internal which is not root we might check min number of keys */
528                 reiserfs_warning(NULL,
529                                  "is_internal: number of key seems wrong: %z",
530                                  bh);
531                 return 0;
532         }
533
534         used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
535         if (used_space != blocksize - blkh_free_space(blkh)) {
536                 reiserfs_warning(NULL,
537                                  "is_internal: free space seems wrong: %z", bh);
538                 return 0;
539         }
540         // one may imagine much more checks
541         return 1;
542 }
543
544 // make sure that bh contains formatted node of reiserfs tree of
545 // 'level'-th level
546 static int is_tree_node(struct buffer_head *bh, int level)
547 {
548         if (B_LEVEL(bh) != level) {
549                 reiserfs_warning(NULL,
550                                  "is_tree_node: node level %d does not match to the expected one %d",
551                                  B_LEVEL(bh), level);
552                 return 0;
553         }
554         if (level == DISK_LEAF_NODE_LEVEL)
555                 return is_leaf(bh->b_data, bh->b_size, bh);
556
557         return is_internal(bh->b_data, bh->b_size, bh);
558 }
559
560 #define SEARCH_BY_KEY_READA 16
561
562 /* The function is NOT SCHEDULE-SAFE! */
563 static void search_by_key_reada(struct super_block *s,
564                                 struct buffer_head **bh,
565                                 unsigned long *b, int num)
566 {
567         int i, j;
568
569         for (i = 0; i < num; i++) {
570                 bh[i] = sb_getblk(s, b[i]);
571         }
572         for (j = 0; j < i; j++) {
573                 /*
574                  * note, this needs attention if we are getting rid of the BKL
575                  * you have to make sure the prepared bit isn't set on this buffer
576                  */
577                 if (!buffer_uptodate(bh[j]))
578                         ll_rw_block(READA, 1, bh + j);
579                 brelse(bh[j]);
580         }
581 }
582
583 /**************************************************************************
584  * Algorithm   SearchByKey                                                *
585  *             look for item in the Disk S+Tree by its key                *
586  * Input:  p_s_sb   -  super block                                        *
587  *         p_s_key  - pointer to the key to search                        *
588  * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
589  *         p_s_search_path - path from the root to the needed leaf        *
590  **************************************************************************/
591
592 /* This function fills up the path from the root to the leaf as it
593    descends the tree looking for the key.  It uses reiserfs_bread to
594    try to find buffers in the cache given their block number.  If it
595    does not find them in the cache it reads them from disk.  For each
596    node search_by_key finds using reiserfs_bread it then uses
597    bin_search to look through that node.  bin_search will find the
598    position of the block_number of the next node if it is looking
599    through an internal node.  If it is looking through a leaf node
600    bin_search will find the position of the item which has key either
601    equal to given key, or which is the maximal key less than the given
602    key.  search_by_key returns a path that must be checked for the
603    correctness of the top of the path but need not be checked for the
604    correctness of the bottom of the path */
605 /* The function is NOT SCHEDULE-SAFE! */
606 int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,    /* Key to search. */
607                   struct path *p_s_search_path, /* This structure was
608                                                    allocated and initialized
609                                                    by the calling
610                                                    function. It is filled up
611                                                    by this function.  */
612                   int n_stop_level      /* How far down the tree to search. To
613                                            stop at leaf level - set to
614                                            DISK_LEAF_NODE_LEVEL */
615     )
616 {
617         int n_block_number;
618         int expected_level;
619         struct buffer_head *p_s_bh;
620         struct path_element *p_s_last_element;
621         int n_node_level, n_retval;
622         int right_neighbor_of_leaf_node;
623         int fs_gen;
624         struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
625         unsigned long reada_blocks[SEARCH_BY_KEY_READA];
626         int reada_count = 0;
627
628 #ifdef CONFIG_REISERFS_CHECK
629         int n_repeat_counter = 0;
630 #endif
631
632         PROC_INFO_INC(p_s_sb, search_by_key);
633
634         /* As we add each node to a path we increase its count.  This means that
635            we must be careful to release all nodes in a path before we either
636            discard the path struct or re-use the path struct, as we do here. */
637
638         decrement_counters_in_path(p_s_search_path);
639
640         right_neighbor_of_leaf_node = 0;
641
642         /* With each iteration of this loop we search through the items in the
643            current node, and calculate the next current node(next path element)
644            for the next iteration of this loop.. */
645         n_block_number = SB_ROOT_BLOCK(p_s_sb);
646         expected_level = -1;
647         while (1) {
648
649 #ifdef CONFIG_REISERFS_CHECK
650                 if (!(++n_repeat_counter % 50000))
651                         reiserfs_warning(p_s_sb, "PAP-5100: search_by_key: %s:"
652                                          "there were %d iterations of while loop "
653                                          "looking for key %K",
654                                          current->comm, n_repeat_counter,
655                                          p_s_key);
656 #endif
657
658                 /* prep path to have another element added to it. */
659                 p_s_last_element =
660                     PATH_OFFSET_PELEMENT(p_s_search_path,
661                                          ++p_s_search_path->path_length);
662                 fs_gen = get_generation(p_s_sb);
663
664                 /* Read the next tree node, and set the last element in the path to
665                    have a pointer to it. */
666                 if ((p_s_bh = p_s_last_element->pe_buffer =
667                      sb_getblk(p_s_sb, n_block_number))) {
668                         if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
669                                 search_by_key_reada(p_s_sb, reada_bh,
670                                                     reada_blocks, reada_count);
671                         }
672                         ll_rw_block(READ, 1, &p_s_bh);
673                         wait_on_buffer(p_s_bh);
674                         if (!buffer_uptodate(p_s_bh))
675                                 goto io_error;
676                 } else {
677                       io_error:
678                         p_s_search_path->path_length--;
679                         pathrelse(p_s_search_path);
680                         return IO_ERROR;
681                 }
682                 reada_count = 0;
683                 if (expected_level == -1)
684                         expected_level = SB_TREE_HEIGHT(p_s_sb);
685                 expected_level--;
686
687                 /* It is possible that schedule occurred. We must check whether the key
688                    to search is still in the tree rooted from the current buffer. If
689                    not then repeat search from the root. */
690                 if (fs_changed(fs_gen, p_s_sb) &&
691                     (!B_IS_IN_TREE(p_s_bh) ||
692                      B_LEVEL(p_s_bh) != expected_level ||
693                      !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
694                         PROC_INFO_INC(p_s_sb, search_by_key_fs_changed);
695                         PROC_INFO_INC(p_s_sb, search_by_key_restarted);
696                         PROC_INFO_INC(p_s_sb,
697                                       sbk_restarted[expected_level - 1]);
698                         decrement_counters_in_path(p_s_search_path);
699
700                         /* Get the root block number so that we can repeat the search
701                            starting from the root. */
702                         n_block_number = SB_ROOT_BLOCK(p_s_sb);
703                         expected_level = -1;
704                         right_neighbor_of_leaf_node = 0;
705
706                         /* repeat search from the root */
707                         continue;
708                 }
709
710                 /* only check that the key is in the buffer if p_s_key is not
711                    equal to the MAX_KEY. Latter case is only possible in
712                    "finish_unfinished()" processing during mount. */
713                 RFALSE(comp_keys(&MAX_KEY, p_s_key) &&
714                        !key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
715                        "PAP-5130: key is not in the buffer");
716 #ifdef CONFIG_REISERFS_CHECK
717                 if (cur_tb) {
718                         print_cur_tb("5140");
719                         reiserfs_panic(p_s_sb,
720                                        "PAP-5140: search_by_key: schedule occurred in do_balance!");
721                 }
722 #endif
723
724                 // make sure, that the node contents look like a node of
725                 // certain level
726                 if (!is_tree_node(p_s_bh, expected_level)) {
727                         reiserfs_warning(p_s_sb, "vs-5150: search_by_key: "
728                                          "invalid format found in block %ld. Fsck?",
729                                          p_s_bh->b_blocknr);
730                         pathrelse(p_s_search_path);
731                         return IO_ERROR;
732                 }
733
734                 /* ok, we have acquired next formatted node in the tree */
735                 n_node_level = B_LEVEL(p_s_bh);
736
737                 PROC_INFO_BH_STAT(p_s_sb, p_s_bh, n_node_level - 1);
738
739                 RFALSE(n_node_level < n_stop_level,
740                        "vs-5152: tree level (%d) is less than stop level (%d)",
741                        n_node_level, n_stop_level);
742
743                 n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
744                                       B_NR_ITEMS(p_s_bh),
745                                       (n_node_level ==
746                                        DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
747                                       KEY_SIZE,
748                                       &(p_s_last_element->pe_position));
749                 if (n_node_level == n_stop_level) {
750                         return n_retval;
751                 }
752
753                 /* we are not in the stop level */
754                 if (n_retval == ITEM_FOUND)
755                         /* item has been found, so we choose the pointer which is to the right of the found one */
756                         p_s_last_element->pe_position++;
757
758                 /* if item was not found we choose the position which is to
759                    the left of the found item. This requires no code,
760                    bin_search did it already. */
761
762                 /* So we have chosen a position in the current node which is
763                    an internal node.  Now we calculate child block number by
764                    position in the node. */
765                 n_block_number =
766                     B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
767
768                 /* if we are going to read leaf nodes, try for read ahead as well */
769                 if ((p_s_search_path->reada & PATH_READA) &&
770                     n_node_level == DISK_LEAF_NODE_LEVEL + 1) {
771                         int pos = p_s_last_element->pe_position;
772                         int limit = B_NR_ITEMS(p_s_bh);
773                         struct reiserfs_key *le_key;
774
775                         if (p_s_search_path->reada & PATH_READA_BACK)
776                                 limit = 0;
777                         while (reada_count < SEARCH_BY_KEY_READA) {
778                                 if (pos == limit)
779                                         break;
780                                 reada_blocks[reada_count++] =
781                                     B_N_CHILD_NUM(p_s_bh, pos);
782                                 if (p_s_search_path->reada & PATH_READA_BACK)
783                                         pos--;
784                                 else
785                                         pos++;
786
787                                 /*
788                                  * check to make sure we're in the same object
789                                  */
790                                 le_key = B_N_PDELIM_KEY(p_s_bh, pos);
791                                 if (le32_to_cpu(le_key->k_objectid) !=
792                                     p_s_key->on_disk_key.k_objectid) {
793                                         break;
794                                 }
795                         }
796                 }
797         }
798 }
799
800 /* Form the path to an item and position in this item which contains
801    file byte defined by p_s_key. If there is no such item
802    corresponding to the key, we point the path to the item with
803    maximal key less than p_s_key, and *p_n_pos_in_item is set to one
804    past the last entry/byte in the item.  If searching for entry in a
805    directory item, and it is not found, *p_n_pos_in_item is set to one
806    entry more than the entry with maximal key which is less than the
807    sought key.
808
809    Note that if there is no entry in this same node which is one more,
810    then we point to an imaginary entry.  for direct items, the
811    position is in units of bytes, for indirect items the position is
812    in units of blocknr entries, for directory items the position is in
813    units of directory entries.  */
814
815 /* The function is NOT SCHEDULE-SAFE! */
816 int search_for_position_by_key(struct super_block *p_s_sb,      /* Pointer to the super block.          */
817                                const struct cpu_key *p_cpu_key, /* Key to search (cpu variable)         */
818                                struct path *p_s_search_path     /* Filled up by this function.          */
819     )
820 {
821         struct item_head *p_le_ih;      /* pointer to on-disk structure */
822         int n_blk_size;
823         loff_t item_offset, offset;
824         struct reiserfs_dir_entry de;
825         int retval;
826
827         /* If searching for directory entry. */
828         if (is_direntry_cpu_key(p_cpu_key))
829                 return search_by_entry_key(p_s_sb, p_cpu_key, p_s_search_path,
830                                            &de);
831
832         /* If not searching for directory entry. */
833
834         /* If item is found. */
835         retval = search_item(p_s_sb, p_cpu_key, p_s_search_path);
836         if (retval == IO_ERROR)
837                 return retval;
838         if (retval == ITEM_FOUND) {
839
840                 RFALSE(!ih_item_len
841                        (B_N_PITEM_HEAD
842                         (PATH_PLAST_BUFFER(p_s_search_path),
843                          PATH_LAST_POSITION(p_s_search_path))),
844                        "PAP-5165: item length equals zero");
845
846                 pos_in_item(p_s_search_path) = 0;
847                 return POSITION_FOUND;
848         }
849
850         RFALSE(!PATH_LAST_POSITION(p_s_search_path),
851                "PAP-5170: position equals zero");
852
853         /* Item is not found. Set path to the previous item. */
854         p_le_ih =
855             B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
856                            --PATH_LAST_POSITION(p_s_search_path));
857         n_blk_size = p_s_sb->s_blocksize;
858
859         if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
860                 return FILE_NOT_FOUND;
861         }
862         // FIXME: quite ugly this far
863
864         item_offset = le_ih_k_offset(p_le_ih);
865         offset = cpu_key_k_offset(p_cpu_key);
866
867         /* Needed byte is contained in the item pointed to by the path. */
868         if (item_offset <= offset &&
869             item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) {
870                 pos_in_item(p_s_search_path) = offset - item_offset;
871                 if (is_indirect_le_ih(p_le_ih)) {
872                         pos_in_item(p_s_search_path) /= n_blk_size;
873                 }
874                 return POSITION_FOUND;
875         }
876
877         /* Needed byte is not contained in the item pointed to by the
878            path. Set pos_in_item out of the item. */
879         if (is_indirect_le_ih(p_le_ih))
880                 pos_in_item(p_s_search_path) =
881                     ih_item_len(p_le_ih) / UNFM_P_SIZE;
882         else
883                 pos_in_item(p_s_search_path) = ih_item_len(p_le_ih);
884
885         return POSITION_NOT_FOUND;
886 }
887
888 /* Compare given item and item pointed to by the path. */
889 int comp_items(const struct item_head *stored_ih, const struct path *p_s_path)
890 {
891         struct buffer_head *p_s_bh;
892         struct item_head *ih;
893
894         /* Last buffer at the path is not in the tree. */
895         if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)))
896                 return 1;
897
898         /* Last path position is invalid. */
899         if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh))
900                 return 1;
901
902         /* we need only to know, whether it is the same item */
903         ih = get_ih(p_s_path);
904         return memcmp(stored_ih, ih, IH_SIZE);
905 }
906
907 /* unformatted nodes are not logged anymore, ever.  This is safe
908 ** now
909 */
910 #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
911
912 // block can not be forgotten as it is in I/O or held by someone
913 #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
914
915 // prepare for delete or cut of direct item
916 static inline int prepare_for_direct_item(struct path *path,
917                                           struct item_head *le_ih,
918                                           struct inode *inode,
919                                           loff_t new_file_length, int *cut_size)
920 {
921         loff_t round_len;
922
923         if (new_file_length == max_reiserfs_offset(inode)) {
924                 /* item has to be deleted */
925                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
926                 return M_DELETE;
927         }
928         // new file gets truncated
929         if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
930                 // 
931                 round_len = ROUND_UP(new_file_length);
932                 /* this was n_new_file_length < le_ih ... */
933                 if (round_len < le_ih_k_offset(le_ih)) {
934                         *cut_size = -(IH_SIZE + ih_item_len(le_ih));
935                         return M_DELETE;        /* Delete this item. */
936                 }
937                 /* Calculate first position and size for cutting from item. */
938                 pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
939                 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
940
941                 return M_CUT;   /* Cut from this item. */
942         }
943
944         // old file: items may have any length
945
946         if (new_file_length < le_ih_k_offset(le_ih)) {
947                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
948                 return M_DELETE;        /* Delete this item. */
949         }
950         /* Calculate first position and size for cutting from item. */
951         *cut_size = -(ih_item_len(le_ih) -
952                       (pos_in_item(path) =
953                        new_file_length + 1 - le_ih_k_offset(le_ih)));
954         return M_CUT;           /* Cut from this item. */
955 }
956
957 static inline int prepare_for_direntry_item(struct path *path,
958                                             struct item_head *le_ih,
959                                             struct inode *inode,
960                                             loff_t new_file_length,
961                                             int *cut_size)
962 {
963         if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
964             new_file_length == max_reiserfs_offset(inode)) {
965                 RFALSE(ih_entry_count(le_ih) != 2,
966                        "PAP-5220: incorrect empty directory item (%h)", le_ih);
967                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
968                 return M_DELETE;        /* Delete the directory item containing "." and ".." entry. */
969         }
970
971         if (ih_entry_count(le_ih) == 1) {
972                 /* Delete the directory item such as there is one record only
973                    in this item */
974                 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
975                 return M_DELETE;
976         }
977
978         /* Cut one record from the directory item. */
979         *cut_size =
980             -(DEH_SIZE +
981               entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
982         return M_CUT;
983 }
984
985 /*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
986     If the path points to an indirect item, remove some number of its unformatted nodes.
987     In case of file truncate calculate whether this item must be deleted/truncated or last
988     unformatted node of this item will be converted to a direct item.
989     This function returns a determination of what balance mode the calling function should employ. */
990 static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct path *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed, /* Number of unformatted nodes which were removed
991                                                                                                                                                                                    from end of the file. */
992                                       int *p_n_cut_size, unsigned long long n_new_file_length   /* MAX_KEY_OFFSET in case of delete. */
993     )
994 {
995         struct super_block *p_s_sb = inode->i_sb;
996         struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path);
997         struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path);
998
999         BUG_ON(!th->t_trans_id);
1000
1001         /* Stat_data item. */
1002         if (is_statdata_le_ih(p_le_ih)) {
1003
1004                 RFALSE(n_new_file_length != max_reiserfs_offset(inode),
1005                        "PAP-5210: mode must be M_DELETE");
1006
1007                 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1008                 return M_DELETE;
1009         }
1010
1011         /* Directory item. */
1012         if (is_direntry_le_ih(p_le_ih))
1013                 return prepare_for_direntry_item(p_s_path, p_le_ih, inode,
1014                                                  n_new_file_length,
1015                                                  p_n_cut_size);
1016
1017         /* Direct item. */
1018         if (is_direct_le_ih(p_le_ih))
1019                 return prepare_for_direct_item(p_s_path, p_le_ih, inode,
1020                                                n_new_file_length, p_n_cut_size);
1021
1022         /* Case of an indirect item. */
1023         {
1024                 int n_unfm_number,      /* Number of the item unformatted nodes. */
1025                  n_counter, n_blk_size;
1026                 __le32 *p_n_unfm_pointer;       /* Pointer to the unformatted node number. */
1027                 __u32 tmp;
1028                 struct item_head s_ih;  /* Item header. */
1029                 char c_mode;    /* Returned mode of the balance. */
1030                 int need_research;
1031
1032                 n_blk_size = p_s_sb->s_blocksize;
1033
1034                 /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
1035                 do {
1036                         need_research = 0;
1037                         p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1038                         /* Copy indirect item header to a temp variable. */
1039                         copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1040                         /* Calculate number of unformatted nodes in this item. */
1041                         n_unfm_number = I_UNFM_NUM(&s_ih);
1042
1043                         RFALSE(!is_indirect_le_ih(&s_ih) || !n_unfm_number ||
1044                                pos_in_item(p_s_path) + 1 != n_unfm_number,
1045                                "PAP-5240: invalid item %h "
1046                                "n_unfm_number = %d *p_n_pos_in_item = %d",
1047                                &s_ih, n_unfm_number, pos_in_item(p_s_path));
1048
1049                         /* Calculate balance mode and position in the item to remove unformatted nodes. */
1050                         if (n_new_file_length == max_reiserfs_offset(inode)) {  /* Case of delete. */
1051                                 pos_in_item(p_s_path) = 0;
1052                                 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1053                                 c_mode = M_DELETE;
1054                         } else {        /* Case of truncate. */
1055                                 if (n_new_file_length < le_ih_k_offset(&s_ih)) {
1056                                         pos_in_item(p_s_path) = 0;
1057                                         *p_n_cut_size =
1058                                             -(IH_SIZE + ih_item_len(&s_ih));
1059                                         c_mode = M_DELETE;      /* Delete this item. */
1060                                 } else {
1061                                         /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
1062                                         pos_in_item(p_s_path) =
1063                                             (n_new_file_length + n_blk_size -
1064                                              le_ih_k_offset(&s_ih)) >> p_s_sb->
1065                                             s_blocksize_bits;
1066
1067                                         RFALSE(pos_in_item(p_s_path) >
1068                                                n_unfm_number,
1069                                                "PAP-5250: invalid position in the item");
1070
1071                                         /* Either convert last unformatted node of indirect item to direct item or increase
1072                                            its free space.  */
1073                                         if (pos_in_item(p_s_path) ==
1074                                             n_unfm_number) {
1075                                                 *p_n_cut_size = 0;      /* Nothing to cut. */
1076                                                 return M_CONVERT;       /* Maybe convert last unformatted node to the direct item. */
1077                                         }
1078                                         /* Calculate size to cut. */
1079                                         *p_n_cut_size =
1080                                             -(ih_item_len(&s_ih) -
1081                                               pos_in_item(p_s_path) *
1082                                               UNFM_P_SIZE);
1083
1084                                         c_mode = M_CUT; /* Cut from this indirect item. */
1085                                 }
1086                         }
1087
1088                         RFALSE(n_unfm_number <= pos_in_item(p_s_path),
1089                                "PAP-5260: invalid position in the indirect item");
1090
1091                         /* pointers to be cut */
1092                         n_unfm_number -= pos_in_item(p_s_path);
1093                         /* Set pointer to the last unformatted node pointer that is to be cut. */
1094                         p_n_unfm_pointer =
1095                             (__le32 *) B_I_PITEM(p_s_bh,
1096                                                  &s_ih) + I_UNFM_NUM(&s_ih) -
1097                             1 - *p_n_removed;
1098
1099                         /* We go through the unformatted nodes pointers of the indirect
1100                            item and look for the unformatted nodes in the cache. If we
1101                            found some of them we free it, zero corresponding indirect item
1102                            entry and log buffer containing that indirect item. For this we
1103                            need to prepare last path element for logging. If some
1104                            unformatted node has b_count > 1 we must not free this
1105                            unformatted node since it is in use. */
1106                         reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1107                         // note: path could be changed, first line in for loop takes care
1108                         // of it
1109
1110                         for (n_counter = *p_n_removed;
1111                              n_counter < n_unfm_number;
1112                              n_counter++, p_n_unfm_pointer--) {
1113
1114                                 cond_resched();
1115                                 if (item_moved(&s_ih, p_s_path)) {
1116                                         need_research = 1;
1117                                         break;
1118                                 }
1119                                 RFALSE(p_n_unfm_pointer <
1120                                        (__le32 *) B_I_PITEM(p_s_bh, &s_ih)
1121                                        || p_n_unfm_pointer >
1122                                        (__le32 *) B_I_PITEM(p_s_bh,
1123                                                             &s_ih) +
1124                                        I_UNFM_NUM(&s_ih) - 1,
1125                                        "vs-5265: pointer out of range");
1126
1127                                 /* Hole, nothing to remove. */
1128                                 if (!get_block_num(p_n_unfm_pointer, 0)) {
1129                                         (*p_n_removed)++;
1130                                         continue;
1131                                 }
1132
1133                                 (*p_n_removed)++;
1134
1135                                 tmp = get_block_num(p_n_unfm_pointer, 0);
1136                                 put_block_num(p_n_unfm_pointer, 0, 0);
1137                                 journal_mark_dirty(th, p_s_sb, p_s_bh);
1138                                 reiserfs_free_block(th, inode, tmp, 1);
1139                                 if (item_moved(&s_ih, p_s_path)) {
1140                                         need_research = 1;
1141                                         break;
1142                                 }
1143                         }
1144
1145                         /* a trick.  If the buffer has been logged, this
1146                          ** will do nothing.  If we've broken the loop without
1147                          ** logging it, it will restore the buffer
1148                          **
1149                          */
1150                         reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
1151
1152                         /* This loop can be optimized. */
1153                 } while ((*p_n_removed < n_unfm_number || need_research) &&
1154                          search_for_position_by_key(p_s_sb, p_s_item_key,
1155                                                     p_s_path) ==
1156                          POSITION_FOUND);
1157
1158                 RFALSE(*p_n_removed < n_unfm_number,
1159                        "PAP-5310: indirect item is not found");
1160                 RFALSE(item_moved(&s_ih, p_s_path),
1161                        "after while, comp failed, retry");
1162
1163                 if (c_mode == M_CUT)
1164                         pos_in_item(p_s_path) *= UNFM_P_SIZE;
1165                 return c_mode;
1166         }
1167 }
1168
1169 /* Calculate number of bytes which will be deleted or cut during balance */
1170 static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode)
1171 {
1172         int n_del_size;
1173         struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
1174
1175         if (is_statdata_le_ih(p_le_ih))
1176                 return 0;
1177
1178         n_del_size =
1179             (c_mode ==
1180              M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
1181         if (is_direntry_le_ih(p_le_ih)) {
1182                 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
1183                 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1184                 // empty size.  ick. FIXME, is this right?
1185                 //
1186                 return n_del_size;
1187         }
1188
1189         if (is_indirect_le_ih(p_le_ih))
1190                 n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size); // - get_ih_free_space (p_le_ih);
1191         return n_del_size;
1192 }
1193
1194 static void init_tb_struct(struct reiserfs_transaction_handle *th,
1195                            struct tree_balance *p_s_tb,
1196                            struct super_block *p_s_sb,
1197                            struct path *p_s_path, int n_size)
1198 {
1199
1200         BUG_ON(!th->t_trans_id);
1201
1202         memset(p_s_tb, '\0', sizeof(struct tree_balance));
1203         p_s_tb->transaction_handle = th;
1204         p_s_tb->tb_sb = p_s_sb;
1205         p_s_tb->tb_path = p_s_path;
1206         PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1207         PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1208         p_s_tb->insert_size[0] = n_size;
1209 }
1210
1211 void padd_item(char *item, int total_length, int length)
1212 {
1213         int i;
1214
1215         for (i = total_length; i > length;)
1216                 item[--i] = 0;
1217 }
1218
1219 #ifdef REISERQUOTA_DEBUG
1220 char key2type(struct reiserfs_key *ih)
1221 {
1222         if (is_direntry_le_key(2, ih))
1223                 return 'd';
1224         if (is_direct_le_key(2, ih))
1225                 return 'D';
1226         if (is_indirect_le_key(2, ih))
1227                 return 'i';
1228         if (is_statdata_le_key(2, ih))
1229                 return 's';
1230         return 'u';
1231 }
1232
1233 char head2type(struct item_head *ih)
1234 {
1235         if (is_direntry_le_ih(ih))
1236                 return 'd';
1237         if (is_direct_le_ih(ih))
1238                 return 'D';
1239         if (is_indirect_le_ih(ih))
1240                 return 'i';
1241         if (is_statdata_le_ih(ih))
1242                 return 's';
1243         return 'u';
1244 }
1245 #endif
1246
1247 /* Delete object item. */
1248 int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the deleted item. */
1249                          const struct cpu_key *p_s_item_key,    /* Key to search for the deleted item.  */
1250                          struct inode *p_s_inode,       /* inode is here just to update i_blocks and quotas */
1251                          struct buffer_head *p_s_un_bh)
1252 {                               /* NULL or unformatted node pointer.    */
1253         struct super_block *p_s_sb = p_s_inode->i_sb;
1254         struct tree_balance s_del_balance;
1255         struct item_head s_ih;
1256         struct item_head *q_ih;
1257         int quota_cut_bytes;
1258         int n_ret_value, n_del_size, n_removed;
1259
1260 #ifdef CONFIG_REISERFS_CHECK
1261         char c_mode;
1262         int n_iter = 0;
1263 #endif
1264
1265         BUG_ON(!th->t_trans_id);
1266
1267         init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path,
1268                        0 /*size is unknown */ );
1269
1270         while (1) {
1271                 n_removed = 0;
1272
1273 #ifdef CONFIG_REISERFS_CHECK
1274                 n_iter++;
1275                 c_mode =
1276 #endif
1277                     prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1278                                               p_s_item_key, &n_removed,
1279                                               &n_del_size,
1280                                               max_reiserfs_offset(p_s_inode));
1281
1282                 RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1283
1284                 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1285                 s_del_balance.insert_size[0] = n_del_size;
1286
1287                 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1288                 if (n_ret_value != REPEAT_SEARCH)
1289                         break;
1290
1291                 PROC_INFO_INC(p_s_sb, delete_item_restarted);
1292
1293                 // file system changed, repeat search
1294                 n_ret_value =
1295                     search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1296                 if (n_ret_value == IO_ERROR)
1297                         break;
1298                 if (n_ret_value == FILE_NOT_FOUND) {
1299                         reiserfs_warning(p_s_sb,
1300                                          "vs-5340: reiserfs_delete_item: "
1301                                          "no items of the file %K found",
1302                                          p_s_item_key);
1303                         break;
1304                 }
1305         }                       /* while (1) */
1306
1307         if (n_ret_value != CARRY_ON) {
1308                 unfix_nodes(&s_del_balance);
1309                 return 0;
1310         }
1311         // reiserfs_delete_item returns item length when success
1312         n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1313         q_ih = get_ih(p_s_path);
1314         quota_cut_bytes = ih_item_len(q_ih);
1315
1316         /* hack so the quota code doesn't have to guess if the file
1317          ** has a tail.  On tail insert, we allocate quota for 1 unformatted node.
1318          ** We test the offset because the tail might have been
1319          ** split into multiple items, and we only want to decrement for
1320          ** the unfm node once
1321          */
1322         if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
1323                 if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
1324                         quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1325                 } else {
1326                         quota_cut_bytes = 0;
1327                 }
1328         }
1329
1330         if (p_s_un_bh) {
1331                 int off;
1332                 char *data;
1333
1334                 /* We are in direct2indirect conversion, so move tail contents
1335                    to the unformatted node */
1336                 /* note, we do the copy before preparing the buffer because we
1337                  ** don't care about the contents of the unformatted node yet.
1338                  ** the only thing we really care about is the direct item's data
1339                  ** is in the unformatted node.
1340                  **
1341                  ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1342                  ** the unformatted node, which might schedule, meaning we'd have to
1343                  ** loop all the way back up to the start of the while loop.
1344                  **
1345                  ** The unformatted node must be dirtied later on.  We can't be
1346                  ** sure here if the entire tail has been deleted yet.
1347                  **
1348                  ** p_s_un_bh is from the page cache (all unformatted nodes are
1349                  ** from the page cache) and might be a highmem page.  So, we
1350                  ** can't use p_s_un_bh->b_data.
1351                  ** -clm
1352                  */
1353
1354                 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1355                 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1356                 memcpy(data + off,
1357                        B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih),
1358                        n_ret_value);
1359                 kunmap_atomic(data, KM_USER0);
1360         }
1361         /* Perform balancing after all resources have been collected at once. */
1362         do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1363
1364 #ifdef REISERQUOTA_DEBUG
1365         reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1366                        "reiserquota delete_item(): freeing %u, id=%u type=%c",
1367                        quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1368 #endif
1369         DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes);
1370         DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1371
1372         /* Return deleted body length */
1373         return n_ret_value;
1374 }
1375
1376 /* Summary Of Mechanisms For Handling Collisions Between Processes:
1377
1378  deletion of the body of the object is performed by iput(), with the
1379  result that if multiple processes are operating on a file, the
1380  deletion of the body of the file is deferred until the last process
1381  that has an open inode performs its iput().
1382
1383  writes and truncates are protected from collisions by use of
1384  semaphores.
1385
1386  creates, linking, and mknod are protected from collisions with other
1387  processes by making the reiserfs_add_entry() the last step in the
1388  creation, and then rolling back all changes if there was a collision.
1389  - Hans
1390 */
1391
1392 /* this deletes item which never gets split */
1393 void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1394                                 struct inode *inode, struct reiserfs_key *key)
1395 {
1396         struct tree_balance tb;
1397         INITIALIZE_PATH(path);
1398         int item_len = 0;
1399         int tb_init = 0;
1400         struct cpu_key cpu_key;
1401         int retval;
1402         int quota_cut_bytes = 0;
1403
1404         BUG_ON(!th->t_trans_id);
1405
1406         le_key2cpu_key(&cpu_key, key);
1407
1408         while (1) {
1409                 retval = search_item(th->t_super, &cpu_key, &path);
1410                 if (retval == IO_ERROR) {
1411                         reiserfs_warning(th->t_super,
1412                                          "vs-5350: reiserfs_delete_solid_item: "
1413                                          "i/o failure occurred trying to delete %K",
1414                                          &cpu_key);
1415                         break;
1416                 }
1417                 if (retval != ITEM_FOUND) {
1418                         pathrelse(&path);
1419                         // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1420                         if (!
1421                             ((unsigned long long)
1422                              GET_HASH_VALUE(le_key_k_offset
1423                                             (le_key_version(key), key)) == 0
1424                              && (unsigned long long)
1425                              GET_GENERATION_NUMBER(le_key_k_offset
1426                                                    (le_key_version(key),
1427                                                     key)) == 1))
1428                                 reiserfs_warning(th->t_super,
1429                                                  "vs-5355: reiserfs_delete_solid_item: %k not found",
1430                                                  key);
1431                         break;
1432                 }
1433                 if (!tb_init) {
1434                         tb_init = 1;
1435                         item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1436                         init_tb_struct(th, &tb, th->t_super, &path,
1437                                        -(IH_SIZE + item_len));
1438                 }
1439                 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1440
1441                 retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1442                 if (retval == REPEAT_SEARCH) {
1443                         PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1444                         continue;
1445                 }
1446
1447                 if (retval == CARRY_ON) {
1448                         do_balance(&tb, NULL, NULL, M_DELETE);
1449                         if (inode) {    /* Should we count quota for item? (we don't count quotas for save-links) */
1450 #ifdef REISERQUOTA_DEBUG
1451                                 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1452                                                "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1453                                                quota_cut_bytes, inode->i_uid,
1454                                                key2type(key));
1455 #endif
1456                                 DLIMIT_FREE_SPACE(inode, quota_cut_bytes);
1457                                 DQUOT_FREE_SPACE_NODIRTY(inode,
1458                                                          quota_cut_bytes);
1459                         }
1460                         break;
1461                 }
1462                 // IO_ERROR, NO_DISK_SPACE, etc
1463                 reiserfs_warning(th->t_super,
1464                                  "vs-5360: reiserfs_delete_solid_item: "
1465                                  "could not delete %K due to fix_nodes failure",
1466                                  &cpu_key);
1467                 unfix_nodes(&tb);
1468                 break;
1469         }
1470
1471         reiserfs_check_path(&path);
1472 }
1473
1474 int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1475                            struct inode *inode)
1476 {
1477         int err;
1478         inode->i_size = 0;
1479         BUG_ON(!th->t_trans_id);
1480
1481         /* for directory this deletes item containing "." and ".." */
1482         err =
1483             reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1484         if (err)
1485                 return err;
1486
1487 #if defined( USE_INODE_GENERATION_COUNTER )
1488         if (!old_format_only(th->t_super)) {
1489                 __le32 *inode_generation;
1490
1491                 inode_generation =
1492                     &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1493                 *inode_generation =
1494                     cpu_to_le32(le32_to_cpu(*inode_generation) + 1);
1495         }
1496 /* USE_INODE_GENERATION_COUNTER */
1497 #endif
1498         reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1499
1500         return err;
1501 }
1502
1503 static void unmap_buffers(struct page *page, loff_t pos)
1504 {
1505         struct buffer_head *bh;
1506         struct buffer_head *head;
1507         struct buffer_head *next;
1508         unsigned long tail_index;
1509         unsigned long cur_index;
1510
1511         if (page) {
1512                 if (page_has_buffers(page)) {
1513                         tail_index = pos & (PAGE_CACHE_SIZE - 1);
1514                         cur_index = 0;
1515                         head = page_buffers(page);
1516                         bh = head;
1517                         do {
1518                                 next = bh->b_this_page;
1519
1520                                 /* we want to unmap the buffers that contain the tail, and
1521                                  ** all the buffers after it (since the tail must be at the
1522                                  ** end of the file).  We don't want to unmap file data
1523                                  ** before the tail, since it might be dirty and waiting to
1524                                  ** reach disk
1525                                  */
1526                                 cur_index += bh->b_size;
1527                                 if (cur_index > tail_index) {
1528                                         reiserfs_unmap_buffer(bh);
1529                                 }
1530                                 bh = next;
1531                         } while (bh != head);
1532                         if (PAGE_SIZE == bh->b_size) {
1533                                 clear_page_dirty(page);
1534                         }
1535                 }
1536         }
1537 }
1538
1539 static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1540                                     struct inode *p_s_inode,
1541                                     struct page *page,
1542                                     struct path *p_s_path,
1543                                     const struct cpu_key *p_s_item_key,
1544                                     loff_t n_new_file_size, char *p_c_mode)
1545 {
1546         struct super_block *p_s_sb = p_s_inode->i_sb;
1547         int n_block_size = p_s_sb->s_blocksize;
1548         int cut_bytes;
1549         BUG_ON(!th->t_trans_id);
1550
1551         if (n_new_file_size != p_s_inode->i_size)
1552                 BUG();
1553
1554         /* the page being sent in could be NULL if there was an i/o error
1555          ** reading in the last block.  The user will hit problems trying to
1556          ** read the file, but for now we just skip the indirect2direct
1557          */
1558         if (atomic_read(&p_s_inode->i_count) > 1 ||
1559             !tail_has_to_be_packed(p_s_inode) ||
1560             !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1561                 // leave tail in an unformatted node    
1562                 *p_c_mode = M_SKIP_BALANCING;
1563                 cut_bytes =
1564                     n_block_size - (n_new_file_size & (n_block_size - 1));
1565                 pathrelse(p_s_path);
1566                 return cut_bytes;
1567         }
1568         /* Permorm the conversion to a direct_item. */
1569         /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */
1570         return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key,
1571                                n_new_file_size, p_c_mode);
1572 }
1573
1574 /* we did indirect_to_direct conversion. And we have inserted direct
1575    item successesfully, but there were no disk space to cut unfm
1576    pointer being converted. Therefore we have to delete inserted
1577    direct item(s) */
1578 static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1579                                          struct inode *inode, struct path *path)
1580 {
1581         struct cpu_key tail_key;
1582         int tail_len;
1583         int removed;
1584         BUG_ON(!th->t_trans_id);
1585
1586         make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);      // !!!!
1587         tail_key.key_length = 4;
1588
1589         tail_len =
1590             (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1591         while (tail_len) {
1592                 /* look for the last byte of the tail */
1593                 if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1594                     POSITION_NOT_FOUND)
1595                         reiserfs_panic(inode->i_sb,
1596                                        "vs-5615: indirect_to_direct_roll_back: found invalid item");
1597                 RFALSE(path->pos_in_item !=
1598                        ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1599                        "vs-5616: appended bytes found");
1600                 PATH_LAST_POSITION(path)--;
1601
1602                 removed =
1603                     reiserfs_delete_item(th, path, &tail_key, inode,
1604                                          NULL /*unbh not needed */ );
1605                 RFALSE(removed <= 0
1606                        || removed > tail_len,
1607                        "vs-5617: there was tail %d bytes, removed item length %d bytes",
1608                        tail_len, removed);
1609                 tail_len -= removed;
1610                 set_cpu_key_k_offset(&tail_key,
1611                                      cpu_key_k_offset(&tail_key) - removed);
1612         }
1613         reiserfs_warning(inode->i_sb,
1614                          "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
1615         //mark_file_without_tail (inode);
1616         mark_inode_dirty(inode);
1617 }
1618
1619 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1620 int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1621                            struct path *p_s_path,
1622                            struct cpu_key *p_s_item_key,
1623                            struct inode *p_s_inode,
1624                            struct page *page, loff_t n_new_file_size)
1625 {
1626         struct super_block *p_s_sb = p_s_inode->i_sb;
1627         /* Every function which is going to call do_balance must first
1628            create a tree_balance structure.  Then it must fill up this
1629            structure by using the init_tb_struct and fix_nodes functions.
1630            After that we can make tree balancing. */
1631         struct tree_balance s_cut_balance;
1632         struct item_head *p_le_ih;
1633         int n_cut_size = 0,     /* Amount to be cut. */
1634             n_ret_value = CARRY_ON, n_removed = 0,      /* Number of the removed unformatted nodes. */
1635             n_is_inode_locked = 0;
1636         char c_mode;            /* Mode of the balance. */
1637         int retval2 = -1;
1638         int quota_cut_bytes;
1639         loff_t tail_pos = 0;
1640
1641         BUG_ON(!th->t_trans_id);
1642
1643         init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path,
1644                        n_cut_size);
1645
1646         /* Repeat this loop until we either cut the item without needing
1647            to balance, or we fix_nodes without schedule occurring */
1648         while (1) {
1649                 /* Determine the balance mode, position of the first byte to
1650                    be cut, and size to be cut.  In case of the indirect item
1651                    free unformatted nodes which are pointed to by the cut
1652                    pointers. */
1653
1654                 c_mode =
1655                     prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1656                                               p_s_item_key, &n_removed,
1657                                               &n_cut_size, n_new_file_size);
1658                 if (c_mode == M_CONVERT) {
1659                         /* convert last unformatted node to direct item or leave
1660                            tail in the unformatted node */
1661                         RFALSE(n_ret_value != CARRY_ON,
1662                                "PAP-5570: can not convert twice");
1663
1664                         n_ret_value =
1665                             maybe_indirect_to_direct(th, p_s_inode, page,
1666                                                      p_s_path, p_s_item_key,
1667                                                      n_new_file_size, &c_mode);
1668                         if (c_mode == M_SKIP_BALANCING)
1669                                 /* tail has been left in the unformatted node */
1670                                 return n_ret_value;
1671
1672                         n_is_inode_locked = 1;
1673
1674                         /* removing of last unformatted node will change value we
1675                            have to return to truncate. Save it */
1676                         retval2 = n_ret_value;
1677                         /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1)); */
1678
1679                         /* So, we have performed the first part of the conversion:
1680                            inserting the new direct item.  Now we are removing the
1681                            last unformatted node pointer. Set key to search for
1682                            it. */
1683                         set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT);
1684                         p_s_item_key->key_length = 4;
1685                         n_new_file_size -=
1686                             (n_new_file_size & (p_s_sb->s_blocksize - 1));
1687                         tail_pos = n_new_file_size;
1688                         set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1);
1689                         if (search_for_position_by_key
1690                             (p_s_sb, p_s_item_key,
1691                              p_s_path) == POSITION_NOT_FOUND) {
1692                                 print_block(PATH_PLAST_BUFFER(p_s_path), 3,
1693                                             PATH_LAST_POSITION(p_s_path) - 1,
1694                                             PATH_LAST_POSITION(p_s_path) + 1);
1695                                 reiserfs_panic(p_s_sb,
1696                                                "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)",
1697                                                p_s_item_key);
1698                         }
1699                         continue;
1700                 }
1701                 if (n_cut_size == 0) {
1702                         pathrelse(p_s_path);
1703                         return 0;
1704                 }
1705
1706                 s_cut_balance.insert_size[0] = n_cut_size;
1707
1708                 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
1709                 if (n_ret_value != REPEAT_SEARCH)
1710                         break;
1711
1712                 PROC_INFO_INC(p_s_sb, cut_from_item_restarted);
1713
1714                 n_ret_value =
1715                     search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1716                 if (n_ret_value == POSITION_FOUND)
1717                         continue;
1718
1719                 reiserfs_warning(p_s_sb,
1720                                  "PAP-5610: reiserfs_cut_from_item: item %K not found",
1721                                  p_s_item_key);
1722                 unfix_nodes(&s_cut_balance);
1723                 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
1724         }                       /* while */
1725
1726         // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1727         if (n_ret_value != CARRY_ON) {
1728                 if (n_is_inode_locked) {
1729                         // FIXME: this seems to be not needed: we are always able
1730                         // to cut item
1731                         indirect_to_direct_roll_back(th, p_s_inode, p_s_path);
1732                 }
1733                 if (n_ret_value == NO_DISK_SPACE)
1734                         reiserfs_warning(p_s_sb, "NO_DISK_SPACE");
1735                 unfix_nodes(&s_cut_balance);
1736                 return -EIO;
1737         }
1738
1739         /* go ahead and perform balancing */
1740
1741         RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
1742
1743         /* Calculate number of bytes that need to be cut from the item. */
1744         quota_cut_bytes =
1745             (c_mode ==
1746              M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.
1747             insert_size[0];
1748         if (retval2 == -1)
1749                 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
1750         else
1751                 n_ret_value = retval2;
1752
1753         /* For direct items, we only change the quota when deleting the last
1754          ** item.
1755          */
1756         p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1757         if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1758                 if (c_mode == M_DELETE &&
1759                     (le_ih_k_offset(p_le_ih) & (p_s_sb->s_blocksize - 1)) ==
1760                     1) {
1761                         // FIXME: this is to keep 3.5 happy
1762                         REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
1763                         quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1764                 } else {
1765                         quota_cut_bytes = 0;
1766                 }
1767         }
1768 #ifdef CONFIG_REISERFS_CHECK
1769         if (n_is_inode_locked) {
1770                 struct item_head *le_ih =
1771                     PATH_PITEM_HEAD(s_cut_balance.tb_path);
1772                 /* we are going to complete indirect2direct conversion. Make
1773                    sure, that we exactly remove last unformatted node pointer
1774                    of the item */
1775                 if (!is_indirect_le_ih(le_ih))
1776                         reiserfs_panic(p_s_sb,
1777                                        "vs-5652: reiserfs_cut_from_item: "
1778                                        "item must be indirect %h", le_ih);
1779
1780                 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1781                         reiserfs_panic(p_s_sb,
1782                                        "vs-5653: reiserfs_cut_from_item: "
1783                                        "completing indirect2direct conversion indirect item %h "
1784                                        "being deleted must be of 4 byte long",
1785                                        le_ih);
1786
1787                 if (c_mode == M_CUT
1788                     && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1789                         reiserfs_panic(p_s_sb,
1790                                        "vs-5654: reiserfs_cut_from_item: "
1791                                        "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
1792                                        le_ih, s_cut_balance.insert_size[0]);
1793                 }
1794                 /* it would be useful to make sure, that right neighboring
1795                    item is direct item of this file */
1796         }
1797 #endif
1798
1799         do_balance(&s_cut_balance, NULL, NULL, c_mode);
1800         if (n_is_inode_locked) {
1801                 /* we've done an indirect->direct conversion.  when the data block
1802                  ** was freed, it was removed from the list of blocks that must
1803                  ** be flushed before the transaction commits, make sure to
1804                  ** unmap and invalidate it
1805                  */
1806                 unmap_buffers(page, tail_pos);
1807                 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask;
1808         }
1809 #ifdef REISERQUOTA_DEBUG
1810         reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE,
1811                        "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1812                        quota_cut_bytes, p_s_inode->i_uid, '?');
1813 #endif
1814         DLIMIT_FREE_SPACE(p_s_inode, quota_cut_bytes);
1815         DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1816         return n_ret_value;
1817 }
1818
1819 static void truncate_directory(struct reiserfs_transaction_handle *th,
1820                                struct inode *inode)
1821 {
1822         BUG_ON(!th->t_trans_id);
1823         if (inode->i_nlink)
1824                 reiserfs_warning(inode->i_sb,
1825                                  "vs-5655: truncate_directory: link count != 0");
1826
1827         set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1828         set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1829         reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1830         reiserfs_update_sd(th, inode);
1831         set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1832         set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1833 }
1834
1835 /* Truncate file to the new size. Note, this must be called with a transaction
1836    already started */
1837 int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode,       /* ->i_size contains new
1838                                                                                                    size */
1839                          struct page *page,     /* up to date for last block */
1840                          int update_timestamps  /* when it is called by
1841                                                    file_release to convert
1842                                                    the tail - no timestamps
1843                                                    should be updated */
1844     )
1845 {
1846         INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1847         struct item_head *p_le_ih;      /* Pointer to an item header. */
1848         struct cpu_key s_item_key;      /* Key to search for a previous file item. */
1849         loff_t n_file_size,     /* Old file size. */
1850          n_new_file_size;       /* New file size. */
1851         int n_deleted;          /* Number of deleted or truncated bytes. */
1852         int retval;
1853         int err = 0;
1854
1855         BUG_ON(!th->t_trans_id);
1856         if (!
1857             (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode)
1858              || S_ISLNK(p_s_inode->i_mode)))
1859                 return 0;
1860
1861         if (S_ISDIR(p_s_inode->i_mode)) {
1862                 // deletion of directory - no need to update timestamps
1863                 truncate_directory(th, p_s_inode);
1864                 return 0;
1865         }
1866
1867         /* Get new file size. */
1868         n_new_file_size = p_s_inode->i_size;
1869
1870         // FIXME: note, that key type is unimportant here
1871         make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode),
1872                      TYPE_DIRECT, 3);
1873
1874         retval =
1875             search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1876                                        &s_search_path);
1877         if (retval == IO_ERROR) {
1878                 reiserfs_warning(p_s_inode->i_sb,
1879                                  "vs-5657: reiserfs_do_truncate: "
1880                                  "i/o failure occurred trying to truncate %K",
1881                                  &s_item_key);
1882                 err = -EIO;
1883                 goto out;
1884         }
1885         if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1886                 reiserfs_warning(p_s_inode->i_sb,
1887                                  "PAP-5660: reiserfs_do_truncate: "
1888                                  "wrong result %d of search for %K", retval,
1889                                  &s_item_key);
1890
1891                 err = -EIO;
1892                 goto out;
1893         }
1894
1895         s_search_path.pos_in_item--;
1896
1897         /* Get real file size (total length of all file items) */
1898         p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1899         if (is_statdata_le_ih(p_le_ih))
1900                 n_file_size = 0;
1901         else {
1902                 loff_t offset = le_ih_k_offset(p_le_ih);
1903                 int bytes =
1904                     op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize);
1905
1906                 /* this may mismatch with real file size: if last direct item
1907                    had no padding zeros and last unformatted node had no free
1908                    space, this file would have this file size */
1909                 n_file_size = offset + bytes - 1;
1910         }
1911         /*
1912          * are we doing a full truncate or delete, if so
1913          * kick in the reada code
1914          */
1915         if (n_new_file_size == 0)
1916                 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1917
1918         if (n_file_size == 0 || n_file_size < n_new_file_size) {
1919                 goto update_and_out;
1920         }
1921
1922         /* Update key to search for the last file item. */
1923         set_cpu_key_k_offset(&s_item_key, n_file_size);
1924
1925         do {
1926                 /* Cut or delete file item. */
1927                 n_deleted =
1928                     reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1929                                            p_s_inode, page, n_new_file_size);
1930                 if (n_deleted < 0) {
1931                         reiserfs_warning(p_s_inode->i_sb,
1932                                          "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1933                         reiserfs_check_path(&s_search_path);
1934                         return 0;
1935                 }
1936
1937                 RFALSE(n_deleted > n_file_size,
1938                        "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1939                        n_deleted, n_file_size, &s_item_key);
1940
1941                 /* Change key to search the last file item. */
1942                 n_file_size -= n_deleted;
1943
1944                 set_cpu_key_k_offset(&s_item_key, n_file_size);
1945
1946                 /* While there are bytes to truncate and previous file item is presented in the tree. */
1947
1948                 /*
1949                  ** This loop could take a really long time, and could log 
1950                  ** many more blocks than a transaction can hold.  So, we do a polite
1951                  ** journal end here, and if the transaction needs ending, we make
1952                  ** sure the file is consistent before ending the current trans
1953                  ** and starting a new one
1954                  */
1955                 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1956                         int orig_len_alloc = th->t_blocks_allocated;
1957                         decrement_counters_in_path(&s_search_path);
1958
1959                         if (update_timestamps) {
1960                                 p_s_inode->i_mtime = p_s_inode->i_ctime =
1961                                     CURRENT_TIME_SEC;
1962                         }
1963                         reiserfs_update_sd(th, p_s_inode);
1964
1965                         err = journal_end(th, p_s_inode->i_sb, orig_len_alloc);
1966                         if (err)
1967                                 goto out;
1968                         err = journal_begin(th, p_s_inode->i_sb,
1969                                             JOURNAL_PER_BALANCE_CNT * 6);
1970                         if (err)
1971                                 goto out;
1972                         reiserfs_update_inode_transaction(p_s_inode);
1973                 }
1974         } while (n_file_size > ROUND_UP(n_new_file_size) &&
1975                  search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1976                                             &s_search_path) == POSITION_FOUND);
1977
1978         RFALSE(n_file_size > ROUND_UP(n_new_file_size),
1979                "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1980                n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1981
1982       update_and_out:
1983         if (update_timestamps) {
1984                 // this is truncate, not file closing
1985                 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1986         }
1987         reiserfs_update_sd(th, p_s_inode);
1988
1989       out:
1990         pathrelse(&s_search_path);
1991         return err;
1992 }
1993
1994 #ifdef CONFIG_REISERFS_CHECK
1995 // this makes sure, that we __append__, not overwrite or add holes
1996 static void check_research_for_paste(struct path *path,
1997                                      const struct cpu_key *p_s_key)
1998 {
1999         struct item_head *found_ih = get_ih(path);
2000
2001         if (is_direct_le_ih(found_ih)) {
2002                 if (le_ih_k_offset(found_ih) +
2003                     op_bytes_number(found_ih,
2004                                     get_last_bh(path)->b_size) !=
2005                     cpu_key_k_offset(p_s_key)
2006                     || op_bytes_number(found_ih,
2007                                        get_last_bh(path)->b_size) !=
2008                     pos_in_item(path))
2009                         reiserfs_panic(NULL,
2010                                        "PAP-5720: check_research_for_paste: "
2011                                        "found direct item %h or position (%d) does not match to key %K",
2012                                        found_ih, pos_in_item(path), p_s_key);
2013         }
2014         if (is_indirect_le_ih(found_ih)) {
2015                 if (le_ih_k_offset(found_ih) +
2016                     op_bytes_number(found_ih,
2017                                     get_last_bh(path)->b_size) !=
2018                     cpu_key_k_offset(p_s_key)
2019                     || I_UNFM_NUM(found_ih) != pos_in_item(path)
2020                     || get_ih_free_space(found_ih) != 0)
2021                         reiserfs_panic(NULL,
2022                                        "PAP-5730: check_research_for_paste: "
2023                                        "found indirect item (%h) or position (%d) does not match to key (%K)",
2024                                        found_ih, pos_in_item(path), p_s_key);
2025         }
2026 }
2027 #endif                          /* config reiserfs check */
2028
2029 /* Paste bytes to the existing item. Returns bytes number pasted into the item. */
2030 int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct path *p_s_search_path,      /* Path to the pasted item.          */
2031                              const struct cpu_key *p_s_key,     /* Key to search for the needed item. */
2032                              struct inode *inode,       /* Inode item belongs to */
2033                              const char *p_c_body,      /* Pointer to the bytes to paste.    */
2034                              int n_pasted_size)
2035 {                               /* Size of pasted bytes.             */
2036         struct tree_balance s_paste_balance;
2037         int retval;
2038         int fs_gen;
2039
2040         BUG_ON(!th->t_trans_id);
2041
2042         fs_gen = get_generation(inode->i_sb);
2043
2044 #ifdef REISERQUOTA_DEBUG
2045         reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2046                        "reiserquota paste_into_item(): allocating %u id=%u type=%c",
2047                        n_pasted_size, inode->i_uid,
2048                        key2type(&(p_s_key->on_disk_key)));
2049 #endif
2050
2051         if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
2052                 pathrelse(p_s_search_path);
2053                 return -EDQUOT;
2054         }
2055         if (DLIMIT_ALLOC_SPACE(inode, n_pasted_size)) {
2056                 DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
2057                 pathrelse(p_s_search_path);
2058                 return -ENOSPC;
2059         }
2060         init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path,
2061                        n_pasted_size);
2062 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2063         s_paste_balance.key = p_s_key->on_disk_key;
2064 #endif
2065
2066         /* DQUOT_* can schedule, must check before the fix_nodes */
2067         if (fs_changed(fs_gen, inode->i_sb)) {
2068                 goto search_again;
2069         }
2070
2071         while ((retval =
2072                 fix_nodes(M_PASTE, &s_paste_balance, NULL,
2073                           p_c_body)) == REPEAT_SEARCH) {
2074               search_again:
2075                 /* file system changed while we were in the fix_nodes */
2076                 PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2077                 retval =
2078                     search_for_position_by_key(th->t_super, p_s_key,
2079                                                p_s_search_path);
2080                 if (retval == IO_ERROR) {
2081                         retval = -EIO;
2082                         goto error_out;
2083                 }
2084                 if (retval == POSITION_FOUND) {
2085                         reiserfs_warning(inode->i_sb,
2086                                          "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists",
2087                                          p_s_key);
2088                         retval = -EEXIST;
2089                         goto error_out;
2090                 }
2091 #ifdef CONFIG_REISERFS_CHECK
2092                 check_research_for_paste(p_s_search_path, p_s_key);
2093 #endif
2094         }
2095
2096         /* Perform balancing after all resources are collected by fix_nodes, and
2097            accessing them will not risk triggering schedule. */
2098         if (retval == CARRY_ON) {
2099                 do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE);
2100                 return 0;
2101         }
2102         retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2103       error_out:
2104         /* this also releases the path */
2105         unfix_nodes(&s_paste_balance);
2106 #ifdef REISERQUOTA_DEBUG
2107         reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2108                        "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2109                        n_pasted_size, inode->i_uid,
2110                        key2type(&(p_s_key->on_disk_key)));
2111 #endif
2112         DLIMIT_FREE_SPACE(inode, n_pasted_size);
2113         DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
2114         return retval;
2115 }
2116
2117 /* Insert new item into the buffer at the path. */
2118 int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the inserteded item.         */
2119                          const struct cpu_key *key, struct item_head *p_s_ih,   /* Pointer to the item header to insert. */
2120                          struct inode *inode, const char *p_c_body)
2121 {                               /* Pointer to the bytes to insert.      */
2122         struct tree_balance s_ins_balance;
2123         int retval;
2124         int fs_gen = 0;
2125         int quota_bytes = 0;
2126
2127         BUG_ON(!th->t_trans_id);
2128
2129         if (inode) {            /* Do we count quotas for item? */
2130                 fs_gen = get_generation(inode->i_sb);
2131                 quota_bytes = ih_item_len(p_s_ih);
2132
2133                 /* hack so the quota code doesn't have to guess if the file has
2134                  ** a tail, links are always tails, so there's no guessing needed
2135                  */
2136                 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2137                         quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2138                 }
2139 #ifdef REISERQUOTA_DEBUG
2140                 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2141                                "reiserquota insert_item(): allocating %u id=%u type=%c",
2142                                quota_bytes, inode->i_uid, head2type(p_s_ih));
2143 #endif
2144                 /* We can't dirty inode here. It would be immediately written but
2145                  * appropriate stat item isn't inserted yet... */
2146                 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
2147                         pathrelse(p_s_path);
2148                         return -EDQUOT;
2149                 }
2150                 if (DLIMIT_ALLOC_SPACE(inode, quota_bytes)) {
2151                         DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
2152                         pathrelse(p_s_path);
2153                         return -ENOSPC;
2154                 }
2155         }
2156         init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path,
2157                        IH_SIZE + ih_item_len(p_s_ih));
2158 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2159         s_ins_balance.key = key->on_disk_key;
2160 #endif
2161         /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2162         if (inode && fs_changed(fs_gen, inode->i_sb)) {
2163                 goto search_again;
2164         }
2165
2166         while ((retval =
2167                 fix_nodes(M_INSERT, &s_ins_balance, p_s_ih,
2168                           p_c_body)) == REPEAT_SEARCH) {
2169               search_again:
2170                 /* file system changed while we were in the fix_nodes */
2171                 PROC_INFO_INC(th->t_super, insert_item_restarted);
2172                 retval = search_item(th->t_super, key, p_s_path);
2173                 if (retval == IO_ERROR) {
2174                         retval = -EIO;
2175                         goto error_out;
2176                 }
2177                 if (retval == ITEM_FOUND) {
2178                         reiserfs_warning(th->t_super,
2179                                          "PAP-5760: reiserfs_insert_item: "
2180                                          "key %K already exists in the tree",
2181                                          key);
2182                         retval = -EEXIST;
2183                         goto error_out;
2184                 }
2185         }
2186
2187         /* make balancing after all resources will be collected at a time */
2188         if (retval == CARRY_ON) {
2189                 do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
2190                 return 0;
2191         }
2192
2193         retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2194       error_out:
2195         /* also releases the path */
2196         unfix_nodes(&s_ins_balance);
2197 #ifdef REISERQUOTA_DEBUG
2198         reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2199                        "reiserquota insert_item(): freeing %u id=%u type=%c",
2200                        quota_bytes, inode->i_uid, head2type(p_s_ih));
2201 #endif
2202         if (inode) {
2203                 DLIMIT_FREE_SPACE(inode, quota_bytes);
2204                 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
2205         }
2206         return retval;
2207 }