vserver 2.0 rc7
[linux-2.6.git] / fs / xfs / xfs_da_btree.c
1 /*
2  * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32
33 #include "xfs.h"
34
35 #include "xfs_macros.h"
36 #include "xfs_types.h"
37 #include "xfs_inum.h"
38 #include "xfs_log.h"
39 #include "xfs_trans.h"
40 #include "xfs_sb.h"
41 #include "xfs_ag.h"
42 #include "xfs_dir.h"
43 #include "xfs_dir2.h"
44 #include "xfs_dmapi.h"
45 #include "xfs_mount.h"
46 #include "xfs_alloc_btree.h"
47 #include "xfs_bmap_btree.h"
48 #include "xfs_ialloc_btree.h"
49 #include "xfs_alloc.h"
50 #include "xfs_btree.h"
51 #include "xfs_attr_sf.h"
52 #include "xfs_dir_sf.h"
53 #include "xfs_dir2_sf.h"
54 #include "xfs_dinode.h"
55 #include "xfs_inode_item.h"
56 #include "xfs_inode.h"
57 #include "xfs_bmap.h"
58 #include "xfs_da_btree.h"
59 #include "xfs_attr.h"
60 #include "xfs_attr_leaf.h"
61 #include "xfs_dir_leaf.h"
62 #include "xfs_dir2_data.h"
63 #include "xfs_dir2_leaf.h"
64 #include "xfs_dir2_block.h"
65 #include "xfs_dir2_node.h"
66 #include "xfs_error.h"
67 #include "xfs_bit.h"
68
69 /*
70  * xfs_da_btree.c
71  *
72  * Routines to implement directories as Btrees of hashed names.
73  */
74
75 /*========================================================================
76  * Function prototypes for the kernel.
77  *========================================================================*/
78
79 /*
80  * Routines used for growing the Btree.
81  */
82 STATIC int xfs_da_root_split(xfs_da_state_t *state,
83                                             xfs_da_state_blk_t *existing_root,
84                                             xfs_da_state_blk_t *new_child);
85 STATIC int xfs_da_node_split(xfs_da_state_t *state,
86                                             xfs_da_state_blk_t *existing_blk,
87                                             xfs_da_state_blk_t *split_blk,
88                                             xfs_da_state_blk_t *blk_to_add,
89                                             int treelevel,
90                                             int *result);
91 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
92                                          xfs_da_state_blk_t *node_blk_1,
93                                          xfs_da_state_blk_t *node_blk_2);
94 STATIC void xfs_da_node_add(xfs_da_state_t *state,
95                                    xfs_da_state_blk_t *old_node_blk,
96                                    xfs_da_state_blk_t *new_node_blk);
97
98 /*
99  * Routines used for shrinking the Btree.
100  */
101 STATIC int xfs_da_root_join(xfs_da_state_t *state,
102                                            xfs_da_state_blk_t *root_blk);
103 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
104 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
105                                               xfs_da_state_blk_t *drop_blk);
106 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
107                                          xfs_da_state_blk_t *src_node_blk,
108                                          xfs_da_state_blk_t *dst_node_blk);
109
110 /*
111  * Utility routines.
112  */
113 STATIC uint     xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
114 STATIC int      xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
115 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
116
117
118 /*========================================================================
119  * Routines used for growing the Btree.
120  *========================================================================*/
121
122 /*
123  * Create the initial contents of an intermediate node.
124  */
125 int
126 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
127                                  xfs_dabuf_t **bpp, int whichfork)
128 {
129         xfs_da_intnode_t *node;
130         xfs_dabuf_t *bp;
131         int error;
132         xfs_trans_t *tp;
133
134         tp = args->trans;
135         error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
136         if (error)
137                 return(error);
138         ASSERT(bp != NULL);
139         node = bp->data;
140         node->hdr.info.forw = 0;
141         node->hdr.info.back = 0;
142         INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
143         node->hdr.info.pad = 0;
144         node->hdr.count = 0;
145         INT_SET(node->hdr.level, ARCH_CONVERT, level);
146
147         xfs_da_log_buf(tp, bp,
148                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
149
150         *bpp = bp;
151         return(0);
152 }
153
154 /*
155  * Split a leaf node, rebalance, then possibly split
156  * intermediate nodes, rebalance, etc.
157  */
158 int                                                     /* error */
159 xfs_da_split(xfs_da_state_t *state)
160 {
161         xfs_da_state_blk_t *oldblk, *newblk, *addblk;
162         xfs_da_intnode_t *node;
163         xfs_dabuf_t *bp;
164         int max, action, error, i;
165
166         /*
167          * Walk back up the tree splitting/inserting/adjusting as necessary.
168          * If we need to insert and there isn't room, split the node, then
169          * decide which fragment to insert the new block from below into.
170          * Note that we may split the root this way, but we need more fixup.
171          */
172         max = state->path.active - 1;
173         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
174         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
175                state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
176
177         addblk = &state->path.blk[max];         /* initial dummy value */
178         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
179                 oldblk = &state->path.blk[i];
180                 newblk = &state->altpath.blk[i];
181
182                 /*
183                  * If a leaf node then
184                  *     Allocate a new leaf node, then rebalance across them.
185                  * else if an intermediate node then
186                  *     We split on the last layer, must we split the node?
187                  */
188                 switch (oldblk->magic) {
189                 case XFS_ATTR_LEAF_MAGIC:
190 #ifndef __KERNEL__
191                         return(ENOTTY);
192 #else
193                         error = xfs_attr_leaf_split(state, oldblk, newblk);
194                         if ((error != 0) && (error != ENOSPC)) {
195                                 return(error);  /* GROT: attr is inconsistent */
196                         }
197                         if (!error) {
198                                 addblk = newblk;
199                                 break;
200                         }
201                         /*
202                          * Entry wouldn't fit, split the leaf again.
203                          */
204                         state->extravalid = 1;
205                         if (state->inleaf) {
206                                 state->extraafter = 0;  /* before newblk */
207                                 error = xfs_attr_leaf_split(state, oldblk,
208                                                             &state->extrablk);
209                         } else {
210                                 state->extraafter = 1;  /* after newblk */
211                                 error = xfs_attr_leaf_split(state, newblk,
212                                                             &state->extrablk);
213                         }
214                         if (error)
215                                 return(error);  /* GROT: attr inconsistent */
216                         addblk = newblk;
217                         break;
218 #endif
219                 case XFS_DIR_LEAF_MAGIC:
220                         ASSERT(XFS_DIR_IS_V1(state->mp));
221                         error = xfs_dir_leaf_split(state, oldblk, newblk);
222                         if ((error != 0) && (error != ENOSPC)) {
223                                 return(error);  /* GROT: dir is inconsistent */
224                         }
225                         if (!error) {
226                                 addblk = newblk;
227                                 break;
228                         }
229                         /*
230                          * Entry wouldn't fit, split the leaf again.
231                          */
232                         state->extravalid = 1;
233                         if (state->inleaf) {
234                                 state->extraafter = 0;  /* before newblk */
235                                 error = xfs_dir_leaf_split(state, oldblk,
236                                                            &state->extrablk);
237                                 if (error)
238                                         return(error);  /* GROT: dir incon. */
239                                 addblk = newblk;
240                         } else {
241                                 state->extraafter = 1;  /* after newblk */
242                                 error = xfs_dir_leaf_split(state, newblk,
243                                                            &state->extrablk);
244                                 if (error)
245                                         return(error);  /* GROT: dir incon. */
246                                 addblk = newblk;
247                         }
248                         break;
249                 case XFS_DIR2_LEAFN_MAGIC:
250                         ASSERT(XFS_DIR_IS_V2(state->mp));
251                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
252                         if (error)
253                                 return error;
254                         addblk = newblk;
255                         break;
256                 case XFS_DA_NODE_MAGIC:
257                         error = xfs_da_node_split(state, oldblk, newblk, addblk,
258                                                          max - i, &action);
259                         xfs_da_buf_done(addblk->bp);
260                         addblk->bp = NULL;
261                         if (error)
262                                 return(error);  /* GROT: dir is inconsistent */
263                         /*
264                          * Record the newly split block for the next time thru?
265                          */
266                         if (action)
267                                 addblk = newblk;
268                         else
269                                 addblk = NULL;
270                         break;
271                 }
272
273                 /*
274                  * Update the btree to show the new hashval for this child.
275                  */
276                 xfs_da_fixhashpath(state, &state->path);
277                 /*
278                  * If we won't need this block again, it's getting dropped
279                  * from the active path by the loop control, so we need
280                  * to mark it done now.
281                  */
282                 if (i > 0 || !addblk)
283                         xfs_da_buf_done(oldblk->bp);
284         }
285         if (!addblk)
286                 return(0);
287
288         /*
289          * Split the root node.
290          */
291         ASSERT(state->path.active == 0);
292         oldblk = &state->path.blk[0];
293         error = xfs_da_root_split(state, oldblk, addblk);
294         if (error) {
295                 xfs_da_buf_done(oldblk->bp);
296                 xfs_da_buf_done(addblk->bp);
297                 addblk->bp = NULL;
298                 return(error);  /* GROT: dir is inconsistent */
299         }
300
301         /*
302          * Update pointers to the node which used to be block 0 and
303          * just got bumped because of the addition of a new root node.
304          * There might be three blocks involved if a double split occurred,
305          * and the original block 0 could be at any position in the list.
306          */
307
308         node = oldblk->bp->data;
309         if (node->hdr.info.forw) {
310                 if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
311                         bp = addblk->bp;
312                 } else {
313                         ASSERT(state->extravalid);
314                         bp = state->extrablk.bp;
315                 }
316                 node = bp->data;
317                 INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
318                 xfs_da_log_buf(state->args->trans, bp,
319                     XFS_DA_LOGRANGE(node, &node->hdr.info,
320                     sizeof(node->hdr.info)));
321         }
322         node = oldblk->bp->data;
323         if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
324                 if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
325                         bp = addblk->bp;
326                 } else {
327                         ASSERT(state->extravalid);
328                         bp = state->extrablk.bp;
329                 }
330                 node = bp->data;
331                 INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
332                 xfs_da_log_buf(state->args->trans, bp,
333                     XFS_DA_LOGRANGE(node, &node->hdr.info,
334                     sizeof(node->hdr.info)));
335         }
336         xfs_da_buf_done(oldblk->bp);
337         xfs_da_buf_done(addblk->bp);
338         addblk->bp = NULL;
339         return(0);
340 }
341
342 /*
343  * Split the root.  We have to create a new root and point to the two
344  * parts (the split old root) that we just created.  Copy block zero to
345  * the EOF, extending the inode in process.
346  */
347 STATIC int                                              /* error */
348 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
349                                  xfs_da_state_blk_t *blk2)
350 {
351         xfs_da_intnode_t *node, *oldroot;
352         xfs_da_args_t *args;
353         xfs_dablk_t blkno;
354         xfs_dabuf_t *bp;
355         int error, size;
356         xfs_inode_t *dp;
357         xfs_trans_t *tp;
358         xfs_mount_t *mp;
359         xfs_dir2_leaf_t *leaf;
360
361         /*
362          * Copy the existing (incorrect) block from the root node position
363          * to a free space somewhere.
364          */
365         args = state->args;
366         ASSERT(args != NULL);
367         error = xfs_da_grow_inode(args, &blkno);
368         if (error)
369                 return(error);
370         dp = args->dp;
371         tp = args->trans;
372         mp = state->mp;
373         error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
374         if (error)
375                 return(error);
376         ASSERT(bp != NULL);
377         node = bp->data;
378         oldroot = blk1->bp->data;
379         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
380                 size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
381                              (char *)oldroot);
382         } else {
383                 ASSERT(XFS_DIR_IS_V2(mp));
384                 ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
385                 leaf = (xfs_dir2_leaf_t *)oldroot;
386                 size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
387                              (char *)leaf);
388         }
389         memcpy(node, oldroot, size);
390         xfs_da_log_buf(tp, bp, 0, size - 1);
391         xfs_da_buf_done(blk1->bp);
392         blk1->bp = bp;
393         blk1->blkno = blkno;
394
395         /*
396          * Set up the new root node.
397          */
398         error = xfs_da_node_create(args,
399                 args->whichfork == XFS_DATA_FORK &&
400                 XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
401                 INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
402         if (error)
403                 return(error);
404         node = bp->data;
405         INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
406         INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
407         INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
408         INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
409         INT_SET(node->hdr.count, ARCH_CONVERT, 2);
410
411 #ifdef DEBUG
412         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
413                 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
414                        blk1->blkno < mp->m_dirfreeblk);
415                 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
416                        blk2->blkno < mp->m_dirfreeblk);
417         }
418 #endif
419
420         /* Header is already logged by xfs_da_node_create */
421         xfs_da_log_buf(tp, bp,
422                 XFS_DA_LOGRANGE(node, node->btree,
423                         sizeof(xfs_da_node_entry_t) * 2));
424         xfs_da_buf_done(bp);
425
426         return(0);
427 }
428
429 /*
430  * Split the node, rebalance, then add the new entry.
431  */
432 STATIC int                                              /* error */
433 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
434                                  xfs_da_state_blk_t *newblk,
435                                  xfs_da_state_blk_t *addblk,
436                                  int treelevel, int *result)
437 {
438         xfs_da_intnode_t *node;
439         xfs_dablk_t blkno;
440         int newcount, error;
441         int useextra;
442
443         node = oldblk->bp->data;
444         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
445
446         /*
447          * With V2 the extra block is data or freespace.
448          */
449         useextra = state->extravalid && XFS_DIR_IS_V1(state->mp);
450         newcount = 1 + useextra;
451         /*
452          * Do we have to split the node?
453          */
454         if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > state->node_ents) {
455                 /*
456                  * Allocate a new node, add to the doubly linked chain of
457                  * nodes, then move some of our excess entries into it.
458                  */
459                 error = xfs_da_grow_inode(state->args, &blkno);
460                 if (error)
461                         return(error);  /* GROT: dir is inconsistent */
462
463                 error = xfs_da_node_create(state->args, blkno, treelevel,
464                                            &newblk->bp, state->args->whichfork);
465                 if (error)
466                         return(error);  /* GROT: dir is inconsistent */
467                 newblk->blkno = blkno;
468                 newblk->magic = XFS_DA_NODE_MAGIC;
469                 xfs_da_node_rebalance(state, oldblk, newblk);
470                 error = xfs_da_blk_link(state, oldblk, newblk);
471                 if (error)
472                         return(error);
473                 *result = 1;
474         } else {
475                 *result = 0;
476         }
477
478         /*
479          * Insert the new entry(s) into the correct block
480          * (updating last hashval in the process).
481          *
482          * xfs_da_node_add() inserts BEFORE the given index,
483          * and as a result of using node_lookup_int() we always
484          * point to a valid entry (not after one), but a split
485          * operation always results in a new block whose hashvals
486          * FOLLOW the current block.
487          *
488          * If we had double-split op below us, then add the extra block too.
489          */
490         node = oldblk->bp->data;
491         if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
492                 oldblk->index++;
493                 xfs_da_node_add(state, oldblk, addblk);
494                 if (useextra) {
495                         if (state->extraafter)
496                                 oldblk->index++;
497                         xfs_da_node_add(state, oldblk, &state->extrablk);
498                         state->extravalid = 0;
499                 }
500         } else {
501                 newblk->index++;
502                 xfs_da_node_add(state, newblk, addblk);
503                 if (useextra) {
504                         if (state->extraafter)
505                                 newblk->index++;
506                         xfs_da_node_add(state, newblk, &state->extrablk);
507                         state->extravalid = 0;
508                 }
509         }
510
511         return(0);
512 }
513
514 /*
515  * Balance the btree elements between two intermediate nodes,
516  * usually one full and one empty.
517  *
518  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
519  */
520 STATIC void
521 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
522                                      xfs_da_state_blk_t *blk2)
523 {
524         xfs_da_intnode_t *node1, *node2, *tmpnode;
525         xfs_da_node_entry_t *btree_s, *btree_d;
526         int count, tmp;
527         xfs_trans_t *tp;
528
529         node1 = blk1->bp->data;
530         node2 = blk2->bp->data;
531         /*
532          * Figure out how many entries need to move, and in which direction.
533          * Swap the nodes around if that makes it simpler.
534          */
535         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
536             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
537              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
538               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
539                 tmpnode = node1;
540                 node1 = node2;
541                 node2 = tmpnode;
542         }
543         ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
544         ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
545         count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
546         if (count == 0)
547                 return;
548         tp = state->args->trans;
549         /*
550          * Two cases: high-to-low and low-to-high.
551          */
552         if (count > 0) {
553                 /*
554                  * Move elements in node2 up to make a hole.
555                  */
556                 if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
557                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
558                         btree_s = &node2->btree[0];
559                         btree_d = &node2->btree[count];
560                         memmove(btree_d, btree_s, tmp);
561                 }
562
563                 /*
564                  * Move the req'd B-tree elements from high in node1 to
565                  * low in node2.
566                  */
567                 INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
568                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
569                 btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
570                 btree_d = &node2->btree[0];
571                 memcpy(btree_d, btree_s, tmp);
572                 INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
573
574         } else {
575                 /*
576                  * Move the req'd B-tree elements from low in node2 to
577                  * high in node1.
578                  */
579                 count = -count;
580                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
581                 btree_s = &node2->btree[0];
582                 btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
583                 memcpy(btree_d, btree_s, tmp);
584                 INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
585                 xfs_da_log_buf(tp, blk1->bp,
586                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
587
588                 /*
589                  * Move elements in node2 down to fill the hole.
590                  */
591                 tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
592                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
593                 btree_s = &node2->btree[count];
594                 btree_d = &node2->btree[0];
595                 memmove(btree_d, btree_s, tmp);
596                 INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
597         }
598
599         /*
600          * Log header of node 1 and all current bits of node 2.
601          */
602         xfs_da_log_buf(tp, blk1->bp,
603                 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
604         xfs_da_log_buf(tp, blk2->bp,
605                 XFS_DA_LOGRANGE(node2, &node2->hdr,
606                         sizeof(node2->hdr) +
607                         sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
608
609         /*
610          * Record the last hashval from each block for upward propagation.
611          * (note: don't use the swapped node pointers)
612          */
613         node1 = blk1->bp->data;
614         node2 = blk2->bp->data;
615         blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
616         blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
617
618         /*
619          * Adjust the expected index for insertion.
620          */
621         if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
622                 blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
623                 blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;      /* make it invalid */
624         }
625 }
626
627 /*
628  * Add a new entry to an intermediate node.
629  */
630 STATIC void
631 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
632                                xfs_da_state_blk_t *newblk)
633 {
634         xfs_da_intnode_t *node;
635         xfs_da_node_entry_t *btree;
636         int tmp;
637         xfs_mount_t *mp;
638
639         node = oldblk->bp->data;
640         mp = state->mp;
641         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
642         ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
643         ASSERT(newblk->blkno != 0);
644         if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
645                 ASSERT(newblk->blkno >= mp->m_dirleafblk &&
646                        newblk->blkno < mp->m_dirfreeblk);
647
648         /*
649          * We may need to make some room before we insert the new node.
650          */
651         tmp = 0;
652         btree = &node->btree[ oldblk->index ];
653         if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
654                 tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
655                 memmove(btree + 1, btree, tmp);
656         }
657         INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
658         INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
659         xfs_da_log_buf(state->args->trans, oldblk->bp,
660                 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
661         INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
662         xfs_da_log_buf(state->args->trans, oldblk->bp,
663                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
664
665         /*
666          * Copy the last hash value from the oldblk to propagate upwards.
667          */
668         oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
669 }
670
671 /*========================================================================
672  * Routines used for shrinking the Btree.
673  *========================================================================*/
674
675 /*
676  * Deallocate an empty leaf node, remove it from its parent,
677  * possibly deallocating that block, etc...
678  */
679 int
680 xfs_da_join(xfs_da_state_t *state)
681 {
682         xfs_da_state_blk_t *drop_blk, *save_blk;
683         int action, error;
684
685         action = 0;
686         drop_blk = &state->path.blk[ state->path.active-1 ];
687         save_blk = &state->altpath.blk[ state->path.active-1 ];
688         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
689         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
690                drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
691
692         /*
693          * Walk back up the tree joining/deallocating as necessary.
694          * When we stop dropping blocks, break out.
695          */
696         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
697                  state->path.active--) {
698                 /*
699                  * See if we can combine the block with a neighbor.
700                  *   (action == 0) => no options, just leave
701                  *   (action == 1) => coalesce, then unlink
702                  *   (action == 2) => block empty, unlink it
703                  */
704                 switch (drop_blk->magic) {
705                 case XFS_ATTR_LEAF_MAGIC:
706 #ifndef __KERNEL__
707                         error = ENOTTY;
708 #else
709                         error = xfs_attr_leaf_toosmall(state, &action);
710 #endif
711                         if (error)
712                                 return(error);
713                         if (action == 0)
714                                 return(0);
715 #ifdef __KERNEL__
716                         xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
717 #endif
718                         break;
719                 case XFS_DIR_LEAF_MAGIC:
720                         ASSERT(XFS_DIR_IS_V1(state->mp));
721                         error = xfs_dir_leaf_toosmall(state, &action);
722                         if (error)
723                                 return(error);
724                         if (action == 0)
725                                 return(0);
726                         xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
727                         break;
728                 case XFS_DIR2_LEAFN_MAGIC:
729                         ASSERT(XFS_DIR_IS_V2(state->mp));
730                         error = xfs_dir2_leafn_toosmall(state, &action);
731                         if (error)
732                                 return error;
733                         if (action == 0)
734                                 return 0;
735                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
736                         break;
737                 case XFS_DA_NODE_MAGIC:
738                         /*
739                          * Remove the offending node, fixup hashvals,
740                          * check for a toosmall neighbor.
741                          */
742                         xfs_da_node_remove(state, drop_blk);
743                         xfs_da_fixhashpath(state, &state->path);
744                         error = xfs_da_node_toosmall(state, &action);
745                         if (error)
746                                 return(error);
747                         if (action == 0)
748                                 return 0;
749                         xfs_da_node_unbalance(state, drop_blk, save_blk);
750                         break;
751                 }
752                 xfs_da_fixhashpath(state, &state->altpath);
753                 error = xfs_da_blk_unlink(state, drop_blk, save_blk);
754                 xfs_da_state_kill_altpath(state);
755                 if (error)
756                         return(error);
757                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
758                                                          drop_blk->bp);
759                 drop_blk->bp = NULL;
760                 if (error)
761                         return(error);
762         }
763         /*
764          * We joined all the way to the top.  If it turns out that
765          * we only have one entry in the root, make the child block
766          * the new root.
767          */
768         xfs_da_node_remove(state, drop_blk);
769         xfs_da_fixhashpath(state, &state->path);
770         error = xfs_da_root_join(state, &state->path.blk[0]);
771         return(error);
772 }
773
774 /*
775  * We have only one entry in the root.  Copy the only remaining child of
776  * the old root to block 0 as the new root node.
777  */
778 STATIC int
779 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
780 {
781         xfs_da_intnode_t *oldroot;
782         /* REFERENCED */
783         xfs_da_blkinfo_t *blkinfo;
784         xfs_da_args_t *args;
785         xfs_dablk_t child;
786         xfs_dabuf_t *bp;
787         int error;
788
789         args = state->args;
790         ASSERT(args != NULL);
791         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
792         oldroot = root_blk->bp->data;
793         ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
794         ASSERT(!oldroot->hdr.info.forw);
795         ASSERT(!oldroot->hdr.info.back);
796
797         /*
798          * If the root has more than one child, then don't do anything.
799          */
800         if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
801                 return(0);
802
803         /*
804          * Read in the (only) child block, then copy those bytes into
805          * the root block's buffer and free the original child block.
806          */
807         child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
808         ASSERT(child != 0);
809         error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
810                                              args->whichfork);
811         if (error)
812                 return(error);
813         ASSERT(bp != NULL);
814         blkinfo = bp->data;
815         if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
816                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
817                        INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
818         } else {
819                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
820         }
821         ASSERT(!blkinfo->forw);
822         ASSERT(!blkinfo->back);
823         memcpy(root_blk->bp->data, bp->data, state->blocksize);
824         xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
825         error = xfs_da_shrink_inode(args, child, bp);
826         return(error);
827 }
828
829 /*
830  * Check a node block and its neighbors to see if the block should be
831  * collapsed into one or the other neighbor.  Always keep the block
832  * with the smaller block number.
833  * If the current block is over 50% full, don't try to join it, return 0.
834  * If the block is empty, fill in the state structure and return 2.
835  * If it can be collapsed, fill in the state structure and return 1.
836  * If nothing can be done, return 0.
837  */
838 STATIC int
839 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
840 {
841         xfs_da_intnode_t *node;
842         xfs_da_state_blk_t *blk;
843         xfs_da_blkinfo_t *info;
844         int count, forward, error, retval, i;
845         xfs_dablk_t blkno;
846         xfs_dabuf_t *bp;
847
848         /*
849          * Check for the degenerate case of the block being over 50% full.
850          * If so, it's not worth even looking to see if we might be able
851          * to coalesce with a sibling.
852          */
853         blk = &state->path.blk[ state->path.active-1 ];
854         info = blk->bp->data;
855         ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
856         node = (xfs_da_intnode_t *)info;
857         count = INT_GET(node->hdr.count, ARCH_CONVERT);
858         if (count > (state->node_ents >> 1)) {
859                 *action = 0;    /* blk over 50%, don't try to join */
860                 return(0);      /* blk over 50%, don't try to join */
861         }
862
863         /*
864          * Check for the degenerate case of the block being empty.
865          * If the block is empty, we'll simply delete it, no need to
866          * coalesce it with a sibling block.  We choose (aribtrarily)
867          * to merge with the forward block unless it is NULL.
868          */
869         if (count == 0) {
870                 /*
871                  * Make altpath point to the block we want to keep and
872                  * path point to the block we want to drop (this one).
873                  */
874                 forward = info->forw;
875                 memcpy(&state->altpath, &state->path, sizeof(state->path));
876                 error = xfs_da_path_shift(state, &state->altpath, forward,
877                                                  0, &retval);
878                 if (error)
879                         return(error);
880                 if (retval) {
881                         *action = 0;
882                 } else {
883                         *action = 2;
884                 }
885                 return(0);
886         }
887
888         /*
889          * Examine each sibling block to see if we can coalesce with
890          * at least 25% free space to spare.  We need to figure out
891          * whether to merge with the forward or the backward block.
892          * We prefer coalescing with the lower numbered sibling so as
893          * to shrink a directory over time.
894          */
895         /* start with smaller blk num */
896         forward = (INT_GET(info->forw, ARCH_CONVERT)
897                                 < INT_GET(info->back, ARCH_CONVERT));
898         for (i = 0; i < 2; forward = !forward, i++) {
899                 if (forward)
900                         blkno = INT_GET(info->forw, ARCH_CONVERT);
901                 else
902                         blkno = INT_GET(info->back, ARCH_CONVERT);
903                 if (blkno == 0)
904                         continue;
905                 error = xfs_da_read_buf(state->args->trans, state->args->dp,
906                                         blkno, -1, &bp, state->args->whichfork);
907                 if (error)
908                         return(error);
909                 ASSERT(bp != NULL);
910
911                 node = (xfs_da_intnode_t *)info;
912                 count  = state->node_ents;
913                 count -= state->node_ents >> 2;
914                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
915                 node = bp->data;
916                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
917                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
918                 xfs_da_brelse(state->args->trans, bp);
919                 if (count >= 0)
920                         break;  /* fits with at least 25% to spare */
921         }
922         if (i >= 2) {
923                 *action = 0;
924                 return(0);
925         }
926
927         /*
928          * Make altpath point to the block we want to keep (the lower
929          * numbered block) and path point to the block we want to drop.
930          */
931         memcpy(&state->altpath, &state->path, sizeof(state->path));
932         if (blkno < blk->blkno) {
933                 error = xfs_da_path_shift(state, &state->altpath, forward,
934                                                  0, &retval);
935                 if (error) {
936                         return(error);
937                 }
938                 if (retval) {
939                         *action = 0;
940                         return(0);
941                 }
942         } else {
943                 error = xfs_da_path_shift(state, &state->path, forward,
944                                                  0, &retval);
945                 if (error) {
946                         return(error);
947                 }
948                 if (retval) {
949                         *action = 0;
950                         return(0);
951                 }
952         }
953         *action = 1;
954         return(0);
955 }
956
957 /*
958  * Walk back up the tree adjusting hash values as necessary,
959  * when we stop making changes, return.
960  */
961 void
962 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
963 {
964         xfs_da_state_blk_t *blk;
965         xfs_da_intnode_t *node;
966         xfs_da_node_entry_t *btree;
967         xfs_dahash_t lasthash=0;
968         int level, count;
969
970         level = path->active-1;
971         blk = &path->blk[ level ];
972         switch (blk->magic) {
973 #ifdef __KERNEL__
974         case XFS_ATTR_LEAF_MAGIC:
975                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
976                 if (count == 0)
977                         return;
978                 break;
979 #endif
980         case XFS_DIR_LEAF_MAGIC:
981                 ASSERT(XFS_DIR_IS_V1(state->mp));
982                 lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
983                 if (count == 0)
984                         return;
985                 break;
986         case XFS_DIR2_LEAFN_MAGIC:
987                 ASSERT(XFS_DIR_IS_V2(state->mp));
988                 lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
989                 if (count == 0)
990                         return;
991                 break;
992         case XFS_DA_NODE_MAGIC:
993                 lasthash = xfs_da_node_lasthash(blk->bp, &count);
994                 if (count == 0)
995                         return;
996                 break;
997         }
998         for (blk--, level--; level >= 0; blk--, level--) {
999                 node = blk->bp->data;
1000                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1001                 btree = &node->btree[ blk->index ];
1002                 if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
1003                         break;
1004                 blk->hashval = lasthash;
1005                 INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
1006                 xfs_da_log_buf(state->args->trans, blk->bp,
1007                                   XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
1008
1009                 lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1010         }
1011 }
1012
1013 /*
1014  * Remove an entry from an intermediate node.
1015  */
1016 STATIC void
1017 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
1018 {
1019         xfs_da_intnode_t *node;
1020         xfs_da_node_entry_t *btree;
1021         int tmp;
1022
1023         node = drop_blk->bp->data;
1024         ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
1025         ASSERT(drop_blk->index >= 0);
1026
1027         /*
1028          * Copy over the offending entry, or just zero it out.
1029          */
1030         btree = &node->btree[drop_blk->index];
1031         if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1032                 tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
1033                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1034                 memmove(btree, btree + 1, tmp);
1035                 xfs_da_log_buf(state->args->trans, drop_blk->bp,
1036                     XFS_DA_LOGRANGE(node, btree, tmp));
1037                 btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
1038         }
1039         memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
1040         xfs_da_log_buf(state->args->trans, drop_blk->bp,
1041             XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
1042         INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
1043         xfs_da_log_buf(state->args->trans, drop_blk->bp,
1044             XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
1045
1046         /*
1047          * Copy the last hash value from the block to propagate upwards.
1048          */
1049         btree--;
1050         drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
1051 }
1052
1053 /*
1054  * Unbalance the btree elements between two intermediate nodes,
1055  * move all Btree elements from one node into another.
1056  */
1057 STATIC void
1058 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1059                                      xfs_da_state_blk_t *save_blk)
1060 {
1061         xfs_da_intnode_t *drop_node, *save_node;
1062         xfs_da_node_entry_t *btree;
1063         int tmp;
1064         xfs_trans_t *tp;
1065
1066         drop_node = drop_blk->bp->data;
1067         save_node = save_blk->bp->data;
1068         ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1069         ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1070         tp = state->args->trans;
1071
1072         /*
1073          * If the dying block has lower hashvals, then move all the
1074          * elements in the remaining block up to make a hole.
1075          */
1076         if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1077             (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1078              INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
1079         {
1080                 btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
1081                 tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1082                 memmove(btree, &save_node->btree[0], tmp);
1083                 btree = &save_node->btree[0];
1084                 xfs_da_log_buf(tp, save_blk->bp,
1085                         XFS_DA_LOGRANGE(save_node, btree,
1086                                 (INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
1087                                 sizeof(xfs_da_node_entry_t)));
1088         } else {
1089                 btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
1090                 xfs_da_log_buf(tp, save_blk->bp,
1091                         XFS_DA_LOGRANGE(save_node, btree,
1092                                 INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
1093                                 sizeof(xfs_da_node_entry_t)));
1094         }
1095
1096         /*
1097          * Move all the B-tree elements from drop_blk to save_blk.
1098          */
1099         tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1100         memcpy(btree, &drop_node->btree[0], tmp);
1101         INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
1102
1103         xfs_da_log_buf(tp, save_blk->bp,
1104                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1105                         sizeof(save_node->hdr)));
1106
1107         /*
1108          * Save the last hashval in the remaining block for upward propagation.
1109          */
1110         save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1111 }
1112
1113 /*========================================================================
1114  * Routines used for finding things in the Btree.
1115  *========================================================================*/
1116
1117 /*
1118  * Walk down the Btree looking for a particular filename, filling
1119  * in the state structure as we go.
1120  *
1121  * We will set the state structure to point to each of the elements
1122  * in each of the nodes where either the hashval is or should be.
1123  *
1124  * We support duplicate hashval's so for each entry in the current
1125  * node that could contain the desired hashval, descend.  This is a
1126  * pruned depth-first tree search.
1127  */
1128 int                                                     /* error */
1129 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1130 {
1131         xfs_da_state_blk_t *blk;
1132         xfs_da_blkinfo_t *curr;
1133         xfs_da_intnode_t *node;
1134         xfs_da_node_entry_t *btree;
1135         xfs_dablk_t blkno;
1136         int probe, span, max, error, retval;
1137         xfs_dahash_t hashval;
1138         xfs_da_args_t *args;
1139
1140         args = state->args;
1141
1142         /*
1143          * Descend thru the B-tree searching each level for the right
1144          * node to use, until the right hashval is found.
1145          */
1146         if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
1147                 blkno = state->mp->m_dirleafblk;
1148         else
1149                 blkno = 0;
1150         for (blk = &state->path.blk[0], state->path.active = 1;
1151                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
1152                          blk++, state->path.active++) {
1153                 /*
1154                  * Read the next node down in the tree.
1155                  */
1156                 blk->blkno = blkno;
1157                 error = xfs_da_read_buf(args->trans, args->dp, blkno,
1158                                         -1, &blk->bp, args->whichfork);
1159                 if (error) {
1160                         blk->blkno = 0;
1161                         state->path.active--;
1162                         return(error);
1163                 }
1164                 curr = blk->bp->data;
1165                 ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1166                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1167                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1168
1169                 /*
1170                  * Search an intermediate node for a match.
1171                  */
1172                 blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
1173                 if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1174                         node = blk->bp->data;
1175                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1176
1177                         /*
1178                          * Binary search.  (note: small blocks will skip loop)
1179                          */
1180                         max = INT_GET(node->hdr.count, ARCH_CONVERT);
1181                         probe = span = max / 2;
1182                         hashval = args->hashval;
1183                         for (btree = &node->btree[probe]; span > 4;
1184                                    btree = &node->btree[probe]) {
1185                                 span /= 2;
1186                                 if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
1187                                         probe += span;
1188                                 else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
1189                                         probe -= span;
1190                                 else
1191                                         break;
1192                         }
1193                         ASSERT((probe >= 0) && (probe < max));
1194                         ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
1195
1196                         /*
1197                          * Since we may have duplicate hashval's, find the first
1198                          * matching hashval in the node.
1199                          */
1200                         while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
1201                                 btree--;
1202                                 probe--;
1203                         }
1204                         while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
1205                                 btree++;
1206                                 probe++;
1207                         }
1208
1209                         /*
1210                          * Pick the right block to descend on.
1211                          */
1212                         if (probe == max) {
1213                                 blk->index = max-1;
1214                                 blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
1215                         } else {
1216                                 blk->index = probe;
1217                                 blkno = INT_GET(btree->before, ARCH_CONVERT);
1218                         }
1219                 }
1220 #ifdef __KERNEL__
1221                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
1222                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1223                         break;
1224                 }
1225 #endif
1226                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
1227                         blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
1228                         break;
1229                 }
1230                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
1231                         blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1232                         break;
1233                 }
1234         }
1235
1236         /*
1237          * A leaf block that ends in the hashval that we are interested in
1238          * (final hashval == search hashval) means that the next block may
1239          * contain more entries with the same hashval, shift upward to the
1240          * next leaf and keep searching.
1241          */
1242         for (;;) {
1243                 if (blk->magic == XFS_DIR_LEAF_MAGIC) {
1244                         ASSERT(XFS_DIR_IS_V1(state->mp));
1245                         retval = xfs_dir_leaf_lookup_int(blk->bp, args,
1246                                                                   &blk->index);
1247                 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1248                         ASSERT(XFS_DIR_IS_V2(state->mp));
1249                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1250                                                         &blk->index, state);
1251                 }
1252 #ifdef __KERNEL__
1253                 else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1254                         retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1255                         blk->index = args->index;
1256                         args->blkno = blk->blkno;
1257                 }
1258 #endif
1259                 if (((retval == ENOENT) || (retval == ENOATTR)) &&
1260                     (blk->hashval == args->hashval)) {
1261                         error = xfs_da_path_shift(state, &state->path, 1, 1,
1262                                                          &retval);
1263                         if (error)
1264                                 return(error);
1265                         if (retval == 0) {
1266                                 continue;
1267                         }
1268 #ifdef __KERNEL__
1269                         else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1270                                 /* path_shift() gives ENOENT */
1271                                 retval = XFS_ERROR(ENOATTR);
1272                         }
1273 #endif
1274                 }
1275                 break;
1276         }
1277         *result = retval;
1278         return(0);
1279 }
1280
1281 /*========================================================================
1282  * Utility routines.
1283  *========================================================================*/
1284
1285 /*
1286  * Link a new block into a doubly linked list of blocks (of whatever type).
1287  */
1288 int                                                     /* error */
1289 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1290                                xfs_da_state_blk_t *new_blk)
1291 {
1292         xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1293         xfs_da_args_t *args;
1294         int before=0, error;
1295         xfs_dabuf_t *bp;
1296
1297         /*
1298          * Set up environment.
1299          */
1300         args = state->args;
1301         ASSERT(args != NULL);
1302         old_info = old_blk->bp->data;
1303         new_info = new_blk->bp->data;
1304         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1305                old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1306                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1307         ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
1308         ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
1309         ASSERT(old_blk->magic == new_blk->magic);
1310
1311         switch (old_blk->magic) {
1312 #ifdef __KERNEL__
1313         case XFS_ATTR_LEAF_MAGIC:
1314                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1315                 break;
1316 #endif
1317         case XFS_DIR_LEAF_MAGIC:
1318                 ASSERT(XFS_DIR_IS_V1(state->mp));
1319                 before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
1320                 break;
1321         case XFS_DIR2_LEAFN_MAGIC:
1322                 ASSERT(XFS_DIR_IS_V2(state->mp));
1323                 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1324                 break;
1325         case XFS_DA_NODE_MAGIC:
1326                 before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1327                 break;
1328         }
1329
1330         /*
1331          * Link blocks in appropriate order.
1332          */
1333         if (before) {
1334                 /*
1335                  * Link new block in before existing block.
1336                  */
1337                 INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
1338                 new_info->back = old_info->back; /* INT_: direct copy */
1339                 if (INT_GET(old_info->back, ARCH_CONVERT)) {
1340                         error = xfs_da_read_buf(args->trans, args->dp,
1341                                                 INT_GET(old_info->back,
1342                                                         ARCH_CONVERT), -1, &bp,
1343                                                 args->whichfork);
1344                         if (error)
1345                                 return(error);
1346                         ASSERT(bp != NULL);
1347                         tmp_info = bp->data;
1348                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
1349                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
1350                         INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
1351                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1352                         xfs_da_buf_done(bp);
1353                 }
1354                 INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
1355         } else {
1356                 /*
1357                  * Link new block in after existing block.
1358                  */
1359                 new_info->forw = old_info->forw; /* INT_: direct copy */
1360                 INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
1361                 if (INT_GET(old_info->forw, ARCH_CONVERT)) {
1362                         error = xfs_da_read_buf(args->trans, args->dp,
1363                                                 INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
1364                                                 args->whichfork);
1365                         if (error)
1366                                 return(error);
1367                         ASSERT(bp != NULL);
1368                         tmp_info = bp->data;
1369                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1370                                     == INT_GET(old_info->magic, ARCH_CONVERT));
1371                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1372                                     == old_blk->blkno);
1373                         INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
1374                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1375                         xfs_da_buf_done(bp);
1376                 }
1377                 INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
1378         }
1379
1380         xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1381         xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1382         return(0);
1383 }
1384
1385 /*
1386  * Compare two intermediate nodes for "order".
1387  */
1388 STATIC int
1389 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1390 {
1391         xfs_da_intnode_t *node1, *node2;
1392
1393         node1 = node1_bp->data;
1394         node2 = node2_bp->data;
1395         ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
1396                (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
1397         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
1398             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
1399               INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1400              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1401               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
1402                 return(1);
1403         }
1404         return(0);
1405 }
1406
1407 /*
1408  * Pick up the last hashvalue from an intermediate node.
1409  */
1410 STATIC uint
1411 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1412 {
1413         xfs_da_intnode_t *node;
1414
1415         node = bp->data;
1416         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1417         if (count)
1418                 *count = INT_GET(node->hdr.count, ARCH_CONVERT);
1419         if (!node->hdr.count)
1420                 return(0);
1421         return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
1422 }
1423
1424 /*
1425  * Unlink a block from a doubly linked list of blocks.
1426  */
1427 int                                                     /* error */
1428 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1429                                  xfs_da_state_blk_t *save_blk)
1430 {
1431         xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1432         xfs_da_args_t *args;
1433         xfs_dabuf_t *bp;
1434         int error;
1435
1436         /*
1437          * Set up environment.
1438          */
1439         args = state->args;
1440         ASSERT(args != NULL);
1441         save_info = save_blk->bp->data;
1442         drop_info = drop_blk->bp->data;
1443         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1444                save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1445                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1446         ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
1447         ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
1448         ASSERT(save_blk->magic == drop_blk->magic);
1449         ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
1450                (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
1451         ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
1452                (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
1453
1454         /*
1455          * Unlink the leaf block from the doubly linked chain of leaves.
1456          */
1457         if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
1458                 save_info->back = drop_info->back; /* INT_: direct copy */
1459                 if (INT_GET(drop_info->back, ARCH_CONVERT)) {
1460                         error = xfs_da_read_buf(args->trans, args->dp,
1461                                                 INT_GET(drop_info->back,
1462                                                         ARCH_CONVERT), -1, &bp,
1463                                                 args->whichfork);
1464                         if (error)
1465                                 return(error);
1466                         ASSERT(bp != NULL);
1467                         tmp_info = bp->data;
1468                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
1469                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
1470                         INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
1471                         xfs_da_log_buf(args->trans, bp, 0,
1472                                                     sizeof(*tmp_info) - 1);
1473                         xfs_da_buf_done(bp);
1474                 }
1475         } else {
1476                 save_info->forw = drop_info->forw; /* INT_: direct copy */
1477                 if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
1478                         error = xfs_da_read_buf(args->trans, args->dp,
1479                                                 INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
1480                                                 args->whichfork);
1481                         if (error)
1482                                 return(error);
1483                         ASSERT(bp != NULL);
1484                         tmp_info = bp->data;
1485                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1486                                     == INT_GET(save_info->magic, ARCH_CONVERT));
1487                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1488                                     == drop_blk->blkno);
1489                         INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
1490                         xfs_da_log_buf(args->trans, bp, 0,
1491                                                     sizeof(*tmp_info) - 1);
1492                         xfs_da_buf_done(bp);
1493                 }
1494         }
1495
1496         xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1497         return(0);
1498 }
1499
1500 /*
1501  * Move a path "forward" or "!forward" one block at the current level.
1502  *
1503  * This routine will adjust a "path" to point to the next block
1504  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1505  * Btree, including updating pointers to the intermediate nodes between
1506  * the new bottom and the root.
1507  */
1508 int                                                     /* error */
1509 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1510                                  int forward, int release, int *result)
1511 {
1512         xfs_da_state_blk_t *blk;
1513         xfs_da_blkinfo_t *info;
1514         xfs_da_intnode_t *node;
1515         xfs_da_args_t *args;
1516         xfs_dablk_t blkno=0;
1517         int level, error;
1518
1519         /*
1520          * Roll up the Btree looking for the first block where our
1521          * current index is not at the edge of the block.  Note that
1522          * we skip the bottom layer because we want the sibling block.
1523          */
1524         args = state->args;
1525         ASSERT(args != NULL);
1526         ASSERT(path != NULL);
1527         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1528         level = (path->active-1) - 1;   /* skip bottom layer in path */
1529         for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1530                 ASSERT(blk->bp != NULL);
1531                 node = blk->bp->data;
1532                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1533                 if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1534                         blk->index++;
1535                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1536                         break;
1537                 } else if (!forward && (blk->index > 0)) {
1538                         blk->index--;
1539                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1540                         break;
1541                 }
1542         }
1543         if (level < 0) {
1544                 *result = XFS_ERROR(ENOENT);    /* we're out of our tree */
1545                 ASSERT(args->oknoent);
1546                 return(0);
1547         }
1548
1549         /*
1550          * Roll down the edge of the subtree until we reach the
1551          * same depth we were at originally.
1552          */
1553         for (blk++, level++; level < path->active; blk++, level++) {
1554                 /*
1555                  * Release the old block.
1556                  * (if it's dirty, trans won't actually let go)
1557                  */
1558                 if (release)
1559                         xfs_da_brelse(args->trans, blk->bp);
1560
1561                 /*
1562                  * Read the next child block.
1563                  */
1564                 blk->blkno = blkno;
1565                 error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1566                                                      &blk->bp, args->whichfork);
1567                 if (error)
1568                         return(error);
1569                 ASSERT(blk->bp != NULL);
1570                 info = blk->bp->data;
1571                 ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1572                        INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1573                        INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1574                 blk->magic = INT_GET(info->magic, ARCH_CONVERT);
1575                 if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1576                         node = (xfs_da_intnode_t *)info;
1577                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1578                         if (forward)
1579                                 blk->index = 0;
1580                         else
1581                                 blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
1582                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1583                 } else {
1584                         ASSERT(level == path->active-1);
1585                         blk->index = 0;
1586                         switch(blk->magic) {
1587 #ifdef __KERNEL__
1588                         case XFS_ATTR_LEAF_MAGIC:
1589                                 blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1590                                                                       NULL);
1591                                 break;
1592 #endif
1593                         case XFS_DIR_LEAF_MAGIC:
1594                                 ASSERT(XFS_DIR_IS_V1(state->mp));
1595                                 blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
1596                                                                      NULL);
1597                                 break;
1598                         case XFS_DIR2_LEAFN_MAGIC:
1599                                 ASSERT(XFS_DIR_IS_V2(state->mp));
1600                                 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1601                                                                        NULL);
1602                                 break;
1603                         default:
1604                                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1605                                        blk->magic ==
1606                                        XFS_DIRX_LEAF_MAGIC(state->mp));
1607                                 break;
1608                         }
1609                 }
1610         }
1611         *result = 0;
1612         return(0);
1613 }
1614
1615
1616 /*========================================================================
1617  * Utility routines.
1618  *========================================================================*/
1619
1620 /*
1621  * Implement a simple hash on a character string.
1622  * Rotate the hash value by 7 bits, then XOR each character in.
1623  * This is implemented with some source-level loop unrolling.
1624  */
1625 xfs_dahash_t
1626 xfs_da_hashname(uchar_t *name, int namelen)
1627 {
1628         xfs_dahash_t hash;
1629
1630 #ifdef SLOWVERSION
1631         /*
1632          * This is the old one-byte-at-a-time version.
1633          */
1634         for (hash = 0; namelen > 0; namelen--)
1635                 hash = *name++ ^ rol32(hash, 7);
1636
1637         return(hash);
1638 #else
1639         /*
1640          * Do four characters at a time as long as we can.
1641          */
1642         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1643                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1644                        (name[3] << 0) ^ rol32(hash, 7 * 4);
1645
1646         /*
1647          * Now do the rest of the characters.
1648          */
1649         switch (namelen) {
1650         case 3:
1651                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1652                        rol32(hash, 7 * 3);
1653         case 2:
1654                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1655         case 1:
1656                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
1657         case 0:
1658                 return hash;
1659         }
1660         /* NOTREACHED */
1661 #endif
1662         return 0; /* keep gcc happy */
1663 }
1664
1665 /*
1666  * Add a block to the btree ahead of the file.
1667  * Return the new block number to the caller.
1668  */
1669 int
1670 xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1671 {
1672         xfs_fileoff_t bno, b;
1673         xfs_bmbt_irec_t map;
1674         xfs_bmbt_irec_t *mapp;
1675         xfs_inode_t *dp;
1676         int nmap, error, w, count, c, got, i, mapi;
1677         xfs_fsize_t size;
1678         xfs_trans_t *tp;
1679         xfs_mount_t *mp;
1680
1681         dp = args->dp;
1682         mp = dp->i_mount;
1683         w = args->whichfork;
1684         tp = args->trans;
1685         /*
1686          * For new directories adjust the file offset and block count.
1687          */
1688         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) {
1689                 bno = mp->m_dirleafblk;
1690                 count = mp->m_dirblkfsbs;
1691         } else {
1692                 bno = 0;
1693                 count = 1;
1694         }
1695         /*
1696          * Find a spot in the file space to put the new block.
1697          */
1698         if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) {
1699                 return error;
1700         }
1701         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1702                 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
1703         /*
1704          * Try mapping it in one filesystem block.
1705          */
1706         nmap = 1;
1707         ASSERT(args->firstblock != NULL);
1708         if ((error = xfs_bmapi(tp, dp, bno, count,
1709                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1710                         XFS_BMAPI_CONTIG,
1711                         args->firstblock, args->total, &map, &nmap,
1712                         args->flist))) {
1713                 return error;
1714         }
1715         ASSERT(nmap <= 1);
1716         if (nmap == 1) {
1717                 mapp = &map;
1718                 mapi = 1;
1719         }
1720         /*
1721          * If we didn't get it and the block might work if fragmented,
1722          * try without the CONTIG flag.  Loop until we get it all.
1723          */
1724         else if (nmap == 0 && count > 1) {
1725                 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1726                 for (b = bno, mapi = 0; b < bno + count; ) {
1727                         nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1728                         c = (int)(bno + count - b);
1729                         if ((error = xfs_bmapi(tp, dp, b, c,
1730                                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
1731                                         XFS_BMAPI_METADATA,
1732                                         args->firstblock, args->total,
1733                                         &mapp[mapi], &nmap, args->flist))) {
1734                                 kmem_free(mapp, sizeof(*mapp) * count);
1735                                 return error;
1736                         }
1737                         if (nmap < 1)
1738                                 break;
1739                         mapi += nmap;
1740                         b = mapp[mapi - 1].br_startoff +
1741                             mapp[mapi - 1].br_blockcount;
1742                 }
1743         } else {
1744                 mapi = 0;
1745                 mapp = NULL;
1746         }
1747         /*
1748          * Count the blocks we got, make sure it matches the total.
1749          */
1750         for (i = 0, got = 0; i < mapi; i++)
1751                 got += mapp[i].br_blockcount;
1752         if (got != count || mapp[0].br_startoff != bno ||
1753             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1754             bno + count) {
1755                 if (mapp != &map)
1756                         kmem_free(mapp, sizeof(*mapp) * count);
1757                 return XFS_ERROR(ENOSPC);
1758         }
1759         if (mapp != &map)
1760                 kmem_free(mapp, sizeof(*mapp) * count);
1761         *new_blkno = (xfs_dablk_t)bno;
1762         /*
1763          * For version 1 directories, adjust the file size if it changed.
1764          */
1765         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
1766                 ASSERT(mapi == 1);
1767                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
1768                         return error;
1769                 size = XFS_FSB_TO_B(mp, bno);
1770                 if (size != dp->i_d.di_size) {
1771                         dp->i_d.di_size = size;
1772                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1773                 }
1774         }
1775         return 0;
1776 }
1777
1778 /*
1779  * Ick.  We need to always be able to remove a btree block, even
1780  * if there's no space reservation because the filesystem is full.
1781  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1782  * It swaps the target block with the last block in the file.  The
1783  * last block in the file can always be removed since it can't cause
1784  * a bmap btree split to do that.
1785  */
1786 STATIC int
1787 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1788                       xfs_dabuf_t **dead_bufp)
1789 {
1790         xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1791         xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
1792         xfs_fileoff_t lastoff;
1793         xfs_inode_t *ip;
1794         xfs_trans_t *tp;
1795         xfs_mount_t *mp;
1796         int error, w, entno, level, dead_level;
1797         xfs_da_blkinfo_t *dead_info, *sib_info;
1798         xfs_da_intnode_t *par_node, *dead_node;
1799         xfs_dir_leafblock_t *dead_leaf;
1800         xfs_dir2_leaf_t *dead_leaf2;
1801         xfs_dahash_t dead_hash;
1802
1803         dead_buf = *dead_bufp;
1804         dead_blkno = *dead_blknop;
1805         tp = args->trans;
1806         ip = args->dp;
1807         w = args->whichfork;
1808         ASSERT(w == XFS_DATA_FORK);
1809         mp = ip->i_mount;
1810         if (XFS_DIR_IS_V2(mp)) {
1811                 lastoff = mp->m_dirfreeblk;
1812                 error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1813         } else
1814                 error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
1815         if (error)
1816                 return error;
1817         if (unlikely(lastoff == 0)) {
1818                 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1819                                  mp);
1820                 return XFS_ERROR(EFSCORRUPTED);
1821         }
1822         /*
1823          * Read the last block in the btree space.
1824          */
1825         last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1826         if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1827                 return error;
1828         /*
1829          * Copy the last block into the dead buffer and log it.
1830          */
1831         memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
1832         xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1833         dead_info = dead_buf->data;
1834         /*
1835          * Get values from the moved block.
1836          */
1837         if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
1838                 ASSERT(XFS_DIR_IS_V1(mp));
1839                 dead_leaf = (xfs_dir_leafblock_t *)dead_info;
1840                 dead_level = 0;
1841                 dead_hash =
1842                         INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1843         } else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
1844                 ASSERT(XFS_DIR_IS_V2(mp));
1845                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1846                 dead_level = 0;
1847                 dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1848         } else {
1849                 ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1850                 dead_node = (xfs_da_intnode_t *)dead_info;
1851                 dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
1852                 dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1853         }
1854         sib_buf = par_buf = NULL;
1855         /*
1856          * If the moved block has a left sibling, fix up the pointers.
1857          */
1858         if ((sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT))) {
1859                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1860                         goto done;
1861                 sib_info = sib_buf->data;
1862                 if (unlikely(
1863                     INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno ||
1864                     INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT))) {
1865                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1866                                          XFS_ERRLEVEL_LOW, mp);
1867                         error = XFS_ERROR(EFSCORRUPTED);
1868                         goto done;
1869                 }
1870                 INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno);
1871                 xfs_da_log_buf(tp, sib_buf,
1872                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1873                                         sizeof(sib_info->forw)));
1874                 xfs_da_buf_done(sib_buf);
1875                 sib_buf = NULL;
1876         }
1877         /*
1878          * If the moved block has a right sibling, fix up the pointers.
1879          */
1880         if ((sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT))) {
1881                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1882                         goto done;
1883                 sib_info = sib_buf->data;
1884                 if (unlikely(
1885                        INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno
1886                     || INT_GET(sib_info->magic, ARCH_CONVERT)
1887                                 != INT_GET(dead_info->magic, ARCH_CONVERT))) {
1888                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1889                                          XFS_ERRLEVEL_LOW, mp);
1890                         error = XFS_ERROR(EFSCORRUPTED);
1891                         goto done;
1892                 }
1893                 INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno);
1894                 xfs_da_log_buf(tp, sib_buf,
1895                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1896                                         sizeof(sib_info->back)));
1897                 xfs_da_buf_done(sib_buf);
1898                 sib_buf = NULL;
1899         }
1900         par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk;
1901         level = -1;
1902         /*
1903          * Walk down the tree looking for the parent of the moved block.
1904          */
1905         for (;;) {
1906                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1907                         goto done;
1908                 par_node = par_buf->data;
1909                 if (unlikely(
1910                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC ||
1911                     (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1))) {
1912                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1913                                          XFS_ERRLEVEL_LOW, mp);
1914                         error = XFS_ERROR(EFSCORRUPTED);
1915                         goto done;
1916                 }
1917                 level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
1918                 for (entno = 0;
1919                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
1920                      INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash;
1921                      entno++)
1922                         continue;
1923                 if (unlikely(entno == INT_GET(par_node->hdr.count, ARCH_CONVERT))) {
1924                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1925                                          XFS_ERRLEVEL_LOW, mp);
1926                         error = XFS_ERROR(EFSCORRUPTED);
1927                         goto done;
1928                 }
1929                 par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT);
1930                 if (level == dead_level + 1)
1931                         break;
1932                 xfs_da_brelse(tp, par_buf);
1933                 par_buf = NULL;
1934         }
1935         /*
1936          * We're in the right parent block.
1937          * Look for the right entry.
1938          */
1939         for (;;) {
1940                 for (;
1941                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
1942                      INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno;
1943                      entno++)
1944                         continue;
1945                 if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
1946                         break;
1947                 par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT);
1948                 xfs_da_brelse(tp, par_buf);
1949                 par_buf = NULL;
1950                 if (unlikely(par_blkno == 0)) {
1951                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1952                                          XFS_ERRLEVEL_LOW, mp);
1953                         error = XFS_ERROR(EFSCORRUPTED);
1954                         goto done;
1955                 }
1956                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1957                         goto done;
1958                 par_node = par_buf->data;
1959                 if (unlikely(
1960                     INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
1961                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)) {
1962                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1963                                          XFS_ERRLEVEL_LOW, mp);
1964                         error = XFS_ERROR(EFSCORRUPTED);
1965                         goto done;
1966                 }
1967                 entno = 0;
1968         }
1969         /*
1970          * Update the parent entry pointing to the moved block.
1971          */
1972         INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno);
1973         xfs_da_log_buf(tp, par_buf,
1974                 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1975                                 sizeof(par_node->btree[entno].before)));
1976         xfs_da_buf_done(par_buf);
1977         xfs_da_buf_done(dead_buf);
1978         *dead_blknop = last_blkno;
1979         *dead_bufp = last_buf;
1980         return 0;
1981 done:
1982         if (par_buf)
1983                 xfs_da_brelse(tp, par_buf);
1984         if (sib_buf)
1985                 xfs_da_brelse(tp, sib_buf);
1986         xfs_da_brelse(tp, last_buf);
1987         return error;
1988 }
1989
1990 /*
1991  * Remove a btree block from a directory or attribute.
1992  */
1993 int
1994 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1995                     xfs_dabuf_t *dead_buf)
1996 {
1997         xfs_inode_t *dp;
1998         int done, error, w, count;
1999         xfs_fileoff_t bno;
2000         xfs_fsize_t size;
2001         xfs_trans_t *tp;
2002         xfs_mount_t *mp;
2003
2004         dp = args->dp;
2005         w = args->whichfork;
2006         tp = args->trans;
2007         mp = dp->i_mount;
2008         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
2009                 count = mp->m_dirblkfsbs;
2010         else
2011                 count = 1;
2012         for (;;) {
2013                 /*
2014                  * Remove extents.  If we get ENOSPC for a dir we have to move
2015                  * the last block to the place we want to kill.
2016                  */
2017                 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
2018                                 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
2019                                 0, args->firstblock, args->flist,
2020                                 &done)) == ENOSPC) {
2021                         if (w != XFS_DATA_FORK)
2022                                 goto done;
2023                         if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
2024                                         &dead_buf)))
2025                                 goto done;
2026                 } else if (error)
2027                         goto done;
2028                 else
2029                         break;
2030         }
2031         ASSERT(done);
2032         xfs_da_binval(tp, dead_buf);
2033         /*
2034          * Adjust the directory size for version 1.
2035          */
2036         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
2037                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
2038                         return error;
2039                 size = XFS_FSB_TO_B(dp->i_mount, bno);
2040                 if (size != dp->i_d.di_size) {
2041                         dp->i_d.di_size = size;
2042                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2043                 }
2044         }
2045         return 0;
2046 done:
2047         xfs_da_binval(tp, dead_buf);
2048         return error;
2049 }
2050
2051 /*
2052  * See if the mapping(s) for this btree block are valid, i.e.
2053  * don't contain holes, are logically contiguous, and cover the whole range.
2054  */
2055 STATIC int
2056 xfs_da_map_covers_blocks(
2057         int             nmap,
2058         xfs_bmbt_irec_t *mapp,
2059         xfs_dablk_t     bno,
2060         int             count)
2061 {
2062         int             i;
2063         xfs_fileoff_t   off;
2064
2065         for (i = 0, off = bno; i < nmap; i++) {
2066                 if (mapp[i].br_startblock == HOLESTARTBLOCK ||
2067                     mapp[i].br_startblock == DELAYSTARTBLOCK) {
2068                         return 0;
2069                 }
2070                 if (off != mapp[i].br_startoff) {
2071                         return 0;
2072                 }
2073                 off += mapp[i].br_blockcount;
2074         }
2075         return off == bno + count;
2076 }
2077
2078 /*
2079  * Make a dabuf.
2080  * Used for get_buf, read_buf, read_bufr, and reada_buf.
2081  */
2082 STATIC int
2083 xfs_da_do_buf(
2084         xfs_trans_t     *trans,
2085         xfs_inode_t     *dp,
2086         xfs_dablk_t     bno,
2087         xfs_daddr_t     *mappedbnop,
2088         xfs_dabuf_t     **bpp,
2089         int             whichfork,
2090         int             caller,
2091         inst_t          *ra)
2092 {
2093         xfs_buf_t       *bp = NULL;
2094         xfs_buf_t       **bplist;
2095         int             error=0;
2096         int             i;
2097         xfs_bmbt_irec_t map;
2098         xfs_bmbt_irec_t *mapp;
2099         xfs_daddr_t     mappedbno;
2100         xfs_mount_t     *mp;
2101         int             nbplist=0;
2102         int             nfsb;
2103         int             nmap;
2104         xfs_dabuf_t     *rbp;
2105
2106         mp = dp->i_mount;
2107         if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
2108                 nfsb = mp->m_dirblkfsbs;
2109         else
2110                 nfsb = 1;
2111         mappedbno = *mappedbnop;
2112         /*
2113          * Caller doesn't have a mapping.  -2 means don't complain
2114          * if we land in a hole.
2115          */
2116         if (mappedbno == -1 || mappedbno == -2) {
2117                 /*
2118                  * Optimize the one-block case.
2119                  */
2120                 if (nfsb == 1) {
2121                         xfs_fsblock_t   fsb;
2122
2123                         if ((error =
2124                             xfs_bmapi_single(trans, dp, whichfork, &fsb,
2125                                     (xfs_fileoff_t)bno))) {
2126                                 return error;
2127                         }
2128                         mapp = &map;
2129                         if (fsb == NULLFSBLOCK) {
2130                                 nmap = 0;
2131                         } else {
2132                                 map.br_startblock = fsb;
2133                                 map.br_startoff = (xfs_fileoff_t)bno;
2134                                 map.br_blockcount = 1;
2135                                 nmap = 1;
2136                         }
2137                 } else {
2138                         mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
2139                         nmap = nfsb;
2140                         if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
2141                                         nfsb,
2142                                         XFS_BMAPI_METADATA |
2143                                                 XFS_BMAPI_AFLAG(whichfork),
2144                                         NULL, 0, mapp, &nmap, NULL)))
2145                                 goto exit0;
2146                 }
2147         } else {
2148                 map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
2149                 map.br_startoff = (xfs_fileoff_t)bno;
2150                 map.br_blockcount = nfsb;
2151                 mapp = &map;
2152                 nmap = 1;
2153         }
2154         if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
2155                 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
2156                 if (unlikely(error == EFSCORRUPTED)) {
2157                         if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2158                                 int     i;
2159                                 cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n",
2160                                         (long long)bno);
2161                                 cmn_err(CE_ALERT, "dir: inode %lld\n",
2162                                         (long long)dp->i_ino);
2163                                 for (i = 0; i < nmap; i++) {
2164                                         cmn_err(CE_ALERT,
2165                                                 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n",
2166                                                 i,
2167                                                 (long long)mapp[i].br_startoff,
2168                                                 (long long)mapp[i].br_startblock,
2169                                                 (long long)mapp[i].br_blockcount,
2170                                                 mapp[i].br_state);
2171                                 }
2172                         }
2173                         XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2174                                          XFS_ERRLEVEL_LOW, mp);
2175                 }
2176                 goto exit0;
2177         }
2178         if (caller != 3 && nmap > 1) {
2179                 bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
2180                 nbplist = 0;
2181         } else
2182                 bplist = NULL;
2183         /*
2184          * Turn the mapping(s) into buffer(s).
2185          */
2186         for (i = 0; i < nmap; i++) {
2187                 int     nmapped;
2188
2189                 mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
2190                 if (i == 0)
2191                         *mappedbnop = mappedbno;
2192                 nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
2193                 switch (caller) {
2194                 case 0:
2195                         bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
2196                                 mappedbno, nmapped, 0);
2197                         error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
2198                         break;
2199                 case 1:
2200 #ifndef __KERNEL__
2201                 case 2:
2202 #endif
2203                         bp = NULL;
2204                         error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
2205                                 mappedbno, nmapped, 0, &bp);
2206                         break;
2207 #ifdef __KERNEL__
2208                 case 3:
2209                         xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
2210                         error = 0;
2211                         bp = NULL;
2212                         break;
2213 #endif
2214                 }
2215                 if (error) {
2216                         if (bp)
2217                                 xfs_trans_brelse(trans, bp);
2218                         goto exit1;
2219                 }
2220                 if (!bp)
2221                         continue;
2222                 if (caller == 1) {
2223                         if (whichfork == XFS_ATTR_FORK) {
2224                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
2225                                                 XFS_ATTR_BTREE_REF);
2226                         } else {
2227                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
2228                                                 XFS_DIR_BTREE_REF);
2229                         }
2230                 }
2231                 if (bplist) {
2232                         bplist[nbplist++] = bp;
2233                 }
2234         }
2235         /*
2236          * Build a dabuf structure.
2237          */
2238         if (bplist) {
2239                 rbp = xfs_da_buf_make(nbplist, bplist, ra);
2240         } else if (bp)
2241                 rbp = xfs_da_buf_make(1, &bp, ra);
2242         else
2243                 rbp = NULL;
2244         /*
2245          * For read_buf, check the magic number.
2246          */
2247         if (caller == 1) {
2248                 xfs_dir2_data_t         *data;
2249                 xfs_dir2_free_t         *free;
2250                 xfs_da_blkinfo_t        *info;
2251                 uint                    magic, magic1;
2252
2253                 info = rbp->data;
2254                 data = rbp->data;
2255                 free = rbp->data;
2256                 magic = INT_GET(info->magic, ARCH_CONVERT);
2257                 magic1 = INT_GET(data->hdr.magic, ARCH_CONVERT);
2258                 if (unlikely(
2259                     XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2260                                    (magic != XFS_DIR_LEAF_MAGIC) &&
2261                                    (magic != XFS_ATTR_LEAF_MAGIC) &&
2262                                    (magic != XFS_DIR2_LEAF1_MAGIC) &&
2263                                    (magic != XFS_DIR2_LEAFN_MAGIC) &&
2264                                    (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2265                                    (magic1 != XFS_DIR2_DATA_MAGIC) &&
2266                                    (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC),
2267                                 mp, XFS_ERRTAG_DA_READ_BUF,
2268                                 XFS_RANDOM_DA_READ_BUF))) {
2269                         xfs_buftrace("DA READ ERROR", rbp->bps[0]);
2270                         XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2271                                              XFS_ERRLEVEL_LOW, mp, info);
2272                         error = XFS_ERROR(EFSCORRUPTED);
2273                         xfs_da_brelse(trans, rbp);
2274                         nbplist = 0;
2275                         goto exit1;
2276                 }
2277         }
2278         if (bplist) {
2279                 kmem_free(bplist, sizeof(*bplist) * nmap);
2280         }
2281         if (mapp != &map) {
2282                 kmem_free(mapp, sizeof(*mapp) * nfsb);
2283         }
2284         if (bpp)
2285                 *bpp = rbp;
2286         return 0;
2287 exit1:
2288         if (bplist) {
2289                 for (i = 0; i < nbplist; i++)
2290                         xfs_trans_brelse(trans, bplist[i]);
2291                 kmem_free(bplist, sizeof(*bplist) * nmap);
2292         }
2293 exit0:
2294         if (mapp != &map)
2295                 kmem_free(mapp, sizeof(*mapp) * nfsb);
2296         if (bpp)
2297                 *bpp = NULL;
2298         return error;
2299 }
2300
2301 /*
2302  * Get a buffer for the dir/attr block.
2303  */
2304 int
2305 xfs_da_get_buf(
2306         xfs_trans_t     *trans,
2307         xfs_inode_t     *dp,
2308         xfs_dablk_t     bno,
2309         xfs_daddr_t             mappedbno,
2310         xfs_dabuf_t     **bpp,
2311         int             whichfork)
2312 {
2313         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
2314                                                  (inst_t *)__return_address);
2315 }
2316
2317 /*
2318  * Get a buffer for the dir/attr block, fill in the contents.
2319  */
2320 int
2321 xfs_da_read_buf(
2322         xfs_trans_t     *trans,
2323         xfs_inode_t     *dp,
2324         xfs_dablk_t     bno,
2325         xfs_daddr_t             mappedbno,
2326         xfs_dabuf_t     **bpp,
2327         int             whichfork)
2328 {
2329         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
2330                 (inst_t *)__return_address);
2331 }
2332
2333 /*
2334  * Readahead the dir/attr block.
2335  */
2336 xfs_daddr_t
2337 xfs_da_reada_buf(
2338         xfs_trans_t     *trans,
2339         xfs_inode_t     *dp,
2340         xfs_dablk_t     bno,
2341         int             whichfork)
2342 {
2343         xfs_daddr_t             rval;
2344
2345         rval = -1;
2346         if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
2347                         (inst_t *)__return_address))
2348                 return -1;
2349         else
2350                 return rval;
2351 }
2352
2353 /*
2354  * Calculate the number of bits needed to hold i different values.
2355  */
2356 uint
2357 xfs_da_log2_roundup(uint i)
2358 {
2359         uint rval;
2360
2361         for (rval = 0; rval < NBBY * sizeof(i); rval++) {
2362                 if ((1 << rval) >= i)
2363                         break;
2364         }
2365         return(rval);
2366 }
2367
2368 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
2369 kmem_zone_t *xfs_dabuf_zone;            /* dabuf zone */
2370
2371 /*
2372  * Allocate a dir-state structure.
2373  * We don't put them on the stack since they're large.
2374  */
2375 xfs_da_state_t *
2376 xfs_da_state_alloc(void)
2377 {
2378         return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
2379 }
2380
2381 /*
2382  * Kill the altpath contents of a da-state structure.
2383  */
2384 void
2385 xfs_da_state_kill_altpath(xfs_da_state_t *state)
2386 {
2387         int     i;
2388
2389         for (i = 0; i < state->altpath.active; i++) {
2390                 if (state->altpath.blk[i].bp) {
2391                         if (state->altpath.blk[i].bp != state->path.blk[i].bp)
2392                                 xfs_da_buf_done(state->altpath.blk[i].bp);
2393                         state->altpath.blk[i].bp = NULL;
2394                 }
2395         }
2396         state->altpath.active = 0;
2397 }
2398
2399 /*
2400  * Free a da-state structure.
2401  */
2402 void
2403 xfs_da_state_free(xfs_da_state_t *state)
2404 {
2405         int     i;
2406
2407         xfs_da_state_kill_altpath(state);
2408         for (i = 0; i < state->path.active; i++) {
2409                 if (state->path.blk[i].bp)
2410                         xfs_da_buf_done(state->path.blk[i].bp);
2411         }
2412         if (state->extravalid && state->extrablk.bp)
2413                 xfs_da_buf_done(state->extrablk.bp);
2414 #ifdef DEBUG
2415         memset((char *)state, 0, sizeof(*state));
2416 #endif /* DEBUG */
2417         kmem_zone_free(xfs_da_state_zone, state);
2418 }
2419
2420 #ifdef XFS_DABUF_DEBUG
2421 xfs_dabuf_t     *xfs_dabuf_global_list;
2422 lock_t          xfs_dabuf_global_lock;
2423 #endif
2424
2425 /*
2426  * Create a dabuf.
2427  */
2428 /* ARGSUSED */
2429 STATIC xfs_dabuf_t *
2430 xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2431 {
2432         xfs_buf_t       *bp;
2433         xfs_dabuf_t     *dabuf;
2434         int             i;
2435         int             off;
2436
2437         if (nbuf == 1)
2438                 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
2439         else
2440                 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
2441         dabuf->dirty = 0;
2442 #ifdef XFS_DABUF_DEBUG
2443         dabuf->ra = ra;
2444         dabuf->target = XFS_BUF_TARGET(bps[0]);
2445         dabuf->blkno = XFS_BUF_ADDR(bps[0]);
2446 #endif
2447         if (nbuf == 1) {
2448                 dabuf->nbuf = 1;
2449                 bp = bps[0];
2450                 dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
2451                 dabuf->data = XFS_BUF_PTR(bp);
2452                 dabuf->bps[0] = bp;
2453         } else {
2454                 dabuf->nbuf = nbuf;
2455                 for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
2456                         dabuf->bps[i] = bp = bps[i];
2457                         dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
2458                 }
2459                 dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
2460                 for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2461                         bp = bps[i];
2462                         memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
2463                                 XFS_BUF_COUNT(bp));
2464                 }
2465         }
2466 #ifdef XFS_DABUF_DEBUG
2467         {
2468                 SPLDECL(s);
2469                 xfs_dabuf_t     *p;
2470
2471                 s = mutex_spinlock(&xfs_dabuf_global_lock);
2472                 for (p = xfs_dabuf_global_list; p; p = p->next) {
2473                         ASSERT(p->blkno != dabuf->blkno ||
2474                                p->target != dabuf->target);
2475                 }
2476                 dabuf->prev = NULL;
2477                 if (xfs_dabuf_global_list)
2478                         xfs_dabuf_global_list->prev = dabuf;
2479                 dabuf->next = xfs_dabuf_global_list;
2480                 xfs_dabuf_global_list = dabuf;
2481                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
2482         }
2483 #endif
2484         return dabuf;
2485 }
2486
2487 /*
2488  * Un-dirty a dabuf.
2489  */
2490 STATIC void
2491 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
2492 {
2493         xfs_buf_t       *bp;
2494         int             i;
2495         int             off;
2496
2497         if (dabuf->dirty) {
2498                 ASSERT(dabuf->nbuf > 1);
2499                 dabuf->dirty = 0;
2500                 for (i = off = 0; i < dabuf->nbuf;
2501                                 i++, off += XFS_BUF_COUNT(bp)) {
2502                         bp = dabuf->bps[i];
2503                         memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
2504                                 XFS_BUF_COUNT(bp));
2505                 }
2506         }
2507 }
2508
2509 /*
2510  * Release a dabuf.
2511  */
2512 void
2513 xfs_da_buf_done(xfs_dabuf_t *dabuf)
2514 {
2515         ASSERT(dabuf);
2516         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2517         if (dabuf->dirty)
2518                 xfs_da_buf_clean(dabuf);
2519         if (dabuf->nbuf > 1)
2520                 kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
2521 #ifdef XFS_DABUF_DEBUG
2522         {
2523                 SPLDECL(s);
2524
2525                 s = mutex_spinlock(&xfs_dabuf_global_lock);
2526                 if (dabuf->prev)
2527                         dabuf->prev->next = dabuf->next;
2528                 else
2529                         xfs_dabuf_global_list = dabuf->next;
2530                 if (dabuf->next)
2531                         dabuf->next->prev = dabuf->prev;
2532                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
2533         }
2534         memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
2535 #endif
2536         if (dabuf->nbuf == 1)
2537                 kmem_zone_free(xfs_dabuf_zone, dabuf);
2538         else
2539                 kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
2540 }
2541
2542 /*
2543  * Log transaction from a dabuf.
2544  */
2545 void
2546 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
2547 {
2548         xfs_buf_t       *bp;
2549         uint            f;
2550         int             i;
2551         uint            l;
2552         int             off;
2553
2554         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2555         if (dabuf->nbuf == 1) {
2556                 ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
2557                 xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
2558                 return;
2559         }
2560         dabuf->dirty = 1;
2561         ASSERT(first <= last);
2562         for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2563                 bp = dabuf->bps[i];
2564                 f = off;
2565                 l = f + XFS_BUF_COUNT(bp) - 1;
2566                 if (f < first)
2567                         f = first;
2568                 if (l > last)
2569                         l = last;
2570                 if (f <= l)
2571                         xfs_trans_log_buf(tp, bp, f - off, l - off);
2572                 /*
2573                  * B_DONE is set by xfs_trans_log buf.
2574                  * If we don't set it on a new buffer (get not read)
2575                  * then if we don't put anything in the buffer it won't
2576                  * be set, and at commit it it released into the cache,
2577                  * and then a read will fail.
2578                  */
2579                 else if (!(XFS_BUF_ISDONE(bp)))
2580                   XFS_BUF_DONE(bp);
2581         }
2582         ASSERT(last < off);
2583 }
2584
2585 /*
2586  * Release dabuf from a transaction.
2587  * Have to free up the dabuf before the buffers are released,
2588  * since the synchronization on the dabuf is really the lock on the buffer.
2589  */
2590 void
2591 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2592 {
2593         xfs_buf_t       *bp;
2594         xfs_buf_t       **bplist;
2595         int             i;
2596         int             nbuf;
2597
2598         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2599         if ((nbuf = dabuf->nbuf) == 1) {
2600                 bplist = &bp;
2601                 bp = dabuf->bps[0];
2602         } else {
2603                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2604                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2605         }
2606         xfs_da_buf_done(dabuf);
2607         for (i = 0; i < nbuf; i++)
2608                 xfs_trans_brelse(tp, bplist[i]);
2609         if (bplist != &bp)
2610                 kmem_free(bplist, nbuf * sizeof(*bplist));
2611 }
2612
2613 /*
2614  * Invalidate dabuf from a transaction.
2615  */
2616 void
2617 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2618 {
2619         xfs_buf_t       *bp;
2620         xfs_buf_t       **bplist;
2621         int             i;
2622         int             nbuf;
2623
2624         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2625         if ((nbuf = dabuf->nbuf) == 1) {
2626                 bplist = &bp;
2627                 bp = dabuf->bps[0];
2628         } else {
2629                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2630                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2631         }
2632         xfs_da_buf_done(dabuf);
2633         for (i = 0; i < nbuf; i++)
2634                 xfs_trans_binval(tp, bplist[i]);
2635         if (bplist != &bp)
2636                 kmem_free(bplist, nbuf * sizeof(*bplist));
2637 }
2638
2639 /*
2640  * Get the first daddr from a dabuf.
2641  */
2642 xfs_daddr_t
2643 xfs_da_blkno(xfs_dabuf_t *dabuf)
2644 {
2645         ASSERT(dabuf->nbuf);
2646         ASSERT(dabuf->data);
2647         return XFS_BUF_ADDR(dabuf->bps[0]);
2648 }