473671fa5c13e1cff88cd4ad075ceecbf1a70ec6
[linux-2.6.git] / fs / xfs / xfs_da_btree.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h"
31 #include "xfs_da_btree.h"
32 #include "xfs_bmap_btree.h"
33 #include "xfs_alloc_btree.h"
34 #include "xfs_ialloc_btree.h"
35 #include "xfs_dir_sf.h"
36 #include "xfs_dir2_sf.h"
37 #include "xfs_attr_sf.h"
38 #include "xfs_dinode.h"
39 #include "xfs_inode.h"
40 #include "xfs_inode_item.h"
41 #include "xfs_alloc.h"
42 #include "xfs_btree.h"
43 #include "xfs_bmap.h"
44 #include "xfs_attr.h"
45 #include "xfs_attr_leaf.h"
46 #include "xfs_dir_leaf.h"
47 #include "xfs_dir2_data.h"
48 #include "xfs_dir2_leaf.h"
49 #include "xfs_dir2_block.h"
50 #include "xfs_dir2_node.h"
51 #include "xfs_error.h"
52
53 /*
54  * xfs_da_btree.c
55  *
56  * Routines to implement directories as Btrees of hashed names.
57  */
58
59 /*========================================================================
60  * Function prototypes for the kernel.
61  *========================================================================*/
62
63 /*
64  * Routines used for growing the Btree.
65  */
66 STATIC int xfs_da_root_split(xfs_da_state_t *state,
67                                             xfs_da_state_blk_t *existing_root,
68                                             xfs_da_state_blk_t *new_child);
69 STATIC int xfs_da_node_split(xfs_da_state_t *state,
70                                             xfs_da_state_blk_t *existing_blk,
71                                             xfs_da_state_blk_t *split_blk,
72                                             xfs_da_state_blk_t *blk_to_add,
73                                             int treelevel,
74                                             int *result);
75 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
76                                          xfs_da_state_blk_t *node_blk_1,
77                                          xfs_da_state_blk_t *node_blk_2);
78 STATIC void xfs_da_node_add(xfs_da_state_t *state,
79                                    xfs_da_state_blk_t *old_node_blk,
80                                    xfs_da_state_blk_t *new_node_blk);
81
82 /*
83  * Routines used for shrinking the Btree.
84  */
85 STATIC int xfs_da_root_join(xfs_da_state_t *state,
86                                            xfs_da_state_blk_t *root_blk);
87 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
88 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
89                                               xfs_da_state_blk_t *drop_blk);
90 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
91                                          xfs_da_state_blk_t *src_node_blk,
92                                          xfs_da_state_blk_t *dst_node_blk);
93
94 /*
95  * Utility routines.
96  */
97 STATIC uint     xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
98 STATIC int      xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
99 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
100 STATIC int      xfs_da_blk_unlink(xfs_da_state_t *state,
101                                   xfs_da_state_blk_t *drop_blk,
102                                   xfs_da_state_blk_t *save_blk);
103 STATIC void     xfs_da_state_kill_altpath(xfs_da_state_t *state);
104
105 /*========================================================================
106  * Routines used for growing the Btree.
107  *========================================================================*/
108
109 /*
110  * Create the initial contents of an intermediate node.
111  */
112 int
113 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
114                                  xfs_dabuf_t **bpp, int whichfork)
115 {
116         xfs_da_intnode_t *node;
117         xfs_dabuf_t *bp;
118         int error;
119         xfs_trans_t *tp;
120
121         tp = args->trans;
122         error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
123         if (error)
124                 return(error);
125         ASSERT(bp != NULL);
126         node = bp->data;
127         node->hdr.info.forw = 0;
128         node->hdr.info.back = 0;
129         INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
130         node->hdr.info.pad = 0;
131         node->hdr.count = 0;
132         INT_SET(node->hdr.level, ARCH_CONVERT, level);
133
134         xfs_da_log_buf(tp, bp,
135                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
136
137         *bpp = bp;
138         return(0);
139 }
140
141 /*
142  * Split a leaf node, rebalance, then possibly split
143  * intermediate nodes, rebalance, etc.
144  */
145 int                                                     /* error */
146 xfs_da_split(xfs_da_state_t *state)
147 {
148         xfs_da_state_blk_t *oldblk, *newblk, *addblk;
149         xfs_da_intnode_t *node;
150         xfs_dabuf_t *bp;
151         int max, action, error, i;
152
153         /*
154          * Walk back up the tree splitting/inserting/adjusting as necessary.
155          * If we need to insert and there isn't room, split the node, then
156          * decide which fragment to insert the new block from below into.
157          * Note that we may split the root this way, but we need more fixup.
158          */
159         max = state->path.active - 1;
160         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
161         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
162                state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
163
164         addblk = &state->path.blk[max];         /* initial dummy value */
165         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
166                 oldblk = &state->path.blk[i];
167                 newblk = &state->altpath.blk[i];
168
169                 /*
170                  * If a leaf node then
171                  *     Allocate a new leaf node, then rebalance across them.
172                  * else if an intermediate node then
173                  *     We split on the last layer, must we split the node?
174                  */
175                 switch (oldblk->magic) {
176                 case XFS_ATTR_LEAF_MAGIC:
177                         error = xfs_attr_leaf_split(state, oldblk, newblk);
178                         if ((error != 0) && (error != ENOSPC)) {
179                                 return(error);  /* GROT: attr is inconsistent */
180                         }
181                         if (!error) {
182                                 addblk = newblk;
183                                 break;
184                         }
185                         /*
186                          * Entry wouldn't fit, split the leaf again.
187                          */
188                         state->extravalid = 1;
189                         if (state->inleaf) {
190                                 state->extraafter = 0;  /* before newblk */
191                                 error = xfs_attr_leaf_split(state, oldblk,
192                                                             &state->extrablk);
193                         } else {
194                                 state->extraafter = 1;  /* after newblk */
195                                 error = xfs_attr_leaf_split(state, newblk,
196                                                             &state->extrablk);
197                         }
198                         if (error)
199                                 return(error);  /* GROT: attr inconsistent */
200                         addblk = newblk;
201                         break;
202                 case XFS_DIR_LEAF_MAGIC:
203                         ASSERT(XFS_DIR_IS_V1(state->mp));
204                         error = xfs_dir_leaf_split(state, oldblk, newblk);
205                         if ((error != 0) && (error != ENOSPC)) {
206                                 return(error);  /* GROT: dir is inconsistent */
207                         }
208                         if (!error) {
209                                 addblk = newblk;
210                                 break;
211                         }
212                         /*
213                          * Entry wouldn't fit, split the leaf again.
214                          */
215                         state->extravalid = 1;
216                         if (state->inleaf) {
217                                 state->extraafter = 0;  /* before newblk */
218                                 error = xfs_dir_leaf_split(state, oldblk,
219                                                            &state->extrablk);
220                                 if (error)
221                                         return(error);  /* GROT: dir incon. */
222                                 addblk = newblk;
223                         } else {
224                                 state->extraafter = 1;  /* after newblk */
225                                 error = xfs_dir_leaf_split(state, newblk,
226                                                            &state->extrablk);
227                                 if (error)
228                                         return(error);  /* GROT: dir incon. */
229                                 addblk = newblk;
230                         }
231                         break;
232                 case XFS_DIR2_LEAFN_MAGIC:
233                         ASSERT(XFS_DIR_IS_V2(state->mp));
234                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
235                         if (error)
236                                 return error;
237                         addblk = newblk;
238                         break;
239                 case XFS_DA_NODE_MAGIC:
240                         error = xfs_da_node_split(state, oldblk, newblk, addblk,
241                                                          max - i, &action);
242                         xfs_da_buf_done(addblk->bp);
243                         addblk->bp = NULL;
244                         if (error)
245                                 return(error);  /* GROT: dir is inconsistent */
246                         /*
247                          * Record the newly split block for the next time thru?
248                          */
249                         if (action)
250                                 addblk = newblk;
251                         else
252                                 addblk = NULL;
253                         break;
254                 }
255
256                 /*
257                  * Update the btree to show the new hashval for this child.
258                  */
259                 xfs_da_fixhashpath(state, &state->path);
260                 /*
261                  * If we won't need this block again, it's getting dropped
262                  * from the active path by the loop control, so we need
263                  * to mark it done now.
264                  */
265                 if (i > 0 || !addblk)
266                         xfs_da_buf_done(oldblk->bp);
267         }
268         if (!addblk)
269                 return(0);
270
271         /*
272          * Split the root node.
273          */
274         ASSERT(state->path.active == 0);
275         oldblk = &state->path.blk[0];
276         error = xfs_da_root_split(state, oldblk, addblk);
277         if (error) {
278                 xfs_da_buf_done(oldblk->bp);
279                 xfs_da_buf_done(addblk->bp);
280                 addblk->bp = NULL;
281                 return(error);  /* GROT: dir is inconsistent */
282         }
283
284         /*
285          * Update pointers to the node which used to be block 0 and
286          * just got bumped because of the addition of a new root node.
287          * There might be three blocks involved if a double split occurred,
288          * and the original block 0 could be at any position in the list.
289          */
290
291         node = oldblk->bp->data;
292         if (node->hdr.info.forw) {
293                 if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
294                         bp = addblk->bp;
295                 } else {
296                         ASSERT(state->extravalid);
297                         bp = state->extrablk.bp;
298                 }
299                 node = bp->data;
300                 INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
301                 xfs_da_log_buf(state->args->trans, bp,
302                     XFS_DA_LOGRANGE(node, &node->hdr.info,
303                     sizeof(node->hdr.info)));
304         }
305         node = oldblk->bp->data;
306         if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
307                 if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
308                         bp = addblk->bp;
309                 } else {
310                         ASSERT(state->extravalid);
311                         bp = state->extrablk.bp;
312                 }
313                 node = bp->data;
314                 INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
315                 xfs_da_log_buf(state->args->trans, bp,
316                     XFS_DA_LOGRANGE(node, &node->hdr.info,
317                     sizeof(node->hdr.info)));
318         }
319         xfs_da_buf_done(oldblk->bp);
320         xfs_da_buf_done(addblk->bp);
321         addblk->bp = NULL;
322         return(0);
323 }
324
325 /*
326  * Split the root.  We have to create a new root and point to the two
327  * parts (the split old root) that we just created.  Copy block zero to
328  * the EOF, extending the inode in process.
329  */
330 STATIC int                                              /* error */
331 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
332                                  xfs_da_state_blk_t *blk2)
333 {
334         xfs_da_intnode_t *node, *oldroot;
335         xfs_da_args_t *args;
336         xfs_dablk_t blkno;
337         xfs_dabuf_t *bp;
338         int error, size;
339         xfs_inode_t *dp;
340         xfs_trans_t *tp;
341         xfs_mount_t *mp;
342         xfs_dir2_leaf_t *leaf;
343
344         /*
345          * Copy the existing (incorrect) block from the root node position
346          * to a free space somewhere.
347          */
348         args = state->args;
349         ASSERT(args != NULL);
350         error = xfs_da_grow_inode(args, &blkno);
351         if (error)
352                 return(error);
353         dp = args->dp;
354         tp = args->trans;
355         mp = state->mp;
356         error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
357         if (error)
358                 return(error);
359         ASSERT(bp != NULL);
360         node = bp->data;
361         oldroot = blk1->bp->data;
362         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
363                 size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
364                              (char *)oldroot);
365         } else {
366                 ASSERT(XFS_DIR_IS_V2(mp));
367                 ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
368                 leaf = (xfs_dir2_leaf_t *)oldroot;
369                 size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
370                              (char *)leaf);
371         }
372         memcpy(node, oldroot, size);
373         xfs_da_log_buf(tp, bp, 0, size - 1);
374         xfs_da_buf_done(blk1->bp);
375         blk1->bp = bp;
376         blk1->blkno = blkno;
377
378         /*
379          * Set up the new root node.
380          */
381         error = xfs_da_node_create(args,
382                 args->whichfork == XFS_DATA_FORK &&
383                 XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
384                 INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
385         if (error)
386                 return(error);
387         node = bp->data;
388         INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
389         INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
390         INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
391         INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
392         INT_SET(node->hdr.count, ARCH_CONVERT, 2);
393
394 #ifdef DEBUG
395         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
396                 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
397                        blk1->blkno < mp->m_dirfreeblk);
398                 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
399                        blk2->blkno < mp->m_dirfreeblk);
400         }
401 #endif
402
403         /* Header is already logged by xfs_da_node_create */
404         xfs_da_log_buf(tp, bp,
405                 XFS_DA_LOGRANGE(node, node->btree,
406                         sizeof(xfs_da_node_entry_t) * 2));
407         xfs_da_buf_done(bp);
408
409         return(0);
410 }
411
412 /*
413  * Split the node, rebalance, then add the new entry.
414  */
415 STATIC int                                              /* error */
416 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
417                                  xfs_da_state_blk_t *newblk,
418                                  xfs_da_state_blk_t *addblk,
419                                  int treelevel, int *result)
420 {
421         xfs_da_intnode_t *node;
422         xfs_dablk_t blkno;
423         int newcount, error;
424         int useextra;
425
426         node = oldblk->bp->data;
427         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
428
429         /*
430          * With V2 the extra block is data or freespace.
431          */
432         useextra = state->extravalid && (XFS_DIR_IS_V1(state->mp) ||
433                         state->args->whichfork == XFS_ATTR_FORK);
434         newcount = 1 + useextra;
435         /*
436          * Do we have to split the node?
437          */
438         if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > state->node_ents) {
439                 /*
440                  * Allocate a new node, add to the doubly linked chain of
441                  * nodes, then move some of our excess entries into it.
442                  */
443                 error = xfs_da_grow_inode(state->args, &blkno);
444                 if (error)
445                         return(error);  /* GROT: dir is inconsistent */
446
447                 error = xfs_da_node_create(state->args, blkno, treelevel,
448                                            &newblk->bp, state->args->whichfork);
449                 if (error)
450                         return(error);  /* GROT: dir is inconsistent */
451                 newblk->blkno = blkno;
452                 newblk->magic = XFS_DA_NODE_MAGIC;
453                 xfs_da_node_rebalance(state, oldblk, newblk);
454                 error = xfs_da_blk_link(state, oldblk, newblk);
455                 if (error)
456                         return(error);
457                 *result = 1;
458         } else {
459                 *result = 0;
460         }
461
462         /*
463          * Insert the new entry(s) into the correct block
464          * (updating last hashval in the process).
465          *
466          * xfs_da_node_add() inserts BEFORE the given index,
467          * and as a result of using node_lookup_int() we always
468          * point to a valid entry (not after one), but a split
469          * operation always results in a new block whose hashvals
470          * FOLLOW the current block.
471          *
472          * If we had double-split op below us, then add the extra block too.
473          */
474         node = oldblk->bp->data;
475         if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
476                 oldblk->index++;
477                 xfs_da_node_add(state, oldblk, addblk);
478                 if (useextra) {
479                         if (state->extraafter)
480                                 oldblk->index++;
481                         xfs_da_node_add(state, oldblk, &state->extrablk);
482                         state->extravalid = 0;
483                 }
484         } else {
485                 newblk->index++;
486                 xfs_da_node_add(state, newblk, addblk);
487                 if (useextra) {
488                         if (state->extraafter)
489                                 newblk->index++;
490                         xfs_da_node_add(state, newblk, &state->extrablk);
491                         state->extravalid = 0;
492                 }
493         }
494
495         return(0);
496 }
497
498 /*
499  * Balance the btree elements between two intermediate nodes,
500  * usually one full and one empty.
501  *
502  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
503  */
504 STATIC void
505 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
506                                      xfs_da_state_blk_t *blk2)
507 {
508         xfs_da_intnode_t *node1, *node2, *tmpnode;
509         xfs_da_node_entry_t *btree_s, *btree_d;
510         int count, tmp;
511         xfs_trans_t *tp;
512
513         node1 = blk1->bp->data;
514         node2 = blk2->bp->data;
515         /*
516          * Figure out how many entries need to move, and in which direction.
517          * Swap the nodes around if that makes it simpler.
518          */
519         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
520             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
521              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
522               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
523                 tmpnode = node1;
524                 node1 = node2;
525                 node2 = tmpnode;
526         }
527         ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
528         ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
529         count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
530         if (count == 0)
531                 return;
532         tp = state->args->trans;
533         /*
534          * Two cases: high-to-low and low-to-high.
535          */
536         if (count > 0) {
537                 /*
538                  * Move elements in node2 up to make a hole.
539                  */
540                 if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
541                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
542                         btree_s = &node2->btree[0];
543                         btree_d = &node2->btree[count];
544                         memmove(btree_d, btree_s, tmp);
545                 }
546
547                 /*
548                  * Move the req'd B-tree elements from high in node1 to
549                  * low in node2.
550                  */
551                 INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
552                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
553                 btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
554                 btree_d = &node2->btree[0];
555                 memcpy(btree_d, btree_s, tmp);
556                 INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
557
558         } else {
559                 /*
560                  * Move the req'd B-tree elements from low in node2 to
561                  * high in node1.
562                  */
563                 count = -count;
564                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
565                 btree_s = &node2->btree[0];
566                 btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
567                 memcpy(btree_d, btree_s, tmp);
568                 INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
569                 xfs_da_log_buf(tp, blk1->bp,
570                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
571
572                 /*
573                  * Move elements in node2 down to fill the hole.
574                  */
575                 tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
576                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
577                 btree_s = &node2->btree[count];
578                 btree_d = &node2->btree[0];
579                 memmove(btree_d, btree_s, tmp);
580                 INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
581         }
582
583         /*
584          * Log header of node 1 and all current bits of node 2.
585          */
586         xfs_da_log_buf(tp, blk1->bp,
587                 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
588         xfs_da_log_buf(tp, blk2->bp,
589                 XFS_DA_LOGRANGE(node2, &node2->hdr,
590                         sizeof(node2->hdr) +
591                         sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
592
593         /*
594          * Record the last hashval from each block for upward propagation.
595          * (note: don't use the swapped node pointers)
596          */
597         node1 = blk1->bp->data;
598         node2 = blk2->bp->data;
599         blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
600         blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
601
602         /*
603          * Adjust the expected index for insertion.
604          */
605         if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
606                 blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
607                 blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;      /* make it invalid */
608         }
609 }
610
611 /*
612  * Add a new entry to an intermediate node.
613  */
614 STATIC void
615 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
616                                xfs_da_state_blk_t *newblk)
617 {
618         xfs_da_intnode_t *node;
619         xfs_da_node_entry_t *btree;
620         int tmp;
621         xfs_mount_t *mp;
622
623         node = oldblk->bp->data;
624         mp = state->mp;
625         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
626         ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
627         ASSERT(newblk->blkno != 0);
628         if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
629                 ASSERT(newblk->blkno >= mp->m_dirleafblk &&
630                        newblk->blkno < mp->m_dirfreeblk);
631
632         /*
633          * We may need to make some room before we insert the new node.
634          */
635         tmp = 0;
636         btree = &node->btree[ oldblk->index ];
637         if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
638                 tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
639                 memmove(btree + 1, btree, tmp);
640         }
641         INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
642         INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
643         xfs_da_log_buf(state->args->trans, oldblk->bp,
644                 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
645         INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
646         xfs_da_log_buf(state->args->trans, oldblk->bp,
647                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
648
649         /*
650          * Copy the last hash value from the oldblk to propagate upwards.
651          */
652         oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
653 }
654
655 /*========================================================================
656  * Routines used for shrinking the Btree.
657  *========================================================================*/
658
659 /*
660  * Deallocate an empty leaf node, remove it from its parent,
661  * possibly deallocating that block, etc...
662  */
663 int
664 xfs_da_join(xfs_da_state_t *state)
665 {
666         xfs_da_state_blk_t *drop_blk, *save_blk;
667         int action, error;
668
669         action = 0;
670         drop_blk = &state->path.blk[ state->path.active-1 ];
671         save_blk = &state->altpath.blk[ state->path.active-1 ];
672         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
673         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
674                drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
675
676         /*
677          * Walk back up the tree joining/deallocating as necessary.
678          * When we stop dropping blocks, break out.
679          */
680         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
681                  state->path.active--) {
682                 /*
683                  * See if we can combine the block with a neighbor.
684                  *   (action == 0) => no options, just leave
685                  *   (action == 1) => coalesce, then unlink
686                  *   (action == 2) => block empty, unlink it
687                  */
688                 switch (drop_blk->magic) {
689                 case XFS_ATTR_LEAF_MAGIC:
690                         error = xfs_attr_leaf_toosmall(state, &action);
691                         if (error)
692                                 return(error);
693                         if (action == 0)
694                                 return(0);
695                         xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
696                         break;
697                 case XFS_DIR_LEAF_MAGIC:
698                         ASSERT(XFS_DIR_IS_V1(state->mp));
699                         error = xfs_dir_leaf_toosmall(state, &action);
700                         if (error)
701                                 return(error);
702                         if (action == 0)
703                                 return(0);
704                         xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
705                         break;
706                 case XFS_DIR2_LEAFN_MAGIC:
707                         ASSERT(XFS_DIR_IS_V2(state->mp));
708                         error = xfs_dir2_leafn_toosmall(state, &action);
709                         if (error)
710                                 return error;
711                         if (action == 0)
712                                 return 0;
713                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
714                         break;
715                 case XFS_DA_NODE_MAGIC:
716                         /*
717                          * Remove the offending node, fixup hashvals,
718                          * check for a toosmall neighbor.
719                          */
720                         xfs_da_node_remove(state, drop_blk);
721                         xfs_da_fixhashpath(state, &state->path);
722                         error = xfs_da_node_toosmall(state, &action);
723                         if (error)
724                                 return(error);
725                         if (action == 0)
726                                 return 0;
727                         xfs_da_node_unbalance(state, drop_blk, save_blk);
728                         break;
729                 }
730                 xfs_da_fixhashpath(state, &state->altpath);
731                 error = xfs_da_blk_unlink(state, drop_blk, save_blk);
732                 xfs_da_state_kill_altpath(state);
733                 if (error)
734                         return(error);
735                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
736                                                          drop_blk->bp);
737                 drop_blk->bp = NULL;
738                 if (error)
739                         return(error);
740         }
741         /*
742          * We joined all the way to the top.  If it turns out that
743          * we only have one entry in the root, make the child block
744          * the new root.
745          */
746         xfs_da_node_remove(state, drop_blk);
747         xfs_da_fixhashpath(state, &state->path);
748         error = xfs_da_root_join(state, &state->path.blk[0]);
749         return(error);
750 }
751
752 /*
753  * We have only one entry in the root.  Copy the only remaining child of
754  * the old root to block 0 as the new root node.
755  */
756 STATIC int
757 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
758 {
759         xfs_da_intnode_t *oldroot;
760         /* REFERENCED */
761         xfs_da_blkinfo_t *blkinfo;
762         xfs_da_args_t *args;
763         xfs_dablk_t child;
764         xfs_dabuf_t *bp;
765         int error;
766
767         args = state->args;
768         ASSERT(args != NULL);
769         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
770         oldroot = root_blk->bp->data;
771         ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
772         ASSERT(!oldroot->hdr.info.forw);
773         ASSERT(!oldroot->hdr.info.back);
774
775         /*
776          * If the root has more than one child, then don't do anything.
777          */
778         if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
779                 return(0);
780
781         /*
782          * Read in the (only) child block, then copy those bytes into
783          * the root block's buffer and free the original child block.
784          */
785         child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
786         ASSERT(child != 0);
787         error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
788                                              args->whichfork);
789         if (error)
790                 return(error);
791         ASSERT(bp != NULL);
792         blkinfo = bp->data;
793         if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
794                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
795                        INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
796         } else {
797                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
798         }
799         ASSERT(!blkinfo->forw);
800         ASSERT(!blkinfo->back);
801         memcpy(root_blk->bp->data, bp->data, state->blocksize);
802         xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
803         error = xfs_da_shrink_inode(args, child, bp);
804         return(error);
805 }
806
807 /*
808  * Check a node block and its neighbors to see if the block should be
809  * collapsed into one or the other neighbor.  Always keep the block
810  * with the smaller block number.
811  * If the current block is over 50% full, don't try to join it, return 0.
812  * If the block is empty, fill in the state structure and return 2.
813  * If it can be collapsed, fill in the state structure and return 1.
814  * If nothing can be done, return 0.
815  */
816 STATIC int
817 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
818 {
819         xfs_da_intnode_t *node;
820         xfs_da_state_blk_t *blk;
821         xfs_da_blkinfo_t *info;
822         int count, forward, error, retval, i;
823         xfs_dablk_t blkno;
824         xfs_dabuf_t *bp;
825
826         /*
827          * Check for the degenerate case of the block being over 50% full.
828          * If so, it's not worth even looking to see if we might be able
829          * to coalesce with a sibling.
830          */
831         blk = &state->path.blk[ state->path.active-1 ];
832         info = blk->bp->data;
833         ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
834         node = (xfs_da_intnode_t *)info;
835         count = INT_GET(node->hdr.count, ARCH_CONVERT);
836         if (count > (state->node_ents >> 1)) {
837                 *action = 0;    /* blk over 50%, don't try to join */
838                 return(0);      /* blk over 50%, don't try to join */
839         }
840
841         /*
842          * Check for the degenerate case of the block being empty.
843          * If the block is empty, we'll simply delete it, no need to
844          * coalesce it with a sibling block.  We choose (aribtrarily)
845          * to merge with the forward block unless it is NULL.
846          */
847         if (count == 0) {
848                 /*
849                  * Make altpath point to the block we want to keep and
850                  * path point to the block we want to drop (this one).
851                  */
852                 forward = info->forw;
853                 memcpy(&state->altpath, &state->path, sizeof(state->path));
854                 error = xfs_da_path_shift(state, &state->altpath, forward,
855                                                  0, &retval);
856                 if (error)
857                         return(error);
858                 if (retval) {
859                         *action = 0;
860                 } else {
861                         *action = 2;
862                 }
863                 return(0);
864         }
865
866         /*
867          * Examine each sibling block to see if we can coalesce with
868          * at least 25% free space to spare.  We need to figure out
869          * whether to merge with the forward or the backward block.
870          * We prefer coalescing with the lower numbered sibling so as
871          * to shrink a directory over time.
872          */
873         /* start with smaller blk num */
874         forward = (INT_GET(info->forw, ARCH_CONVERT)
875                                 < INT_GET(info->back, ARCH_CONVERT));
876         for (i = 0; i < 2; forward = !forward, i++) {
877                 if (forward)
878                         blkno = INT_GET(info->forw, ARCH_CONVERT);
879                 else
880                         blkno = INT_GET(info->back, ARCH_CONVERT);
881                 if (blkno == 0)
882                         continue;
883                 error = xfs_da_read_buf(state->args->trans, state->args->dp,
884                                         blkno, -1, &bp, state->args->whichfork);
885                 if (error)
886                         return(error);
887                 ASSERT(bp != NULL);
888
889                 node = (xfs_da_intnode_t *)info;
890                 count  = state->node_ents;
891                 count -= state->node_ents >> 2;
892                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
893                 node = bp->data;
894                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
895                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
896                 xfs_da_brelse(state->args->trans, bp);
897                 if (count >= 0)
898                         break;  /* fits with at least 25% to spare */
899         }
900         if (i >= 2) {
901                 *action = 0;
902                 return(0);
903         }
904
905         /*
906          * Make altpath point to the block we want to keep (the lower
907          * numbered block) and path point to the block we want to drop.
908          */
909         memcpy(&state->altpath, &state->path, sizeof(state->path));
910         if (blkno < blk->blkno) {
911                 error = xfs_da_path_shift(state, &state->altpath, forward,
912                                                  0, &retval);
913                 if (error) {
914                         return(error);
915                 }
916                 if (retval) {
917                         *action = 0;
918                         return(0);
919                 }
920         } else {
921                 error = xfs_da_path_shift(state, &state->path, forward,
922                                                  0, &retval);
923                 if (error) {
924                         return(error);
925                 }
926                 if (retval) {
927                         *action = 0;
928                         return(0);
929                 }
930         }
931         *action = 1;
932         return(0);
933 }
934
935 /*
936  * Walk back up the tree adjusting hash values as necessary,
937  * when we stop making changes, return.
938  */
939 void
940 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
941 {
942         xfs_da_state_blk_t *blk;
943         xfs_da_intnode_t *node;
944         xfs_da_node_entry_t *btree;
945         xfs_dahash_t lasthash=0;
946         int level, count;
947
948         level = path->active-1;
949         blk = &path->blk[ level ];
950         switch (blk->magic) {
951         case XFS_ATTR_LEAF_MAGIC:
952                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
953                 if (count == 0)
954                         return;
955                 break;
956         case XFS_DIR_LEAF_MAGIC:
957                 ASSERT(XFS_DIR_IS_V1(state->mp));
958                 lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
959                 if (count == 0)
960                         return;
961                 break;
962         case XFS_DIR2_LEAFN_MAGIC:
963                 ASSERT(XFS_DIR_IS_V2(state->mp));
964                 lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
965                 if (count == 0)
966                         return;
967                 break;
968         case XFS_DA_NODE_MAGIC:
969                 lasthash = xfs_da_node_lasthash(blk->bp, &count);
970                 if (count == 0)
971                         return;
972                 break;
973         }
974         for (blk--, level--; level >= 0; blk--, level--) {
975                 node = blk->bp->data;
976                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
977                 btree = &node->btree[ blk->index ];
978                 if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
979                         break;
980                 blk->hashval = lasthash;
981                 INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
982                 xfs_da_log_buf(state->args->trans, blk->bp,
983                                   XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
984
985                 lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
986         }
987 }
988
989 /*
990  * Remove an entry from an intermediate node.
991  */
992 STATIC void
993 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
994 {
995         xfs_da_intnode_t *node;
996         xfs_da_node_entry_t *btree;
997         int tmp;
998
999         node = drop_blk->bp->data;
1000         ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
1001         ASSERT(drop_blk->index >= 0);
1002
1003         /*
1004          * Copy over the offending entry, or just zero it out.
1005          */
1006         btree = &node->btree[drop_blk->index];
1007         if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1008                 tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
1009                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1010                 memmove(btree, btree + 1, tmp);
1011                 xfs_da_log_buf(state->args->trans, drop_blk->bp,
1012                     XFS_DA_LOGRANGE(node, btree, tmp));
1013                 btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
1014         }
1015         memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
1016         xfs_da_log_buf(state->args->trans, drop_blk->bp,
1017             XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
1018         INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
1019         xfs_da_log_buf(state->args->trans, drop_blk->bp,
1020             XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
1021
1022         /*
1023          * Copy the last hash value from the block to propagate upwards.
1024          */
1025         btree--;
1026         drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
1027 }
1028
1029 /*
1030  * Unbalance the btree elements between two intermediate nodes,
1031  * move all Btree elements from one node into another.
1032  */
1033 STATIC void
1034 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1035                                      xfs_da_state_blk_t *save_blk)
1036 {
1037         xfs_da_intnode_t *drop_node, *save_node;
1038         xfs_da_node_entry_t *btree;
1039         int tmp;
1040         xfs_trans_t *tp;
1041
1042         drop_node = drop_blk->bp->data;
1043         save_node = save_blk->bp->data;
1044         ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1045         ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1046         tp = state->args->trans;
1047
1048         /*
1049          * If the dying block has lower hashvals, then move all the
1050          * elements in the remaining block up to make a hole.
1051          */
1052         if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1053             (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1054              INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
1055         {
1056                 btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
1057                 tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1058                 memmove(btree, &save_node->btree[0], tmp);
1059                 btree = &save_node->btree[0];
1060                 xfs_da_log_buf(tp, save_blk->bp,
1061                         XFS_DA_LOGRANGE(save_node, btree,
1062                                 (INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
1063                                 sizeof(xfs_da_node_entry_t)));
1064         } else {
1065                 btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
1066                 xfs_da_log_buf(tp, save_blk->bp,
1067                         XFS_DA_LOGRANGE(save_node, btree,
1068                                 INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
1069                                 sizeof(xfs_da_node_entry_t)));
1070         }
1071
1072         /*
1073          * Move all the B-tree elements from drop_blk to save_blk.
1074          */
1075         tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1076         memcpy(btree, &drop_node->btree[0], tmp);
1077         INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
1078
1079         xfs_da_log_buf(tp, save_blk->bp,
1080                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1081                         sizeof(save_node->hdr)));
1082
1083         /*
1084          * Save the last hashval in the remaining block for upward propagation.
1085          */
1086         save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1087 }
1088
1089 /*========================================================================
1090  * Routines used for finding things in the Btree.
1091  *========================================================================*/
1092
1093 /*
1094  * Walk down the Btree looking for a particular filename, filling
1095  * in the state structure as we go.
1096  *
1097  * We will set the state structure to point to each of the elements
1098  * in each of the nodes where either the hashval is or should be.
1099  *
1100  * We support duplicate hashval's so for each entry in the current
1101  * node that could contain the desired hashval, descend.  This is a
1102  * pruned depth-first tree search.
1103  */
1104 int                                                     /* error */
1105 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1106 {
1107         xfs_da_state_blk_t *blk;
1108         xfs_da_blkinfo_t *curr;
1109         xfs_da_intnode_t *node;
1110         xfs_da_node_entry_t *btree;
1111         xfs_dablk_t blkno;
1112         int probe, span, max, error, retval;
1113         xfs_dahash_t hashval;
1114         xfs_da_args_t *args;
1115
1116         args = state->args;
1117
1118         /*
1119          * Descend thru the B-tree searching each level for the right
1120          * node to use, until the right hashval is found.
1121          */
1122         if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
1123                 blkno = state->mp->m_dirleafblk;
1124         else
1125                 blkno = 0;
1126         for (blk = &state->path.blk[0], state->path.active = 1;
1127                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
1128                          blk++, state->path.active++) {
1129                 /*
1130                  * Read the next node down in the tree.
1131                  */
1132                 blk->blkno = blkno;
1133                 error = xfs_da_read_buf(args->trans, args->dp, blkno,
1134                                         -1, &blk->bp, args->whichfork);
1135                 if (error) {
1136                         blk->blkno = 0;
1137                         state->path.active--;
1138                         return(error);
1139                 }
1140                 curr = blk->bp->data;
1141                 ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1142                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1143                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1144
1145                 /*
1146                  * Search an intermediate node for a match.
1147                  */
1148                 blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
1149                 if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1150                         node = blk->bp->data;
1151                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1152
1153                         /*
1154                          * Binary search.  (note: small blocks will skip loop)
1155                          */
1156                         max = INT_GET(node->hdr.count, ARCH_CONVERT);
1157                         probe = span = max / 2;
1158                         hashval = args->hashval;
1159                         for (btree = &node->btree[probe]; span > 4;
1160                                    btree = &node->btree[probe]) {
1161                                 span /= 2;
1162                                 if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
1163                                         probe += span;
1164                                 else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
1165                                         probe -= span;
1166                                 else
1167                                         break;
1168                         }
1169                         ASSERT((probe >= 0) && (probe < max));
1170                         ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
1171
1172                         /*
1173                          * Since we may have duplicate hashval's, find the first
1174                          * matching hashval in the node.
1175                          */
1176                         while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
1177                                 btree--;
1178                                 probe--;
1179                         }
1180                         while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
1181                                 btree++;
1182                                 probe++;
1183                         }
1184
1185                         /*
1186                          * Pick the right block to descend on.
1187                          */
1188                         if (probe == max) {
1189                                 blk->index = max-1;
1190                                 blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
1191                         } else {
1192                                 blk->index = probe;
1193                                 blkno = INT_GET(btree->before, ARCH_CONVERT);
1194                         }
1195                 }
1196                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
1197                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1198                         break;
1199                 }
1200                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
1201                         blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
1202                         break;
1203                 }
1204                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
1205                         blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1206                         break;
1207                 }
1208         }
1209
1210         /*
1211          * A leaf block that ends in the hashval that we are interested in
1212          * (final hashval == search hashval) means that the next block may
1213          * contain more entries with the same hashval, shift upward to the
1214          * next leaf and keep searching.
1215          */
1216         for (;;) {
1217                 if (blk->magic == XFS_DIR_LEAF_MAGIC) {
1218                         ASSERT(XFS_DIR_IS_V1(state->mp));
1219                         retval = xfs_dir_leaf_lookup_int(blk->bp, args,
1220                                                                   &blk->index);
1221                 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1222                         ASSERT(XFS_DIR_IS_V2(state->mp));
1223                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1224                                                         &blk->index, state);
1225                 }
1226                 else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1227                         retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1228                         blk->index = args->index;
1229                         args->blkno = blk->blkno;
1230                 }
1231                 if (((retval == ENOENT) || (retval == ENOATTR)) &&
1232                     (blk->hashval == args->hashval)) {
1233                         error = xfs_da_path_shift(state, &state->path, 1, 1,
1234                                                          &retval);
1235                         if (error)
1236                                 return(error);
1237                         if (retval == 0) {
1238                                 continue;
1239                         }
1240                         else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1241                                 /* path_shift() gives ENOENT */
1242                                 retval = XFS_ERROR(ENOATTR);
1243                         }
1244                 }
1245                 break;
1246         }
1247         *result = retval;
1248         return(0);
1249 }
1250
1251 /*========================================================================
1252  * Utility routines.
1253  *========================================================================*/
1254
1255 /*
1256  * Link a new block into a doubly linked list of blocks (of whatever type).
1257  */
1258 int                                                     /* error */
1259 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1260                                xfs_da_state_blk_t *new_blk)
1261 {
1262         xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1263         xfs_da_args_t *args;
1264         int before=0, error;
1265         xfs_dabuf_t *bp;
1266
1267         /*
1268          * Set up environment.
1269          */
1270         args = state->args;
1271         ASSERT(args != NULL);
1272         old_info = old_blk->bp->data;
1273         new_info = new_blk->bp->data;
1274         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1275                old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1276                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1277         ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
1278         ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
1279         ASSERT(old_blk->magic == new_blk->magic);
1280
1281         switch (old_blk->magic) {
1282         case XFS_ATTR_LEAF_MAGIC:
1283                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1284                 break;
1285         case XFS_DIR_LEAF_MAGIC:
1286                 ASSERT(XFS_DIR_IS_V1(state->mp));
1287                 before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
1288                 break;
1289         case XFS_DIR2_LEAFN_MAGIC:
1290                 ASSERT(XFS_DIR_IS_V2(state->mp));
1291                 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1292                 break;
1293         case XFS_DA_NODE_MAGIC:
1294                 before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1295                 break;
1296         }
1297
1298         /*
1299          * Link blocks in appropriate order.
1300          */
1301         if (before) {
1302                 /*
1303                  * Link new block in before existing block.
1304                  */
1305                 INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
1306                 new_info->back = old_info->back; /* INT_: direct copy */
1307                 if (INT_GET(old_info->back, ARCH_CONVERT)) {
1308                         error = xfs_da_read_buf(args->trans, args->dp,
1309                                                 INT_GET(old_info->back,
1310                                                         ARCH_CONVERT), -1, &bp,
1311                                                 args->whichfork);
1312                         if (error)
1313                                 return(error);
1314                         ASSERT(bp != NULL);
1315                         tmp_info = bp->data;
1316                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
1317                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
1318                         INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
1319                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1320                         xfs_da_buf_done(bp);
1321                 }
1322                 INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
1323         } else {
1324                 /*
1325                  * Link new block in after existing block.
1326                  */
1327                 new_info->forw = old_info->forw; /* INT_: direct copy */
1328                 INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
1329                 if (INT_GET(old_info->forw, ARCH_CONVERT)) {
1330                         error = xfs_da_read_buf(args->trans, args->dp,
1331                                                 INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
1332                                                 args->whichfork);
1333                         if (error)
1334                                 return(error);
1335                         ASSERT(bp != NULL);
1336                         tmp_info = bp->data;
1337                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1338                                     == INT_GET(old_info->magic, ARCH_CONVERT));
1339                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1340                                     == old_blk->blkno);
1341                         INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
1342                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1343                         xfs_da_buf_done(bp);
1344                 }
1345                 INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
1346         }
1347
1348         xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1349         xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1350         return(0);
1351 }
1352
1353 /*
1354  * Compare two intermediate nodes for "order".
1355  */
1356 STATIC int
1357 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1358 {
1359         xfs_da_intnode_t *node1, *node2;
1360
1361         node1 = node1_bp->data;
1362         node2 = node2_bp->data;
1363         ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
1364                (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
1365         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
1366             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
1367               INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1368              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1369               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
1370                 return(1);
1371         }
1372         return(0);
1373 }
1374
1375 /*
1376  * Pick up the last hashvalue from an intermediate node.
1377  */
1378 STATIC uint
1379 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1380 {
1381         xfs_da_intnode_t *node;
1382
1383         node = bp->data;
1384         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1385         if (count)
1386                 *count = INT_GET(node->hdr.count, ARCH_CONVERT);
1387         if (!node->hdr.count)
1388                 return(0);
1389         return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
1390 }
1391
1392 /*
1393  * Unlink a block from a doubly linked list of blocks.
1394  */
1395 STATIC int                                              /* error */
1396 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1397                                  xfs_da_state_blk_t *save_blk)
1398 {
1399         xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1400         xfs_da_args_t *args;
1401         xfs_dabuf_t *bp;
1402         int error;
1403
1404         /*
1405          * Set up environment.
1406          */
1407         args = state->args;
1408         ASSERT(args != NULL);
1409         save_info = save_blk->bp->data;
1410         drop_info = drop_blk->bp->data;
1411         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1412                save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1413                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1414         ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
1415         ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
1416         ASSERT(save_blk->magic == drop_blk->magic);
1417         ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
1418                (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
1419         ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
1420                (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
1421
1422         /*
1423          * Unlink the leaf block from the doubly linked chain of leaves.
1424          */
1425         if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
1426                 save_info->back = drop_info->back; /* INT_: direct copy */
1427                 if (INT_GET(drop_info->back, ARCH_CONVERT)) {
1428                         error = xfs_da_read_buf(args->trans, args->dp,
1429                                                 INT_GET(drop_info->back,
1430                                                         ARCH_CONVERT), -1, &bp,
1431                                                 args->whichfork);
1432                         if (error)
1433                                 return(error);
1434                         ASSERT(bp != NULL);
1435                         tmp_info = bp->data;
1436                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
1437                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
1438                         INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
1439                         xfs_da_log_buf(args->trans, bp, 0,
1440                                                     sizeof(*tmp_info) - 1);
1441                         xfs_da_buf_done(bp);
1442                 }
1443         } else {
1444                 save_info->forw = drop_info->forw; /* INT_: direct copy */
1445                 if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
1446                         error = xfs_da_read_buf(args->trans, args->dp,
1447                                                 INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
1448                                                 args->whichfork);
1449                         if (error)
1450                                 return(error);
1451                         ASSERT(bp != NULL);
1452                         tmp_info = bp->data;
1453                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1454                                     == INT_GET(save_info->magic, ARCH_CONVERT));
1455                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1456                                     == drop_blk->blkno);
1457                         INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
1458                         xfs_da_log_buf(args->trans, bp, 0,
1459                                                     sizeof(*tmp_info) - 1);
1460                         xfs_da_buf_done(bp);
1461                 }
1462         }
1463
1464         xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1465         return(0);
1466 }
1467
1468 /*
1469  * Move a path "forward" or "!forward" one block at the current level.
1470  *
1471  * This routine will adjust a "path" to point to the next block
1472  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1473  * Btree, including updating pointers to the intermediate nodes between
1474  * the new bottom and the root.
1475  */
1476 int                                                     /* error */
1477 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1478                                  int forward, int release, int *result)
1479 {
1480         xfs_da_state_blk_t *blk;
1481         xfs_da_blkinfo_t *info;
1482         xfs_da_intnode_t *node;
1483         xfs_da_args_t *args;
1484         xfs_dablk_t blkno=0;
1485         int level, error;
1486
1487         /*
1488          * Roll up the Btree looking for the first block where our
1489          * current index is not at the edge of the block.  Note that
1490          * we skip the bottom layer because we want the sibling block.
1491          */
1492         args = state->args;
1493         ASSERT(args != NULL);
1494         ASSERT(path != NULL);
1495         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1496         level = (path->active-1) - 1;   /* skip bottom layer in path */
1497         for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1498                 ASSERT(blk->bp != NULL);
1499                 node = blk->bp->data;
1500                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1501                 if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1502                         blk->index++;
1503                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1504                         break;
1505                 } else if (!forward && (blk->index > 0)) {
1506                         blk->index--;
1507                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1508                         break;
1509                 }
1510         }
1511         if (level < 0) {
1512                 *result = XFS_ERROR(ENOENT);    /* we're out of our tree */
1513                 ASSERT(args->oknoent);
1514                 return(0);
1515         }
1516
1517         /*
1518          * Roll down the edge of the subtree until we reach the
1519          * same depth we were at originally.
1520          */
1521         for (blk++, level++; level < path->active; blk++, level++) {
1522                 /*
1523                  * Release the old block.
1524                  * (if it's dirty, trans won't actually let go)
1525                  */
1526                 if (release)
1527                         xfs_da_brelse(args->trans, blk->bp);
1528
1529                 /*
1530                  * Read the next child block.
1531                  */
1532                 blk->blkno = blkno;
1533                 error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1534                                                      &blk->bp, args->whichfork);
1535                 if (error)
1536                         return(error);
1537                 ASSERT(blk->bp != NULL);
1538                 info = blk->bp->data;
1539                 ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1540                        INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1541                        INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1542                 blk->magic = INT_GET(info->magic, ARCH_CONVERT);
1543                 if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1544                         node = (xfs_da_intnode_t *)info;
1545                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1546                         if (forward)
1547                                 blk->index = 0;
1548                         else
1549                                 blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
1550                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1551                 } else {
1552                         ASSERT(level == path->active-1);
1553                         blk->index = 0;
1554                         switch(blk->magic) {
1555                         case XFS_ATTR_LEAF_MAGIC:
1556                                 blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1557                                                                       NULL);
1558                                 break;
1559                         case XFS_DIR_LEAF_MAGIC:
1560                                 ASSERT(XFS_DIR_IS_V1(state->mp));
1561                                 blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
1562                                                                      NULL);
1563                                 break;
1564                         case XFS_DIR2_LEAFN_MAGIC:
1565                                 ASSERT(XFS_DIR_IS_V2(state->mp));
1566                                 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1567                                                                        NULL);
1568                                 break;
1569                         default:
1570                                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1571                                        blk->magic ==
1572                                        XFS_DIRX_LEAF_MAGIC(state->mp));
1573                                 break;
1574                         }
1575                 }
1576         }
1577         *result = 0;
1578         return(0);
1579 }
1580
1581
1582 /*========================================================================
1583  * Utility routines.
1584  *========================================================================*/
1585
1586 /*
1587  * Implement a simple hash on a character string.
1588  * Rotate the hash value by 7 bits, then XOR each character in.
1589  * This is implemented with some source-level loop unrolling.
1590  */
1591 xfs_dahash_t
1592 xfs_da_hashname(const uchar_t *name, int namelen)
1593 {
1594         xfs_dahash_t hash;
1595
1596         /*
1597          * Do four characters at a time as long as we can.
1598          */
1599         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1600                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1601                        (name[3] << 0) ^ rol32(hash, 7 * 4);
1602
1603         /*
1604          * Now do the rest of the characters.
1605          */
1606         switch (namelen) {
1607         case 3:
1608                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1609                        rol32(hash, 7 * 3);
1610         case 2:
1611                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1612         case 1:
1613                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
1614         default: /* case 0: */
1615                 return hash;
1616         }
1617 }
1618
1619 /*
1620  * Add a block to the btree ahead of the file.
1621  * Return the new block number to the caller.
1622  */
1623 int
1624 xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1625 {
1626         xfs_fileoff_t bno, b;
1627         xfs_bmbt_irec_t map;
1628         xfs_bmbt_irec_t *mapp;
1629         xfs_inode_t *dp;
1630         int nmap, error, w, count, c, got, i, mapi;
1631         xfs_fsize_t size;
1632         xfs_trans_t *tp;
1633         xfs_mount_t *mp;
1634
1635         dp = args->dp;
1636         mp = dp->i_mount;
1637         w = args->whichfork;
1638         tp = args->trans;
1639         /*
1640          * For new directories adjust the file offset and block count.
1641          */
1642         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) {
1643                 bno = mp->m_dirleafblk;
1644                 count = mp->m_dirblkfsbs;
1645         } else {
1646                 bno = 0;
1647                 count = 1;
1648         }
1649         /*
1650          * Find a spot in the file space to put the new block.
1651          */
1652         if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) {
1653                 return error;
1654         }
1655         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1656                 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
1657         /*
1658          * Try mapping it in one filesystem block.
1659          */
1660         nmap = 1;
1661         ASSERT(args->firstblock != NULL);
1662         if ((error = xfs_bmapi(tp, dp, bno, count,
1663                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1664                         XFS_BMAPI_CONTIG,
1665                         args->firstblock, args->total, &map, &nmap,
1666                         args->flist))) {
1667                 return error;
1668         }
1669         ASSERT(nmap <= 1);
1670         if (nmap == 1) {
1671                 mapp = &map;
1672                 mapi = 1;
1673         }
1674         /*
1675          * If we didn't get it and the block might work if fragmented,
1676          * try without the CONTIG flag.  Loop until we get it all.
1677          */
1678         else if (nmap == 0 && count > 1) {
1679                 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1680                 for (b = bno, mapi = 0; b < bno + count; ) {
1681                         nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1682                         c = (int)(bno + count - b);
1683                         if ((error = xfs_bmapi(tp, dp, b, c,
1684                                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
1685                                         XFS_BMAPI_METADATA,
1686                                         args->firstblock, args->total,
1687                                         &mapp[mapi], &nmap, args->flist))) {
1688                                 kmem_free(mapp, sizeof(*mapp) * count);
1689                                 return error;
1690                         }
1691                         if (nmap < 1)
1692                                 break;
1693                         mapi += nmap;
1694                         b = mapp[mapi - 1].br_startoff +
1695                             mapp[mapi - 1].br_blockcount;
1696                 }
1697         } else {
1698                 mapi = 0;
1699                 mapp = NULL;
1700         }
1701         /*
1702          * Count the blocks we got, make sure it matches the total.
1703          */
1704         for (i = 0, got = 0; i < mapi; i++)
1705                 got += mapp[i].br_blockcount;
1706         if (got != count || mapp[0].br_startoff != bno ||
1707             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1708             bno + count) {
1709                 if (mapp != &map)
1710                         kmem_free(mapp, sizeof(*mapp) * count);
1711                 return XFS_ERROR(ENOSPC);
1712         }
1713         if (mapp != &map)
1714                 kmem_free(mapp, sizeof(*mapp) * count);
1715         *new_blkno = (xfs_dablk_t)bno;
1716         /*
1717          * For version 1 directories, adjust the file size if it changed.
1718          */
1719         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
1720                 ASSERT(mapi == 1);
1721                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
1722                         return error;
1723                 size = XFS_FSB_TO_B(mp, bno);
1724                 if (size != dp->i_d.di_size) {
1725                         dp->i_d.di_size = size;
1726                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1727                 }
1728         }
1729         return 0;
1730 }
1731
1732 /*
1733  * Ick.  We need to always be able to remove a btree block, even
1734  * if there's no space reservation because the filesystem is full.
1735  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1736  * It swaps the target block with the last block in the file.  The
1737  * last block in the file can always be removed since it can't cause
1738  * a bmap btree split to do that.
1739  */
1740 STATIC int
1741 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1742                       xfs_dabuf_t **dead_bufp)
1743 {
1744         xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1745         xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
1746         xfs_fileoff_t lastoff;
1747         xfs_inode_t *ip;
1748         xfs_trans_t *tp;
1749         xfs_mount_t *mp;
1750         int error, w, entno, level, dead_level;
1751         xfs_da_blkinfo_t *dead_info, *sib_info;
1752         xfs_da_intnode_t *par_node, *dead_node;
1753         xfs_dir_leafblock_t *dead_leaf;
1754         xfs_dir2_leaf_t *dead_leaf2;
1755         xfs_dahash_t dead_hash;
1756
1757         dead_buf = *dead_bufp;
1758         dead_blkno = *dead_blknop;
1759         tp = args->trans;
1760         ip = args->dp;
1761         w = args->whichfork;
1762         ASSERT(w == XFS_DATA_FORK);
1763         mp = ip->i_mount;
1764         if (XFS_DIR_IS_V2(mp)) {
1765                 lastoff = mp->m_dirfreeblk;
1766                 error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1767         } else
1768                 error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
1769         if (error)
1770                 return error;
1771         if (unlikely(lastoff == 0)) {
1772                 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1773                                  mp);
1774                 return XFS_ERROR(EFSCORRUPTED);
1775         }
1776         /*
1777          * Read the last block in the btree space.
1778          */
1779         last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1780         if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1781                 return error;
1782         /*
1783          * Copy the last block into the dead buffer and log it.
1784          */
1785         memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
1786         xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1787         dead_info = dead_buf->data;
1788         /*
1789          * Get values from the moved block.
1790          */
1791         if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
1792                 ASSERT(XFS_DIR_IS_V1(mp));
1793                 dead_leaf = (xfs_dir_leafblock_t *)dead_info;
1794                 dead_level = 0;
1795                 dead_hash =
1796                         INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1797         } else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
1798                 ASSERT(XFS_DIR_IS_V2(mp));
1799                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1800                 dead_level = 0;
1801                 dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1802         } else {
1803                 ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1804                 dead_node = (xfs_da_intnode_t *)dead_info;
1805                 dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
1806                 dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1807         }
1808         sib_buf = par_buf = NULL;
1809         /*
1810          * If the moved block has a left sibling, fix up the pointers.
1811          */
1812         if ((sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT))) {
1813                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1814                         goto done;
1815                 sib_info = sib_buf->data;
1816                 if (unlikely(
1817                     INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno ||
1818                     INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT))) {
1819                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1820                                          XFS_ERRLEVEL_LOW, mp);
1821                         error = XFS_ERROR(EFSCORRUPTED);
1822                         goto done;
1823                 }
1824                 INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno);
1825                 xfs_da_log_buf(tp, sib_buf,
1826                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1827                                         sizeof(sib_info->forw)));
1828                 xfs_da_buf_done(sib_buf);
1829                 sib_buf = NULL;
1830         }
1831         /*
1832          * If the moved block has a right sibling, fix up the pointers.
1833          */
1834         if ((sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT))) {
1835                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1836                         goto done;
1837                 sib_info = sib_buf->data;
1838                 if (unlikely(
1839                        INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno
1840                     || INT_GET(sib_info->magic, ARCH_CONVERT)
1841                                 != INT_GET(dead_info->magic, ARCH_CONVERT))) {
1842                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1843                                          XFS_ERRLEVEL_LOW, mp);
1844                         error = XFS_ERROR(EFSCORRUPTED);
1845                         goto done;
1846                 }
1847                 INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno);
1848                 xfs_da_log_buf(tp, sib_buf,
1849                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1850                                         sizeof(sib_info->back)));
1851                 xfs_da_buf_done(sib_buf);
1852                 sib_buf = NULL;
1853         }
1854         par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk;
1855         level = -1;
1856         /*
1857          * Walk down the tree looking for the parent of the moved block.
1858          */
1859         for (;;) {
1860                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1861                         goto done;
1862                 par_node = par_buf->data;
1863                 if (unlikely(
1864                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC ||
1865                     (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1))) {
1866                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1867                                          XFS_ERRLEVEL_LOW, mp);
1868                         error = XFS_ERROR(EFSCORRUPTED);
1869                         goto done;
1870                 }
1871                 level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
1872                 for (entno = 0;
1873                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
1874                      INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash;
1875                      entno++)
1876                         continue;
1877                 if (unlikely(entno == INT_GET(par_node->hdr.count, ARCH_CONVERT))) {
1878                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1879                                          XFS_ERRLEVEL_LOW, mp);
1880                         error = XFS_ERROR(EFSCORRUPTED);
1881                         goto done;
1882                 }
1883                 par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT);
1884                 if (level == dead_level + 1)
1885                         break;
1886                 xfs_da_brelse(tp, par_buf);
1887                 par_buf = NULL;
1888         }
1889         /*
1890          * We're in the right parent block.
1891          * Look for the right entry.
1892          */
1893         for (;;) {
1894                 for (;
1895                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
1896                      INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno;
1897                      entno++)
1898                         continue;
1899                 if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
1900                         break;
1901                 par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT);
1902                 xfs_da_brelse(tp, par_buf);
1903                 par_buf = NULL;
1904                 if (unlikely(par_blkno == 0)) {
1905                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1906                                          XFS_ERRLEVEL_LOW, mp);
1907                         error = XFS_ERROR(EFSCORRUPTED);
1908                         goto done;
1909                 }
1910                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1911                         goto done;
1912                 par_node = par_buf->data;
1913                 if (unlikely(
1914                     INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
1915                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)) {
1916                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1917                                          XFS_ERRLEVEL_LOW, mp);
1918                         error = XFS_ERROR(EFSCORRUPTED);
1919                         goto done;
1920                 }
1921                 entno = 0;
1922         }
1923         /*
1924          * Update the parent entry pointing to the moved block.
1925          */
1926         INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno);
1927         xfs_da_log_buf(tp, par_buf,
1928                 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1929                                 sizeof(par_node->btree[entno].before)));
1930         xfs_da_buf_done(par_buf);
1931         xfs_da_buf_done(dead_buf);
1932         *dead_blknop = last_blkno;
1933         *dead_bufp = last_buf;
1934         return 0;
1935 done:
1936         if (par_buf)
1937                 xfs_da_brelse(tp, par_buf);
1938         if (sib_buf)
1939                 xfs_da_brelse(tp, sib_buf);
1940         xfs_da_brelse(tp, last_buf);
1941         return error;
1942 }
1943
1944 /*
1945  * Remove a btree block from a directory or attribute.
1946  */
1947 int
1948 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1949                     xfs_dabuf_t *dead_buf)
1950 {
1951         xfs_inode_t *dp;
1952         int done, error, w, count;
1953         xfs_fileoff_t bno;
1954         xfs_fsize_t size;
1955         xfs_trans_t *tp;
1956         xfs_mount_t *mp;
1957
1958         dp = args->dp;
1959         w = args->whichfork;
1960         tp = args->trans;
1961         mp = dp->i_mount;
1962         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1963                 count = mp->m_dirblkfsbs;
1964         else
1965                 count = 1;
1966         for (;;) {
1967                 /*
1968                  * Remove extents.  If we get ENOSPC for a dir we have to move
1969                  * the last block to the place we want to kill.
1970                  */
1971                 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1972                                 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
1973                                 0, args->firstblock, args->flist,
1974                                 &done)) == ENOSPC) {
1975                         if (w != XFS_DATA_FORK)
1976                                 goto done;
1977                         if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
1978                                         &dead_buf)))
1979                                 goto done;
1980                 } else if (error)
1981                         goto done;
1982                 else
1983                         break;
1984         }
1985         ASSERT(done);
1986         xfs_da_binval(tp, dead_buf);
1987         /*
1988          * Adjust the directory size for version 1.
1989          */
1990         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
1991                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
1992                         return error;
1993                 size = XFS_FSB_TO_B(dp->i_mount, bno);
1994                 if (size != dp->i_d.di_size) {
1995                         dp->i_d.di_size = size;
1996                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1997                 }
1998         }
1999         return 0;
2000 done:
2001         xfs_da_binval(tp, dead_buf);
2002         return error;
2003 }
2004
2005 /*
2006  * See if the mapping(s) for this btree block are valid, i.e.
2007  * don't contain holes, are logically contiguous, and cover the whole range.
2008  */
2009 STATIC int
2010 xfs_da_map_covers_blocks(
2011         int             nmap,
2012         xfs_bmbt_irec_t *mapp,
2013         xfs_dablk_t     bno,
2014         int             count)
2015 {
2016         int             i;
2017         xfs_fileoff_t   off;
2018
2019         for (i = 0, off = bno; i < nmap; i++) {
2020                 if (mapp[i].br_startblock == HOLESTARTBLOCK ||
2021                     mapp[i].br_startblock == DELAYSTARTBLOCK) {
2022                         return 0;
2023                 }
2024                 if (off != mapp[i].br_startoff) {
2025                         return 0;
2026                 }
2027                 off += mapp[i].br_blockcount;
2028         }
2029         return off == bno + count;
2030 }
2031
2032 /*
2033  * Make a dabuf.
2034  * Used for get_buf, read_buf, read_bufr, and reada_buf.
2035  */
2036 STATIC int
2037 xfs_da_do_buf(
2038         xfs_trans_t     *trans,
2039         xfs_inode_t     *dp,
2040         xfs_dablk_t     bno,
2041         xfs_daddr_t     *mappedbnop,
2042         xfs_dabuf_t     **bpp,
2043         int             whichfork,
2044         int             caller,
2045         inst_t          *ra)
2046 {
2047         xfs_buf_t       *bp = NULL;
2048         xfs_buf_t       **bplist;
2049         int             error=0;
2050         int             i;
2051         xfs_bmbt_irec_t map;
2052         xfs_bmbt_irec_t *mapp;
2053         xfs_daddr_t     mappedbno;
2054         xfs_mount_t     *mp;
2055         int             nbplist=0;
2056         int             nfsb;
2057         int             nmap;
2058         xfs_dabuf_t     *rbp;
2059
2060         mp = dp->i_mount;
2061         if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
2062                 nfsb = mp->m_dirblkfsbs;
2063         else
2064                 nfsb = 1;
2065         mappedbno = *mappedbnop;
2066         /*
2067          * Caller doesn't have a mapping.  -2 means don't complain
2068          * if we land in a hole.
2069          */
2070         if (mappedbno == -1 || mappedbno == -2) {
2071                 /*
2072                  * Optimize the one-block case.
2073                  */
2074                 if (nfsb == 1) {
2075                         xfs_fsblock_t   fsb;
2076
2077                         if ((error =
2078                             xfs_bmapi_single(trans, dp, whichfork, &fsb,
2079                                     (xfs_fileoff_t)bno))) {
2080                                 return error;
2081                         }
2082                         mapp = &map;
2083                         if (fsb == NULLFSBLOCK) {
2084                                 nmap = 0;
2085                         } else {
2086                                 map.br_startblock = fsb;
2087                                 map.br_startoff = (xfs_fileoff_t)bno;
2088                                 map.br_blockcount = 1;
2089                                 nmap = 1;
2090                         }
2091                 } else {
2092                         mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
2093                         nmap = nfsb;
2094                         if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
2095                                         nfsb,
2096                                         XFS_BMAPI_METADATA |
2097                                                 XFS_BMAPI_AFLAG(whichfork),
2098                                         NULL, 0, mapp, &nmap, NULL)))
2099                                 goto exit0;
2100                 }
2101         } else {
2102                 map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
2103                 map.br_startoff = (xfs_fileoff_t)bno;
2104                 map.br_blockcount = nfsb;
2105                 mapp = &map;
2106                 nmap = 1;
2107         }
2108         if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
2109                 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
2110                 if (unlikely(error == EFSCORRUPTED)) {
2111                         if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2112                                 int     i;
2113                                 cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n",
2114                                         (long long)bno);
2115                                 cmn_err(CE_ALERT, "dir: inode %lld\n",
2116                                         (long long)dp->i_ino);
2117                                 for (i = 0; i < nmap; i++) {
2118                                         cmn_err(CE_ALERT,
2119                                                 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n",
2120                                                 i,
2121                                                 (long long)mapp[i].br_startoff,
2122                                                 (long long)mapp[i].br_startblock,
2123                                                 (long long)mapp[i].br_blockcount,
2124                                                 mapp[i].br_state);
2125                                 }
2126                         }
2127                         XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2128                                          XFS_ERRLEVEL_LOW, mp);
2129                 }
2130                 goto exit0;
2131         }
2132         if (caller != 3 && nmap > 1) {
2133                 bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
2134                 nbplist = 0;
2135         } else
2136                 bplist = NULL;
2137         /*
2138          * Turn the mapping(s) into buffer(s).
2139          */
2140         for (i = 0; i < nmap; i++) {
2141                 int     nmapped;
2142
2143                 mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
2144                 if (i == 0)
2145                         *mappedbnop = mappedbno;
2146                 nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
2147                 switch (caller) {
2148                 case 0:
2149                         bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
2150                                 mappedbno, nmapped, 0);
2151                         error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
2152                         break;
2153                 case 1:
2154                 case 2:
2155                         bp = NULL;
2156                         error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
2157                                 mappedbno, nmapped, 0, &bp);
2158                         break;
2159                 case 3:
2160                         xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
2161                         error = 0;
2162                         bp = NULL;
2163                         break;
2164                 }
2165                 if (error) {
2166                         if (bp)
2167                                 xfs_trans_brelse(trans, bp);
2168                         goto exit1;
2169                 }
2170                 if (!bp)
2171                         continue;
2172                 if (caller == 1) {
2173                         if (whichfork == XFS_ATTR_FORK) {
2174                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
2175                                                 XFS_ATTR_BTREE_REF);
2176                         } else {
2177                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
2178                                                 XFS_DIR_BTREE_REF);
2179                         }
2180                 }
2181                 if (bplist) {
2182                         bplist[nbplist++] = bp;
2183                 }
2184         }
2185         /*
2186          * Build a dabuf structure.
2187          */
2188         if (bplist) {
2189                 rbp = xfs_da_buf_make(nbplist, bplist, ra);
2190         } else if (bp)
2191                 rbp = xfs_da_buf_make(1, &bp, ra);
2192         else
2193                 rbp = NULL;
2194         /*
2195          * For read_buf, check the magic number.
2196          */
2197         if (caller == 1) {
2198                 xfs_dir2_data_t         *data;
2199                 xfs_dir2_free_t         *free;
2200                 xfs_da_blkinfo_t        *info;
2201                 uint                    magic, magic1;
2202
2203                 info = rbp->data;
2204                 data = rbp->data;
2205                 free = rbp->data;
2206                 magic = INT_GET(info->magic, ARCH_CONVERT);
2207                 magic1 = INT_GET(data->hdr.magic, ARCH_CONVERT);
2208                 if (unlikely(
2209                     XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2210                                    (magic != XFS_DIR_LEAF_MAGIC) &&
2211                                    (magic != XFS_ATTR_LEAF_MAGIC) &&
2212                                    (magic != XFS_DIR2_LEAF1_MAGIC) &&
2213                                    (magic != XFS_DIR2_LEAFN_MAGIC) &&
2214                                    (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2215                                    (magic1 != XFS_DIR2_DATA_MAGIC) &&
2216                                    (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC),
2217                                 mp, XFS_ERRTAG_DA_READ_BUF,
2218                                 XFS_RANDOM_DA_READ_BUF))) {
2219                         xfs_buftrace("DA READ ERROR", rbp->bps[0]);
2220                         XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2221                                              XFS_ERRLEVEL_LOW, mp, info);
2222                         error = XFS_ERROR(EFSCORRUPTED);
2223                         xfs_da_brelse(trans, rbp);
2224                         nbplist = 0;
2225                         goto exit1;
2226                 }
2227         }
2228         if (bplist) {
2229                 kmem_free(bplist, sizeof(*bplist) * nmap);
2230         }
2231         if (mapp != &map) {
2232                 kmem_free(mapp, sizeof(*mapp) * nfsb);
2233         }
2234         if (bpp)
2235                 *bpp = rbp;
2236         return 0;
2237 exit1:
2238         if (bplist) {
2239                 for (i = 0; i < nbplist; i++)
2240                         xfs_trans_brelse(trans, bplist[i]);
2241                 kmem_free(bplist, sizeof(*bplist) * nmap);
2242         }
2243 exit0:
2244         if (mapp != &map)
2245                 kmem_free(mapp, sizeof(*mapp) * nfsb);
2246         if (bpp)
2247                 *bpp = NULL;
2248         return error;
2249 }
2250
2251 /*
2252  * Get a buffer for the dir/attr block.
2253  */
2254 int
2255 xfs_da_get_buf(
2256         xfs_trans_t     *trans,
2257         xfs_inode_t     *dp,
2258         xfs_dablk_t     bno,
2259         xfs_daddr_t             mappedbno,
2260         xfs_dabuf_t     **bpp,
2261         int             whichfork)
2262 {
2263         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
2264                                                  (inst_t *)__return_address);
2265 }
2266
2267 /*
2268  * Get a buffer for the dir/attr block, fill in the contents.
2269  */
2270 int
2271 xfs_da_read_buf(
2272         xfs_trans_t     *trans,
2273         xfs_inode_t     *dp,
2274         xfs_dablk_t     bno,
2275         xfs_daddr_t             mappedbno,
2276         xfs_dabuf_t     **bpp,
2277         int             whichfork)
2278 {
2279         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
2280                 (inst_t *)__return_address);
2281 }
2282
2283 /*
2284  * Readahead the dir/attr block.
2285  */
2286 xfs_daddr_t
2287 xfs_da_reada_buf(
2288         xfs_trans_t     *trans,
2289         xfs_inode_t     *dp,
2290         xfs_dablk_t     bno,
2291         int             whichfork)
2292 {
2293         xfs_daddr_t             rval;
2294
2295         rval = -1;
2296         if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
2297                         (inst_t *)__return_address))
2298                 return -1;
2299         else
2300                 return rval;
2301 }
2302
2303 /*
2304  * Calculate the number of bits needed to hold i different values.
2305  */
2306 uint
2307 xfs_da_log2_roundup(uint i)
2308 {
2309         uint rval;
2310
2311         for (rval = 0; rval < NBBY * sizeof(i); rval++) {
2312                 if ((1 << rval) >= i)
2313                         break;
2314         }
2315         return(rval);
2316 }
2317
2318 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
2319 kmem_zone_t *xfs_dabuf_zone;            /* dabuf zone */
2320
2321 /*
2322  * Allocate a dir-state structure.
2323  * We don't put them on the stack since they're large.
2324  */
2325 xfs_da_state_t *
2326 xfs_da_state_alloc(void)
2327 {
2328         return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
2329 }
2330
2331 /*
2332  * Kill the altpath contents of a da-state structure.
2333  */
2334 STATIC void
2335 xfs_da_state_kill_altpath(xfs_da_state_t *state)
2336 {
2337         int     i;
2338
2339         for (i = 0; i < state->altpath.active; i++) {
2340                 if (state->altpath.blk[i].bp) {
2341                         if (state->altpath.blk[i].bp != state->path.blk[i].bp)
2342                                 xfs_da_buf_done(state->altpath.blk[i].bp);
2343                         state->altpath.blk[i].bp = NULL;
2344                 }
2345         }
2346         state->altpath.active = 0;
2347 }
2348
2349 /*
2350  * Free a da-state structure.
2351  */
2352 void
2353 xfs_da_state_free(xfs_da_state_t *state)
2354 {
2355         int     i;
2356
2357         xfs_da_state_kill_altpath(state);
2358         for (i = 0; i < state->path.active; i++) {
2359                 if (state->path.blk[i].bp)
2360                         xfs_da_buf_done(state->path.blk[i].bp);
2361         }
2362         if (state->extravalid && state->extrablk.bp)
2363                 xfs_da_buf_done(state->extrablk.bp);
2364 #ifdef DEBUG
2365         memset((char *)state, 0, sizeof(*state));
2366 #endif /* DEBUG */
2367         kmem_zone_free(xfs_da_state_zone, state);
2368 }
2369
2370 #ifdef XFS_DABUF_DEBUG
2371 xfs_dabuf_t     *xfs_dabuf_global_list;
2372 lock_t          xfs_dabuf_global_lock;
2373 #endif
2374
2375 /*
2376  * Create a dabuf.
2377  */
2378 /* ARGSUSED */
2379 STATIC xfs_dabuf_t *
2380 xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2381 {
2382         xfs_buf_t       *bp;
2383         xfs_dabuf_t     *dabuf;
2384         int             i;
2385         int             off;
2386
2387         if (nbuf == 1)
2388                 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
2389         else
2390                 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
2391         dabuf->dirty = 0;
2392 #ifdef XFS_DABUF_DEBUG
2393         dabuf->ra = ra;
2394         dabuf->target = XFS_BUF_TARGET(bps[0]);
2395         dabuf->blkno = XFS_BUF_ADDR(bps[0]);
2396 #endif
2397         if (nbuf == 1) {
2398                 dabuf->nbuf = 1;
2399                 bp = bps[0];
2400                 dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
2401                 dabuf->data = XFS_BUF_PTR(bp);
2402                 dabuf->bps[0] = bp;
2403         } else {
2404                 dabuf->nbuf = nbuf;
2405                 for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
2406                         dabuf->bps[i] = bp = bps[i];
2407                         dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
2408                 }
2409                 dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
2410                 for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2411                         bp = bps[i];
2412                         memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
2413                                 XFS_BUF_COUNT(bp));
2414                 }
2415         }
2416 #ifdef XFS_DABUF_DEBUG
2417         {
2418                 SPLDECL(s);
2419                 xfs_dabuf_t     *p;
2420
2421                 s = mutex_spinlock(&xfs_dabuf_global_lock);
2422                 for (p = xfs_dabuf_global_list; p; p = p->next) {
2423                         ASSERT(p->blkno != dabuf->blkno ||
2424                                p->target != dabuf->target);
2425                 }
2426                 dabuf->prev = NULL;
2427                 if (xfs_dabuf_global_list)
2428                         xfs_dabuf_global_list->prev = dabuf;
2429                 dabuf->next = xfs_dabuf_global_list;
2430                 xfs_dabuf_global_list = dabuf;
2431                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
2432         }
2433 #endif
2434         return dabuf;
2435 }
2436
2437 /*
2438  * Un-dirty a dabuf.
2439  */
2440 STATIC void
2441 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
2442 {
2443         xfs_buf_t       *bp;
2444         int             i;
2445         int             off;
2446
2447         if (dabuf->dirty) {
2448                 ASSERT(dabuf->nbuf > 1);
2449                 dabuf->dirty = 0;
2450                 for (i = off = 0; i < dabuf->nbuf;
2451                                 i++, off += XFS_BUF_COUNT(bp)) {
2452                         bp = dabuf->bps[i];
2453                         memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
2454                                 XFS_BUF_COUNT(bp));
2455                 }
2456         }
2457 }
2458
2459 /*
2460  * Release a dabuf.
2461  */
2462 void
2463 xfs_da_buf_done(xfs_dabuf_t *dabuf)
2464 {
2465         ASSERT(dabuf);
2466         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2467         if (dabuf->dirty)
2468                 xfs_da_buf_clean(dabuf);
2469         if (dabuf->nbuf > 1)
2470                 kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
2471 #ifdef XFS_DABUF_DEBUG
2472         {
2473                 SPLDECL(s);
2474
2475                 s = mutex_spinlock(&xfs_dabuf_global_lock);
2476                 if (dabuf->prev)
2477                         dabuf->prev->next = dabuf->next;
2478                 else
2479                         xfs_dabuf_global_list = dabuf->next;
2480                 if (dabuf->next)
2481                         dabuf->next->prev = dabuf->prev;
2482                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
2483         }
2484         memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
2485 #endif
2486         if (dabuf->nbuf == 1)
2487                 kmem_zone_free(xfs_dabuf_zone, dabuf);
2488         else
2489                 kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
2490 }
2491
2492 /*
2493  * Log transaction from a dabuf.
2494  */
2495 void
2496 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
2497 {
2498         xfs_buf_t       *bp;
2499         uint            f;
2500         int             i;
2501         uint            l;
2502         int             off;
2503
2504         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2505         if (dabuf->nbuf == 1) {
2506                 ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
2507                 xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
2508                 return;
2509         }
2510         dabuf->dirty = 1;
2511         ASSERT(first <= last);
2512         for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2513                 bp = dabuf->bps[i];
2514                 f = off;
2515                 l = f + XFS_BUF_COUNT(bp) - 1;
2516                 if (f < first)
2517                         f = first;
2518                 if (l > last)
2519                         l = last;
2520                 if (f <= l)
2521                         xfs_trans_log_buf(tp, bp, f - off, l - off);
2522                 /*
2523                  * B_DONE is set by xfs_trans_log buf.
2524                  * If we don't set it on a new buffer (get not read)
2525                  * then if we don't put anything in the buffer it won't
2526                  * be set, and at commit it it released into the cache,
2527                  * and then a read will fail.
2528                  */
2529                 else if (!(XFS_BUF_ISDONE(bp)))
2530                   XFS_BUF_DONE(bp);
2531         }
2532         ASSERT(last < off);
2533 }
2534
2535 /*
2536  * Release dabuf from a transaction.
2537  * Have to free up the dabuf before the buffers are released,
2538  * since the synchronization on the dabuf is really the lock on the buffer.
2539  */
2540 void
2541 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2542 {
2543         xfs_buf_t       *bp;
2544         xfs_buf_t       **bplist;
2545         int             i;
2546         int             nbuf;
2547
2548         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2549         if ((nbuf = dabuf->nbuf) == 1) {
2550                 bplist = &bp;
2551                 bp = dabuf->bps[0];
2552         } else {
2553                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2554                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2555         }
2556         xfs_da_buf_done(dabuf);
2557         for (i = 0; i < nbuf; i++)
2558                 xfs_trans_brelse(tp, bplist[i]);
2559         if (bplist != &bp)
2560                 kmem_free(bplist, nbuf * sizeof(*bplist));
2561 }
2562
2563 /*
2564  * Invalidate dabuf from a transaction.
2565  */
2566 void
2567 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2568 {
2569         xfs_buf_t       *bp;
2570         xfs_buf_t       **bplist;
2571         int             i;
2572         int             nbuf;
2573
2574         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2575         if ((nbuf = dabuf->nbuf) == 1) {
2576                 bplist = &bp;
2577                 bp = dabuf->bps[0];
2578         } else {
2579                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2580                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2581         }
2582         xfs_da_buf_done(dabuf);
2583         for (i = 0; i < nbuf; i++)
2584                 xfs_trans_binval(tp, bplist[i]);
2585         if (bplist != &bp)
2586                 kmem_free(bplist, nbuf * sizeof(*bplist));
2587 }
2588
2589 /*
2590  * Get the first daddr from a dabuf.
2591  */
2592 xfs_daddr_t
2593 xfs_da_blkno(xfs_dabuf_t *dabuf)
2594 {
2595         ASSERT(dabuf->nbuf);
2596         ASSERT(dabuf->data);
2597         return XFS_BUF_ADDR(dabuf->bps[0]);
2598 }