Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / fs / xfs / xfs_alloc_btree.c
1 /*
2  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_alloc.h"
40 #include "xfs_error.h"
41
42 /*
43  * Prototypes for internal functions.
44  */
45
46 STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int);
47 STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
48 STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
49 STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
50 STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *);
51 STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *);
52 STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *);
53 STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
54                 xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
55 STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);
56
57 /*
58  * Internal functions.
59  */
60
61 /*
62  * Single level of the xfs_alloc_delete record deletion routine.
63  * Delete record pointed to by cur/level.
64  * Remove the record from its block then rebalance the tree.
65  * Return 0 for error, 1 for done, 2 to go on to the next level.
66  */
67 STATIC int                              /* error */
68 xfs_alloc_delrec(
69         xfs_btree_cur_t         *cur,   /* btree cursor */
70         int                     level,  /* level removing record from */
71         int                     *stat)  /* fail/done/go-on */
72 {
73         xfs_agf_t               *agf;   /* allocation group freelist header */
74         xfs_alloc_block_t       *block; /* btree block record/key lives in */
75         xfs_agblock_t           bno;    /* btree block number */
76         xfs_buf_t               *bp;    /* buffer for block */
77         int                     error;  /* error return value */
78         int                     i;      /* loop index */
79         xfs_alloc_key_t         key;    /* kp points here if block is level 0 */
80         xfs_agblock_t           lbno;   /* left block's block number */
81         xfs_buf_t               *lbp;   /* left block's buffer pointer */
82         xfs_alloc_block_t       *left;  /* left btree block */
83         xfs_alloc_key_t         *lkp=NULL;      /* left block key pointer */
84         xfs_alloc_ptr_t         *lpp=NULL;      /* left block address pointer */
85         int                     lrecs=0;        /* number of records in left block */
86         xfs_alloc_rec_t         *lrp;   /* left block record pointer */
87         xfs_mount_t             *mp;    /* mount structure */
88         int                     ptr;    /* index in btree block for this rec */
89         xfs_agblock_t           rbno;   /* right block's block number */
90         xfs_buf_t               *rbp;   /* right block's buffer pointer */
91         xfs_alloc_block_t       *right; /* right btree block */
92         xfs_alloc_key_t         *rkp;   /* right block key pointer */
93         xfs_alloc_ptr_t         *rpp;   /* right block address pointer */
94         int                     rrecs=0;        /* number of records in right block */
95         xfs_alloc_rec_t         *rrp;   /* right block record pointer */
96         xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
97
98         /*
99          * Get the index of the entry being deleted, check for nothing there.
100          */
101         ptr = cur->bc_ptrs[level];
102         if (ptr == 0) {
103                 *stat = 0;
104                 return 0;
105         }
106         /*
107          * Get the buffer & block containing the record or key/ptr.
108          */
109         bp = cur->bc_bufs[level];
110         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
111 #ifdef DEBUG
112         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
113                 return error;
114 #endif
115         /*
116          * Fail if we're off the end of the block.
117          */
118         if (ptr > be16_to_cpu(block->bb_numrecs)) {
119                 *stat = 0;
120                 return 0;
121         }
122         XFS_STATS_INC(xs_abt_delrec);
123         /*
124          * It's a nonleaf.  Excise the key and ptr being deleted, by
125          * sliding the entries past them down one.
126          * Log the changed areas of the block.
127          */
128         if (level > 0) {
129                 lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
130                 lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
131 #ifdef DEBUG
132                 for (i = ptr; i < be16_to_cpu(block->bb_numrecs); i++) {
133                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
134                                 return error;
135                 }
136 #endif
137                 if (ptr < be16_to_cpu(block->bb_numrecs)) {
138                         memmove(&lkp[ptr - 1], &lkp[ptr],
139                                 (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lkp));
140                         memmove(&lpp[ptr - 1], &lpp[ptr],
141                                 (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lpp));
142                         xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
143                         xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
144                 }
145         }
146         /*
147          * It's a leaf.  Excise the record being deleted, by sliding the
148          * entries past it down one.  Log the changed areas of the block.
149          */
150         else {
151                 lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
152                 if (ptr < be16_to_cpu(block->bb_numrecs)) {
153                         memmove(&lrp[ptr - 1], &lrp[ptr],
154                                 (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lrp));
155                         xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
156                 }
157                 /*
158                  * If it's the first record in the block, we'll need a key
159                  * structure to pass up to the next level (updkey).
160                  */
161                 if (ptr == 1) {
162                         key.ar_startblock = lrp->ar_startblock;
163                         key.ar_blockcount = lrp->ar_blockcount;
164                         lkp = &key;
165                 }
166         }
167         /*
168          * Decrement and log the number of entries in the block.
169          */
170         be16_add(&block->bb_numrecs, -1);
171         xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
172         /*
173          * See if the longest free extent in the allocation group was
174          * changed by this operation.  True if it's the by-size btree, and
175          * this is the leaf level, and there is no right sibling block,
176          * and this was the last record.
177          */
178         agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
179         mp = cur->bc_mp;
180
181         if (level == 0 &&
182             cur->bc_btnum == XFS_BTNUM_CNT &&
183             be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
184             ptr > be16_to_cpu(block->bb_numrecs)) {
185                 ASSERT(ptr == be16_to_cpu(block->bb_numrecs) + 1);
186                 /*
187                  * There are still records in the block.  Grab the size
188                  * from the last one.
189                  */
190                 if (be16_to_cpu(block->bb_numrecs)) {
191                         rrp = XFS_ALLOC_REC_ADDR(block, be16_to_cpu(block->bb_numrecs), cur);
192                         agf->agf_longest = rrp->ar_blockcount;
193                 }
194                 /*
195                  * No free extents left.
196                  */
197                 else
198                         agf->agf_longest = 0;
199                 mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
200                         be32_to_cpu(agf->agf_longest);
201                 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
202                         XFS_AGF_LONGEST);
203         }
204         /*
205          * Is this the root level?  If so, we're almost done.
206          */
207         if (level == cur->bc_nlevels - 1) {
208                 /*
209                  * If this is the root level,
210                  * and there's only one entry left,
211                  * and it's NOT the leaf level,
212                  * then we can get rid of this level.
213                  */
214                 if (be16_to_cpu(block->bb_numrecs) == 1 && level > 0) {
215                         /*
216                          * lpp is still set to the first pointer in the block.
217                          * Make it the new root of the btree.
218                          */
219                         bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
220                         agf->agf_roots[cur->bc_btnum] = *lpp;
221                         be32_add(&agf->agf_levels[cur->bc_btnum], -1);
222                         mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
223                         /*
224                          * Put this buffer/block on the ag's freelist.
225                          */
226                         if ((error = xfs_alloc_put_freelist(cur->bc_tp,
227                                         cur->bc_private.a.agbp, NULL, bno)))
228                                 return error;
229                         /*
230                          * Since blocks move to the free list without the
231                          * coordination used in xfs_bmap_finish, we can't allow
232                          * block to be available for reallocation and
233                          * non-transaction writing (user data) until we know
234                          * that the transaction that moved it to the free list
235                          * is permanently on disk. We track the blocks by
236                          * declaring these blocks as "busy"; the busy list is
237                          * maintained on a per-ag basis and each transaction
238                          * records which entries should be removed when the
239                          * iclog commits to disk. If a busy block is
240                          * allocated, the iclog is pushed up to the LSN
241                          * that freed the block.
242                          */
243                         xfs_alloc_mark_busy(cur->bc_tp,
244                                 be32_to_cpu(agf->agf_seqno), bno, 1);
245
246                         xfs_trans_agbtree_delta(cur->bc_tp, -1);
247                         xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
248                                 XFS_AGF_ROOTS | XFS_AGF_LEVELS);
249                         /*
250                          * Update the cursor so there's one fewer level.
251                          */
252                         xfs_btree_setbuf(cur, level, NULL);
253                         cur->bc_nlevels--;
254                 } else if (level > 0 &&
255                            (error = xfs_alloc_decrement(cur, level, &i)))
256                         return error;
257                 *stat = 1;
258                 return 0;
259         }
260         /*
261          * If we deleted the leftmost entry in the block, update the
262          * key values above us in the tree.
263          */
264         if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
265                 return error;
266         /*
267          * If the number of records remaining in the block is at least
268          * the minimum, we're done.
269          */
270         if (be16_to_cpu(block->bb_numrecs) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
271                 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
272                         return error;
273                 *stat = 1;
274                 return 0;
275         }
276         /*
277          * Otherwise, we have to move some records around to keep the
278          * tree balanced.  Look at the left and right sibling blocks to
279          * see if we can re-balance by moving only one record.
280          */
281         rbno = be32_to_cpu(block->bb_rightsib);
282         lbno = be32_to_cpu(block->bb_leftsib);
283         bno = NULLAGBLOCK;
284         ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
285         /*
286          * Duplicate the cursor so our btree manipulations here won't
287          * disrupt the next level up.
288          */
289         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
290                 return error;
291         /*
292          * If there's a right sibling, see if it's ok to shift an entry
293          * out of it.
294          */
295         if (rbno != NULLAGBLOCK) {
296                 /*
297                  * Move the temp cursor to the last entry in the next block.
298                  * Actually any entry but the first would suffice.
299                  */
300                 i = xfs_btree_lastrec(tcur, level);
301                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
302                 if ((error = xfs_alloc_increment(tcur, level, &i)))
303                         goto error0;
304                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
305                 i = xfs_btree_lastrec(tcur, level);
306                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
307                 /*
308                  * Grab a pointer to the block.
309                  */
310                 rbp = tcur->bc_bufs[level];
311                 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
312 #ifdef DEBUG
313                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
314                         goto error0;
315 #endif
316                 /*
317                  * Grab the current block number, for future use.
318                  */
319                 bno = be32_to_cpu(right->bb_leftsib);
320                 /*
321                  * If right block is full enough so that removing one entry
322                  * won't make it too empty, and left-shifting an entry out
323                  * of right to us works, we're done.
324                  */
325                 if (be16_to_cpu(right->bb_numrecs) - 1 >=
326                      XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
327                         if ((error = xfs_alloc_lshift(tcur, level, &i)))
328                                 goto error0;
329                         if (i) {
330                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
331                                        XFS_ALLOC_BLOCK_MINRECS(level, cur));
332                                 xfs_btree_del_cursor(tcur,
333                                                      XFS_BTREE_NOERROR);
334                                 if (level > 0 &&
335                                     (error = xfs_alloc_decrement(cur, level,
336                                             &i)))
337                                         return error;
338                                 *stat = 1;
339                                 return 0;
340                         }
341                 }
342                 /*
343                  * Otherwise, grab the number of records in right for
344                  * future reference, and fix up the temp cursor to point
345                  * to our block again (last record).
346                  */
347                 rrecs = be16_to_cpu(right->bb_numrecs);
348                 if (lbno != NULLAGBLOCK) {
349                         i = xfs_btree_firstrec(tcur, level);
350                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
351                         if ((error = xfs_alloc_decrement(tcur, level, &i)))
352                                 goto error0;
353                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
354                 }
355         }
356         /*
357          * If there's a left sibling, see if it's ok to shift an entry
358          * out of it.
359          */
360         if (lbno != NULLAGBLOCK) {
361                 /*
362                  * Move the temp cursor to the first entry in the
363                  * previous block.
364                  */
365                 i = xfs_btree_firstrec(tcur, level);
366                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
367                 if ((error = xfs_alloc_decrement(tcur, level, &i)))
368                         goto error0;
369                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
370                 xfs_btree_firstrec(tcur, level);
371                 /*
372                  * Grab a pointer to the block.
373                  */
374                 lbp = tcur->bc_bufs[level];
375                 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
376 #ifdef DEBUG
377                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
378                         goto error0;
379 #endif
380                 /*
381                  * Grab the current block number, for future use.
382                  */
383                 bno = be32_to_cpu(left->bb_rightsib);
384                 /*
385                  * If left block is full enough so that removing one entry
386                  * won't make it too empty, and right-shifting an entry out
387                  * of left to us works, we're done.
388                  */
389                 if (be16_to_cpu(left->bb_numrecs) - 1 >=
390                      XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
391                         if ((error = xfs_alloc_rshift(tcur, level, &i)))
392                                 goto error0;
393                         if (i) {
394                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
395                                        XFS_ALLOC_BLOCK_MINRECS(level, cur));
396                                 xfs_btree_del_cursor(tcur,
397                                                      XFS_BTREE_NOERROR);
398                                 if (level == 0)
399                                         cur->bc_ptrs[0]++;
400                                 *stat = 1;
401                                 return 0;
402                         }
403                 }
404                 /*
405                  * Otherwise, grab the number of records in right for
406                  * future reference.
407                  */
408                 lrecs = be16_to_cpu(left->bb_numrecs);
409         }
410         /*
411          * Delete the temp cursor, we're done with it.
412          */
413         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
414         /*
415          * If here, we need to do a join to keep the tree balanced.
416          */
417         ASSERT(bno != NULLAGBLOCK);
418         /*
419          * See if we can join with the left neighbor block.
420          */
421         if (lbno != NULLAGBLOCK &&
422             lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
423                 /*
424                  * Set "right" to be the starting block,
425                  * "left" to be the left neighbor.
426                  */
427                 rbno = bno;
428                 right = block;
429                 rbp = bp;
430                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
431                                 cur->bc_private.a.agno, lbno, 0, &lbp,
432                                 XFS_ALLOC_BTREE_REF)))
433                         return error;
434                 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
435                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
436                         return error;
437         }
438         /*
439          * If that won't work, see if we can join with the right neighbor block.
440          */
441         else if (rbno != NULLAGBLOCK &&
442                  rrecs + be16_to_cpu(block->bb_numrecs) <=
443                   XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
444                 /*
445                  * Set "left" to be the starting block,
446                  * "right" to be the right neighbor.
447                  */
448                 lbno = bno;
449                 left = block;
450                 lbp = bp;
451                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
452                                 cur->bc_private.a.agno, rbno, 0, &rbp,
453                                 XFS_ALLOC_BTREE_REF)))
454                         return error;
455                 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
456                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
457                         return error;
458         }
459         /*
460          * Otherwise, we can't fix the imbalance.
461          * Just return.  This is probably a logic error, but it's not fatal.
462          */
463         else {
464                 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
465                         return error;
466                 *stat = 1;
467                 return 0;
468         }
469         /*
470          * We're now going to join "left" and "right" by moving all the stuff
471          * in "right" to "left" and deleting "right".
472          */
473         if (level > 0) {
474                 /*
475                  * It's a non-leaf.  Move keys and pointers.
476                  */
477                 lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
478                 lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
479                 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
480                 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
481 #ifdef DEBUG
482                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
483                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
484                                 return error;
485                 }
486 #endif
487                 memcpy(lkp, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*lkp));
488                 memcpy(lpp, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*lpp));
489                 xfs_alloc_log_keys(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
490                                    be16_to_cpu(left->bb_numrecs) +
491                                    be16_to_cpu(right->bb_numrecs));
492                 xfs_alloc_log_ptrs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
493                                    be16_to_cpu(left->bb_numrecs) +
494                                    be16_to_cpu(right->bb_numrecs));
495         } else {
496                 /*
497                  * It's a leaf.  Move records.
498                  */
499                 lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
500                 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
501                 memcpy(lrp, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*lrp));
502                 xfs_alloc_log_recs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
503                                    be16_to_cpu(left->bb_numrecs) +
504                                    be16_to_cpu(right->bb_numrecs));
505         }
506         /*
507          * If we joined with the left neighbor, set the buffer in the
508          * cursor to the left block, and fix up the index.
509          */
510         if (bp != lbp) {
511                 xfs_btree_setbuf(cur, level, lbp);
512                 cur->bc_ptrs[level] += be16_to_cpu(left->bb_numrecs);
513         }
514         /*
515          * If we joined with the right neighbor and there's a level above
516          * us, increment the cursor at that level.
517          */
518         else if (level + 1 < cur->bc_nlevels &&
519                  (error = xfs_alloc_increment(cur, level + 1, &i)))
520                 return error;
521         /*
522          * Fix up the number of records in the surviving block.
523          */
524         be16_add(&left->bb_numrecs, be16_to_cpu(right->bb_numrecs));
525         /*
526          * Fix up the right block pointer in the surviving block, and log it.
527          */
528         left->bb_rightsib = right->bb_rightsib;
529         xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
530         /*
531          * If there is a right sibling now, make it point to the
532          * remaining block.
533          */
534         if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
535                 xfs_alloc_block_t       *rrblock;
536                 xfs_buf_t               *rrbp;
537
538                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
539                                 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
540                                 &rrbp, XFS_ALLOC_BTREE_REF)))
541                         return error;
542                 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
543                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
544                         return error;
545                 rrblock->bb_leftsib = cpu_to_be32(lbno);
546                 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
547         }
548         /*
549          * Free the deleting block by putting it on the freelist.
550          */
551         if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp,
552                         NULL, rbno)))
553                 return error;
554         /*
555          * Since blocks move to the free list without the coordination
556          * used in xfs_bmap_finish, we can't allow block to be available
557          * for reallocation and non-transaction writing (user data)
558          * until we know that the transaction that moved it to the free
559          * list is permanently on disk. We track the blocks by declaring
560          * these blocks as "busy"; the busy list is maintained on a
561          * per-ag basis and each transaction records which entries
562          * should be removed when the iclog commits to disk. If a
563          * busy block is allocated, the iclog is pushed up to the
564          * LSN that freed the block.
565          */
566         xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
567         xfs_trans_agbtree_delta(cur->bc_tp, -1);
568
569         /*
570          * Adjust the current level's cursor so that we're left referring
571          * to the right node, after we're done.
572          * If this leaves the ptr value 0 our caller will fix it up.
573          */
574         if (level > 0)
575                 cur->bc_ptrs[level]--;
576         /*
577          * Return value means the next level up has something to do.
578          */
579         *stat = 2;
580         return 0;
581
582 error0:
583         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
584         return error;
585 }
586
587 /*
588  * Insert one record/level.  Return information to the caller
589  * allowing the next level up to proceed if necessary.
590  */
591 STATIC int                              /* error */
592 xfs_alloc_insrec(
593         xfs_btree_cur_t         *cur,   /* btree cursor */
594         int                     level,  /* level to insert record at */
595         xfs_agblock_t           *bnop,  /* i/o: block number inserted */
596         xfs_alloc_rec_t         *recp,  /* i/o: record data inserted */
597         xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
598         int                     *stat)  /* output: success/failure */
599 {
600         xfs_agf_t               *agf;   /* allocation group freelist header */
601         xfs_alloc_block_t       *block; /* btree block record/key lives in */
602         xfs_buf_t               *bp;    /* buffer for block */
603         int                     error;  /* error return value */
604         int                     i;      /* loop index */
605         xfs_alloc_key_t         key;    /* key value being inserted */
606         xfs_alloc_key_t         *kp;    /* pointer to btree keys */
607         xfs_agblock_t           nbno;   /* block number of allocated block */
608         xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
609         xfs_alloc_key_t         nkey;   /* new key value, from split */
610         xfs_alloc_rec_t         nrec;   /* new record value, for caller */
611         int                     optr;   /* old ptr value */
612         xfs_alloc_ptr_t         *pp;    /* pointer to btree addresses */
613         int                     ptr;    /* index in btree block for this rec */
614         xfs_alloc_rec_t         *rp;    /* pointer to btree records */
615
616         ASSERT(be32_to_cpu(recp->ar_blockcount) > 0);
617
618         /*
619          * GCC doesn't understand the (arguably complex) control flow in
620          * this function and complains about uninitialized structure fields
621          * without this.
622          */
623         memset(&nrec, 0, sizeof(nrec));
624
625         /*
626          * If we made it to the root level, allocate a new root block
627          * and we're done.
628          */
629         if (level >= cur->bc_nlevels) {
630                 XFS_STATS_INC(xs_abt_insrec);
631                 if ((error = xfs_alloc_newroot(cur, &i)))
632                         return error;
633                 *bnop = NULLAGBLOCK;
634                 *stat = i;
635                 return 0;
636         }
637         /*
638          * Make a key out of the record data to be inserted, and save it.
639          */
640         key.ar_startblock = recp->ar_startblock;
641         key.ar_blockcount = recp->ar_blockcount;
642         optr = ptr = cur->bc_ptrs[level];
643         /*
644          * If we're off the left edge, return failure.
645          */
646         if (ptr == 0) {
647                 *stat = 0;
648                 return 0;
649         }
650         XFS_STATS_INC(xs_abt_insrec);
651         /*
652          * Get pointers to the btree buffer and block.
653          */
654         bp = cur->bc_bufs[level];
655         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
656 #ifdef DEBUG
657         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
658                 return error;
659         /*
660          * Check that the new entry is being inserted in the right place.
661          */
662         if (ptr <= be16_to_cpu(block->bb_numrecs)) {
663                 if (level == 0) {
664                         rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
665                         xfs_btree_check_rec(cur->bc_btnum, recp, rp);
666                 } else {
667                         kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
668                         xfs_btree_check_key(cur->bc_btnum, &key, kp);
669                 }
670         }
671 #endif
672         nbno = NULLAGBLOCK;
673         ncur = (xfs_btree_cur_t *)0;
674         /*
675          * If the block is full, we can't insert the new entry until we
676          * make the block un-full.
677          */
678         if (be16_to_cpu(block->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
679                 /*
680                  * First, try shifting an entry to the right neighbor.
681                  */
682                 if ((error = xfs_alloc_rshift(cur, level, &i)))
683                         return error;
684                 if (i) {
685                         /* nothing */
686                 }
687                 /*
688                  * Next, try shifting an entry to the left neighbor.
689                  */
690                 else {
691                         if ((error = xfs_alloc_lshift(cur, level, &i)))
692                                 return error;
693                         if (i)
694                                 optr = ptr = cur->bc_ptrs[level];
695                         else {
696                                 /*
697                                  * Next, try splitting the current block in
698                                  * half. If this works we have to re-set our
699                                  * variables because we could be in a
700                                  * different block now.
701                                  */
702                                 if ((error = xfs_alloc_split(cur, level, &nbno,
703                                                 &nkey, &ncur, &i)))
704                                         return error;
705                                 if (i) {
706                                         bp = cur->bc_bufs[level];
707                                         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
708 #ifdef DEBUG
709                                         if ((error =
710                                                 xfs_btree_check_sblock(cur,
711                                                         block, level, bp)))
712                                                 return error;
713 #endif
714                                         ptr = cur->bc_ptrs[level];
715                                         nrec.ar_startblock = nkey.ar_startblock;
716                                         nrec.ar_blockcount = nkey.ar_blockcount;
717                                 }
718                                 /*
719                                  * Otherwise the insert fails.
720                                  */
721                                 else {
722                                         *stat = 0;
723                                         return 0;
724                                 }
725                         }
726                 }
727         }
728         /*
729          * At this point we know there's room for our new entry in the block
730          * we're pointing at.
731          */
732         if (level > 0) {
733                 /*
734                  * It's a non-leaf entry.  Make a hole for the new data
735                  * in the key and ptr regions of the block.
736                  */
737                 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
738                 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
739 #ifdef DEBUG
740                 for (i = be16_to_cpu(block->bb_numrecs); i >= ptr; i--) {
741                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
742                                 return error;
743                 }
744 #endif
745                 memmove(&kp[ptr], &kp[ptr - 1],
746                         (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*kp));
747                 memmove(&pp[ptr], &pp[ptr - 1],
748                         (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*pp));
749 #ifdef DEBUG
750                 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
751                         return error;
752 #endif
753                 /*
754                  * Now stuff the new data in, bump numrecs and log the new data.
755                  */
756                 kp[ptr - 1] = key;
757                 pp[ptr - 1] = cpu_to_be32(*bnop);
758                 be16_add(&block->bb_numrecs, 1);
759                 xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
760                 xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
761 #ifdef DEBUG
762                 if (ptr < be16_to_cpu(block->bb_numrecs))
763                         xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
764                                 kp + ptr);
765 #endif
766         } else {
767                 /*
768                  * It's a leaf entry.  Make a hole for the new record.
769                  */
770                 rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
771                 memmove(&rp[ptr], &rp[ptr - 1],
772                         (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*rp));
773                 /*
774                  * Now stuff the new record in, bump numrecs
775                  * and log the new data.
776                  */
777                 rp[ptr - 1] = *recp; /* INT_: struct copy */
778                 be16_add(&block->bb_numrecs, 1);
779                 xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
780 #ifdef DEBUG
781                 if (ptr < be16_to_cpu(block->bb_numrecs))
782                         xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
783                                 rp + ptr);
784 #endif
785         }
786         /*
787          * Log the new number of records in the btree header.
788          */
789         xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
790         /*
791          * If we inserted at the start of a block, update the parents' keys.
792          */
793         if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
794                 return error;
795         /*
796          * Look to see if the longest extent in the allocation group
797          * needs to be updated.
798          */
799
800         agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
801         if (level == 0 &&
802             cur->bc_btnum == XFS_BTNUM_CNT &&
803             be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
804             be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
805                 /*
806                  * If this is a leaf in the by-size btree and there
807                  * is no right sibling block and this block is bigger
808                  * than the previous longest block, update it.
809                  */
810                 agf->agf_longest = recp->ar_blockcount;
811                 cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
812                         = be32_to_cpu(recp->ar_blockcount);
813                 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
814                         XFS_AGF_LONGEST);
815         }
816         /*
817          * Return the new block number, if any.
818          * If there is one, give back a record value and a cursor too.
819          */
820         *bnop = nbno;
821         if (nbno != NULLAGBLOCK) {
822                 *recp = nrec; /* INT_: struct copy */
823                 *curp = ncur; /* INT_: struct copy */
824         }
825         *stat = 1;
826         return 0;
827 }
828
829 /*
830  * Log header fields from a btree block.
831  */
832 STATIC void
833 xfs_alloc_log_block(
834         xfs_trans_t             *tp,    /* transaction pointer */
835         xfs_buf_t               *bp,    /* buffer containing btree block */
836         int                     fields) /* mask of fields: XFS_BB_... */
837 {
838         int                     first;  /* first byte offset logged */
839         int                     last;   /* last byte offset logged */
840         static const short      offsets[] = {   /* table of offsets */
841                 offsetof(xfs_alloc_block_t, bb_magic),
842                 offsetof(xfs_alloc_block_t, bb_level),
843                 offsetof(xfs_alloc_block_t, bb_numrecs),
844                 offsetof(xfs_alloc_block_t, bb_leftsib),
845                 offsetof(xfs_alloc_block_t, bb_rightsib),
846                 sizeof(xfs_alloc_block_t)
847         };
848
849         xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
850         xfs_trans_log_buf(tp, bp, first, last);
851 }
852
853 /*
854  * Log keys from a btree block (nonleaf).
855  */
856 STATIC void
857 xfs_alloc_log_keys(
858         xfs_btree_cur_t         *cur,   /* btree cursor */
859         xfs_buf_t               *bp,    /* buffer containing btree block */
860         int                     kfirst, /* index of first key to log */
861         int                     klast)  /* index of last key to log */
862 {
863         xfs_alloc_block_t       *block; /* btree block to log from */
864         int                     first;  /* first byte offset logged */
865         xfs_alloc_key_t         *kp;    /* key pointer in btree block */
866         int                     last;   /* last byte offset logged */
867
868         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
869         kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
870         first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
871         last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
872         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
873 }
874
875 /*
876  * Log block pointer fields from a btree block (nonleaf).
877  */
878 STATIC void
879 xfs_alloc_log_ptrs(
880         xfs_btree_cur_t         *cur,   /* btree cursor */
881         xfs_buf_t               *bp,    /* buffer containing btree block */
882         int                     pfirst, /* index of first pointer to log */
883         int                     plast)  /* index of last pointer to log */
884 {
885         xfs_alloc_block_t       *block; /* btree block to log from */
886         int                     first;  /* first byte offset logged */
887         int                     last;   /* last byte offset logged */
888         xfs_alloc_ptr_t         *pp;    /* block-pointer pointer in btree blk */
889
890         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
891         pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
892         first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
893         last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
894         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
895 }
896
897 /*
898  * Log records from a btree block (leaf).
899  */
900 STATIC void
901 xfs_alloc_log_recs(
902         xfs_btree_cur_t         *cur,   /* btree cursor */
903         xfs_buf_t               *bp,    /* buffer containing btree block */
904         int                     rfirst, /* index of first record to log */
905         int                     rlast)  /* index of last record to log */
906 {
907         xfs_alloc_block_t       *block; /* btree block to log from */
908         int                     first;  /* first byte offset logged */
909         int                     last;   /* last byte offset logged */
910         xfs_alloc_rec_t         *rp;    /* record pointer for btree block */
911
912
913         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
914         rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
915 #ifdef DEBUG
916         {
917                 xfs_agf_t       *agf;
918                 xfs_alloc_rec_t *p;
919
920                 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
921                 for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
922                         ASSERT(be32_to_cpu(p->ar_startblock) +
923                                be32_to_cpu(p->ar_blockcount) <=
924                                be32_to_cpu(agf->agf_length));
925         }
926 #endif
927         first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
928         last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
929         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
930 }
931
932 /*
933  * Lookup the record.  The cursor is made to point to it, based on dir.
934  * Return 0 if can't find any such record, 1 for success.
935  */
936 STATIC int                              /* error */
937 xfs_alloc_lookup(
938         xfs_btree_cur_t         *cur,   /* btree cursor */
939         xfs_lookup_t            dir,    /* <=, ==, or >= */
940         int                     *stat)  /* success/failure */
941 {
942         xfs_agblock_t           agbno;  /* a.g. relative btree block number */
943         xfs_agnumber_t          agno;   /* allocation group number */
944         xfs_alloc_block_t       *block=NULL;    /* current btree block */
945         int                     diff;   /* difference for the current key */
946         int                     error;  /* error return value */
947         int                     keyno=0;        /* current key number */
948         int                     level;  /* level in the btree */
949         xfs_mount_t             *mp;    /* file system mount point */
950
951         XFS_STATS_INC(xs_abt_lookup);
952         /*
953          * Get the allocation group header, and the root block number.
954          */
955         mp = cur->bc_mp;
956
957         {
958                 xfs_agf_t       *agf;   /* a.g. freespace header */
959
960                 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
961                 agno = be32_to_cpu(agf->agf_seqno);
962                 agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
963         }
964         /*
965          * Iterate over each level in the btree, starting at the root.
966          * For each level above the leaves, find the key we need, based
967          * on the lookup record, then follow the corresponding block
968          * pointer down to the next level.
969          */
970         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
971                 xfs_buf_t       *bp;    /* buffer pointer for btree block */
972                 xfs_daddr_t     d;      /* disk address of btree block */
973
974                 /*
975                  * Get the disk address we're looking for.
976                  */
977                 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
978                 /*
979                  * If the old buffer at this level is for a different block,
980                  * throw it away, otherwise just use it.
981                  */
982                 bp = cur->bc_bufs[level];
983                 if (bp && XFS_BUF_ADDR(bp) != d)
984                         bp = (xfs_buf_t *)0;
985                 if (!bp) {
986                         /*
987                          * Need to get a new buffer.  Read it, then
988                          * set it in the cursor, releasing the old one.
989                          */
990                         if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
991                                         agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
992                                 return error;
993                         xfs_btree_setbuf(cur, level, bp);
994                         /*
995                          * Point to the btree block, now that we have the buffer
996                          */
997                         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
998                         if ((error = xfs_btree_check_sblock(cur, block, level,
999                                         bp)))
1000                                 return error;
1001                 } else
1002                         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1003                 /*
1004                  * If we already had a key match at a higher level, we know
1005                  * we need to use the first entry in this block.
1006                  */
1007                 if (diff == 0)
1008                         keyno = 1;
1009                 /*
1010                  * Otherwise we need to search this block.  Do a binary search.
1011                  */
1012                 else {
1013                         int             high;   /* high entry number */
1014                         xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */
1015                         xfs_alloc_rec_t *krbase=NULL;/* base of records in block */
1016                         int             low;    /* low entry number */
1017
1018                         /*
1019                          * Get a pointer to keys or records.
1020                          */
1021                         if (level > 0)
1022                                 kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
1023                         else
1024                                 krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
1025                         /*
1026                          * Set low and high entry numbers, 1-based.
1027                          */
1028                         low = 1;
1029                         if (!(high = be16_to_cpu(block->bb_numrecs))) {
1030                                 /*
1031                                  * If the block is empty, the tree must
1032                                  * be an empty leaf.
1033                                  */
1034                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1035                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1036                                 *stat = 0;
1037                                 return 0;
1038                         }
1039                         /*
1040                          * Binary search the block.
1041                          */
1042                         while (low <= high) {
1043                                 xfs_extlen_t    blockcount;     /* key value */
1044                                 xfs_agblock_t   startblock;     /* key value */
1045
1046                                 XFS_STATS_INC(xs_abt_compare);
1047                                 /*
1048                                  * keyno is average of low and high.
1049                                  */
1050                                 keyno = (low + high) >> 1;
1051                                 /*
1052                                  * Get startblock & blockcount.
1053                                  */
1054                                 if (level > 0) {
1055                                         xfs_alloc_key_t *kkp;
1056
1057                                         kkp = kkbase + keyno - 1;
1058                                         startblock = be32_to_cpu(kkp->ar_startblock);
1059                                         blockcount = be32_to_cpu(kkp->ar_blockcount);
1060                                 } else {
1061                                         xfs_alloc_rec_t *krp;
1062
1063                                         krp = krbase + keyno - 1;
1064                                         startblock = be32_to_cpu(krp->ar_startblock);
1065                                         blockcount = be32_to_cpu(krp->ar_blockcount);
1066                                 }
1067                                 /*
1068                                  * Compute difference to get next direction.
1069                                  */
1070                                 if (cur->bc_btnum == XFS_BTNUM_BNO)
1071                                         diff = (int)startblock -
1072                                                (int)cur->bc_rec.a.ar_startblock;
1073                                 else if (!(diff = (int)blockcount -
1074                                             (int)cur->bc_rec.a.ar_blockcount))
1075                                         diff = (int)startblock -
1076                                             (int)cur->bc_rec.a.ar_startblock;
1077                                 /*
1078                                  * Less than, move right.
1079                                  */
1080                                 if (diff < 0)
1081                                         low = keyno + 1;
1082                                 /*
1083                                  * Greater than, move left.
1084                                  */
1085                                 else if (diff > 0)
1086                                         high = keyno - 1;
1087                                 /*
1088                                  * Equal, we're done.
1089                                  */
1090                                 else
1091                                         break;
1092                         }
1093                 }
1094                 /*
1095                  * If there are more levels, set up for the next level
1096                  * by getting the block number and filling in the cursor.
1097                  */
1098                 if (level > 0) {
1099                         /*
1100                          * If we moved left, need the previous key number,
1101                          * unless there isn't one.
1102                          */
1103                         if (diff > 0 && --keyno < 1)
1104                                 keyno = 1;
1105                         agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
1106 #ifdef DEBUG
1107                         if ((error = xfs_btree_check_sptr(cur, agbno, level)))
1108                                 return error;
1109 #endif
1110                         cur->bc_ptrs[level] = keyno;
1111                 }
1112         }
1113         /*
1114          * Done with the search.
1115          * See if we need to adjust the results.
1116          */
1117         if (dir != XFS_LOOKUP_LE && diff < 0) {
1118                 keyno++;
1119                 /*
1120                  * If ge search and we went off the end of the block, but it's
1121                  * not the last block, we're in the wrong block.
1122                  */
1123                 if (dir == XFS_LOOKUP_GE &&
1124                     keyno > be16_to_cpu(block->bb_numrecs) &&
1125                     be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1126                         int     i;
1127
1128                         cur->bc_ptrs[0] = keyno;
1129                         if ((error = xfs_alloc_increment(cur, 0, &i)))
1130                                 return error;
1131                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1132                         *stat = 1;
1133                         return 0;
1134                 }
1135         }
1136         else if (dir == XFS_LOOKUP_LE && diff > 0)
1137                 keyno--;
1138         cur->bc_ptrs[0] = keyno;
1139         /*
1140          * Return if we succeeded or not.
1141          */
1142         if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1143                 *stat = 0;
1144         else
1145                 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1146         return 0;
1147 }
1148
1149 /*
1150  * Move 1 record left from cur/level if possible.
1151  * Update cur to reflect the new path.
1152  */
1153 STATIC int                              /* error */
1154 xfs_alloc_lshift(
1155         xfs_btree_cur_t         *cur,   /* btree cursor */
1156         int                     level,  /* level to shift record on */
1157         int                     *stat)  /* success/failure */
1158 {
1159         int                     error;  /* error return value */
1160 #ifdef DEBUG
1161         int                     i;      /* loop index */
1162 #endif
1163         xfs_alloc_key_t         key;    /* key value for leaf level upward */
1164         xfs_buf_t               *lbp;   /* buffer for left neighbor block */
1165         xfs_alloc_block_t       *left;  /* left neighbor btree block */
1166         int                     nrec;   /* new number of left block entries */
1167         xfs_buf_t               *rbp;   /* buffer for right (current) block */
1168         xfs_alloc_block_t       *right; /* right (current) btree block */
1169         xfs_alloc_key_t         *rkp=NULL;      /* key pointer for right block */
1170         xfs_alloc_ptr_t         *rpp=NULL;      /* address pointer for right block */
1171         xfs_alloc_rec_t         *rrp=NULL;      /* record pointer for right block */
1172
1173         /*
1174          * Set up variables for this block as "right".
1175          */
1176         rbp = cur->bc_bufs[level];
1177         right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1178 #ifdef DEBUG
1179         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1180                 return error;
1181 #endif
1182         /*
1183          * If we've got no left sibling then we can't shift an entry left.
1184          */
1185         if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1186                 *stat = 0;
1187                 return 0;
1188         }
1189         /*
1190          * If the cursor entry is the one that would be moved, don't
1191          * do it... it's too complicated.
1192          */
1193         if (cur->bc_ptrs[level] <= 1) {
1194                 *stat = 0;
1195                 return 0;
1196         }
1197         /*
1198          * Set up the left neighbor as "left".
1199          */
1200         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1201                         cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1202                         0, &lbp, XFS_ALLOC_BTREE_REF)))
1203                 return error;
1204         left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1205         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1206                 return error;
1207         /*
1208          * If it's full, it can't take another entry.
1209          */
1210         if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1211                 *stat = 0;
1212                 return 0;
1213         }
1214         nrec = be16_to_cpu(left->bb_numrecs) + 1;
1215         /*
1216          * If non-leaf, copy a key and a ptr to the left block.
1217          */
1218         if (level > 0) {
1219                 xfs_alloc_key_t *lkp;   /* key pointer for left block */
1220                 xfs_alloc_ptr_t *lpp;   /* address pointer for left block */
1221
1222                 lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur);
1223                 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1224                 *lkp = *rkp;
1225                 xfs_alloc_log_keys(cur, lbp, nrec, nrec);
1226                 lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
1227                 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1228 #ifdef DEBUG
1229                 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1230                         return error;
1231 #endif
1232                 *lpp = *rpp; /* INT_: copy */
1233                 xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
1234                 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1235         }
1236         /*
1237          * If leaf, copy a record to the left block.
1238          */
1239         else {
1240                 xfs_alloc_rec_t *lrp;   /* record pointer for left block */
1241
1242                 lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur);
1243                 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1244                 *lrp = *rrp;
1245                 xfs_alloc_log_recs(cur, lbp, nrec, nrec);
1246                 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1247         }
1248         /*
1249          * Bump and log left's numrecs, decrement and log right's numrecs.
1250          */
1251         be16_add(&left->bb_numrecs, 1);
1252         xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1253         be16_add(&right->bb_numrecs, -1);
1254         xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1255         /*
1256          * Slide the contents of right down one entry.
1257          */
1258         if (level > 0) {
1259 #ifdef DEBUG
1260                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1261                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1262                                         level)))
1263                                 return error;
1264                 }
1265 #endif
1266                 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1267                 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1268                 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1269                 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1270         } else {
1271                 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1272                 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1273                 key.ar_startblock = rrp->ar_startblock;
1274                 key.ar_blockcount = rrp->ar_blockcount;
1275                 rkp = &key;
1276         }
1277         /*
1278          * Update the parent key values of right.
1279          */
1280         if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
1281                 return error;
1282         /*
1283          * Slide the cursor value left one.
1284          */
1285         cur->bc_ptrs[level]--;
1286         *stat = 1;
1287         return 0;
1288 }
1289
1290 /*
1291  * Allocate a new root block, fill it in.
1292  */
1293 STATIC int                              /* error */
1294 xfs_alloc_newroot(
1295         xfs_btree_cur_t         *cur,   /* btree cursor */
1296         int                     *stat)  /* success/failure */
1297 {
1298         int                     error;  /* error return value */
1299         xfs_agblock_t           lbno;   /* left block number */
1300         xfs_buf_t               *lbp;   /* left btree buffer */
1301         xfs_alloc_block_t       *left;  /* left btree block */
1302         xfs_mount_t             *mp;    /* mount structure */
1303         xfs_agblock_t           nbno;   /* new block number */
1304         xfs_buf_t               *nbp;   /* new (root) buffer */
1305         xfs_alloc_block_t       *new;   /* new (root) btree block */
1306         int                     nptr;   /* new value for key index, 1 or 2 */
1307         xfs_agblock_t           rbno;   /* right block number */
1308         xfs_buf_t               *rbp;   /* right btree buffer */
1309         xfs_alloc_block_t       *right; /* right btree block */
1310
1311         mp = cur->bc_mp;
1312
1313         ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp));
1314         /*
1315          * Get a buffer from the freelist blocks, for the new root.
1316          */
1317         if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
1318                         &nbno)))
1319                 return error;
1320         /*
1321          * None available, we fail.
1322          */
1323         if (nbno == NULLAGBLOCK) {
1324                 *stat = 0;
1325                 return 0;
1326         }
1327         xfs_trans_agbtree_delta(cur->bc_tp, 1);
1328         nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
1329                 0);
1330         new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
1331         /*
1332          * Set the root data in the a.g. freespace structure.
1333          */
1334         {
1335                 xfs_agf_t       *agf;   /* a.g. freespace header */
1336                 xfs_agnumber_t  seqno;
1337
1338                 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
1339                 agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
1340                 be32_add(&agf->agf_levels[cur->bc_btnum], 1);
1341                 seqno = be32_to_cpu(agf->agf_seqno);
1342                 mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
1343                 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
1344                         XFS_AGF_ROOTS | XFS_AGF_LEVELS);
1345         }
1346         /*
1347          * At the previous root level there are now two blocks: the old
1348          * root, and the new block generated when it was split.
1349          * We don't know which one the cursor is pointing at, so we
1350          * set up variables "left" and "right" for each case.
1351          */
1352         lbp = cur->bc_bufs[cur->bc_nlevels - 1];
1353         left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1354 #ifdef DEBUG
1355         if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp)))
1356                 return error;
1357 #endif
1358         if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
1359                 /*
1360                  * Our block is left, pick up the right block.
1361                  */
1362                 lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
1363                 rbno = be32_to_cpu(left->bb_rightsib);
1364                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1365                                 cur->bc_private.a.agno, rbno, 0, &rbp,
1366                                 XFS_ALLOC_BTREE_REF)))
1367                         return error;
1368                 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1369                 if ((error = xfs_btree_check_sblock(cur, right,
1370                                 cur->bc_nlevels - 1, rbp)))
1371                         return error;
1372                 nptr = 1;
1373         } else {
1374                 /*
1375                  * Our block is right, pick up the left block.
1376                  */
1377                 rbp = lbp;
1378                 right = left;
1379                 rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
1380                 lbno = be32_to_cpu(right->bb_leftsib);
1381                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1382                                 cur->bc_private.a.agno, lbno, 0, &lbp,
1383                                 XFS_ALLOC_BTREE_REF)))
1384                         return error;
1385                 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1386                 if ((error = xfs_btree_check_sblock(cur, left,
1387                                 cur->bc_nlevels - 1, lbp)))
1388                         return error;
1389                 nptr = 2;
1390         }
1391         /*
1392          * Fill in the new block's btree header and log it.
1393          */
1394         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1395         new->bb_level = cpu_to_be16(cur->bc_nlevels);
1396         new->bb_numrecs = cpu_to_be16(2);
1397         new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1398         new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1399         xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
1400         ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1401         /*
1402          * Fill in the key data in the new root.
1403          */
1404         {
1405                 xfs_alloc_key_t         *kp;    /* btree key pointer */
1406
1407                 kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
1408                 if (be16_to_cpu(left->bb_level) > 0) {
1409                         kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */
1410                         kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */
1411                 } else {
1412                         xfs_alloc_rec_t *rp;    /* btree record pointer */
1413
1414                         rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
1415                         kp[0].ar_startblock = rp->ar_startblock;
1416                         kp[0].ar_blockcount = rp->ar_blockcount;
1417                         rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1418                         kp[1].ar_startblock = rp->ar_startblock;
1419                         kp[1].ar_blockcount = rp->ar_blockcount;
1420                 }
1421         }
1422         xfs_alloc_log_keys(cur, nbp, 1, 2);
1423         /*
1424          * Fill in the pointer data in the new root.
1425          */
1426         {
1427                 xfs_alloc_ptr_t         *pp;    /* btree address pointer */
1428
1429                 pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
1430                 pp[0] = cpu_to_be32(lbno);
1431                 pp[1] = cpu_to_be32(rbno);
1432         }
1433         xfs_alloc_log_ptrs(cur, nbp, 1, 2);
1434         /*
1435          * Fix up the cursor.
1436          */
1437         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1438         cur->bc_ptrs[cur->bc_nlevels] = nptr;
1439         cur->bc_nlevels++;
1440         *stat = 1;
1441         return 0;
1442 }
1443
1444 /*
1445  * Move 1 record right from cur/level if possible.
1446  * Update cur to reflect the new path.
1447  */
1448 STATIC int                              /* error */
1449 xfs_alloc_rshift(
1450         xfs_btree_cur_t         *cur,   /* btree cursor */
1451         int                     level,  /* level to shift record on */
1452         int                     *stat)  /* success/failure */
1453 {
1454         int                     error;  /* error return value */
1455         int                     i;      /* loop index */
1456         xfs_alloc_key_t         key;    /* key value for leaf level upward */
1457         xfs_buf_t               *lbp;   /* buffer for left (current) block */
1458         xfs_alloc_block_t       *left;  /* left (current) btree block */
1459         xfs_buf_t               *rbp;   /* buffer for right neighbor block */
1460         xfs_alloc_block_t       *right; /* right neighbor btree block */
1461         xfs_alloc_key_t         *rkp;   /* key pointer for right block */
1462         xfs_btree_cur_t         *tcur;  /* temporary cursor */
1463
1464         /*
1465          * Set up variables for this block as "left".
1466          */
1467         lbp = cur->bc_bufs[level];
1468         left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1469 #ifdef DEBUG
1470         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1471                 return error;
1472 #endif
1473         /*
1474          * If we've got no right sibling then we can't shift an entry right.
1475          */
1476         if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1477                 *stat = 0;
1478                 return 0;
1479         }
1480         /*
1481          * If the cursor entry is the one that would be moved, don't
1482          * do it... it's too complicated.
1483          */
1484         if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1485                 *stat = 0;
1486                 return 0;
1487         }
1488         /*
1489          * Set up the right neighbor as "right".
1490          */
1491         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1492                         cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1493                         0, &rbp, XFS_ALLOC_BTREE_REF)))
1494                 return error;
1495         right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1496         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1497                 return error;
1498         /*
1499          * If it's full, it can't take another entry.
1500          */
1501         if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1502                 *stat = 0;
1503                 return 0;
1504         }
1505         /*
1506          * Make a hole at the start of the right neighbor block, then
1507          * copy the last left block entry to the hole.
1508          */
1509         if (level > 0) {
1510                 xfs_alloc_key_t *lkp;   /* key pointer for left block */
1511                 xfs_alloc_ptr_t *lpp;   /* address pointer for left block */
1512                 xfs_alloc_ptr_t *rpp;   /* address pointer for right block */
1513
1514                 lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1515                 lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1516                 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1517                 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1518 #ifdef DEBUG
1519                 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1520                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1521                                 return error;
1522                 }
1523 #endif
1524                 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1525                 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1526 #ifdef DEBUG
1527                 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1528                         return error;
1529 #endif
1530                 *rkp = *lkp; /* INT_: copy */
1531                 *rpp = *lpp; /* INT_: copy */
1532                 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1533                 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1534                 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1535         } else {
1536                 xfs_alloc_rec_t *lrp;   /* record pointer for left block */
1537                 xfs_alloc_rec_t *rrp;   /* record pointer for right block */
1538
1539                 lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1540                 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1541                 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1542                 *rrp = *lrp;
1543                 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1544                 key.ar_startblock = rrp->ar_startblock;
1545                 key.ar_blockcount = rrp->ar_blockcount;
1546                 rkp = &key;
1547                 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1548         }
1549         /*
1550          * Decrement and log left's numrecs, bump and log right's numrecs.
1551          */
1552         be16_add(&left->bb_numrecs, -1);
1553         xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1554         be16_add(&right->bb_numrecs, 1);
1555         xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1556         /*
1557          * Using a temporary cursor, update the parent key values of the
1558          * block on the right.
1559          */
1560         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1561                 return error;
1562         i = xfs_btree_lastrec(tcur, level);
1563         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1564         if ((error = xfs_alloc_increment(tcur, level, &i)) ||
1565             (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
1566                 goto error0;
1567         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1568         *stat = 1;
1569         return 0;
1570 error0:
1571         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1572         return error;
1573 }
1574
1575 /*
1576  * Split cur/level block in half.
1577  * Return new block number and its first record (to be inserted into parent).
1578  */
1579 STATIC int                              /* error */
1580 xfs_alloc_split(
1581         xfs_btree_cur_t         *cur,   /* btree cursor */
1582         int                     level,  /* level to split */
1583         xfs_agblock_t           *bnop,  /* output: block number allocated */
1584         xfs_alloc_key_t         *keyp,  /* output: first key of new block */
1585         xfs_btree_cur_t         **curp, /* output: new cursor */
1586         int                     *stat)  /* success/failure */
1587 {
1588         int                     error;  /* error return value */
1589         int                     i;      /* loop index/record number */
1590         xfs_agblock_t           lbno;   /* left (current) block number */
1591         xfs_buf_t               *lbp;   /* buffer for left block */
1592         xfs_alloc_block_t       *left;  /* left (current) btree block */
1593         xfs_agblock_t           rbno;   /* right (new) block number */
1594         xfs_buf_t               *rbp;   /* buffer for right block */
1595         xfs_alloc_block_t       *right; /* right (new) btree block */
1596
1597         /*
1598          * Allocate the new block from the freelist.
1599          * If we can't do it, we're toast.  Give up.
1600          */
1601         if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
1602                         &rbno)))
1603                 return error;
1604         if (rbno == NULLAGBLOCK) {
1605                 *stat = 0;
1606                 return 0;
1607         }
1608         xfs_trans_agbtree_delta(cur->bc_tp, 1);
1609         rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
1610                 rbno, 0);
1611         /*
1612          * Set up the new block as "right".
1613          */
1614         right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1615         /*
1616          * "Left" is the current (according to the cursor) block.
1617          */
1618         lbp = cur->bc_bufs[level];
1619         left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1620 #ifdef DEBUG
1621         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1622                 return error;
1623 #endif
1624         /*
1625          * Fill in the btree header for the new block.
1626          */
1627         right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1628         right->bb_level = left->bb_level;
1629         right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1630         /*
1631          * Make sure that if there's an odd number of entries now, that
1632          * each new block will have the same number of entries.
1633          */
1634         if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1635             cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1636                 be16_add(&right->bb_numrecs, 1);
1637         i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1638         /*
1639          * For non-leaf blocks, copy keys and addresses over to the new block.
1640          */
1641         if (level > 0) {
1642                 xfs_alloc_key_t *lkp;   /* left btree key pointer */
1643                 xfs_alloc_ptr_t *lpp;   /* left btree address pointer */
1644                 xfs_alloc_key_t *rkp;   /* right btree key pointer */
1645                 xfs_alloc_ptr_t *rpp;   /* right btree address pointer */
1646
1647                 lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
1648                 lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
1649                 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1650                 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1651 #ifdef DEBUG
1652                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1653                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1654                                 return error;
1655                 }
1656 #endif
1657                 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1658                 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1659                 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1660                 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1661                 *keyp = *rkp;
1662         }
1663         /*
1664          * For leaf blocks, copy records over to the new block.
1665          */
1666         else {
1667                 xfs_alloc_rec_t *lrp;   /* left btree record pointer */
1668                 xfs_alloc_rec_t *rrp;   /* right btree record pointer */
1669
1670                 lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
1671                 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1672                 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1673                 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1674                 keyp->ar_startblock = rrp->ar_startblock;
1675                 keyp->ar_blockcount = rrp->ar_blockcount;
1676         }
1677         /*
1678          * Find the left block number by looking in the buffer.
1679          * Adjust numrecs, sibling pointers.
1680          */
1681         lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
1682         be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1683         right->bb_rightsib = left->bb_rightsib;
1684         left->bb_rightsib = cpu_to_be32(rbno);
1685         right->bb_leftsib = cpu_to_be32(lbno);
1686         xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
1687         xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1688         /*
1689          * If there's a block to the new block's right, make that block
1690          * point back to right instead of to left.
1691          */
1692         if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1693                 xfs_alloc_block_t       *rrblock;       /* rr btree block */
1694                 xfs_buf_t               *rrbp;          /* buffer for rrblock */
1695
1696                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1697                                 cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0,
1698                                 &rrbp, XFS_ALLOC_BTREE_REF)))
1699                         return error;
1700                 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
1701                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1702                         return error;
1703                 rrblock->bb_leftsib = cpu_to_be32(rbno);
1704                 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
1705         }
1706         /*
1707          * If the cursor is really in the right block, move it there.
1708          * If it's just pointing past the last entry in left, then we'll
1709          * insert there, so don't change anything in that case.
1710          */
1711         if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1712                 xfs_btree_setbuf(cur, level, rbp);
1713                 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1714         }
1715         /*
1716          * If there are more levels, we'll need another cursor which refers to
1717          * the right block, no matter where this cursor was.
1718          */
1719         if (level + 1 < cur->bc_nlevels) {
1720                 if ((error = xfs_btree_dup_cursor(cur, curp)))
1721                         return error;
1722                 (*curp)->bc_ptrs[level + 1]++;
1723         }
1724         *bnop = rbno;
1725         *stat = 1;
1726         return 0;
1727 }
1728
1729 /*
1730  * Update keys at all levels from here to the root along the cursor's path.
1731  */
1732 STATIC int                              /* error */
1733 xfs_alloc_updkey(
1734         xfs_btree_cur_t         *cur,   /* btree cursor */
1735         xfs_alloc_key_t         *keyp,  /* new key value to update to */
1736         int                     level)  /* starting level for update */
1737 {
1738         int                     ptr;    /* index of key in block */
1739
1740         /*
1741          * Go up the tree from this level toward the root.
1742          * At each level, update the key value to the value input.
1743          * Stop when we reach a level where the cursor isn't pointing
1744          * at the first entry in the block.
1745          */
1746         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1747                 xfs_alloc_block_t       *block; /* btree block */
1748                 xfs_buf_t               *bp;    /* buffer for block */
1749 #ifdef DEBUG
1750                 int                     error;  /* error return value */
1751 #endif
1752                 xfs_alloc_key_t         *kp;    /* ptr to btree block keys */
1753
1754                 bp = cur->bc_bufs[level];
1755                 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1756 #ifdef DEBUG
1757                 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1758                         return error;
1759 #endif
1760                 ptr = cur->bc_ptrs[level];
1761                 kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
1762                 *kp = *keyp;
1763                 xfs_alloc_log_keys(cur, bp, ptr, ptr);
1764         }
1765         return 0;
1766 }
1767
1768 /*
1769  * Externally visible routines.
1770  */
1771
1772 /*
1773  * Decrement cursor by one record at the level.
1774  * For nonzero levels the leaf-ward information is untouched.
1775  */
1776 int                                     /* error */
1777 xfs_alloc_decrement(
1778         xfs_btree_cur_t         *cur,   /* btree cursor */
1779         int                     level,  /* level in btree, 0 is leaf */
1780         int                     *stat)  /* success/failure */
1781 {
1782         xfs_alloc_block_t       *block; /* btree block */
1783         int                     error;  /* error return value */
1784         int                     lev;    /* btree level */
1785
1786         ASSERT(level < cur->bc_nlevels);
1787         /*
1788          * Read-ahead to the left at this level.
1789          */
1790         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1791         /*
1792          * Decrement the ptr at this level.  If we're still in the block
1793          * then we're done.
1794          */
1795         if (--cur->bc_ptrs[level] > 0) {
1796                 *stat = 1;
1797                 return 0;
1798         }
1799         /*
1800          * Get a pointer to the btree block.
1801          */
1802         block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
1803 #ifdef DEBUG
1804         if ((error = xfs_btree_check_sblock(cur, block, level,
1805                         cur->bc_bufs[level])))
1806                 return error;
1807 #endif
1808         /*
1809          * If we just went off the left edge of the tree, return failure.
1810          */
1811         if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1812                 *stat = 0;
1813                 return 0;
1814         }
1815         /*
1816          * March up the tree decrementing pointers.
1817          * Stop when we don't go off the left edge of a block.
1818          */
1819         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1820                 if (--cur->bc_ptrs[lev] > 0)
1821                         break;
1822                 /*
1823                  * Read-ahead the left block, we're going to read it
1824                  * in the next loop.
1825                  */
1826                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1827         }
1828         /*
1829          * If we went off the root then we are seriously confused.
1830          */
1831         ASSERT(lev < cur->bc_nlevels);
1832         /*
1833          * Now walk back down the tree, fixing up the cursor's buffer
1834          * pointers and key numbers.
1835          */
1836         for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1837                 xfs_agblock_t   agbno;  /* block number of btree block */
1838                 xfs_buf_t       *bp;    /* buffer pointer for block */
1839
1840                 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1841                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1842                                 cur->bc_private.a.agno, agbno, 0, &bp,
1843                                 XFS_ALLOC_BTREE_REF)))
1844                         return error;
1845                 lev--;
1846                 xfs_btree_setbuf(cur, lev, bp);
1847                 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1848                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1849                         return error;
1850                 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1851         }
1852         *stat = 1;
1853         return 0;
1854 }
1855
1856 /*
1857  * Delete the record pointed to by cur.
1858  * The cursor refers to the place where the record was (could be inserted)
1859  * when the operation returns.
1860  */
1861 int                                     /* error */
1862 xfs_alloc_delete(
1863         xfs_btree_cur_t *cur,           /* btree cursor */
1864         int             *stat)          /* success/failure */
1865 {
1866         int             error;          /* error return value */
1867         int             i;              /* result code */
1868         int             level;          /* btree level */
1869
1870         /*
1871          * Go up the tree, starting at leaf level.
1872          * If 2 is returned then a join was done; go to the next level.
1873          * Otherwise we are done.
1874          */
1875         for (level = 0, i = 2; i == 2; level++) {
1876                 if ((error = xfs_alloc_delrec(cur, level, &i)))
1877                         return error;
1878         }
1879         if (i == 0) {
1880                 for (level = 1; level < cur->bc_nlevels; level++) {
1881                         if (cur->bc_ptrs[level] == 0) {
1882                                 if ((error = xfs_alloc_decrement(cur, level, &i)))
1883                                         return error;
1884                                 break;
1885                         }
1886                 }
1887         }
1888         *stat = i;
1889         return 0;
1890 }
1891
1892 /*
1893  * Get the data from the pointed-to record.
1894  */
1895 int                                     /* error */
1896 xfs_alloc_get_rec(
1897         xfs_btree_cur_t         *cur,   /* btree cursor */
1898         xfs_agblock_t           *bno,   /* output: starting block of extent */
1899         xfs_extlen_t            *len,   /* output: length of extent */
1900         int                     *stat)  /* output: success/failure */
1901 {
1902         xfs_alloc_block_t       *block; /* btree block */
1903 #ifdef DEBUG
1904         int                     error;  /* error return value */
1905 #endif
1906         int                     ptr;    /* record number */
1907
1908         ptr = cur->bc_ptrs[0];
1909         block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
1910 #ifdef DEBUG
1911         if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
1912                 return error;
1913 #endif
1914         /*
1915          * Off the right end or left end, return failure.
1916          */
1917         if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1918                 *stat = 0;
1919                 return 0;
1920         }
1921         /*
1922          * Point to the record and extract its data.
1923          */
1924         {
1925                 xfs_alloc_rec_t         *rec;   /* record data */
1926
1927                 rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
1928                 *bno = be32_to_cpu(rec->ar_startblock);
1929                 *len = be32_to_cpu(rec->ar_blockcount);
1930         }
1931         *stat = 1;
1932         return 0;
1933 }
1934
1935 /*
1936  * Increment cursor by one record at the level.
1937  * For nonzero levels the leaf-ward information is untouched.
1938  */
1939 int                                     /* error */
1940 xfs_alloc_increment(
1941         xfs_btree_cur_t         *cur,   /* btree cursor */
1942         int                     level,  /* level in btree, 0 is leaf */
1943         int                     *stat)  /* success/failure */
1944 {
1945         xfs_alloc_block_t       *block; /* btree block */
1946         xfs_buf_t               *bp;    /* tree block buffer */
1947         int                     error;  /* error return value */
1948         int                     lev;    /* btree level */
1949
1950         ASSERT(level < cur->bc_nlevels);
1951         /*
1952          * Read-ahead to the right at this level.
1953          */
1954         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1955         /*
1956          * Get a pointer to the btree block.
1957          */
1958         bp = cur->bc_bufs[level];
1959         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1960 #ifdef DEBUG
1961         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1962                 return error;
1963 #endif
1964         /*
1965          * Increment the ptr at this level.  If we're still in the block
1966          * then we're done.
1967          */
1968         if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1969                 *stat = 1;
1970                 return 0;
1971         }
1972         /*
1973          * If we just went off the right edge of the tree, return failure.
1974          */
1975         if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1976                 *stat = 0;
1977                 return 0;
1978         }
1979         /*
1980          * March up the tree incrementing pointers.
1981          * Stop when we don't go off the right edge of a block.
1982          */
1983         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1984                 bp = cur->bc_bufs[lev];
1985                 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1986 #ifdef DEBUG
1987                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1988                         return error;
1989 #endif
1990                 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
1991                         break;
1992                 /*
1993                  * Read-ahead the right block, we're going to read it
1994                  * in the next loop.
1995                  */
1996                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1997         }
1998         /*
1999          * If we went off the root then we are seriously confused.
2000          */
2001         ASSERT(lev < cur->bc_nlevels);
2002         /*
2003          * Now walk back down the tree, fixing up the cursor's buffer
2004          * pointers and key numbers.
2005          */
2006         for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2007              lev > level; ) {
2008                 xfs_agblock_t   agbno;  /* block number of btree block */
2009
2010                 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
2011                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
2012                                 cur->bc_private.a.agno, agbno, 0, &bp,
2013                                 XFS_ALLOC_BTREE_REF)))
2014                         return error;
2015                 lev--;
2016                 xfs_btree_setbuf(cur, lev, bp);
2017                 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2018                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
2019                         return error;
2020                 cur->bc_ptrs[lev] = 1;
2021         }
2022         *stat = 1;
2023         return 0;
2024 }
2025
2026 /*
2027  * Insert the current record at the point referenced by cur.
2028  * The cursor may be inconsistent on return if splits have been done.
2029  */
2030 int                                     /* error */
2031 xfs_alloc_insert(
2032         xfs_btree_cur_t *cur,           /* btree cursor */
2033         int             *stat)          /* success/failure */
2034 {
2035         int             error;          /* error return value */
2036         int             i;              /* result value, 0 for failure */
2037         int             level;          /* current level number in btree */
2038         xfs_agblock_t   nbno;           /* new block number (split result) */
2039         xfs_btree_cur_t *ncur;          /* new cursor (split result) */
2040         xfs_alloc_rec_t nrec;           /* record being inserted this level */
2041         xfs_btree_cur_t *pcur;          /* previous level's cursor */
2042
2043         level = 0;
2044         nbno = NULLAGBLOCK;
2045         nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
2046         nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
2047         ncur = (xfs_btree_cur_t *)0;
2048         pcur = cur;
2049         /*
2050          * Loop going up the tree, starting at the leaf level.
2051          * Stop when we don't get a split block, that must mean that
2052          * the insert is finished with this level.
2053          */
2054         do {
2055                 /*
2056                  * Insert nrec/nbno into this level of the tree.
2057                  * Note if we fail, nbno will be null.
2058                  */
2059                 if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
2060                                 &i))) {
2061                         if (pcur != cur)
2062                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2063                         return error;
2064                 }
2065                 /*
2066                  * See if the cursor we just used is trash.
2067                  * Can't trash the caller's cursor, but otherwise we should
2068                  * if ncur is a new cursor or we're about to be done.
2069                  */
2070                 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
2071                         cur->bc_nlevels = pcur->bc_nlevels;
2072                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2073                 }
2074                 /*
2075                  * If we got a new cursor, switch to it.
2076                  */
2077                 if (ncur) {
2078                         pcur = ncur;
2079                         ncur = (xfs_btree_cur_t *)0;
2080                 }
2081         } while (nbno != NULLAGBLOCK);
2082         *stat = i;
2083         return 0;
2084 }
2085
2086 /*
2087  * Lookup the record equal to [bno, len] in the btree given by cur.
2088  */
2089 int                                     /* error */
2090 xfs_alloc_lookup_eq(
2091         xfs_btree_cur_t *cur,           /* btree cursor */
2092         xfs_agblock_t   bno,            /* starting block of extent */
2093         xfs_extlen_t    len,            /* length of extent */
2094         int             *stat)          /* success/failure */
2095 {
2096         cur->bc_rec.a.ar_startblock = bno;
2097         cur->bc_rec.a.ar_blockcount = len;
2098         return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
2099 }
2100
2101 /*
2102  * Lookup the first record greater than or equal to [bno, len]
2103  * in the btree given by cur.
2104  */
2105 int                                     /* error */
2106 xfs_alloc_lookup_ge(
2107         xfs_btree_cur_t *cur,           /* btree cursor */
2108         xfs_agblock_t   bno,            /* starting block of extent */
2109         xfs_extlen_t    len,            /* length of extent */
2110         int             *stat)          /* success/failure */
2111 {
2112         cur->bc_rec.a.ar_startblock = bno;
2113         cur->bc_rec.a.ar_blockcount = len;
2114         return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
2115 }
2116
2117 /*
2118  * Lookup the first record less than or equal to [bno, len]
2119  * in the btree given by cur.
2120  */
2121 int                                     /* error */
2122 xfs_alloc_lookup_le(
2123         xfs_btree_cur_t *cur,           /* btree cursor */
2124         xfs_agblock_t   bno,            /* starting block of extent */
2125         xfs_extlen_t    len,            /* length of extent */
2126         int             *stat)          /* success/failure */
2127 {
2128         cur->bc_rec.a.ar_startblock = bno;
2129         cur->bc_rec.a.ar_blockcount = len;
2130         return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
2131 }
2132
2133 /*
2134  * Update the record referred to by cur, to the value given by [bno, len].
2135  * This either works (return 0) or gets an EFSCORRUPTED error.
2136  */
2137 int                                     /* error */
2138 xfs_alloc_update(
2139         xfs_btree_cur_t         *cur,   /* btree cursor */
2140         xfs_agblock_t           bno,    /* starting block of extent */
2141         xfs_extlen_t            len)    /* length of extent */
2142 {
2143         xfs_alloc_block_t       *block; /* btree block to update */
2144         int                     error;  /* error return value */
2145         int                     ptr;    /* current record number (updating) */
2146
2147         ASSERT(len > 0);
2148         /*
2149          * Pick up the a.g. freelist struct and the current block.
2150          */
2151         block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
2152 #ifdef DEBUG
2153         if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
2154                 return error;
2155 #endif
2156         /*
2157          * Get the address of the rec to be updated.
2158          */
2159         ptr = cur->bc_ptrs[0];
2160         {
2161                 xfs_alloc_rec_t         *rp;    /* pointer to updated record */
2162
2163                 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
2164                 /*
2165                  * Fill in the new contents and log them.
2166                  */
2167                 rp->ar_startblock = cpu_to_be32(bno);
2168                 rp->ar_blockcount = cpu_to_be32(len);
2169                 xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
2170         }
2171         /*
2172          * If it's the by-size btree and it's the last leaf block and
2173          * it's the last record... then update the size of the longest
2174          * extent in the a.g., which we cache in the a.g. freelist header.
2175          */
2176         if (cur->bc_btnum == XFS_BTNUM_CNT &&
2177             be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
2178             ptr == be16_to_cpu(block->bb_numrecs)) {
2179                 xfs_agf_t       *agf;   /* a.g. freespace header */
2180                 xfs_agnumber_t  seqno;
2181
2182                 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
2183                 seqno = be32_to_cpu(agf->agf_seqno);
2184                 cur->bc_mp->m_perag[seqno].pagf_longest = len;
2185                 agf->agf_longest = cpu_to_be32(len);
2186                 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
2187                         XFS_AGF_LONGEST);
2188         }
2189         /*
2190          * Updating first record in leaf. Pass new key value up to our parent.
2191          */
2192         if (ptr == 1) {
2193                 xfs_alloc_key_t key;    /* key containing [bno, len] */
2194
2195                 key.ar_startblock = cpu_to_be32(bno);
2196                 key.ar_blockcount = cpu_to_be32(len);
2197                 if ((error = xfs_alloc_updkey(cur, &key, 1)))
2198                         return error;
2199         }
2200         return 0;
2201 }