fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / fs / xfs / xfs_attr.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 #include <linux/capability.h>
20
21 #include "xfs.h"
22 #include "xfs_fs.h"
23 #include "xfs_types.h"
24 #include "xfs_bit.h"
25 #include "xfs_log.h"
26 #include "xfs_inum.h"
27 #include "xfs_trans.h"
28 #include "xfs_sb.h"
29 #include "xfs_ag.h"
30 #include "xfs_dir2.h"
31 #include "xfs_dmapi.h"
32 #include "xfs_mount.h"
33 #include "xfs_da_btree.h"
34 #include "xfs_bmap_btree.h"
35 #include "xfs_alloc_btree.h"
36 #include "xfs_ialloc_btree.h"
37 #include "xfs_dir2_sf.h"
38 #include "xfs_attr_sf.h"
39 #include "xfs_dinode.h"
40 #include "xfs_inode.h"
41 #include "xfs_alloc.h"
42 #include "xfs_btree.h"
43 #include "xfs_inode_item.h"
44 #include "xfs_bmap.h"
45 #include "xfs_attr.h"
46 #include "xfs_attr_leaf.h"
47 #include "xfs_error.h"
48 #include "xfs_quota.h"
49 #include "xfs_trans_space.h"
50 #include "xfs_acl.h"
51 #include "xfs_rw.h"
52
53 /*
54  * xfs_attr.c
55  *
56  * Provide the external interfaces to manage attribute lists.
57  */
58
59 #define ATTR_SYSCOUNT   2
60 STATIC struct attrnames posix_acl_access;
61 STATIC struct attrnames posix_acl_default;
62 STATIC struct attrnames *attr_system_names[ATTR_SYSCOUNT];
63
64 /*========================================================================
65  * Function prototypes for the kernel.
66  *========================================================================*/
67
68 /*
69  * Internal routines when attribute list fits inside the inode.
70  */
71 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
72
73 /*
74  * Internal routines when attribute list is one block.
75  */
76 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
77 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
78 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
79 STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
80
81 /*
82  * Internal routines when attribute list is more than one block.
83  */
84 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
85 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
86 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
87 STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
88 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
89 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
90
91 /*
92  * Routines to manipulate out-of-line attribute values.
93  */
94 STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
95 STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
96
97 #define ATTR_RMTVALUE_MAPSIZE   1       /* # of map entries at once */
98
99 #if defined(XFS_ATTR_TRACE)
100 ktrace_t *xfs_attr_trace_buf;
101 #endif
102
103
104 /*========================================================================
105  * Overall external interface routines.
106  *========================================================================*/
107
108 int
109 xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
110                char *value, int *valuelenp, int flags, struct cred *cred)
111 {
112         xfs_da_args_t   args;
113         int             error;
114
115         if ((XFS_IFORK_Q(ip) == 0) ||
116             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
117              ip->i_d.di_anextents == 0))
118                 return(ENOATTR);
119
120         /*
121          * Fill in the arg structure for this request.
122          */
123         memset((char *)&args, 0, sizeof(args));
124         args.name = name;
125         args.namelen = namelen;
126         args.value = value;
127         args.valuelen = *valuelenp;
128         args.flags = flags;
129         args.hashval = xfs_da_hashname(args.name, args.namelen);
130         args.dp = ip;
131         args.whichfork = XFS_ATTR_FORK;
132
133         /*
134          * Decide on what work routines to call based on the inode size.
135          */
136         if (XFS_IFORK_Q(ip) == 0 ||
137             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
138              ip->i_d.di_anextents == 0)) {
139                 error = XFS_ERROR(ENOATTR);
140         } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
141                 error = xfs_attr_shortform_getvalue(&args);
142         } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
143                 error = xfs_attr_leaf_get(&args);
144         } else {
145                 error = xfs_attr_node_get(&args);
146         }
147
148         /*
149          * Return the number of bytes in the value to the caller.
150          */
151         *valuelenp = args.valuelen;
152
153         if (error == EEXIST)
154                 error = 0;
155         return(error);
156 }
157
158 int
159 xfs_attr_get(bhv_desc_t *bdp, const char *name, char *value, int *valuelenp,
160              int flags, struct cred *cred)
161 {
162         xfs_inode_t     *ip = XFS_BHVTOI(bdp);
163         int             error, namelen;
164
165         XFS_STATS_INC(xs_attr_get);
166
167         if (!name)
168                 return(EINVAL);
169         namelen = strlen(name);
170         if (namelen >= MAXNAMELEN)
171                 return(EFAULT);         /* match IRIX behaviour */
172
173         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
174                 return(EIO);
175
176         xfs_ilock(ip, XFS_ILOCK_SHARED);
177         error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred);
178         xfs_iunlock(ip, XFS_ILOCK_SHARED);
179         return(error);
180 }
181
182 int
183 xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
184                  char *value, int valuelen, int flags)
185 {
186         xfs_da_args_t   args;
187         xfs_fsblock_t   firstblock;
188         xfs_bmap_free_t flist;
189         int             error, err2, committed;
190         int             local, size;
191         uint            nblks;
192         xfs_mount_t     *mp = dp->i_mount;
193         int             rsvd = (flags & ATTR_ROOT) != 0;
194
195         /*
196          * Attach the dquots to the inode.
197          */
198         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
199                 return (error);
200
201         /*
202          * If the inode doesn't have an attribute fork, add one.
203          * (inode must not be locked when we call this routine)
204          */
205         if (XFS_IFORK_Q(dp) == 0) {
206                 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
207                               XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen);
208
209                 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
210                         return(error);
211         }
212
213         /*
214          * Fill in the arg structure for this request.
215          */
216         memset((char *)&args, 0, sizeof(args));
217         args.name = name;
218         args.namelen = namelen;
219         args.value = value;
220         args.valuelen = valuelen;
221         args.flags = flags;
222         args.hashval = xfs_da_hashname(args.name, args.namelen);
223         args.dp = dp;
224         args.firstblock = &firstblock;
225         args.flist = &flist;
226         args.whichfork = XFS_ATTR_FORK;
227         args.addname = 1;
228         args.oknoent = 1;
229
230         /*
231          * Determine space new attribute will use, and if it would be
232          * "local" or "remote" (note: local != inline).
233          */
234         size = xfs_attr_leaf_newentsize(namelen, valuelen,
235                                         mp->m_sb.sb_blocksize, &local);
236
237         nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
238         if (local) {
239                 if (size > (mp->m_sb.sb_blocksize >> 1)) {
240                         /* Double split possible */
241                         nblks <<= 1;
242                 }
243         } else {
244                 uint    dblocks = XFS_B_TO_FSB(mp, valuelen);
245                 /* Out of line attribute, cannot double split, but make
246                  * room for the attribute value itself.
247                  */
248                 nblks += dblocks;
249                 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
250         }
251
252         /* Size is now blocks for attribute data */
253         args.total = nblks;
254
255         /*
256          * Start our first transaction of the day.
257          *
258          * All future transactions during this code must be "chained" off
259          * this one via the trans_dup() call.  All transactions will contain
260          * the inode, and the inode will always be marked with trans_ihold().
261          * Since the inode will be locked in all transactions, we must log
262          * the inode in every transaction to let it float upward through
263          * the log.
264          */
265         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
266
267         /*
268          * Root fork attributes can use reserved data blocks for this
269          * operation if necessary
270          */
271
272         if (rsvd)
273                 args.trans->t_flags |= XFS_TRANS_RESERVE;
274
275         if ((error = xfs_trans_reserve(args.trans, (uint) nblks,
276                                       XFS_ATTRSET_LOG_RES(mp, nblks),
277                                       0, XFS_TRANS_PERM_LOG_RES,
278                                       XFS_ATTRSET_LOG_COUNT))) {
279                 xfs_trans_cancel(args.trans, 0);
280                 return(error);
281         }
282         xfs_ilock(dp, XFS_ILOCK_EXCL);
283
284         error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0,
285                          rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
286                                 XFS_QMOPT_RES_REGBLKS);
287         if (error) {
288                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
289                 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
290                 return (error);
291         }
292
293         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
294         xfs_trans_ihold(args.trans, dp);
295
296         /*
297          * If the attribute list is non-existent or a shortform list,
298          * upgrade it to a single-leaf-block attribute list.
299          */
300         if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
301             ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) &&
302              (dp->i_d.di_anextents == 0))) {
303
304                 /*
305                  * Build initial attribute list (if required).
306                  */
307                 if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
308                         xfs_attr_shortform_create(&args);
309
310                 /*
311                  * Try to add the attr to the attribute list in
312                  * the inode.
313                  */
314                 error = xfs_attr_shortform_addname(&args);
315                 if (error != ENOSPC) {
316                         /*
317                          * Commit the shortform mods, and we're done.
318                          * NOTE: this is also the error path (EEXIST, etc).
319                          */
320                         ASSERT(args.trans != NULL);
321
322                         /*
323                          * If this is a synchronous mount, make sure that
324                          * the transaction goes to disk before returning
325                          * to the user.
326                          */
327                         if (mp->m_flags & XFS_MOUNT_WSYNC) {
328                                 xfs_trans_set_sync(args.trans);
329                         }
330                         err2 = xfs_trans_commit(args.trans,
331                                                  XFS_TRANS_RELEASE_LOG_RES,
332                                                  NULL);
333                         xfs_iunlock(dp, XFS_ILOCK_EXCL);
334
335                         /*
336                          * Hit the inode change time.
337                          */
338                         if (!error && (flags & ATTR_KERNOTIME) == 0) {
339                                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
340                         }
341                         return(error == 0 ? err2 : error);
342                 }
343
344                 /*
345                  * It won't fit in the shortform, transform to a leaf block.
346                  * GROT: another possible req'mt for a double-split btree op.
347                  */
348                 XFS_BMAP_INIT(args.flist, args.firstblock);
349                 error = xfs_attr_shortform_to_leaf(&args);
350                 if (!error) {
351                         error = xfs_bmap_finish(&args.trans, args.flist,
352                                                 *args.firstblock, &committed);
353                 }
354                 if (error) {
355                         ASSERT(committed);
356                         args.trans = NULL;
357                         xfs_bmap_cancel(&flist);
358                         goto out;
359                 }
360
361                 /*
362                  * bmap_finish() may have committed the last trans and started
363                  * a new one.  We need the inode to be in all transactions.
364                  */
365                 if (committed) {
366                         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
367                         xfs_trans_ihold(args.trans, dp);
368                 }
369
370                 /*
371                  * Commit the leaf transformation.  We'll need another (linked)
372                  * transaction to add the new attribute to the leaf.
373                  */
374                 if ((error = xfs_attr_rolltrans(&args.trans, dp)))
375                         goto out;
376
377         }
378
379         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
380                 error = xfs_attr_leaf_addname(&args);
381         } else {
382                 error = xfs_attr_node_addname(&args);
383         }
384         if (error) {
385                 goto out;
386         }
387
388         /*
389          * If this is a synchronous mount, make sure that the
390          * transaction goes to disk before returning to the user.
391          */
392         if (mp->m_flags & XFS_MOUNT_WSYNC) {
393                 xfs_trans_set_sync(args.trans);
394         }
395
396         /*
397          * Commit the last in the sequence of transactions.
398          */
399         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
400         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES,
401                                  NULL);
402         xfs_iunlock(dp, XFS_ILOCK_EXCL);
403
404         /*
405          * Hit the inode change time.
406          */
407         if (!error && (flags & ATTR_KERNOTIME) == 0) {
408                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
409         }
410
411         return(error);
412
413 out:
414         if (args.trans)
415                 xfs_trans_cancel(args.trans,
416                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
417         xfs_iunlock(dp, XFS_ILOCK_EXCL);
418         return(error);
419 }
420
421 int
422 xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int flags,
423              struct cred *cred)
424 {
425         xfs_inode_t     *dp;
426         int             namelen;
427
428         namelen = strlen(name);
429         if (namelen >= MAXNAMELEN)
430                 return EFAULT;          /* match IRIX behaviour */
431
432         XFS_STATS_INC(xs_attr_set);
433
434         dp = XFS_BHVTOI(bdp);
435         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
436                 return (EIO);
437
438         return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
439 }
440
441 /*
442  * Generic handler routine to remove a name from an attribute list.
443  * Transitions attribute list from Btree to shortform as necessary.
444  */
445 int
446 xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
447 {
448         xfs_da_args_t   args;
449         xfs_fsblock_t   firstblock;
450         xfs_bmap_free_t flist;
451         int             error;
452         xfs_mount_t     *mp = dp->i_mount;
453
454         /*
455          * Fill in the arg structure for this request.
456          */
457         memset((char *)&args, 0, sizeof(args));
458         args.name = name;
459         args.namelen = namelen;
460         args.flags = flags;
461         args.hashval = xfs_da_hashname(args.name, args.namelen);
462         args.dp = dp;
463         args.firstblock = &firstblock;
464         args.flist = &flist;
465         args.total = 0;
466         args.whichfork = XFS_ATTR_FORK;
467
468         /*
469          * Attach the dquots to the inode.
470          */
471         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
472                 return (error);
473
474         /*
475          * Start our first transaction of the day.
476          *
477          * All future transactions during this code must be "chained" off
478          * this one via the trans_dup() call.  All transactions will contain
479          * the inode, and the inode will always be marked with trans_ihold().
480          * Since the inode will be locked in all transactions, we must log
481          * the inode in every transaction to let it float upward through
482          * the log.
483          */
484         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
485
486         /*
487          * Root fork attributes can use reserved data blocks for this
488          * operation if necessary
489          */
490
491         if (flags & ATTR_ROOT)
492                 args.trans->t_flags |= XFS_TRANS_RESERVE;
493
494         if ((error = xfs_trans_reserve(args.trans,
495                                       XFS_ATTRRM_SPACE_RES(mp),
496                                       XFS_ATTRRM_LOG_RES(mp),
497                                       0, XFS_TRANS_PERM_LOG_RES,
498                                       XFS_ATTRRM_LOG_COUNT))) {
499                 xfs_trans_cancel(args.trans, 0);
500                 return(error);
501         }
502
503         xfs_ilock(dp, XFS_ILOCK_EXCL);
504         /*
505          * No need to make quota reservations here. We expect to release some
506          * blocks not allocate in the common case.
507          */
508         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
509         xfs_trans_ihold(args.trans, dp);
510
511         /*
512          * Decide on what work routines to call based on the inode size.
513          */
514         if (XFS_IFORK_Q(dp) == 0 ||
515             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
516              dp->i_d.di_anextents == 0)) {
517                 error = XFS_ERROR(ENOATTR);
518                 goto out;
519         }
520         if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
521                 ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
522                 error = xfs_attr_shortform_remove(&args);
523                 if (error) {
524                         goto out;
525                 }
526         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
527                 error = xfs_attr_leaf_removename(&args);
528         } else {
529                 error = xfs_attr_node_removename(&args);
530         }
531         if (error) {
532                 goto out;
533         }
534
535         /*
536          * If this is a synchronous mount, make sure that the
537          * transaction goes to disk before returning to the user.
538          */
539         if (mp->m_flags & XFS_MOUNT_WSYNC) {
540                 xfs_trans_set_sync(args.trans);
541         }
542
543         /*
544          * Commit the last in the sequence of transactions.
545          */
546         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
547         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES,
548                                  NULL);
549         xfs_iunlock(dp, XFS_ILOCK_EXCL);
550
551         /*
552          * Hit the inode change time.
553          */
554         if (!error && (flags & ATTR_KERNOTIME) == 0) {
555                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
556         }
557
558         return(error);
559
560 out:
561         if (args.trans)
562                 xfs_trans_cancel(args.trans,
563                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
564         xfs_iunlock(dp, XFS_ILOCK_EXCL);
565         return(error);
566 }
567
568 int
569 xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
570 {
571         xfs_inode_t         *dp;
572         int                 namelen;
573
574         namelen = strlen(name);
575         if (namelen >= MAXNAMELEN)
576                 return EFAULT;          /* match IRIX behaviour */
577
578         XFS_STATS_INC(xs_attr_remove);
579
580         dp = XFS_BHVTOI(bdp);
581         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
582                 return (EIO);
583
584         xfs_ilock(dp, XFS_ILOCK_SHARED);
585         if (XFS_IFORK_Q(dp) == 0 ||
586                    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
587                     dp->i_d.di_anextents == 0)) {
588                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
589                 return(XFS_ERROR(ENOATTR));
590         }
591         xfs_iunlock(dp, XFS_ILOCK_SHARED);
592
593         return xfs_attr_remove_int(dp, name, namelen, flags);
594 }
595
596 int                                                             /* error */
597 xfs_attr_list_int(xfs_attr_list_context_t *context)
598 {
599         int error;
600         xfs_inode_t *dp = context->dp;
601
602         /*
603          * Decide on what work routines to call based on the inode size.
604          */
605         if (XFS_IFORK_Q(dp) == 0 ||
606             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
607              dp->i_d.di_anextents == 0)) {
608                 error = 0;
609         } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
610                 error = xfs_attr_shortform_list(context);
611         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
612                 error = xfs_attr_leaf_list(context);
613         } else {
614                 error = xfs_attr_node_list(context);
615         }
616         return error;
617 }
618
619 #define ATTR_ENTBASESIZE                /* minimum bytes used by an attr */ \
620         (((struct attrlist_ent *) 0)->a_name - (char *) 0)
621 #define ATTR_ENTSIZE(namelen)           /* actual bytes used by an attr */ \
622         ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
623          & ~(sizeof(u_int32_t)-1))
624
625 /*
626  * Format an attribute and copy it out to the user's buffer.
627  * Take care to check values and protect against them changing later,
628  * we may be reading them directly out of a user buffer.
629  */
630 /*ARGSUSED*/
631 STATIC int
632 xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
633                      char *name, int namelen,
634                      int valuelen, char *value)
635 {
636         attrlist_ent_t *aep;
637         int arraytop;
638
639         ASSERT(!(context->flags & ATTR_KERNOVAL));
640         ASSERT(context->count >= 0);
641         ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
642         ASSERT(context->firstu >= sizeof(*context->alist));
643         ASSERT(context->firstu <= context->bufsize);
644
645         arraytop = sizeof(*context->alist) +
646                         context->count * sizeof(context->alist->al_offset[0]);
647         context->firstu -= ATTR_ENTSIZE(namelen);
648         if (context->firstu < arraytop) {
649                 xfs_attr_trace_l_c("buffer full", context);
650                 context->alist->al_more = 1;
651                 context->seen_enough = 1;
652                 return 1;
653         }
654
655         aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
656         aep->a_valuelen = valuelen;
657         memcpy(aep->a_name, name, namelen);
658         aep->a_name[ namelen ] = 0;
659         context->alist->al_offset[ context->count++ ] = context->firstu;
660         context->alist->al_count = context->count;
661         xfs_attr_trace_l_c("add", context);
662         return 0;
663 }
664
665 STATIC int
666 xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
667                      char *name, int namelen,
668                      int valuelen, char *value)
669 {
670         char *offset;
671         int arraytop;
672
673         ASSERT(context->count >= 0);
674
675         arraytop = context->count + namesp->attr_namelen + namelen + 1;
676         if (arraytop > context->firstu) {
677                 context->count = -1;    /* insufficient space */
678                 return 1;
679         }
680         offset = (char *)context->alist + context->count;
681         strncpy(offset, namesp->attr_name, namesp->attr_namelen);
682         offset += namesp->attr_namelen;
683         strncpy(offset, name, namelen);                 /* real name */
684         offset += namelen;
685         *offset = '\0';
686         context->count += namesp->attr_namelen + namelen + 1;
687         return 0;
688 }
689
690 /*ARGSUSED*/
691 STATIC int
692 xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
693                      char *name, int namelen,
694                      int valuelen, char *value)
695 {
696         context->count += namesp->attr_namelen + namelen + 1;
697         return 0;
698 }
699
700 /*
701  * Generate a list of extended attribute names and optionally
702  * also value lengths.  Positive return value follows the XFS
703  * convention of being an error, zero or negative return code
704  * is the length of the buffer returned (negated), indicating
705  * success.
706  */
707 int
708 xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
709                       attrlist_cursor_kern_t *cursor, struct cred *cred)
710 {
711         xfs_attr_list_context_t context;
712         xfs_inode_t *dp;
713         int error;
714
715         XFS_STATS_INC(xs_attr_list);
716
717         /*
718          * Validate the cursor.
719          */
720         if (cursor->pad1 || cursor->pad2)
721                 return(XFS_ERROR(EINVAL));
722         if ((cursor->initted == 0) &&
723             (cursor->hashval || cursor->blkno || cursor->offset))
724                 return XFS_ERROR(EINVAL);
725
726         /*
727          * Check for a properly aligned buffer.
728          */
729         if (((long)buffer) & (sizeof(int)-1))
730                 return XFS_ERROR(EFAULT);
731         if (flags & ATTR_KERNOVAL)
732                 bufsize = 0;
733
734         /*
735          * Initialize the output buffer.
736          */
737         context.dp = dp = XFS_BHVTOI(bdp);
738         context.cursor = cursor;
739         context.count = 0;
740         context.dupcnt = 0;
741         context.resynch = 1;
742         context.flags = flags;
743         context.seen_enough = 0;
744         context.alist = (attrlist_t *)buffer;
745         context.put_value = 0;
746
747         if (flags & ATTR_KERNAMELS) {
748                 context.bufsize = bufsize;
749                 context.firstu = context.bufsize;
750                 if (flags & ATTR_KERNOVAL)
751                         context.put_listent = xfs_attr_kern_list_sizes;
752                 else
753                         context.put_listent = xfs_attr_kern_list;
754         } else {
755                 context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
756                 context.firstu = context.bufsize;
757                 context.alist->al_count = 0;
758                 context.alist->al_more = 0;
759                 context.alist->al_offset[0] = context.bufsize;
760                 context.put_listent = xfs_attr_put_listent;
761         }
762
763         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
764                 return EIO;
765
766         xfs_ilock(dp, XFS_ILOCK_SHARED);
767         xfs_attr_trace_l_c("syscall start", &context);
768
769         error = xfs_attr_list_int(&context);
770
771         xfs_iunlock(dp, XFS_ILOCK_SHARED);
772         xfs_attr_trace_l_c("syscall end", &context);
773
774         if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
775                 /* must return negated buffer size or the error */
776                 if (context.count < 0)
777                         error = XFS_ERROR(ERANGE);
778                 else
779                         error = -context.count;
780         } else
781                 ASSERT(error >= 0);
782
783         return error;
784 }
785
786 int                                                             /* error */
787 xfs_attr_inactive(xfs_inode_t *dp)
788 {
789         xfs_trans_t *trans;
790         xfs_mount_t *mp;
791         int error;
792
793         mp = dp->i_mount;
794         ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
795
796         xfs_ilock(dp, XFS_ILOCK_SHARED);
797         if ((XFS_IFORK_Q(dp) == 0) ||
798             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
799             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
800              dp->i_d.di_anextents == 0)) {
801                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
802                 return(0);
803         }
804         xfs_iunlock(dp, XFS_ILOCK_SHARED);
805
806         /*
807          * Start our first transaction of the day.
808          *
809          * All future transactions during this code must be "chained" off
810          * this one via the trans_dup() call.  All transactions will contain
811          * the inode, and the inode will always be marked with trans_ihold().
812          * Since the inode will be locked in all transactions, we must log
813          * the inode in every transaction to let it float upward through
814          * the log.
815          */
816         trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
817         if ((error = xfs_trans_reserve(trans, 0, XFS_ATTRINVAL_LOG_RES(mp), 0,
818                                       XFS_TRANS_PERM_LOG_RES,
819                                       XFS_ATTRINVAL_LOG_COUNT))) {
820                 xfs_trans_cancel(trans, 0);
821                 return(error);
822         }
823         xfs_ilock(dp, XFS_ILOCK_EXCL);
824
825         /*
826          * No need to make quota reservations here. We expect to release some
827          * blocks, not allocate, in the common case.
828          */
829         xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
830         xfs_trans_ihold(trans, dp);
831
832         /*
833          * Decide on what work routines to call based on the inode size.
834          */
835         if ((XFS_IFORK_Q(dp) == 0) ||
836             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
837             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
838              dp->i_d.di_anextents == 0)) {
839                 error = 0;
840                 goto out;
841         }
842         error = xfs_attr_root_inactive(&trans, dp);
843         if (error)
844                 goto out;
845         /*
846          * signal synchronous inactive transactions unless this
847          * is a synchronous mount filesystem in which case we
848          * know that we're here because we've been called out of
849          * xfs_inactive which means that the last reference is gone
850          * and the unlink transaction has already hit the disk so
851          * async inactive transactions are safe.
852          */
853         if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
854                                 (!(mp->m_flags & XFS_MOUNT_WSYNC)
855                                  ? 1 : 0))))
856                 goto out;
857
858         /*
859          * Commit the last in the sequence of transactions.
860          */
861         xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
862         error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES,
863                                  NULL);
864         xfs_iunlock(dp, XFS_ILOCK_EXCL);
865
866         return(error);
867
868 out:
869         xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
870         xfs_iunlock(dp, XFS_ILOCK_EXCL);
871         return(error);
872 }
873
874
875
876 /*========================================================================
877  * External routines when attribute list is inside the inode
878  *========================================================================*/
879
880 /*
881  * Add a name to the shortform attribute list structure
882  * This is the external routine.
883  */
884 STATIC int
885 xfs_attr_shortform_addname(xfs_da_args_t *args)
886 {
887         int newsize, forkoff, retval;
888
889         retval = xfs_attr_shortform_lookup(args);
890         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
891                 return(retval);
892         } else if (retval == EEXIST) {
893                 if (args->flags & ATTR_CREATE)
894                         return(retval);
895                 retval = xfs_attr_shortform_remove(args);
896                 ASSERT(retval == 0);
897         }
898
899         if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
900             args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
901                 return(XFS_ERROR(ENOSPC));
902
903         newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
904         newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
905
906         forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
907         if (!forkoff)
908                 return(XFS_ERROR(ENOSPC));
909
910         xfs_attr_shortform_add(args, forkoff);
911         return(0);
912 }
913
914
915 /*========================================================================
916  * External routines when attribute list is one block
917  *========================================================================*/
918
919 /*
920  * Add a name to the leaf attribute list structure
921  *
922  * This leaf block cannot have a "remote" value, we only call this routine
923  * if bmap_one_block() says there is only one block (ie: no remote blks).
924  */
925 int
926 xfs_attr_leaf_addname(xfs_da_args_t *args)
927 {
928         xfs_inode_t *dp;
929         xfs_dabuf_t *bp;
930         int retval, error, committed, forkoff;
931
932         /*
933          * Read the (only) block in the attribute list in.
934          */
935         dp = args->dp;
936         args->blkno = 0;
937         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
938                                              XFS_ATTR_FORK);
939         if (error)
940                 return(error);
941         ASSERT(bp != NULL);
942
943         /*
944          * Look up the given attribute in the leaf block.  Figure out if
945          * the given flags produce an error or call for an atomic rename.
946          */
947         retval = xfs_attr_leaf_lookup_int(bp, args);
948         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
949                 xfs_da_brelse(args->trans, bp);
950                 return(retval);
951         } else if (retval == EEXIST) {
952                 if (args->flags & ATTR_CREATE) {        /* pure create op */
953                         xfs_da_brelse(args->trans, bp);
954                         return(retval);
955                 }
956                 args->rename = 1;                       /* an atomic rename */
957                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
958                 args->index2 = args->index;
959                 args->rmtblkno2 = args->rmtblkno;
960                 args->rmtblkcnt2 = args->rmtblkcnt;
961         }
962
963         /*
964          * Add the attribute to the leaf block, transitioning to a Btree
965          * if required.
966          */
967         retval = xfs_attr_leaf_add(bp, args);
968         xfs_da_buf_done(bp);
969         if (retval == ENOSPC) {
970                 /*
971                  * Promote the attribute list to the Btree format, then
972                  * Commit that transaction so that the node_addname() call
973                  * can manage its own transactions.
974                  */
975                 XFS_BMAP_INIT(args->flist, args->firstblock);
976                 error = xfs_attr_leaf_to_node(args);
977                 if (!error) {
978                         error = xfs_bmap_finish(&args->trans, args->flist,
979                                                 *args->firstblock, &committed);
980                 }
981                 if (error) {
982                         ASSERT(committed);
983                         args->trans = NULL;
984                         xfs_bmap_cancel(args->flist);
985                         return(error);
986                 }
987
988                 /*
989                  * bmap_finish() may have committed the last trans and started
990                  * a new one.  We need the inode to be in all transactions.
991                  */
992                 if (committed) {
993                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
994                         xfs_trans_ihold(args->trans, dp);
995                 }
996
997                 /*
998                  * Commit the current trans (including the inode) and start
999                  * a new one.
1000                  */
1001                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1002                         return (error);
1003
1004                 /*
1005                  * Fob the whole rest of the problem off on the Btree code.
1006                  */
1007                 error = xfs_attr_node_addname(args);
1008                 return(error);
1009         }
1010
1011         /*
1012          * Commit the transaction that added the attr name so that
1013          * later routines can manage their own transactions.
1014          */
1015         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1016                 return (error);
1017
1018         /*
1019          * If there was an out-of-line value, allocate the blocks we
1020          * identified for its storage and copy the value.  This is done
1021          * after we create the attribute so that we don't overflow the
1022          * maximum size of a transaction and/or hit a deadlock.
1023          */
1024         if (args->rmtblkno > 0) {
1025                 error = xfs_attr_rmtval_set(args);
1026                 if (error)
1027                         return(error);
1028         }
1029
1030         /*
1031          * If this is an atomic rename operation, we must "flip" the
1032          * incomplete flags on the "new" and "old" attribute/value pairs
1033          * so that one disappears and one appears atomically.  Then we
1034          * must remove the "old" attribute/value pair.
1035          */
1036         if (args->rename) {
1037                 /*
1038                  * In a separate transaction, set the incomplete flag on the
1039                  * "old" attr and clear the incomplete flag on the "new" attr.
1040                  */
1041                 error = xfs_attr_leaf_flipflags(args);
1042                 if (error)
1043                         return(error);
1044
1045                 /*
1046                  * Dismantle the "old" attribute/value pair by removing
1047                  * a "remote" value (if it exists).
1048                  */
1049                 args->index = args->index2;
1050                 args->blkno = args->blkno2;
1051                 args->rmtblkno = args->rmtblkno2;
1052                 args->rmtblkcnt = args->rmtblkcnt2;
1053                 if (args->rmtblkno) {
1054                         error = xfs_attr_rmtval_remove(args);
1055                         if (error)
1056                                 return(error);
1057                 }
1058
1059                 /*
1060                  * Read in the block containing the "old" attr, then
1061                  * remove the "old" attr from that block (neat, huh!)
1062                  */
1063                 error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1,
1064                                                      &bp, XFS_ATTR_FORK);
1065                 if (error)
1066                         return(error);
1067                 ASSERT(bp != NULL);
1068                 (void)xfs_attr_leaf_remove(bp, args);
1069
1070                 /*
1071                  * If the result is small enough, shrink it all into the inode.
1072                  */
1073                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1074                         XFS_BMAP_INIT(args->flist, args->firstblock);
1075                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1076                         /* bp is gone due to xfs_da_shrink_inode */
1077                         if (!error) {
1078                                 error = xfs_bmap_finish(&args->trans,
1079                                                         args->flist,
1080                                                         *args->firstblock,
1081                                                         &committed);
1082                         }
1083                         if (error) {
1084                                 ASSERT(committed);
1085                                 args->trans = NULL;
1086                                 xfs_bmap_cancel(args->flist);
1087                                 return(error);
1088                         }
1089
1090                         /*
1091                          * bmap_finish() may have committed the last trans
1092                          * and started a new one.  We need the inode to be
1093                          * in all transactions.
1094                          */
1095                         if (committed) {
1096                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1097                                 xfs_trans_ihold(args->trans, dp);
1098                         }
1099                 } else
1100                         xfs_da_buf_done(bp);
1101
1102                 /*
1103                  * Commit the remove and start the next trans in series.
1104                  */
1105                 error = xfs_attr_rolltrans(&args->trans, dp);
1106
1107         } else if (args->rmtblkno > 0) {
1108                 /*
1109                  * Added a "remote" value, just clear the incomplete flag.
1110                  */
1111                 error = xfs_attr_leaf_clearflag(args);
1112         }
1113         return(error);
1114 }
1115
1116 /*
1117  * Remove a name from the leaf attribute list structure
1118  *
1119  * This leaf block cannot have a "remote" value, we only call this routine
1120  * if bmap_one_block() says there is only one block (ie: no remote blks).
1121  */
1122 STATIC int
1123 xfs_attr_leaf_removename(xfs_da_args_t *args)
1124 {
1125         xfs_inode_t *dp;
1126         xfs_dabuf_t *bp;
1127         int error, committed, forkoff;
1128
1129         /*
1130          * Remove the attribute.
1131          */
1132         dp = args->dp;
1133         args->blkno = 0;
1134         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1135                                              XFS_ATTR_FORK);
1136         if (error) {
1137                 return(error);
1138         }
1139
1140         ASSERT(bp != NULL);
1141         error = xfs_attr_leaf_lookup_int(bp, args);
1142         if (error == ENOATTR) {
1143                 xfs_da_brelse(args->trans, bp);
1144                 return(error);
1145         }
1146
1147         (void)xfs_attr_leaf_remove(bp, args);
1148
1149         /*
1150          * If the result is small enough, shrink it all into the inode.
1151          */
1152         if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1153                 XFS_BMAP_INIT(args->flist, args->firstblock);
1154                 error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1155                 /* bp is gone due to xfs_da_shrink_inode */
1156                 if (!error) {
1157                         error = xfs_bmap_finish(&args->trans, args->flist,
1158                                                 *args->firstblock, &committed);
1159                 }
1160                 if (error) {
1161                         ASSERT(committed);
1162                         args->trans = NULL;
1163                         xfs_bmap_cancel(args->flist);
1164                         return(error);
1165                 }
1166
1167                 /*
1168                  * bmap_finish() may have committed the last trans and started
1169                  * a new one.  We need the inode to be in all transactions.
1170                  */
1171                 if (committed) {
1172                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1173                         xfs_trans_ihold(args->trans, dp);
1174                 }
1175         } else
1176                 xfs_da_buf_done(bp);
1177         return(0);
1178 }
1179
1180 /*
1181  * Look up a name in a leaf attribute list structure.
1182  *
1183  * This leaf block cannot have a "remote" value, we only call this routine
1184  * if bmap_one_block() says there is only one block (ie: no remote blks).
1185  */
1186 STATIC int
1187 xfs_attr_leaf_get(xfs_da_args_t *args)
1188 {
1189         xfs_dabuf_t *bp;
1190         int error;
1191
1192         args->blkno = 0;
1193         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1194                                              XFS_ATTR_FORK);
1195         if (error)
1196                 return(error);
1197         ASSERT(bp != NULL);
1198
1199         error = xfs_attr_leaf_lookup_int(bp, args);
1200         if (error != EEXIST)  {
1201                 xfs_da_brelse(args->trans, bp);
1202                 return(error);
1203         }
1204         error = xfs_attr_leaf_getvalue(bp, args);
1205         xfs_da_brelse(args->trans, bp);
1206         if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
1207                 error = xfs_attr_rmtval_get(args);
1208         }
1209         return(error);
1210 }
1211
1212 /*
1213  * Copy out attribute entries for attr_list(), for leaf attribute lists.
1214  */
1215 STATIC int
1216 xfs_attr_leaf_list(xfs_attr_list_context_t *context)
1217 {
1218         xfs_attr_leafblock_t *leaf;
1219         int error;
1220         xfs_dabuf_t *bp;
1221
1222         context->cursor->blkno = 0;
1223         error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
1224         if (error)
1225                 return XFS_ERROR(error);
1226         ASSERT(bp != NULL);
1227         leaf = bp->data;
1228         if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
1229                 XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
1230                                      context->dp->i_mount, leaf);
1231                 xfs_da_brelse(NULL, bp);
1232                 return XFS_ERROR(EFSCORRUPTED);
1233         }
1234
1235         error = xfs_attr_leaf_list_int(bp, context);
1236         xfs_da_brelse(NULL, bp);
1237         return XFS_ERROR(error);
1238 }
1239
1240
1241 /*========================================================================
1242  * External routines when attribute list size > XFS_LBSIZE(mp).
1243  *========================================================================*/
1244
1245 /*
1246  * Add a name to a Btree-format attribute list.
1247  *
1248  * This will involve walking down the Btree, and may involve splitting
1249  * leaf nodes and even splitting intermediate nodes up to and including
1250  * the root node (a special case of an intermediate node).
1251  *
1252  * "Remote" attribute values confuse the issue and atomic rename operations
1253  * add a whole extra layer of confusion on top of that.
1254  */
1255 STATIC int
1256 xfs_attr_node_addname(xfs_da_args_t *args)
1257 {
1258         xfs_da_state_t *state;
1259         xfs_da_state_blk_t *blk;
1260         xfs_inode_t *dp;
1261         xfs_mount_t *mp;
1262         int committed, retval, error;
1263
1264         /*
1265          * Fill in bucket of arguments/results/context to carry around.
1266          */
1267         dp = args->dp;
1268         mp = dp->i_mount;
1269 restart:
1270         state = xfs_da_state_alloc();
1271         state->args = args;
1272         state->mp = mp;
1273         state->blocksize = state->mp->m_sb.sb_blocksize;
1274         state->node_ents = state->mp->m_attr_node_ents;
1275
1276         /*
1277          * Search to see if name already exists, and get back a pointer
1278          * to where it should go.
1279          */
1280         error = xfs_da_node_lookup_int(state, &retval);
1281         if (error)
1282                 goto out;
1283         blk = &state->path.blk[ state->path.active-1 ];
1284         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1285         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
1286                 goto out;
1287         } else if (retval == EEXIST) {
1288                 if (args->flags & ATTR_CREATE)
1289                         goto out;
1290                 args->rename = 1;                       /* atomic rename op */
1291                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
1292                 args->index2 = args->index;
1293                 args->rmtblkno2 = args->rmtblkno;
1294                 args->rmtblkcnt2 = args->rmtblkcnt;
1295                 args->rmtblkno = 0;
1296                 args->rmtblkcnt = 0;
1297         }
1298
1299         retval = xfs_attr_leaf_add(blk->bp, state->args);
1300         if (retval == ENOSPC) {
1301                 if (state->path.active == 1) {
1302                         /*
1303                          * Its really a single leaf node, but it had
1304                          * out-of-line values so it looked like it *might*
1305                          * have been a b-tree.
1306                          */
1307                         xfs_da_state_free(state);
1308                         XFS_BMAP_INIT(args->flist, args->firstblock);
1309                         error = xfs_attr_leaf_to_node(args);
1310                         if (!error) {
1311                                 error = xfs_bmap_finish(&args->trans,
1312                                                         args->flist,
1313                                                         *args->firstblock,
1314                                                         &committed);
1315                         }
1316                         if (error) {
1317                                 ASSERT(committed);
1318                                 args->trans = NULL;
1319                                 xfs_bmap_cancel(args->flist);
1320                                 goto out;
1321                         }
1322
1323                         /*
1324                          * bmap_finish() may have committed the last trans
1325                          * and started a new one.  We need the inode to be
1326                          * in all transactions.
1327                          */
1328                         if (committed) {
1329                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1330                                 xfs_trans_ihold(args->trans, dp);
1331                         }
1332
1333                         /*
1334                          * Commit the node conversion and start the next
1335                          * trans in the chain.
1336                          */
1337                         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1338                                 goto out;
1339
1340                         goto restart;
1341                 }
1342
1343                 /*
1344                  * Split as many Btree elements as required.
1345                  * This code tracks the new and old attr's location
1346                  * in the index/blkno/rmtblkno/rmtblkcnt fields and
1347                  * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
1348                  */
1349                 XFS_BMAP_INIT(args->flist, args->firstblock);
1350                 error = xfs_da_split(state);
1351                 if (!error) {
1352                         error = xfs_bmap_finish(&args->trans, args->flist,
1353                                                 *args->firstblock, &committed);
1354                 }
1355                 if (error) {
1356                         ASSERT(committed);
1357                         args->trans = NULL;
1358                         xfs_bmap_cancel(args->flist);
1359                         goto out;
1360                 }
1361
1362                 /*
1363                  * bmap_finish() may have committed the last trans and started
1364                  * a new one.  We need the inode to be in all transactions.
1365                  */
1366                 if (committed) {
1367                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1368                         xfs_trans_ihold(args->trans, dp);
1369                 }
1370         } else {
1371                 /*
1372                  * Addition succeeded, update Btree hashvals.
1373                  */
1374                 xfs_da_fixhashpath(state, &state->path);
1375         }
1376
1377         /*
1378          * Kill the state structure, we're done with it and need to
1379          * allow the buffers to come back later.
1380          */
1381         xfs_da_state_free(state);
1382         state = NULL;
1383
1384         /*
1385          * Commit the leaf addition or btree split and start the next
1386          * trans in the chain.
1387          */
1388         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1389                 goto out;
1390
1391         /*
1392          * If there was an out-of-line value, allocate the blocks we
1393          * identified for its storage and copy the value.  This is done
1394          * after we create the attribute so that we don't overflow the
1395          * maximum size of a transaction and/or hit a deadlock.
1396          */
1397         if (args->rmtblkno > 0) {
1398                 error = xfs_attr_rmtval_set(args);
1399                 if (error)
1400                         return(error);
1401         }
1402
1403         /*
1404          * If this is an atomic rename operation, we must "flip" the
1405          * incomplete flags on the "new" and "old" attribute/value pairs
1406          * so that one disappears and one appears atomically.  Then we
1407          * must remove the "old" attribute/value pair.
1408          */
1409         if (args->rename) {
1410                 /*
1411                  * In a separate transaction, set the incomplete flag on the
1412                  * "old" attr and clear the incomplete flag on the "new" attr.
1413                  */
1414                 error = xfs_attr_leaf_flipflags(args);
1415                 if (error)
1416                         goto out;
1417
1418                 /*
1419                  * Dismantle the "old" attribute/value pair by removing
1420                  * a "remote" value (if it exists).
1421                  */
1422                 args->index = args->index2;
1423                 args->blkno = args->blkno2;
1424                 args->rmtblkno = args->rmtblkno2;
1425                 args->rmtblkcnt = args->rmtblkcnt2;
1426                 if (args->rmtblkno) {
1427                         error = xfs_attr_rmtval_remove(args);
1428                         if (error)
1429                                 return(error);
1430                 }
1431
1432                 /*
1433                  * Re-find the "old" attribute entry after any split ops.
1434                  * The INCOMPLETE flag means that we will find the "old"
1435                  * attr, not the "new" one.
1436                  */
1437                 args->flags |= XFS_ATTR_INCOMPLETE;
1438                 state = xfs_da_state_alloc();
1439                 state->args = args;
1440                 state->mp = mp;
1441                 state->blocksize = state->mp->m_sb.sb_blocksize;
1442                 state->node_ents = state->mp->m_attr_node_ents;
1443                 state->inleaf = 0;
1444                 error = xfs_da_node_lookup_int(state, &retval);
1445                 if (error)
1446                         goto out;
1447
1448                 /*
1449                  * Remove the name and update the hashvals in the tree.
1450                  */
1451                 blk = &state->path.blk[ state->path.active-1 ];
1452                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1453                 error = xfs_attr_leaf_remove(blk->bp, args);
1454                 xfs_da_fixhashpath(state, &state->path);
1455
1456                 /*
1457                  * Check to see if the tree needs to be collapsed.
1458                  */
1459                 if (retval && (state->path.active > 1)) {
1460                         XFS_BMAP_INIT(args->flist, args->firstblock);
1461                         error = xfs_da_join(state);
1462                         if (!error) {
1463                                 error = xfs_bmap_finish(&args->trans,
1464                                                         args->flist,
1465                                                         *args->firstblock,
1466                                                         &committed);
1467                         }
1468                         if (error) {
1469                                 ASSERT(committed);
1470                                 args->trans = NULL;
1471                                 xfs_bmap_cancel(args->flist);
1472                                 goto out;
1473                         }
1474
1475                         /*
1476                          * bmap_finish() may have committed the last trans
1477                          * and started a new one.  We need the inode to be
1478                          * in all transactions.
1479                          */
1480                         if (committed) {
1481                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1482                                 xfs_trans_ihold(args->trans, dp);
1483                         }
1484                 }
1485
1486                 /*
1487                  * Commit and start the next trans in the chain.
1488                  */
1489                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1490                         goto out;
1491
1492         } else if (args->rmtblkno > 0) {
1493                 /*
1494                  * Added a "remote" value, just clear the incomplete flag.
1495                  */
1496                 error = xfs_attr_leaf_clearflag(args);
1497                 if (error)
1498                         goto out;
1499         }
1500         retval = error = 0;
1501
1502 out:
1503         if (state)
1504                 xfs_da_state_free(state);
1505         if (error)
1506                 return(error);
1507         return(retval);
1508 }
1509
1510 /*
1511  * Remove a name from a B-tree attribute list.
1512  *
1513  * This will involve walking down the Btree, and may involve joining
1514  * leaf nodes and even joining intermediate nodes up to and including
1515  * the root node (a special case of an intermediate node).
1516  */
1517 STATIC int
1518 xfs_attr_node_removename(xfs_da_args_t *args)
1519 {
1520         xfs_da_state_t *state;
1521         xfs_da_state_blk_t *blk;
1522         xfs_inode_t *dp;
1523         xfs_dabuf_t *bp;
1524         int retval, error, committed, forkoff;
1525
1526         /*
1527          * Tie a string around our finger to remind us where we are.
1528          */
1529         dp = args->dp;
1530         state = xfs_da_state_alloc();
1531         state->args = args;
1532         state->mp = dp->i_mount;
1533         state->blocksize = state->mp->m_sb.sb_blocksize;
1534         state->node_ents = state->mp->m_attr_node_ents;
1535
1536         /*
1537          * Search to see if name exists, and get back a pointer to it.
1538          */
1539         error = xfs_da_node_lookup_int(state, &retval);
1540         if (error || (retval != EEXIST)) {
1541                 if (error == 0)
1542                         error = retval;
1543                 goto out;
1544         }
1545
1546         /*
1547          * If there is an out-of-line value, de-allocate the blocks.
1548          * This is done before we remove the attribute so that we don't
1549          * overflow the maximum size of a transaction and/or hit a deadlock.
1550          */
1551         blk = &state->path.blk[ state->path.active-1 ];
1552         ASSERT(blk->bp != NULL);
1553         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1554         if (args->rmtblkno > 0) {
1555                 /*
1556                  * Fill in disk block numbers in the state structure
1557                  * so that we can get the buffers back after we commit
1558                  * several transactions in the following calls.
1559                  */
1560                 error = xfs_attr_fillstate(state);
1561                 if (error)
1562                         goto out;
1563
1564                 /*
1565                  * Mark the attribute as INCOMPLETE, then bunmapi() the
1566                  * remote value.
1567                  */
1568                 error = xfs_attr_leaf_setflag(args);
1569                 if (error)
1570                         goto out;
1571                 error = xfs_attr_rmtval_remove(args);
1572                 if (error)
1573                         goto out;
1574
1575                 /*
1576                  * Refill the state structure with buffers, the prior calls
1577                  * released our buffers.
1578                  */
1579                 error = xfs_attr_refillstate(state);
1580                 if (error)
1581                         goto out;
1582         }
1583
1584         /*
1585          * Remove the name and update the hashvals in the tree.
1586          */
1587         blk = &state->path.blk[ state->path.active-1 ];
1588         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1589         retval = xfs_attr_leaf_remove(blk->bp, args);
1590         xfs_da_fixhashpath(state, &state->path);
1591
1592         /*
1593          * Check to see if the tree needs to be collapsed.
1594          */
1595         if (retval && (state->path.active > 1)) {
1596                 XFS_BMAP_INIT(args->flist, args->firstblock);
1597                 error = xfs_da_join(state);
1598                 if (!error) {
1599                         error = xfs_bmap_finish(&args->trans, args->flist,
1600                                                 *args->firstblock, &committed);
1601                 }
1602                 if (error) {
1603                         ASSERT(committed);
1604                         args->trans = NULL;
1605                         xfs_bmap_cancel(args->flist);
1606                         goto out;
1607                 }
1608
1609                 /*
1610                  * bmap_finish() may have committed the last trans and started
1611                  * a new one.  We need the inode to be in all transactions.
1612                  */
1613                 if (committed) {
1614                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1615                         xfs_trans_ihold(args->trans, dp);
1616                 }
1617
1618                 /*
1619                  * Commit the Btree join operation and start a new trans.
1620                  */
1621                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1622                         goto out;
1623         }
1624
1625         /*
1626          * If the result is small enough, push it all into the inode.
1627          */
1628         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1629                 /*
1630                  * Have to get rid of the copy of this dabuf in the state.
1631                  */
1632                 ASSERT(state->path.active == 1);
1633                 ASSERT(state->path.blk[0].bp);
1634                 xfs_da_buf_done(state->path.blk[0].bp);
1635                 state->path.blk[0].bp = NULL;
1636
1637                 error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
1638                                                      XFS_ATTR_FORK);
1639                 if (error)
1640                         goto out;
1641                 ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
1642                                       bp->data)->hdr.info.magic)
1643                                                        == XFS_ATTR_LEAF_MAGIC);
1644
1645                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1646                         XFS_BMAP_INIT(args->flist, args->firstblock);
1647                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1648                         /* bp is gone due to xfs_da_shrink_inode */
1649                         if (!error) {
1650                                 error = xfs_bmap_finish(&args->trans,
1651                                                         args->flist,
1652                                                         *args->firstblock,
1653                                                         &committed);
1654                         }
1655                         if (error) {
1656                                 ASSERT(committed);
1657                                 args->trans = NULL;
1658                                 xfs_bmap_cancel(args->flist);
1659                                 goto out;
1660                         }
1661
1662                         /*
1663                          * bmap_finish() may have committed the last trans
1664                          * and started a new one.  We need the inode to be
1665                          * in all transactions.
1666                          */
1667                         if (committed) {
1668                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1669                                 xfs_trans_ihold(args->trans, dp);
1670                         }
1671                 } else
1672                         xfs_da_brelse(args->trans, bp);
1673         }
1674         error = 0;
1675
1676 out:
1677         xfs_da_state_free(state);
1678         return(error);
1679 }
1680
1681 /*
1682  * Fill in the disk block numbers in the state structure for the buffers
1683  * that are attached to the state structure.
1684  * This is done so that we can quickly reattach ourselves to those buffers
1685  * after some set of transaction commits have released these buffers.
1686  */
1687 STATIC int
1688 xfs_attr_fillstate(xfs_da_state_t *state)
1689 {
1690         xfs_da_state_path_t *path;
1691         xfs_da_state_blk_t *blk;
1692         int level;
1693
1694         /*
1695          * Roll down the "path" in the state structure, storing the on-disk
1696          * block number for those buffers in the "path".
1697          */
1698         path = &state->path;
1699         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1700         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1701                 if (blk->bp) {
1702                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1703                         xfs_da_buf_done(blk->bp);
1704                         blk->bp = NULL;
1705                 } else {
1706                         blk->disk_blkno = 0;
1707                 }
1708         }
1709
1710         /*
1711          * Roll down the "altpath" in the state structure, storing the on-disk
1712          * block number for those buffers in the "altpath".
1713          */
1714         path = &state->altpath;
1715         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1716         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1717                 if (blk->bp) {
1718                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1719                         xfs_da_buf_done(blk->bp);
1720                         blk->bp = NULL;
1721                 } else {
1722                         blk->disk_blkno = 0;
1723                 }
1724         }
1725
1726         return(0);
1727 }
1728
1729 /*
1730  * Reattach the buffers to the state structure based on the disk block
1731  * numbers stored in the state structure.
1732  * This is done after some set of transaction commits have released those
1733  * buffers from our grip.
1734  */
1735 STATIC int
1736 xfs_attr_refillstate(xfs_da_state_t *state)
1737 {
1738         xfs_da_state_path_t *path;
1739         xfs_da_state_blk_t *blk;
1740         int level, error;
1741
1742         /*
1743          * Roll down the "path" in the state structure, storing the on-disk
1744          * block number for those buffers in the "path".
1745          */
1746         path = &state->path;
1747         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1748         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1749                 if (blk->disk_blkno) {
1750                         error = xfs_da_read_buf(state->args->trans,
1751                                                 state->args->dp,
1752                                                 blk->blkno, blk->disk_blkno,
1753                                                 &blk->bp, XFS_ATTR_FORK);
1754                         if (error)
1755                                 return(error);
1756                 } else {
1757                         blk->bp = NULL;
1758                 }
1759         }
1760
1761         /*
1762          * Roll down the "altpath" in the state structure, storing the on-disk
1763          * block number for those buffers in the "altpath".
1764          */
1765         path = &state->altpath;
1766         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1767         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1768                 if (blk->disk_blkno) {
1769                         error = xfs_da_read_buf(state->args->trans,
1770                                                 state->args->dp,
1771                                                 blk->blkno, blk->disk_blkno,
1772                                                 &blk->bp, XFS_ATTR_FORK);
1773                         if (error)
1774                                 return(error);
1775                 } else {
1776                         blk->bp = NULL;
1777                 }
1778         }
1779
1780         return(0);
1781 }
1782
1783 /*
1784  * Look up a filename in a node attribute list.
1785  *
1786  * This routine gets called for any attribute fork that has more than one
1787  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1788  * "remote" values taking up more blocks.
1789  */
1790 STATIC int
1791 xfs_attr_node_get(xfs_da_args_t *args)
1792 {
1793         xfs_da_state_t *state;
1794         xfs_da_state_blk_t *blk;
1795         int error, retval;
1796         int i;
1797
1798         state = xfs_da_state_alloc();
1799         state->args = args;
1800         state->mp = args->dp->i_mount;
1801         state->blocksize = state->mp->m_sb.sb_blocksize;
1802         state->node_ents = state->mp->m_attr_node_ents;
1803
1804         /*
1805          * Search to see if name exists, and get back a pointer to it.
1806          */
1807         error = xfs_da_node_lookup_int(state, &retval);
1808         if (error) {
1809                 retval = error;
1810         } else if (retval == EEXIST) {
1811                 blk = &state->path.blk[ state->path.active-1 ];
1812                 ASSERT(blk->bp != NULL);
1813                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1814
1815                 /*
1816                  * Get the value, local or "remote"
1817                  */
1818                 retval = xfs_attr_leaf_getvalue(blk->bp, args);
1819                 if (!retval && (args->rmtblkno > 0)
1820                     && !(args->flags & ATTR_KERNOVAL)) {
1821                         retval = xfs_attr_rmtval_get(args);
1822                 }
1823         }
1824
1825         /*
1826          * If not in a transaction, we have to release all the buffers.
1827          */
1828         for (i = 0; i < state->path.active; i++) {
1829                 xfs_da_brelse(args->trans, state->path.blk[i].bp);
1830                 state->path.blk[i].bp = NULL;
1831         }
1832
1833         xfs_da_state_free(state);
1834         return(retval);
1835 }
1836
1837 STATIC int                                                      /* error */
1838 xfs_attr_node_list(xfs_attr_list_context_t *context)
1839 {
1840         attrlist_cursor_kern_t *cursor;
1841         xfs_attr_leafblock_t *leaf;
1842         xfs_da_intnode_t *node;
1843         xfs_da_node_entry_t *btree;
1844         int error, i;
1845         xfs_dabuf_t *bp;
1846
1847         cursor = context->cursor;
1848         cursor->initted = 1;
1849
1850         /*
1851          * Do all sorts of validation on the passed-in cursor structure.
1852          * If anything is amiss, ignore the cursor and look up the hashval
1853          * starting from the btree root.
1854          */
1855         bp = NULL;
1856         if (cursor->blkno > 0) {
1857                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1858                                               &bp, XFS_ATTR_FORK);
1859                 if ((error != 0) && (error != EFSCORRUPTED))
1860                         return(error);
1861                 if (bp) {
1862                         node = bp->data;
1863                         switch (be16_to_cpu(node->hdr.info.magic)) {
1864                         case XFS_DA_NODE_MAGIC:
1865                                 xfs_attr_trace_l_cn("wrong blk", context, node);
1866                                 xfs_da_brelse(NULL, bp);
1867                                 bp = NULL;
1868                                 break;
1869                         case XFS_ATTR_LEAF_MAGIC:
1870                                 leaf = bp->data;
1871                                 if (cursor->hashval > be32_to_cpu(leaf->entries[
1872                                     be16_to_cpu(leaf->hdr.count)-1].hashval)) {
1873                                         xfs_attr_trace_l_cl("wrong blk",
1874                                                            context, leaf);
1875                                         xfs_da_brelse(NULL, bp);
1876                                         bp = NULL;
1877                                 } else if (cursor->hashval <=
1878                                              be32_to_cpu(leaf->entries[0].hashval)) {
1879                                         xfs_attr_trace_l_cl("maybe wrong blk",
1880                                                            context, leaf);
1881                                         xfs_da_brelse(NULL, bp);
1882                                         bp = NULL;
1883                                 }
1884                                 break;
1885                         default:
1886                                 xfs_attr_trace_l_c("wrong blk - ??", context);
1887                                 xfs_da_brelse(NULL, bp);
1888                                 bp = NULL;
1889                         }
1890                 }
1891         }
1892
1893         /*
1894          * We did not find what we expected given the cursor's contents,
1895          * so we start from the top and work down based on the hash value.
1896          * Note that start of node block is same as start of leaf block.
1897          */
1898         if (bp == NULL) {
1899                 cursor->blkno = 0;
1900                 for (;;) {
1901                         error = xfs_da_read_buf(NULL, context->dp,
1902                                                       cursor->blkno, -1, &bp,
1903                                                       XFS_ATTR_FORK);
1904                         if (error)
1905                                 return(error);
1906                         if (unlikely(bp == NULL)) {
1907                                 XFS_ERROR_REPORT("xfs_attr_node_list(2)",
1908                                                  XFS_ERRLEVEL_LOW,
1909                                                  context->dp->i_mount);
1910                                 return(XFS_ERROR(EFSCORRUPTED));
1911                         }
1912                         node = bp->data;
1913                         if (be16_to_cpu(node->hdr.info.magic)
1914                                                         == XFS_ATTR_LEAF_MAGIC)
1915                                 break;
1916                         if (unlikely(be16_to_cpu(node->hdr.info.magic)
1917                                                         != XFS_DA_NODE_MAGIC)) {
1918                                 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
1919                                                      XFS_ERRLEVEL_LOW,
1920                                                      context->dp->i_mount,
1921                                                      node);
1922                                 xfs_da_brelse(NULL, bp);
1923                                 return(XFS_ERROR(EFSCORRUPTED));
1924                         }
1925                         btree = node->btree;
1926                         for (i = 0; i < be16_to_cpu(node->hdr.count);
1927                                                                 btree++, i++) {
1928                                 if (cursor->hashval
1929                                                 <= be32_to_cpu(btree->hashval)) {
1930                                         cursor->blkno = be32_to_cpu(btree->before);
1931                                         xfs_attr_trace_l_cb("descending",
1932                                                             context, btree);
1933                                         break;
1934                                 }
1935                         }
1936                         if (i == be16_to_cpu(node->hdr.count)) {
1937                                 xfs_da_brelse(NULL, bp);
1938                                 return(0);
1939                         }
1940                         xfs_da_brelse(NULL, bp);
1941                 }
1942         }
1943         ASSERT(bp != NULL);
1944
1945         /*
1946          * Roll upward through the blocks, processing each leaf block in
1947          * order.  As long as there is space in the result buffer, keep
1948          * adding the information.
1949          */
1950         for (;;) {
1951                 leaf = bp->data;
1952                 if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
1953                                                 != XFS_ATTR_LEAF_MAGIC)) {
1954                         XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
1955                                              XFS_ERRLEVEL_LOW,
1956                                              context->dp->i_mount, leaf);
1957                         xfs_da_brelse(NULL, bp);
1958                         return(XFS_ERROR(EFSCORRUPTED));
1959                 }
1960                 error = xfs_attr_leaf_list_int(bp, context);
1961                 if (error) {
1962                         xfs_da_brelse(NULL, bp);
1963                         return error;
1964                 }
1965                 if (context->seen_enough || leaf->hdr.info.forw == 0)
1966                         break;
1967                 cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
1968                 xfs_da_brelse(NULL, bp);
1969                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1970                                               &bp, XFS_ATTR_FORK);
1971                 if (error)
1972                         return(error);
1973                 if (unlikely((bp == NULL))) {
1974                         XFS_ERROR_REPORT("xfs_attr_node_list(5)",
1975                                          XFS_ERRLEVEL_LOW,
1976                                          context->dp->i_mount);
1977                         return(XFS_ERROR(EFSCORRUPTED));
1978                 }
1979         }
1980         xfs_da_brelse(NULL, bp);
1981         return(0);
1982 }
1983
1984
1985 /*========================================================================
1986  * External routines for manipulating out-of-line attribute values.
1987  *========================================================================*/
1988
1989 /*
1990  * Read the value associated with an attribute from the out-of-line buffer
1991  * that we stored it in.
1992  */
1993 int
1994 xfs_attr_rmtval_get(xfs_da_args_t *args)
1995 {
1996         xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
1997         xfs_mount_t *mp;
1998         xfs_daddr_t dblkno;
1999         xfs_caddr_t dst;
2000         xfs_buf_t *bp;
2001         int nmap, error, tmp, valuelen, blkcnt, i;
2002         xfs_dablk_t lblkno;
2003
2004         ASSERT(!(args->flags & ATTR_KERNOVAL));
2005
2006         mp = args->dp->i_mount;
2007         dst = args->value;
2008         valuelen = args->valuelen;
2009         lblkno = args->rmtblkno;
2010         while (valuelen > 0) {
2011                 nmap = ATTR_RMTVALUE_MAPSIZE;
2012                 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
2013                                   args->rmtblkcnt,
2014                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2015                                   NULL, 0, map, &nmap, NULL, NULL);
2016                 if (error)
2017                         return(error);
2018                 ASSERT(nmap >= 1);
2019
2020                 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
2021                         ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
2022                                (map[i].br_startblock != HOLESTARTBLOCK));
2023                         dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2024                         blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2025                         error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2026                                              blkcnt, XFS_BUF_LOCK, &bp);
2027                         if (error)
2028                                 return(error);
2029
2030                         tmp = (valuelen < XFS_BUF_SIZE(bp))
2031                                 ? valuelen : XFS_BUF_SIZE(bp);
2032                         xfs_biomove(bp, 0, tmp, dst, XFS_B_READ);
2033                         xfs_buf_relse(bp);
2034                         dst += tmp;
2035                         valuelen -= tmp;
2036
2037                         lblkno += map[i].br_blockcount;
2038                 }
2039         }
2040         ASSERT(valuelen == 0);
2041         return(0);
2042 }
2043
2044 /*
2045  * Write the value associated with an attribute into the out-of-line buffer
2046  * that we have defined for it.
2047  */
2048 STATIC int
2049 xfs_attr_rmtval_set(xfs_da_args_t *args)
2050 {
2051         xfs_mount_t *mp;
2052         xfs_fileoff_t lfileoff;
2053         xfs_inode_t *dp;
2054         xfs_bmbt_irec_t map;
2055         xfs_daddr_t dblkno;
2056         xfs_caddr_t src;
2057         xfs_buf_t *bp;
2058         xfs_dablk_t lblkno;
2059         int blkcnt, valuelen, nmap, error, tmp, committed;
2060
2061         dp = args->dp;
2062         mp = dp->i_mount;
2063         src = args->value;
2064
2065         /*
2066          * Find a "hole" in the attribute address space large enough for
2067          * us to drop the new attribute's value into.
2068          */
2069         blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
2070         lfileoff = 0;
2071         error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
2072                                                    XFS_ATTR_FORK);
2073         if (error) {
2074                 return(error);
2075         }
2076         args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
2077         args->rmtblkcnt = blkcnt;
2078
2079         /*
2080          * Roll through the "value", allocating blocks on disk as required.
2081          */
2082         while (blkcnt > 0) {
2083                 /*
2084                  * Allocate a single extent, up to the size of the value.
2085                  */
2086                 XFS_BMAP_INIT(args->flist, args->firstblock);
2087                 nmap = 1;
2088                 error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno,
2089                                   blkcnt,
2090                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
2091                                                         XFS_BMAPI_WRITE,
2092                                   args->firstblock, args->total, &map, &nmap,
2093                                   args->flist, NULL);
2094                 if (!error) {
2095                         error = xfs_bmap_finish(&args->trans, args->flist,
2096                                                 *args->firstblock, &committed);
2097                 }
2098                 if (error) {
2099                         ASSERT(committed);
2100                         args->trans = NULL;
2101                         xfs_bmap_cancel(args->flist);
2102                         return(error);
2103                 }
2104
2105                 /*
2106                  * bmap_finish() may have committed the last trans and started
2107                  * a new one.  We need the inode to be in all transactions.
2108                  */
2109                 if (committed) {
2110                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
2111                         xfs_trans_ihold(args->trans, dp);
2112                 }
2113
2114                 ASSERT(nmap == 1);
2115                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2116                        (map.br_startblock != HOLESTARTBLOCK));
2117                 lblkno += map.br_blockcount;
2118                 blkcnt -= map.br_blockcount;
2119
2120                 /*
2121                  * Start the next trans in the chain.
2122                  */
2123                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
2124                         return (error);
2125         }
2126
2127         /*
2128          * Roll through the "value", copying the attribute value to the
2129          * already-allocated blocks.  Blocks are written synchronously
2130          * so that we can know they are all on disk before we turn off
2131          * the INCOMPLETE flag.
2132          */
2133         lblkno = args->rmtblkno;
2134         valuelen = args->valuelen;
2135         while (valuelen > 0) {
2136                 /*
2137                  * Try to remember where we decided to put the value.
2138                  */
2139                 XFS_BMAP_INIT(args->flist, args->firstblock);
2140                 nmap = 1;
2141                 error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
2142                                   args->rmtblkcnt,
2143                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2144                                   args->firstblock, 0, &map, &nmap,
2145                                   NULL, NULL);
2146                 if (error) {
2147                         return(error);
2148                 }
2149                 ASSERT(nmap == 1);
2150                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2151                        (map.br_startblock != HOLESTARTBLOCK));
2152
2153                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2154                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2155
2156                 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno,
2157                                                         blkcnt, XFS_BUF_LOCK);
2158                 ASSERT(bp);
2159                 ASSERT(!XFS_BUF_GETERROR(bp));
2160
2161                 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
2162                                                         XFS_BUF_SIZE(bp);
2163                 xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE);
2164                 if (tmp < XFS_BUF_SIZE(bp))
2165                         xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
2166                 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
2167                         return (error);
2168                 }
2169                 src += tmp;
2170                 valuelen -= tmp;
2171
2172                 lblkno += map.br_blockcount;
2173         }
2174         ASSERT(valuelen == 0);
2175         return(0);
2176 }
2177
2178 /*
2179  * Remove the value associated with an attribute by deleting the
2180  * out-of-line buffer that it is stored on.
2181  */
2182 STATIC int
2183 xfs_attr_rmtval_remove(xfs_da_args_t *args)
2184 {
2185         xfs_mount_t *mp;
2186         xfs_bmbt_irec_t map;
2187         xfs_buf_t *bp;
2188         xfs_daddr_t dblkno;
2189         xfs_dablk_t lblkno;
2190         int valuelen, blkcnt, nmap, error, done, committed;
2191
2192         mp = args->dp->i_mount;
2193
2194         /*
2195          * Roll through the "value", invalidating the attribute value's
2196          * blocks.
2197          */
2198         lblkno = args->rmtblkno;
2199         valuelen = args->rmtblkcnt;
2200         while (valuelen > 0) {
2201                 /*
2202                  * Try to remember where we decided to put the value.
2203                  */
2204                 XFS_BMAP_INIT(args->flist, args->firstblock);
2205                 nmap = 1;
2206                 error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno,
2207                                         args->rmtblkcnt,
2208                                         XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2209                                         args->firstblock, 0, &map, &nmap,
2210                                         args->flist, NULL);
2211                 if (error) {
2212                         return(error);
2213                 }
2214                 ASSERT(nmap == 1);
2215                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2216                        (map.br_startblock != HOLESTARTBLOCK));
2217
2218                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2219                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2220
2221                 /*
2222                  * If the "remote" value is in the cache, remove it.
2223                  */
2224                 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt,
2225                                 XFS_INCORE_TRYLOCK);
2226                 if (bp) {
2227                         XFS_BUF_STALE(bp);
2228                         XFS_BUF_UNDELAYWRITE(bp);
2229                         xfs_buf_relse(bp);
2230                         bp = NULL;
2231                 }
2232
2233                 valuelen -= map.br_blockcount;
2234
2235                 lblkno += map.br_blockcount;
2236         }
2237
2238         /*
2239          * Keep de-allocating extents until the remote-value region is gone.
2240          */
2241         lblkno = args->rmtblkno;
2242         blkcnt = args->rmtblkcnt;
2243         done = 0;
2244         while (!done) {
2245                 XFS_BMAP_INIT(args->flist, args->firstblock);
2246                 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2247                                     XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2248                                     1, args->firstblock, args->flist,
2249                                     NULL, &done);
2250                 if (!error) {
2251                         error = xfs_bmap_finish(&args->trans, args->flist,
2252                                                 *args->firstblock, &committed);
2253                 }
2254                 if (error) {
2255                         ASSERT(committed);
2256                         args->trans = NULL;
2257                         xfs_bmap_cancel(args->flist);
2258                         return(error);
2259                 }
2260
2261                 /*
2262                  * bmap_finish() may have committed the last trans and started
2263                  * a new one.  We need the inode to be in all transactions.
2264                  */
2265                 if (committed) {
2266                         xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL);
2267                         xfs_trans_ihold(args->trans, args->dp);
2268                 }
2269
2270                 /*
2271                  * Close out trans and start the next one in the chain.
2272                  */
2273                 if ((error = xfs_attr_rolltrans(&args->trans, args->dp)))
2274                         return (error);
2275         }
2276         return(0);
2277 }
2278
2279 #if defined(XFS_ATTR_TRACE)
2280 /*
2281  * Add a trace buffer entry for an attr_list context structure.
2282  */
2283 void
2284 xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2285 {
2286         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where,
2287                 (__psunsigned_t)context->dp,
2288                 (__psunsigned_t)context->cursor->hashval,
2289                 (__psunsigned_t)context->cursor->blkno,
2290                 (__psunsigned_t)context->cursor->offset,
2291                 (__psunsigned_t)context->alist,
2292                 (__psunsigned_t)context->bufsize,
2293                 (__psunsigned_t)context->count,
2294                 (__psunsigned_t)context->firstu,
2295                 (__psunsigned_t)
2296                         ((context->count > 0) &&
2297                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2298                                 ? (ATTR_ENTRY(context->alist,
2299                                               context->count-1)->a_valuelen)
2300                                 : 0,
2301                 (__psunsigned_t)context->dupcnt,
2302                 (__psunsigned_t)context->flags,
2303                 (__psunsigned_t)NULL,
2304                 (__psunsigned_t)NULL,
2305                 (__psunsigned_t)NULL);
2306 }
2307
2308 /*
2309  * Add a trace buffer entry for a context structure and a Btree node.
2310  */
2311 void
2312 xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2313                          struct xfs_da_intnode *node)
2314 {
2315         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where,
2316                 (__psunsigned_t)context->dp,
2317                 (__psunsigned_t)context->cursor->hashval,
2318                 (__psunsigned_t)context->cursor->blkno,
2319                 (__psunsigned_t)context->cursor->offset,
2320                 (__psunsigned_t)context->alist,
2321                 (__psunsigned_t)context->bufsize,
2322                 (__psunsigned_t)context->count,
2323                 (__psunsigned_t)context->firstu,
2324                 (__psunsigned_t)
2325                         ((context->count > 0) &&
2326                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2327                                 ? (ATTR_ENTRY(context->alist,
2328                                               context->count-1)->a_valuelen)
2329                                 : 0,
2330                 (__psunsigned_t)context->dupcnt,
2331                 (__psunsigned_t)context->flags,
2332                 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2333                 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2334                 (__psunsigned_t)be32_to_cpu(node->btree[
2335                                     be16_to_cpu(node->hdr.count)-1].hashval));
2336 }
2337
2338 /*
2339  * Add a trace buffer entry for a context structure and a Btree element.
2340  */
2341 void
2342 xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2343                           struct xfs_da_node_entry *btree)
2344 {
2345         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where,
2346                 (__psunsigned_t)context->dp,
2347                 (__psunsigned_t)context->cursor->hashval,
2348                 (__psunsigned_t)context->cursor->blkno,
2349                 (__psunsigned_t)context->cursor->offset,
2350                 (__psunsigned_t)context->alist,
2351                 (__psunsigned_t)context->bufsize,
2352                 (__psunsigned_t)context->count,
2353                 (__psunsigned_t)context->firstu,
2354                 (__psunsigned_t)
2355                         ((context->count > 0) &&
2356                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2357                                 ? (ATTR_ENTRY(context->alist,
2358                                               context->count-1)->a_valuelen)
2359                                 : 0,
2360                 (__psunsigned_t)context->dupcnt,
2361                 (__psunsigned_t)context->flags,
2362                 (__psunsigned_t)be32_to_cpu(btree->hashval),
2363                 (__psunsigned_t)be32_to_cpu(btree->before),
2364                 (__psunsigned_t)NULL);
2365 }
2366
2367 /*
2368  * Add a trace buffer entry for a context structure and a leaf block.
2369  */
2370 void
2371 xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2372                               struct xfs_attr_leafblock *leaf)
2373 {
2374         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where,
2375                 (__psunsigned_t)context->dp,
2376                 (__psunsigned_t)context->cursor->hashval,
2377                 (__psunsigned_t)context->cursor->blkno,
2378                 (__psunsigned_t)context->cursor->offset,
2379                 (__psunsigned_t)context->alist,
2380                 (__psunsigned_t)context->bufsize,
2381                 (__psunsigned_t)context->count,
2382                 (__psunsigned_t)context->firstu,
2383                 (__psunsigned_t)
2384                         ((context->count > 0) &&
2385                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2386                                 ? (ATTR_ENTRY(context->alist,
2387                                               context->count-1)->a_valuelen)
2388                                 : 0,
2389                 (__psunsigned_t)context->dupcnt,
2390                 (__psunsigned_t)context->flags,
2391                 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2392                 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2393                 (__psunsigned_t)be32_to_cpu(leaf->entries[
2394                                 be16_to_cpu(leaf->hdr.count)-1].hashval));
2395 }
2396
2397 /*
2398  * Add a trace buffer entry for the arguments given to the routine,
2399  * generic form.
2400  */
2401 void
2402 xfs_attr_trace_enter(int type, char *where,
2403                          __psunsigned_t a2, __psunsigned_t a3,
2404                          __psunsigned_t a4, __psunsigned_t a5,
2405                          __psunsigned_t a6, __psunsigned_t a7,
2406                          __psunsigned_t a8, __psunsigned_t a9,
2407                          __psunsigned_t a10, __psunsigned_t a11,
2408                          __psunsigned_t a12, __psunsigned_t a13,
2409                          __psunsigned_t a14, __psunsigned_t a15)
2410 {
2411         ASSERT(xfs_attr_trace_buf);
2412         ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2413                                          (void *)where,
2414                                          (void *)a2,  (void *)a3,  (void *)a4,
2415                                          (void *)a5,  (void *)a6,  (void *)a7,
2416                                          (void *)a8,  (void *)a9,  (void *)a10,
2417                                          (void *)a11, (void *)a12, (void *)a13,
2418                                          (void *)a14, (void *)a15);
2419 }
2420 #endif  /* XFS_ATTR_TRACE */
2421
2422
2423 /*========================================================================
2424  * System (pseudo) namespace attribute interface routines.
2425  *========================================================================*/
2426
2427 STATIC int
2428 posix_acl_access_set(
2429         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2430 {
2431         return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
2432 }
2433
2434 STATIC int
2435 posix_acl_access_remove(
2436         bhv_vnode_t *vp, char *name, int xflags)
2437 {
2438         return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
2439 }
2440
2441 STATIC int
2442 posix_acl_access_get(
2443         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2444 {
2445         return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
2446 }
2447
2448 STATIC int
2449 posix_acl_access_exists(
2450         bhv_vnode_t *vp)
2451 {
2452         return xfs_acl_vhasacl_access(vp);
2453 }
2454
2455 STATIC int
2456 posix_acl_default_set(
2457         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2458 {
2459         return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
2460 }
2461
2462 STATIC int
2463 posix_acl_default_get(
2464         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2465 {
2466         return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
2467 }
2468
2469 STATIC int
2470 posix_acl_default_remove(
2471         bhv_vnode_t *vp, char *name, int xflags)
2472 {
2473         return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
2474 }
2475
2476 STATIC int
2477 posix_acl_default_exists(
2478         bhv_vnode_t *vp)
2479 {
2480         return xfs_acl_vhasacl_default(vp);
2481 }
2482
2483 STATIC struct attrnames posix_acl_access = {
2484         .attr_name      = "posix_acl_access",
2485         .attr_namelen   = sizeof("posix_acl_access") - 1,
2486         .attr_get       = posix_acl_access_get,
2487         .attr_set       = posix_acl_access_set,
2488         .attr_remove    = posix_acl_access_remove,
2489         .attr_exists    = posix_acl_access_exists,
2490 };
2491
2492 STATIC struct attrnames posix_acl_default = {
2493         .attr_name      = "posix_acl_default",
2494         .attr_namelen   = sizeof("posix_acl_default") - 1,
2495         .attr_get       = posix_acl_default_get,
2496         .attr_set       = posix_acl_default_set,
2497         .attr_remove    = posix_acl_default_remove,
2498         .attr_exists    = posix_acl_default_exists,
2499 };
2500
2501 STATIC struct attrnames *attr_system_names[] =
2502         { &posix_acl_access, &posix_acl_default };
2503
2504
2505 /*========================================================================
2506  * Namespace-prefix-style attribute name interface routines.
2507  *========================================================================*/
2508
2509 STATIC int
2510 attr_generic_set(
2511         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2512 {
2513         return -bhv_vop_attr_set(vp, name, data, size, xflags, NULL);
2514 }
2515
2516 STATIC int
2517 attr_generic_get(
2518         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2519 {
2520         int     error, asize = size;
2521
2522         error = bhv_vop_attr_get(vp, name, data, &asize, xflags, NULL);
2523         if (!error)
2524                 return asize;
2525         return -error;
2526 }
2527
2528 STATIC int
2529 attr_generic_remove(
2530         bhv_vnode_t *vp, char *name, int xflags)
2531 {
2532         return -bhv_vop_attr_remove(vp, name, xflags, NULL);
2533 }
2534
2535 STATIC int
2536 attr_generic_listadd(
2537         attrnames_t             *prefix,
2538         attrnames_t             *namesp,
2539         void                    *data,
2540         size_t                  size,
2541         ssize_t                 *result)
2542 {
2543         char                    *p = data + *result;
2544
2545         *result += prefix->attr_namelen;
2546         *result += namesp->attr_namelen + 1;
2547         if (!size)
2548                 return 0;
2549         if (*result > size)
2550                 return -ERANGE;
2551         strcpy(p, prefix->attr_name);
2552         p += prefix->attr_namelen;
2553         strcpy(p, namesp->attr_name);
2554         p += namesp->attr_namelen + 1;
2555         return 0;
2556 }
2557
2558 STATIC int
2559 attr_system_list(
2560         bhv_vnode_t             *vp,
2561         void                    *data,
2562         size_t                  size,
2563         ssize_t                 *result)
2564 {
2565         attrnames_t             *namesp;
2566         int                     i, error = 0;
2567
2568         for (i = 0; i < ATTR_SYSCOUNT; i++) {
2569                 namesp = attr_system_names[i];
2570                 if (!namesp->attr_exists || !namesp->attr_exists(vp))
2571                         continue;
2572                 error = attr_generic_listadd(&attr_system, namesp,
2573                                                 data, size, result);
2574                 if (error)
2575                         break;
2576         }
2577         return error;
2578 }
2579
2580 int
2581 attr_generic_list(
2582         bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
2583 {
2584         attrlist_cursor_kern_t  cursor = { 0 };
2585         int                     error;
2586
2587         error = bhv_vop_attr_list(vp, data, size, xflags, &cursor, NULL);
2588         if (error > 0)
2589                 return -error;
2590         *result = -error;
2591         return attr_system_list(vp, data, size, result);
2592 }
2593
2594 attrnames_t *
2595 attr_lookup_namespace(
2596         char                    *name,
2597         struct attrnames        **names,
2598         int                     nnames)
2599 {
2600         int                     i;
2601
2602         for (i = 0; i < nnames; i++)
2603                 if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
2604                         return names[i];
2605         return NULL;
2606 }
2607
2608 /*
2609  * Some checks to prevent people abusing EAs to get over quota:
2610  * - Don't allow modifying user EAs on devices/symlinks;
2611  * - Don't allow modifying user EAs if sticky bit set;
2612  */
2613 STATIC int
2614 attr_user_capable(
2615         bhv_vnode_t     *vp,
2616         cred_t          *cred)
2617 {
2618         struct inode    *inode = vn_to_inode(vp);
2619
2620         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2621                 return -EPERM;
2622         if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
2623             !capable(CAP_SYS_ADMIN))
2624                 return -EPERM;
2625         if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
2626             (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
2627                 return -EPERM;
2628         return 0;
2629 }
2630
2631 STATIC int
2632 attr_trusted_capable(
2633         bhv_vnode_t     *vp,
2634         cred_t          *cred)
2635 {
2636         struct inode    *inode = vn_to_inode(vp);
2637
2638         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2639                 return -EPERM;
2640         if (!capable(CAP_SYS_ADMIN))
2641                 return -EPERM;
2642         return 0;
2643 }
2644
2645 STATIC int
2646 attr_secure_capable(
2647         bhv_vnode_t     *vp,
2648         cred_t          *cred)
2649 {
2650         return -ENOSECURITY;
2651 }
2652
2653 STATIC int
2654 attr_system_set(
2655         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2656 {
2657         attrnames_t     *namesp;
2658         int             error;
2659
2660         if (xflags & ATTR_CREATE)
2661                 return -EINVAL;
2662
2663         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2664         if (!namesp)
2665                 return -EOPNOTSUPP;
2666         error = namesp->attr_set(vp, name, data, size, xflags);
2667         if (!error)
2668                 error = vn_revalidate(vp);
2669         return error;
2670 }
2671
2672 STATIC int
2673 attr_system_get(
2674         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2675 {
2676         attrnames_t     *namesp;
2677
2678         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2679         if (!namesp)
2680                 return -EOPNOTSUPP;
2681         return namesp->attr_get(vp, name, data, size, xflags);
2682 }
2683
2684 STATIC int
2685 attr_system_remove(
2686         bhv_vnode_t *vp, char *name, int xflags)
2687 {
2688         attrnames_t     *namesp;
2689
2690         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2691         if (!namesp)
2692                 return -EOPNOTSUPP;
2693         return namesp->attr_remove(vp, name, xflags);
2694 }
2695
2696 struct attrnames attr_system = {
2697         .attr_name      = "system.",
2698         .attr_namelen   = sizeof("system.") - 1,
2699         .attr_flag      = ATTR_SYSTEM,
2700         .attr_get       = attr_system_get,
2701         .attr_set       = attr_system_set,
2702         .attr_remove    = attr_system_remove,
2703         .attr_capable   = (attrcapable_t)fs_noerr,
2704 };
2705
2706 struct attrnames attr_trusted = {
2707         .attr_name      = "trusted.",
2708         .attr_namelen   = sizeof("trusted.") - 1,
2709         .attr_flag      = ATTR_ROOT,
2710         .attr_get       = attr_generic_get,
2711         .attr_set       = attr_generic_set,
2712         .attr_remove    = attr_generic_remove,
2713         .attr_capable   = attr_trusted_capable,
2714 };
2715
2716 struct attrnames attr_secure = {
2717         .attr_name      = "security.",
2718         .attr_namelen   = sizeof("security.") - 1,
2719         .attr_flag      = ATTR_SECURE,
2720         .attr_get       = attr_generic_get,
2721         .attr_set       = attr_generic_set,
2722         .attr_remove    = attr_generic_remove,
2723         .attr_capable   = attr_secure_capable,
2724 };
2725
2726 struct attrnames attr_user = {
2727         .attr_name      = "user.",
2728         .attr_namelen   = sizeof("user.") - 1,
2729         .attr_get       = attr_generic_get,
2730         .attr_set       = attr_generic_set,
2731         .attr_remove    = attr_generic_remove,
2732         .attr_capable   = attr_user_capable,
2733 };
2734
2735 struct attrnames *attr_namespaces[] =
2736         { &attr_system, &attr_trusted, &attr_secure, &attr_user };