upgrade to linux 2.6.9-1.11_FC2
[linux-2.6.git] / fs / ext3 / xattr.c
1 /*
2  * linux/fs/ext3/xattr.c
3  *
4  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5  *
6  * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7  * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8  * Extended attributes for symlinks and special files added per
9  *  suggestion of Luka Renko <luka.renko@hermes.si>.
10  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
11  *  Red Hat Inc.
12  */
13
14 /*
15  * Extended attributes are stored on disk blocks allocated outside of
16  * any inode. The i_file_acl field is then made to point to this allocated
17  * block. If all extended attributes of an inode are identical, these
18  * inodes may share the same extended attribute block. Such situations
19  * are automatically detected by keeping a cache of recent attribute block
20  * numbers and hashes over the block's contents in memory.
21  *
22  *
23  * Extended attribute block layout:
24  *
25  *   +------------------+
26  *   | header           |
27  *   ¦ entry 1          | |
28  *   | entry 2          | | growing downwards
29  *   | entry 3          | v
30  *   | four null bytes  |
31  *   | . . .            |
32  *   | value 1          | ^
33  *   | value 3          | | growing upwards
34  *   | value 2          | |
35  *   +------------------+
36  *
37  * The block header is followed by multiple entry descriptors. These entry
38  * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
39  * byte boundaries. The entry descriptors are sorted by attribute name,
40  * so that two extended attribute blocks can be compared efficiently.
41  *
42  * Attribute values are aligned to the end of the block, stored in
43  * no specific order. They are also padded to EXT3_XATTR_PAD byte
44  * boundaries. No additional gaps are left between them.
45  *
46  * Locking strategy
47  * ----------------
48  * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
49  * EA blocks are only changed if they are exclusive to an inode, so
50  * holding xattr_sem also means that nothing but the EA block's reference
51  * count will change. Multiple writers to an EA block are synchronized
52  * by the bh lock. No more than a single bh lock is held at any time
53  * to avoid deadlocks.
54  */
55
56 #include <linux/init.h>
57 #include <linux/fs.h>
58 #include <linux/slab.h>
59 #include <linux/ext3_jbd.h>
60 #include <linux/ext3_fs.h>
61 #include <linux/mbcache.h>
62 #include <linux/quotaops.h>
63 #include <linux/rwsem.h>
64 #include <linux/vs_dlimit.h>
65 #include "xattr.h"
66 #include "acl.h"
67
68 #define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
69 #define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
70 #define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
71 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
72
73 #ifdef EXT3_XATTR_DEBUG
74 # define ea_idebug(inode, f...) do { \
75                 printk(KERN_DEBUG "inode %s:%ld: ", \
76                         inode->i_sb->s_id, inode->i_ino); \
77                 printk(f); \
78                 printk("\n"); \
79         } while (0)
80 # define ea_bdebug(bh, f...) do { \
81                 char b[BDEVNAME_SIZE]; \
82                 printk(KERN_DEBUG "block %s:%lu: ", \
83                         bdevname(bh->b_bdev, b), \
84                         (unsigned long) bh->b_blocknr); \
85                 printk(f); \
86                 printk("\n"); \
87         } while (0)
88 #else
89 # define ea_idebug(f...)
90 # define ea_bdebug(f...)
91 #endif
92
93 static int ext3_xattr_set_handle2(handle_t *, struct inode *,
94                                   struct buffer_head *,
95                                   struct ext3_xattr_header *);
96
97 static int ext3_xattr_cache_insert(struct buffer_head *);
98 static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
99                                                  struct ext3_xattr_header *,
100                                                  int *);
101 static void ext3_xattr_rehash(struct ext3_xattr_header *,
102                               struct ext3_xattr_entry *);
103
104 static struct mb_cache *ext3_xattr_cache;
105
106 static struct xattr_handler *ext3_xattr_handler_map[EXT3_XATTR_INDEX_MAX] = {
107         [EXT3_XATTR_INDEX_USER]              = &ext3_xattr_user_handler,
108 #ifdef CONFIG_EXT3_FS_POSIX_ACL
109         [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext3_xattr_acl_access_handler,
110         [EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3_xattr_acl_default_handler,
111 #endif
112         [EXT3_XATTR_INDEX_TRUSTED]           = &ext3_xattr_trusted_handler,
113 #ifdef CONFIG_EXT3_FS_SECURITY
114         [EXT3_XATTR_INDEX_SECURITY]          = &ext3_xattr_security_handler,
115 #endif
116 };
117
118 struct xattr_handler *ext3_xattr_handlers[] = {
119         &ext3_xattr_user_handler,
120         &ext3_xattr_trusted_handler,
121 #ifdef CONFIG_EXT3_FS_POSIX_ACL
122         &ext3_xattr_acl_access_handler,
123         &ext3_xattr_acl_default_handler,
124 #endif
125 #ifdef CONFIG_EXT3_FS_SECURITY
126         &ext3_xattr_security_handler,
127 #endif
128         NULL
129 };
130
131 static inline struct xattr_handler *
132 ext3_xattr_handler(int name_index)
133 {
134         struct xattr_handler *handler = NULL;
135
136         if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX)
137                 handler = ext3_xattr_handler_map[name_index];
138         return handler;
139 }
140
141 /*
142  * Inode operation listxattr()
143  *
144  * dentry->d_inode->i_sem: don't care
145  */
146 ssize_t
147 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
148 {
149         return ext3_xattr_list(dentry->d_inode, buffer, size);
150 }
151
152 /*
153  * ext3_xattr_get()
154  *
155  * Copy an extended attribute into the buffer
156  * provided, or compute the buffer size required.
157  * Buffer is NULL to compute the size of the buffer required.
158  *
159  * Returns a negative error number on failure, or the number of bytes
160  * used / required on success.
161  */
162 int
163 ext3_xattr_get(struct inode *inode, int name_index, const char *name,
164                void *buffer, size_t buffer_size)
165 {
166         struct buffer_head *bh = NULL;
167         struct ext3_xattr_entry *entry;
168         size_t name_len, size;
169         char *end;
170         int error;
171
172         ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
173                   name_index, name, buffer, (long)buffer_size);
174
175         if (name == NULL)
176                 return -EINVAL;
177         down_read(&EXT3_I(inode)->xattr_sem);
178         error = -ENODATA;
179         if (!EXT3_I(inode)->i_file_acl)
180                 goto cleanup;
181         ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
182         bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
183         error = -EIO;
184         if (!bh)
185                 goto cleanup;
186         ea_bdebug(bh, "b_count=%d, refcount=%d",
187                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
188         end = bh->b_data + bh->b_size;
189         if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
190             HDR(bh)->h_blocks != cpu_to_le32(1)) {
191 bad_block:      ext3_error(inode->i_sb, "ext3_xattr_get",
192                         "inode %ld: bad block %d", inode->i_ino,
193                         EXT3_I(inode)->i_file_acl);
194                 error = -EIO;
195                 goto cleanup;
196         }
197         /* find named attribute */
198         name_len = strlen(name);
199
200         error = -ERANGE;
201         if (name_len > 255)
202                 goto cleanup;
203         entry = FIRST_ENTRY(bh);
204         while (!IS_LAST_ENTRY(entry)) {
205                 struct ext3_xattr_entry *next =
206                         EXT3_XATTR_NEXT(entry);
207                 if ((char *)next >= end)
208                         goto bad_block;
209                 if (name_index == entry->e_name_index &&
210                     name_len == entry->e_name_len &&
211                     memcmp(name, entry->e_name, name_len) == 0)
212                         goto found;
213                 entry = next;
214         }
215         /* Check the remaining name entries */
216         while (!IS_LAST_ENTRY(entry)) {
217                 struct ext3_xattr_entry *next =
218                         EXT3_XATTR_NEXT(entry);
219                 if ((char *)next >= end)
220                         goto bad_block;
221                 entry = next;
222         }
223         if (ext3_xattr_cache_insert(bh))
224                 ea_idebug(inode, "cache insert failed");
225         error = -ENODATA;
226         goto cleanup;
227 found:
228         /* check the buffer size */
229         if (entry->e_value_block != 0)
230                 goto bad_block;
231         size = le32_to_cpu(entry->e_value_size);
232         if (size > inode->i_sb->s_blocksize ||
233             le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
234                 goto bad_block;
235
236         if (ext3_xattr_cache_insert(bh))
237                 ea_idebug(inode, "cache insert failed");
238         if (buffer) {
239                 error = -ERANGE;
240                 if (size > buffer_size)
241                         goto cleanup;
242                 /* return value of attribute */
243                 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
244                         size);
245         }
246         error = size;
247
248 cleanup:
249         brelse(bh);
250         up_read(&EXT3_I(inode)->xattr_sem);
251
252         return error;
253 }
254
255 /*
256  * ext3_xattr_list()
257  *
258  * Copy a list of attribute names into the buffer
259  * provided, or compute the buffer size required.
260  * Buffer is NULL to compute the size of the buffer required.
261  *
262  * Returns a negative error number on failure, or the number of bytes
263  * used / required on success.
264  */
265 int
266 ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
267 {
268         struct buffer_head *bh = NULL;
269         struct ext3_xattr_entry *entry;
270         char *end;
271         size_t rest = buffer_size;
272         int error;
273
274         ea_idebug(inode, "buffer=%p, buffer_size=%ld",
275                   buffer, (long)buffer_size);
276
277         down_read(&EXT3_I(inode)->xattr_sem);
278         error = 0;
279         if (!EXT3_I(inode)->i_file_acl)
280                 goto cleanup;
281         ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
282         bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
283         error = -EIO;
284         if (!bh)
285                 goto cleanup;
286         ea_bdebug(bh, "b_count=%d, refcount=%d",
287                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
288         end = bh->b_data + bh->b_size;
289         if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
290             HDR(bh)->h_blocks != cpu_to_le32(1)) {
291 bad_block:      ext3_error(inode->i_sb, "ext3_xattr_list",
292                         "inode %ld: bad block %d", inode->i_ino,
293                         EXT3_I(inode)->i_file_acl);
294                 error = -EIO;
295                 goto cleanup;
296         }
297
298         /* check the on-disk data structure */
299         entry = FIRST_ENTRY(bh);
300         while (!IS_LAST_ENTRY(entry)) {
301                 struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
302
303                 if ((char *)next >= end)
304                         goto bad_block;
305                 entry = next;
306         }
307         if (ext3_xattr_cache_insert(bh))
308                 ea_idebug(inode, "cache insert failed");
309
310         /* list the attribute names */
311         for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
312              entry = EXT3_XATTR_NEXT(entry)) {
313                 struct xattr_handler *handler =
314                         ext3_xattr_handler(entry->e_name_index);
315
316                 if (handler) {
317                         size_t size = handler->list(inode, buffer, rest,
318                                                     entry->e_name,
319                                                     entry->e_name_len);
320                         if (buffer) {
321                                 if (size > rest) {
322                                         error = -ERANGE;
323                                         goto cleanup;
324                                 }
325                                 buffer += size;
326                         }
327                         rest -= size;
328                 }
329         }
330         error = buffer_size - rest;  /* total size */
331
332 cleanup:
333         brelse(bh);
334         up_read(&EXT3_I(inode)->xattr_sem);
335
336         return error;
337 }
338
339 /*
340  * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
341  * not set, set it.
342  */
343 static void ext3_xattr_update_super_block(handle_t *handle,
344                                           struct super_block *sb)
345 {
346         if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
347                 return;
348
349         lock_super(sb);
350         if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
351                 EXT3_SB(sb)->s_es->s_feature_compat |=
352                         cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
353                 sb->s_dirt = 1;
354                 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
355         }
356         unlock_super(sb);
357 }
358
359 /*
360  * ext3_xattr_set_handle()
361  *
362  * Create, replace or remove an extended attribute for this inode. Buffer
363  * is NULL to remove an existing extended attribute, and non-NULL to
364  * either replace an existing extended attribute, or create a new extended
365  * attribute. The flags XATTR_REPLACE and XATTR_CREATE
366  * specify that an extended attribute must exist and must not exist
367  * previous to the call, respectively.
368  *
369  * Returns 0, or a negative error number on failure.
370  */
371 int
372 ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
373                       const char *name, const void *value, size_t value_len,
374                       int flags)
375 {
376         struct super_block *sb = inode->i_sb;
377         struct buffer_head *bh = NULL;
378         struct ext3_xattr_header *header = NULL;
379         struct ext3_xattr_entry *here, *last;
380         size_t name_len, free, min_offs = sb->s_blocksize;
381         int not_found = 1, error;
382         char *end;
383
384         /*
385          * header -- Points either into bh, or to a temporarily
386          *           allocated buffer.
387          * here -- The named entry found, or the place for inserting, within
388          *         the block pointed to by header.
389          * last -- Points right after the last named entry within the block
390          *         pointed to by header.
391          * min_offs -- The offset of the first value (values are aligned
392          *             towards the end of the block).
393          * end -- Points right after the block pointed to by header.
394          */
395
396         ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
397                   name_index, name, value, (long)value_len);
398
399         if (IS_RDONLY(inode))
400                 return -EROFS;
401         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
402                 return -EPERM;
403         if (value == NULL)
404                 value_len = 0;
405         if (name == NULL)
406                 return -EINVAL;
407         name_len = strlen(name);
408         if (name_len > 255 || value_len > sb->s_blocksize)
409                 return -ERANGE;
410         down_write(&EXT3_I(inode)->xattr_sem);
411         if (EXT3_I(inode)->i_file_acl) {
412                 /* The inode already has an extended attribute block. */
413                 bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
414                 error = -EIO;
415                 if (!bh)
416                         goto cleanup;
417                 ea_bdebug(bh, "b_count=%d, refcount=%d",
418                         atomic_read(&(bh->b_count)),
419                         le32_to_cpu(HDR(bh)->h_refcount));
420                 header = HDR(bh);
421                 end = bh->b_data + bh->b_size;
422                 if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
423                     header->h_blocks != cpu_to_le32(1)) {
424 bad_block:              ext3_error(sb, "ext3_xattr_set",
425                                 "inode %ld: bad block %d", inode->i_ino,
426                                 EXT3_I(inode)->i_file_acl);
427                         error = -EIO;
428                         goto cleanup;
429                 }
430                 /* Find the named attribute. */
431                 here = FIRST_ENTRY(bh);
432                 while (!IS_LAST_ENTRY(here)) {
433                         struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
434                         if ((char *)next >= end)
435                                 goto bad_block;
436                         if (!here->e_value_block && here->e_value_size) {
437                                 size_t offs = le16_to_cpu(here->e_value_offs);
438                                 if (offs < min_offs)
439                                         min_offs = offs;
440                         }
441                         not_found = name_index - here->e_name_index;
442                         if (!not_found)
443                                 not_found = name_len - here->e_name_len;
444                         if (!not_found)
445                                 not_found = memcmp(name, here->e_name,name_len);
446                         if (not_found <= 0)
447                                 break;
448                         here = next;
449                 }
450                 last = here;
451                 /* We still need to compute min_offs and last. */
452                 while (!IS_LAST_ENTRY(last)) {
453                         struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
454                         if ((char *)next >= end)
455                                 goto bad_block;
456                         if (!last->e_value_block && last->e_value_size) {
457                                 size_t offs = le16_to_cpu(last->e_value_offs);
458                                 if (offs < min_offs)
459                                         min_offs = offs;
460                         }
461                         last = next;
462                 }
463
464                 /* Check whether we have enough space left. */
465                 free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
466         } else {
467                 /* We will use a new extended attribute block. */
468                 free = sb->s_blocksize -
469                         sizeof(struct ext3_xattr_header) - sizeof(__u32);
470                 here = last = NULL;  /* avoid gcc uninitialized warning. */
471         }
472
473         if (not_found) {
474                 /* Request to remove a nonexistent attribute? */
475                 error = -ENODATA;
476                 if (flags & XATTR_REPLACE)
477                         goto cleanup;
478                 error = 0;
479                 if (value == NULL)
480                         goto cleanup;
481         } else {
482                 /* Request to create an existing attribute? */
483                 error = -EEXIST;
484                 if (flags & XATTR_CREATE)
485                         goto cleanup;
486                 if (!here->e_value_block && here->e_value_size) {
487                         size_t size = le32_to_cpu(here->e_value_size);
488
489                         if (le16_to_cpu(here->e_value_offs) + size > 
490                             sb->s_blocksize || size > sb->s_blocksize)
491                                 goto bad_block;
492                         free += EXT3_XATTR_SIZE(size);
493                 }
494                 free += EXT3_XATTR_LEN(name_len);
495         }
496         error = -ENOSPC;
497         if (free < EXT3_XATTR_LEN(name_len) + EXT3_XATTR_SIZE(value_len))
498                 goto cleanup;
499
500         /* Here we know that we can set the new attribute. */
501
502         if (header) {
503                 struct mb_cache_entry *ce;
504                 int credits = 0;
505
506                 /* assert(header == HDR(bh)); */
507                 if (header->h_refcount != cpu_to_le32(1))
508                         goto skip_get_write_access;
509                 /* ext3_journal_get_write_access() requires an unlocked bh,
510                    which complicates things here. */
511                 error = ext3_journal_get_write_access_credits(handle, bh,
512                                                               &credits);
513                 if (error)
514                         goto cleanup;
515                 ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
516                                         bh->b_blocknr);
517                 lock_buffer(bh);
518                 if (header->h_refcount == cpu_to_le32(1)) {
519                         ea_bdebug(bh, "modifying in-place");
520                         if (ce)
521                                 mb_cache_entry_free(ce);
522                         /* keep the buffer locked while modifying it. */
523                 } else {
524                         int offset;
525
526                         if (ce)
527                                 mb_cache_entry_release(ce);
528                         unlock_buffer(bh);
529                         journal_release_buffer(handle, bh, credits);
530                 skip_get_write_access:
531                         ea_bdebug(bh, "cloning");
532                         header = kmalloc(bh->b_size, GFP_KERNEL);
533                         error = -ENOMEM;
534                         if (header == NULL)
535                                 goto cleanup;
536                         memcpy(header, HDR(bh), bh->b_size);
537                         header->h_refcount = cpu_to_le32(1);
538                         offset = (char *)here - bh->b_data;
539                         here = ENTRY((char *)header + offset);
540                         offset = (char *)last - bh->b_data;
541                         last = ENTRY((char *)header + offset);
542                 }
543         } else {
544                 /* Allocate a buffer where we construct the new block. */
545                 header = kmalloc(sb->s_blocksize, GFP_KERNEL);
546                 error = -ENOMEM;
547                 if (header == NULL)
548                         goto cleanup;
549                 memset(header, 0, sb->s_blocksize);
550                 end = (char *)header + sb->s_blocksize;
551                 header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
552                 header->h_blocks = header->h_refcount = cpu_to_le32(1);
553                 last = here = ENTRY(header+1);
554         }
555
556         /* Iff we are modifying the block in-place, bh is locked here. */
557
558         if (not_found) {
559                 /* Insert the new name. */
560                 size_t size = EXT3_XATTR_LEN(name_len);
561                 size_t rest = (char *)last - (char *)here;
562                 memmove((char *)here + size, here, rest);
563                 memset(here, 0, size);
564                 here->e_name_index = name_index;
565                 here->e_name_len = name_len;
566                 memcpy(here->e_name, name, name_len);
567         } else {
568                 if (!here->e_value_block && here->e_value_size) {
569                         char *first_val = (char *)header + min_offs;
570                         size_t offs = le16_to_cpu(here->e_value_offs);
571                         char *val = (char *)header + offs;
572                         size_t size = EXT3_XATTR_SIZE(
573                                 le32_to_cpu(here->e_value_size));
574
575                         if (size == EXT3_XATTR_SIZE(value_len)) {
576                                 /* The old and the new value have the same
577                                    size. Just replace. */
578                                 here->e_value_size = cpu_to_le32(value_len);
579                                 memset(val + size - EXT3_XATTR_PAD, 0,
580                                        EXT3_XATTR_PAD); /* Clear pad bytes. */
581                                 memcpy(val, value, value_len);
582                                 goto skip_replace;
583                         }
584
585                         /* Remove the old value. */
586                         memmove(first_val + size, first_val, val - first_val);
587                         memset(first_val, 0, size);
588                         here->e_value_offs = 0;
589                         min_offs += size;
590
591                         /* Adjust all value offsets. */
592                         last = ENTRY(header+1);
593                         while (!IS_LAST_ENTRY(last)) {
594                                 size_t o = le16_to_cpu(last->e_value_offs);
595                                 if (!last->e_value_block && o < offs)
596                                         last->e_value_offs =
597                                                 cpu_to_le16(o + size);
598                                 last = EXT3_XATTR_NEXT(last);
599                         }
600                 }
601                 if (value == NULL) {
602                         /* Remove the old name. */
603                         size_t size = EXT3_XATTR_LEN(name_len);
604                         last = ENTRY((char *)last - size);
605                         memmove(here, (char*)here + size,
606                                 (char*)last - (char*)here);
607                         memset(last, 0, size);
608                 }
609         }
610
611         if (value != NULL) {
612                 /* Insert the new value. */
613                 here->e_value_size = cpu_to_le32(value_len);
614                 if (value_len) {
615                         size_t size = EXT3_XATTR_SIZE(value_len);
616                         char *val = (char *)header + min_offs - size;
617                         here->e_value_offs =
618                                 cpu_to_le16((char *)val - (char *)header);
619                         memset(val + size - EXT3_XATTR_PAD, 0,
620                                EXT3_XATTR_PAD); /* Clear the pad bytes. */
621                         memcpy(val, value, value_len);
622                 }
623         }
624
625 skip_replace:
626         if (IS_LAST_ENTRY(ENTRY(header+1))) {
627                 /* This block is now empty. */
628                 if (bh && header == HDR(bh))
629                         unlock_buffer(bh);  /* we were modifying in-place. */
630                 error = ext3_xattr_set_handle2(handle, inode, bh, NULL);
631         } else {
632                 ext3_xattr_rehash(header, here);
633                 if (bh && header == HDR(bh))
634                         unlock_buffer(bh);  /* we were modifying in-place. */
635                 error = ext3_xattr_set_handle2(handle, inode, bh, header);
636         }
637
638 cleanup:
639         brelse(bh);
640         if (!(bh && header == HDR(bh)))
641                 kfree(header);
642         up_write(&EXT3_I(inode)->xattr_sem);
643
644         return error;
645 }
646
647 /*
648  * Second half of ext3_xattr_set_handle(): Update the file system.
649  */
650 static int
651 ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
652                        struct buffer_head *old_bh,
653                        struct ext3_xattr_header *header)
654 {
655         struct super_block *sb = inode->i_sb;
656         struct buffer_head *new_bh = NULL;
657         int credits = 0, error;
658
659         if (header) {
660                 new_bh = ext3_xattr_cache_find(handle, inode, header, &credits);
661                 if (new_bh) {
662                         /* We found an identical block in the cache. */
663                         if (new_bh == old_bh)
664                                 ea_bdebug(new_bh, "keeping this block");
665                         else {
666                                 /* The old block is released after updating
667                                    the inode. */
668                                 ea_bdebug(new_bh, "reusing block");
669
670                                 error = -ENOSPC;
671                                 if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1))
672                                         goto cleanup;
673                                 error = -EDQUOT;
674                                 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
675                                         DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
676                                         unlock_buffer(new_bh);
677                                         journal_release_buffer(handle, new_bh,
678                                                                credits);
679                                         goto cleanup;
680                                 }
681                                 HDR(new_bh)->h_refcount = cpu_to_le32(1 +
682                                         le32_to_cpu(HDR(new_bh)->h_refcount));
683                                 ea_bdebug(new_bh, "refcount now=%d",
684                                         le32_to_cpu(HDR(new_bh)->h_refcount));
685                         }
686                         unlock_buffer(new_bh);
687                 } else if (old_bh && header == HDR(old_bh)) {
688                         /* Keep this block. No need to lock the block as we
689                          * don't need to change the reference count. */
690                         new_bh = old_bh;
691                         get_bh(new_bh);
692                         ext3_xattr_cache_insert(new_bh);
693                 } else {
694                         /* We need to allocate a new block */
695                         int goal = le32_to_cpu(
696                                         EXT3_SB(sb)->s_es->s_first_data_block) +
697                                 EXT3_I(inode)->i_block_group *
698                                 EXT3_BLOCKS_PER_GROUP(sb);
699                         int block = ext3_new_block(handle, inode, goal, &error);
700                         if (error)
701                                 goto cleanup;
702                         ea_idebug(inode, "creating block %d", block);
703
704                         new_bh = sb_getblk(sb, block);
705                         if (!new_bh) {
706 getblk_failed:
707                                 ext3_free_blocks(handle, inode, block, 1);
708                                 error = -EIO;
709                                 goto cleanup;
710                         }
711                         lock_buffer(new_bh);
712                         error = ext3_journal_get_create_access(handle, new_bh);
713                         if (error) {
714                                 unlock_buffer(new_bh);
715                                 goto getblk_failed;
716                         }
717                         memcpy(new_bh->b_data, header, new_bh->b_size);
718                         set_buffer_uptodate(new_bh);
719                         unlock_buffer(new_bh);
720                         ext3_xattr_cache_insert(new_bh);
721
722                         ext3_xattr_update_super_block(handle, sb);
723                 }
724                 error = ext3_journal_dirty_metadata(handle, new_bh);
725                 if (error)
726                         goto cleanup;
727         }
728
729         /* Update the inode. */
730         EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
731         inode->i_ctime = CURRENT_TIME;
732         ext3_mark_inode_dirty(handle, inode);
733         if (IS_SYNC(inode))
734                 handle->h_sync = 1;
735
736         error = 0;
737         if (old_bh && old_bh != new_bh) {
738                 struct mb_cache_entry *ce;
739                 /*
740                  * If there was an old block, and we are no longer using it,
741                  * release the old block.
742                 */
743                 error = ext3_journal_get_write_access(handle, old_bh);
744                 if (error)
745                         goto cleanup;
746                 ce = mb_cache_entry_get(ext3_xattr_cache, old_bh->b_bdev,
747                                         old_bh->b_blocknr);
748                 lock_buffer(old_bh);
749                 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
750                         /* Free the old block. */
751                         if (ce)
752                                 mb_cache_entry_free(ce);
753                         ea_bdebug(old_bh, "freeing");
754                         ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
755
756                         /* ext3_forget() calls bforget() for us, but we
757                            let our caller release old_bh, so we need to
758                            duplicate the handle before. */
759                         get_bh(old_bh);
760                         ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
761                 } else {
762                         /* Decrement the refcount only. */
763                         if (ce)
764                                 mb_cache_entry_release(ce);
765                         HDR(old_bh)->h_refcount = cpu_to_le32(
766                                 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
767                         DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
768                         DQUOT_FREE_BLOCK(inode, 1);
769                         ext3_journal_dirty_metadata(handle, old_bh);
770                         ea_bdebug(old_bh, "refcount now=%d",
771                                 le32_to_cpu(HDR(old_bh)->h_refcount));
772                 }
773                 unlock_buffer(old_bh);
774         }
775
776 cleanup:
777         brelse(new_bh);
778
779         return error;
780 }
781
782 /*
783  * ext3_xattr_set()
784  *
785  * Like ext3_xattr_set_handle, but start from an inode. This extended
786  * attribute modification is a filesystem transaction by itself.
787  *
788  * Returns 0, or a negative error number on failure.
789  */
790 int
791 ext3_xattr_set(struct inode *inode, int name_index, const char *name,
792                const void *value, size_t value_len, int flags)
793 {
794         handle_t *handle;
795         int error, retries = 0;
796
797 retry:
798         handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
799         if (IS_ERR(handle)) {
800                 error = PTR_ERR(handle);
801         } else {
802                 int error2;
803
804                 error = ext3_xattr_set_handle(handle, inode, name_index, name,
805                                               value, value_len, flags);
806                 error2 = ext3_journal_stop(handle);
807                 if (error == -ENOSPC &&
808                     ext3_should_retry_alloc(inode->i_sb, &retries))
809                         goto retry;
810                 if (error == 0)
811                         error = error2;
812         }
813
814         return error;
815 }
816
817 /*
818  * ext3_xattr_delete_inode()
819  *
820  * Free extended attribute resources associated with this inode. This
821  * is called immediately before an inode is freed.
822  */
823 void
824 ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
825 {
826         struct buffer_head *bh = NULL;
827         struct mb_cache_entry *ce;
828
829         down_write(&EXT3_I(inode)->xattr_sem);
830         if (!EXT3_I(inode)->i_file_acl)
831                 goto cleanup;
832         bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
833         if (!bh) {
834                 ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
835                         "inode %ld: block %d read error", inode->i_ino,
836                         EXT3_I(inode)->i_file_acl);
837                 goto cleanup;
838         }
839         if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
840             HDR(bh)->h_blocks != cpu_to_le32(1)) {
841                 ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
842                         "inode %ld: bad block %d", inode->i_ino,
843                         EXT3_I(inode)->i_file_acl);
844                 goto cleanup;
845         }
846         if (ext3_journal_get_write_access(handle, bh) != 0)
847                 goto cleanup;
848         ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
849                                 bh->b_blocknr);
850         lock_buffer(bh);
851         if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
852                 if (ce)
853                         mb_cache_entry_free(ce);
854                 ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
855                 get_bh(bh);
856                 ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
857         } else {
858                 if (ce)
859                         mb_cache_entry_release(ce);
860                 HDR(bh)->h_refcount = cpu_to_le32(
861                         le32_to_cpu(HDR(bh)->h_refcount) - 1);
862                 ext3_journal_dirty_metadata(handle, bh);
863                 if (IS_SYNC(inode))
864                         handle->h_sync = 1;
865                 DLIMIT_FREE_BLOCK(inode->i_sb, inode->i_xid, 1);
866                 DQUOT_FREE_BLOCK(inode, 1);
867         }
868         ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
869         unlock_buffer(bh);
870         EXT3_I(inode)->i_file_acl = 0;
871
872 cleanup:
873         brelse(bh);
874         up_write(&EXT3_I(inode)->xattr_sem);
875 }
876
877 /*
878  * ext3_xattr_put_super()
879  *
880  * This is called when a file system is unmounted.
881  */
882 void
883 ext3_xattr_put_super(struct super_block *sb)
884 {
885         mb_cache_shrink(ext3_xattr_cache, sb->s_bdev);
886 }
887
888 /*
889  * ext3_xattr_cache_insert()
890  *
891  * Create a new entry in the extended attribute cache, and insert
892  * it unless such an entry is already in the cache.
893  *
894  * Returns 0, or a negative error number on failure.
895  */
896 static int
897 ext3_xattr_cache_insert(struct buffer_head *bh)
898 {
899         __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
900         struct mb_cache_entry *ce;
901         int error;
902
903         ce = mb_cache_entry_alloc(ext3_xattr_cache);
904         if (!ce)
905                 return -ENOMEM;
906         error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
907         if (error) {
908                 mb_cache_entry_free(ce);
909                 if (error == -EBUSY) {
910                         ea_bdebug(bh, "already in cache (%d cache entries)",
911                                 atomic_read(&ext3_xattr_cache->c_entry_count));
912                         error = 0;
913                 }
914         } else {
915                 ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
916                           atomic_read(&ext3_xattr_cache->c_entry_count));
917                 mb_cache_entry_release(ce);
918         }
919         return error;
920 }
921
922 /*
923  * ext3_xattr_cmp()
924  *
925  * Compare two extended attribute blocks for equality.
926  *
927  * Returns 0 if the blocks are equal, 1 if they differ, and
928  * a negative error number on errors.
929  */
930 static int
931 ext3_xattr_cmp(struct ext3_xattr_header *header1,
932                struct ext3_xattr_header *header2)
933 {
934         struct ext3_xattr_entry *entry1, *entry2;
935
936         entry1 = ENTRY(header1+1);
937         entry2 = ENTRY(header2+1);
938         while (!IS_LAST_ENTRY(entry1)) {
939                 if (IS_LAST_ENTRY(entry2))
940                         return 1;
941                 if (entry1->e_hash != entry2->e_hash ||
942                     entry1->e_name_len != entry2->e_name_len ||
943                     entry1->e_value_size != entry2->e_value_size ||
944                     memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
945                         return 1;
946                 if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
947                         return -EIO;
948                 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
949                            (char *)header2 + le16_to_cpu(entry2->e_value_offs),
950                            le32_to_cpu(entry1->e_value_size)))
951                         return 1;
952
953                 entry1 = EXT3_XATTR_NEXT(entry1);
954                 entry2 = EXT3_XATTR_NEXT(entry2);
955         }
956         if (!IS_LAST_ENTRY(entry2))
957                 return 1;
958         return 0;
959 }
960
961 /*
962  * ext3_xattr_cache_find()
963  *
964  * Find an identical extended attribute block.
965  *
966  * Returns a pointer to the block found, or NULL if such a block was
967  * not found or an error occurred.
968  */
969 static struct buffer_head *
970 ext3_xattr_cache_find(handle_t *handle, struct inode *inode,
971                       struct ext3_xattr_header *header, int *credits)
972 {
973         __u32 hash = le32_to_cpu(header->h_hash);
974         struct mb_cache_entry *ce;
975
976         if (!header->h_hash)
977                 return NULL;  /* never share */
978         ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
979 again:
980         ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
981                                        inode->i_sb->s_bdev, hash);
982         while (ce) {
983                 struct buffer_head *bh;
984
985                 if (IS_ERR(ce)) {
986                         if (PTR_ERR(ce) == -EAGAIN)
987                                 goto again;
988                         break;
989                 }
990
991                 bh = sb_bread(inode->i_sb, ce->e_block);
992                 if (!bh) {
993                         ext3_error(inode->i_sb, "ext3_xattr_cache_find",
994                                 "inode %ld: block %ld read error",
995                                 inode->i_ino, (unsigned long) ce->e_block);
996                 } else if (ext3_journal_get_write_access_credits(
997                                 handle, bh, credits) == 0) {
998                         /* ext3_journal_get_write_access() requires an unlocked
999                          * bh, which complicates things here. */
1000                         lock_buffer(bh);
1001                         if (le32_to_cpu(HDR(bh)->h_refcount) >
1002                                    EXT3_XATTR_REFCOUNT_MAX) {
1003                                 ea_idebug(inode, "block %ld refcount %d>%d",
1004                                           (unsigned long) ce->e_block,
1005                                           le32_to_cpu(HDR(bh)->h_refcount),
1006                                           EXT3_XATTR_REFCOUNT_MAX);
1007                         } else if (!ext3_xattr_cmp(header, HDR(bh))) {
1008                                 mb_cache_entry_release(ce);
1009                                 /* buffer will be unlocked by caller */
1010                                 return bh;
1011                         }
1012                         unlock_buffer(bh);
1013                         journal_release_buffer(handle, bh, *credits);
1014                         *credits = 0;
1015                         brelse(bh);
1016                 }
1017                 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
1018         }
1019         return NULL;
1020 }
1021
1022 #define NAME_HASH_SHIFT 5
1023 #define VALUE_HASH_SHIFT 16
1024
1025 /*
1026  * ext3_xattr_hash_entry()
1027  *
1028  * Compute the hash of an extended attribute.
1029  */
1030 static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
1031                                          struct ext3_xattr_entry *entry)
1032 {
1033         __u32 hash = 0;
1034         char *name = entry->e_name;
1035         int n;
1036
1037         for (n=0; n < entry->e_name_len; n++) {
1038                 hash = (hash << NAME_HASH_SHIFT) ^
1039                        (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1040                        *name++;
1041         }
1042
1043         if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1044                 __le32 *value = (__le32 *)((char *)header +
1045                         le16_to_cpu(entry->e_value_offs));
1046                 for (n = (le32_to_cpu(entry->e_value_size) +
1047                      EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
1048                         hash = (hash << VALUE_HASH_SHIFT) ^
1049                                (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1050                                le32_to_cpu(*value++);
1051                 }
1052         }
1053         entry->e_hash = cpu_to_le32(hash);
1054 }
1055
1056 #undef NAME_HASH_SHIFT
1057 #undef VALUE_HASH_SHIFT
1058
1059 #define BLOCK_HASH_SHIFT 16
1060
1061 /*
1062  * ext3_xattr_rehash()
1063  *
1064  * Re-compute the extended attribute hash value after an entry has changed.
1065  */
1066 static void ext3_xattr_rehash(struct ext3_xattr_header *header,
1067                               struct ext3_xattr_entry *entry)
1068 {
1069         struct ext3_xattr_entry *here;
1070         __u32 hash = 0;
1071
1072         ext3_xattr_hash_entry(header, entry);
1073         here = ENTRY(header+1);
1074         while (!IS_LAST_ENTRY(here)) {
1075                 if (!here->e_hash) {
1076                         /* Block is not shared if an entry's hash value == 0 */
1077                         hash = 0;
1078                         break;
1079                 }
1080                 hash = (hash << BLOCK_HASH_SHIFT) ^
1081                        (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1082                        le32_to_cpu(here->e_hash);
1083                 here = EXT3_XATTR_NEXT(here);
1084         }
1085         header->h_hash = cpu_to_le32(hash);
1086 }
1087
1088 #undef BLOCK_HASH_SHIFT
1089
1090 int __init
1091 init_ext3_xattr(void)
1092 {
1093         ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
1094                 sizeof(struct mb_cache_entry) +
1095                 sizeof(struct mb_cache_entry_index), 1, 6);
1096         if (!ext3_xattr_cache)
1097                 return -ENOMEM;
1098         return 0;
1099 }
1100
1101 void
1102 exit_ext3_xattr(void)
1103 {
1104         if (ext3_xattr_cache)
1105                 mb_cache_destroy(ext3_xattr_cache);
1106         ext3_xattr_cache = NULL;
1107 }