ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / fs / ext3 / xattr.c
1 /*
2  * linux/fs/ext3/xattr.c
3  *
4  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5  *
6  * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7  * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8  * Extended attributes for symlinks and special files added per
9  *  suggestion of Luka Renko <luka.renko@hermes.si>.
10  */
11
12 /*
13  * Extended attributes are stored on disk blocks allocated outside of
14  * any inode. The i_file_acl field is then made to point to this allocated
15  * block. If all extended attributes of an inode are identical, these
16  * inodes may share the same extended attribute block. Such situations
17  * are automatically detected by keeping a cache of recent attribute block
18  * numbers and hashes over the block's contents in memory.
19  *
20  *
21  * Extended attribute block layout:
22  *
23  *   +------------------+
24  *   | header           |
25  *   ¦ entry 1          | |
26  *   | entry 2          | | growing downwards
27  *   | entry 3          | v
28  *   | four null bytes  |
29  *   | . . .            |
30  *   | value 1          | ^
31  *   | value 3          | | growing upwards
32  *   | value 2          | |
33  *   +------------------+
34  *
35  * The block header is followed by multiple entry descriptors. These entry
36  * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
37  * byte boundaries. The entry descriptors are sorted by attribute name,
38  * so that two extended attribute blocks can be compared efficiently.
39  *
40  * Attribute values are aligned to the end of the block, stored in
41  * no specific order. They are also padded to EXT3_XATTR_PAD byte
42  * boundaries. No additional gaps are left between them.
43  *
44  * Locking strategy
45  * ----------------
46  * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
47  * EA blocks are only changed if they are exclusive to an inode, so
48  * holding xattr_sem also means that nothing but the EA block's reference
49  * count will change. Multiple writers to an EA block are synchronized
50  * by the bh lock. No more than a single bh lock is held at any time
51  * to avoid deadlocks.
52  */
53
54 #include <linux/init.h>
55 #include <linux/fs.h>
56 #include <linux/slab.h>
57 #include <linux/ext3_jbd.h>
58 #include <linux/ext3_fs.h>
59 #include <linux/mbcache.h>
60 #include <linux/quotaops.h>
61 #include <linux/rwsem.h>
62 #include "xattr.h"
63 #include "acl.h"
64
65 #define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
66 #define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
67 #define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
68 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
69
70 #ifdef EXT3_XATTR_DEBUG
71 # define ea_idebug(inode, f...) do { \
72                 printk(KERN_DEBUG "inode %s:%ld: ", \
73                         inode->i_sb->s_id, inode->i_ino); \
74                 printk(f); \
75                 printk("\n"); \
76         } while (0)
77 # define ea_bdebug(bh, f...) do { \
78                 char b[BDEVNAME_SIZE]; \
79                 printk(KERN_DEBUG "block %s:%lu: ", \
80                         bdevname(bh->b_bdev, b), \
81                         (unsigned long) bh->b_blocknr); \
82                 printk(f); \
83                 printk("\n"); \
84         } while (0)
85 #else
86 # define ea_idebug(f...)
87 # define ea_bdebug(f...)
88 #endif
89
90 static int ext3_xattr_set_handle2(handle_t *, struct inode *,
91                                   struct buffer_head *,
92                                   struct ext3_xattr_header *);
93
94 static int ext3_xattr_cache_insert(struct buffer_head *);
95 static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
96                                                  struct ext3_xattr_header *,
97                                                  int *);
98 static void ext3_xattr_cache_remove(struct buffer_head *);
99 static void ext3_xattr_rehash(struct ext3_xattr_header *,
100                               struct ext3_xattr_entry *);
101
102 static struct mb_cache *ext3_xattr_cache;
103 static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX];
104 static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED;
105
106 int
107 ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler)
108 {
109         int error = -EINVAL;
110
111         if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
112                 write_lock(&ext3_handler_lock);
113                 if (!ext3_xattr_handlers[name_index-1]) {
114                         ext3_xattr_handlers[name_index-1] = handler;
115                         error = 0;
116                 }
117                 write_unlock(&ext3_handler_lock);
118         }
119         return error;
120 }
121
122 void
123 ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler)
124 {
125         if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) {
126                 write_lock(&ext3_handler_lock);
127                 ext3_xattr_handlers[name_index-1] = NULL;
128                 write_unlock(&ext3_handler_lock);
129         }
130 }
131
132 static inline const char *
133 strcmp_prefix(const char *a, const char *a_prefix)
134 {
135         while (*a_prefix && *a == *a_prefix) {
136                 a++;
137                 a_prefix++;
138         }
139         return *a_prefix ? NULL : a;
140 }
141
142 /*
143  * Decode the extended attribute name, and translate it into
144  * the name_index and name suffix.
145  */
146 static inline struct ext3_xattr_handler *
147 ext3_xattr_resolve_name(const char **name)
148 {
149         struct ext3_xattr_handler *handler = NULL;
150         int i;
151
152         if (!*name)
153                 return NULL;
154         read_lock(&ext3_handler_lock);
155         for (i=0; i<EXT3_XATTR_INDEX_MAX; i++) {
156                 if (ext3_xattr_handlers[i]) {
157                         const char *n = strcmp_prefix(*name,
158                                 ext3_xattr_handlers[i]->prefix);
159                         if (n) {
160                                 handler = ext3_xattr_handlers[i];
161                                 *name = n;
162                                 break;
163                         }
164                 }
165         }
166         read_unlock(&ext3_handler_lock);
167         return handler;
168 }
169
170 static inline struct ext3_xattr_handler *
171 ext3_xattr_handler(int name_index)
172 {
173         struct ext3_xattr_handler *handler = NULL;
174         if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
175                 read_lock(&ext3_handler_lock);
176                 handler = ext3_xattr_handlers[name_index-1];
177                 read_unlock(&ext3_handler_lock);
178         }
179         return handler;
180 }
181
182 /*
183  * Inode operation getxattr()
184  *
185  * dentry->d_inode->i_sem: don't care
186  */
187 ssize_t
188 ext3_getxattr(struct dentry *dentry, const char *name,
189               void *buffer, size_t size)
190 {
191         struct ext3_xattr_handler *handler;
192         struct inode *inode = dentry->d_inode;
193
194         handler = ext3_xattr_resolve_name(&name);
195         if (!handler)
196                 return -EOPNOTSUPP;
197         return handler->get(inode, name, buffer, size);
198 }
199
200 /*
201  * Inode operation listxattr()
202  *
203  * dentry->d_inode->i_sem: don't care
204  */
205 ssize_t
206 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
207 {
208         return ext3_xattr_list(dentry->d_inode, buffer, size);
209 }
210
211 /*
212  * Inode operation setxattr()
213  *
214  * dentry->d_inode->i_sem: down
215  */
216 int
217 ext3_setxattr(struct dentry *dentry, const char *name,
218               const void *value, size_t size, int flags)
219 {
220         struct ext3_xattr_handler *handler;
221         struct inode *inode = dentry->d_inode;
222
223         if (size == 0)
224                 value = "";  /* empty EA, do not remove */
225         handler = ext3_xattr_resolve_name(&name);
226         if (!handler)
227                 return -EOPNOTSUPP;
228         return handler->set(inode, name, value, size, flags);
229 }
230
231 /*
232  * Inode operation removexattr()
233  *
234  * dentry->d_inode->i_sem: down
235  */
236 int
237 ext3_removexattr(struct dentry *dentry, const char *name)
238 {
239         struct ext3_xattr_handler *handler;
240         struct inode *inode = dentry->d_inode;
241
242         handler = ext3_xattr_resolve_name(&name);
243         if (!handler)
244                 return -EOPNOTSUPP;
245         return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
246 }
247
248 /*
249  * ext3_xattr_get()
250  *
251  * Copy an extended attribute into the buffer
252  * provided, or compute the buffer size required.
253  * Buffer is NULL to compute the size of the buffer required.
254  *
255  * Returns a negative error number on failure, or the number of bytes
256  * used / required on success.
257  */
258 int
259 ext3_xattr_get(struct inode *inode, int name_index, const char *name,
260                void *buffer, size_t buffer_size)
261 {
262         struct buffer_head *bh = NULL;
263         struct ext3_xattr_entry *entry;
264         size_t name_len, size;
265         char *end;
266         int error;
267
268         ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
269                   name_index, name, buffer, (long)buffer_size);
270
271         if (name == NULL)
272                 return -EINVAL;
273         down_read(&EXT3_I(inode)->xattr_sem);
274         error = -ENODATA;
275         if (!EXT3_I(inode)->i_file_acl)
276                 goto cleanup;
277         ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
278         bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
279         error = -EIO;
280         if (!bh)
281                 goto cleanup;
282         ea_bdebug(bh, "b_count=%d, refcount=%d",
283                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
284         end = bh->b_data + bh->b_size;
285         if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
286             HDR(bh)->h_blocks != cpu_to_le32(1)) {
287 bad_block:      ext3_error(inode->i_sb, "ext3_xattr_get",
288                         "inode %ld: bad block %d", inode->i_ino,
289                         EXT3_I(inode)->i_file_acl);
290                 error = -EIO;
291                 goto cleanup;
292         }
293         /* find named attribute */
294         name_len = strlen(name);
295
296         error = -ERANGE;
297         if (name_len > 255)
298                 goto cleanup;
299         entry = FIRST_ENTRY(bh);
300         while (!IS_LAST_ENTRY(entry)) {
301                 struct ext3_xattr_entry *next =
302                         EXT3_XATTR_NEXT(entry);
303                 if ((char *)next >= end)
304                         goto bad_block;
305                 if (name_index == entry->e_name_index &&
306                     name_len == entry->e_name_len &&
307                     memcmp(name, entry->e_name, name_len) == 0)
308                         goto found;
309                 entry = next;
310         }
311         /* Check the remaining name entries */
312         while (!IS_LAST_ENTRY(entry)) {
313                 struct ext3_xattr_entry *next =
314                         EXT3_XATTR_NEXT(entry);
315                 if ((char *)next >= end)
316                         goto bad_block;
317                 entry = next;
318         }
319         if (ext3_xattr_cache_insert(bh))
320                 ea_idebug(inode, "cache insert failed");
321         error = -ENODATA;
322         goto cleanup;
323 found:
324         /* check the buffer size */
325         if (entry->e_value_block != 0)
326                 goto bad_block;
327         size = le32_to_cpu(entry->e_value_size);
328         if (size > inode->i_sb->s_blocksize ||
329             le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
330                 goto bad_block;
331
332         if (ext3_xattr_cache_insert(bh))
333                 ea_idebug(inode, "cache insert failed");
334         if (buffer) {
335                 error = -ERANGE;
336                 if (size > buffer_size)
337                         goto cleanup;
338                 /* return value of attribute */
339                 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
340                         size);
341         }
342         error = size;
343
344 cleanup:
345         brelse(bh);
346         up_read(&EXT3_I(inode)->xattr_sem);
347
348         return error;
349 }
350
351 /*
352  * ext3_xattr_list()
353  *
354  * Copy a list of attribute names into the buffer
355  * provided, or compute the buffer size required.
356  * Buffer is NULL to compute the size of the buffer required.
357  *
358  * Returns a negative error number on failure, or the number of bytes
359  * used / required on success.
360  */
361 int
362 ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
363 {
364         struct buffer_head *bh = NULL;
365         struct ext3_xattr_entry *entry;
366         size_t size = 0;
367         char *buf, *end;
368         int error;
369
370         ea_idebug(inode, "buffer=%p, buffer_size=%ld",
371                   buffer, (long)buffer_size);
372
373         down_read(&EXT3_I(inode)->xattr_sem);
374         error = 0;
375         if (!EXT3_I(inode)->i_file_acl)
376                 goto cleanup;
377         ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
378         bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
379         error = -EIO;
380         if (!bh)
381                 goto cleanup;
382         ea_bdebug(bh, "b_count=%d, refcount=%d",
383                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
384         end = bh->b_data + bh->b_size;
385         if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
386             HDR(bh)->h_blocks != cpu_to_le32(1)) {
387 bad_block:      ext3_error(inode->i_sb, "ext3_xattr_list",
388                         "inode %ld: bad block %d", inode->i_ino,
389                         EXT3_I(inode)->i_file_acl);
390                 error = -EIO;
391                 goto cleanup;
392         }
393         /* compute the size required for the list of attribute names */
394         for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
395              entry = EXT3_XATTR_NEXT(entry)) {
396                 struct ext3_xattr_handler *handler;
397                 struct ext3_xattr_entry *next =
398                         EXT3_XATTR_NEXT(entry);
399                 if ((char *)next >= end)
400                         goto bad_block;
401
402                 handler = ext3_xattr_handler(entry->e_name_index);
403                 if (handler)
404                         size += handler->list(NULL, inode, entry->e_name,
405                                               entry->e_name_len);
406         }
407
408         if (ext3_xattr_cache_insert(bh))
409                 ea_idebug(inode, "cache insert failed");
410         if (!buffer) {
411                 error = size;
412                 goto cleanup;
413         } else {
414                 error = -ERANGE;
415                 if (size > buffer_size)
416                         goto cleanup;
417         }
418
419         /* list the attribute names */
420         buf = buffer;
421         for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
422              entry = EXT3_XATTR_NEXT(entry)) {
423                 struct ext3_xattr_handler *handler;
424
425                 handler = ext3_xattr_handler(entry->e_name_index);
426                 if (handler)
427                         buf += handler->list(buf, inode, entry->e_name,
428                                              entry->e_name_len);
429         }
430         error = size;
431
432 cleanup:
433         brelse(bh);
434         up_read(&EXT3_I(inode)->xattr_sem);
435
436         return error;
437 }
438
439 /*
440  * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
441  * not set, set it.
442  */
443 static void ext3_xattr_update_super_block(handle_t *handle,
444                                           struct super_block *sb)
445 {
446         if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
447                 return;
448
449         lock_super(sb);
450         if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
451                 EXT3_SB(sb)->s_es->s_feature_compat |=
452                         cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
453                 sb->s_dirt = 1;
454                 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
455         }
456         unlock_super(sb);
457 }
458
459 /*
460  * ext3_xattr_set_handle()
461  *
462  * Create, replace or remove an extended attribute for this inode. Buffer
463  * is NULL to remove an existing extended attribute, and non-NULL to
464  * either replace an existing extended attribute, or create a new extended
465  * attribute. The flags XATTR_REPLACE and XATTR_CREATE
466  * specify that an extended attribute must exist and must not exist
467  * previous to the call, respectively.
468  *
469  * Returns 0, or a negative error number on failure.
470  */
471 int
472 ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
473                       const char *name, const void *value, size_t value_len,
474                       int flags)
475 {
476         struct super_block *sb = inode->i_sb;
477         struct buffer_head *bh = NULL;
478         struct ext3_xattr_header *header = NULL;
479         struct ext3_xattr_entry *here, *last;
480         size_t name_len, free, min_offs = sb->s_blocksize;
481         int not_found = 1, error;
482         char *end;
483
484         /*
485          * header -- Points either into bh, or to a temporarily
486          *           allocated buffer.
487          * here -- The named entry found, or the place for inserting, within
488          *         the block pointed to by header.
489          * last -- Points right after the last named entry within the block
490          *         pointed to by header.
491          * min_offs -- The offset of the first value (values are aligned
492          *             towards the end of the block).
493          * end -- Points right after the block pointed to by header.
494          */
495
496         ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
497                   name_index, name, value, (long)value_len);
498
499         if (IS_RDONLY(inode))
500                 return -EROFS;
501         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
502                 return -EPERM;
503         if (value == NULL)
504                 value_len = 0;
505         if (name == NULL)
506                 return -EINVAL;
507         name_len = strlen(name);
508         if (name_len > 255 || value_len > sb->s_blocksize)
509                 return -ERANGE;
510         down_write(&EXT3_I(inode)->xattr_sem);
511         if (EXT3_I(inode)->i_file_acl) {
512                 /* The inode already has an extended attribute block. */
513                 bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
514                 error = -EIO;
515                 if (!bh)
516                         goto cleanup;
517                 ea_bdebug(bh, "b_count=%d, refcount=%d",
518                         atomic_read(&(bh->b_count)),
519                         le32_to_cpu(HDR(bh)->h_refcount));
520                 header = HDR(bh);
521                 end = bh->b_data + bh->b_size;
522                 if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
523                     header->h_blocks != cpu_to_le32(1)) {
524 bad_block:              ext3_error(sb, "ext3_xattr_set",
525                                 "inode %ld: bad block %d", inode->i_ino,
526                                 EXT3_I(inode)->i_file_acl);
527                         error = -EIO;
528                         goto cleanup;
529                 }
530                 /* Find the named attribute. */
531                 here = FIRST_ENTRY(bh);
532                 while (!IS_LAST_ENTRY(here)) {
533                         struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
534                         if ((char *)next >= end)
535                                 goto bad_block;
536                         if (!here->e_value_block && here->e_value_size) {
537                                 size_t offs = le16_to_cpu(here->e_value_offs);
538                                 if (offs < min_offs)
539                                         min_offs = offs;
540                         }
541                         not_found = name_index - here->e_name_index;
542                         if (!not_found)
543                                 not_found = name_len - here->e_name_len;
544                         if (!not_found)
545                                 not_found = memcmp(name, here->e_name,name_len);
546                         if (not_found <= 0)
547                                 break;
548                         here = next;
549                 }
550                 last = here;
551                 /* We still need to compute min_offs and last. */
552                 while (!IS_LAST_ENTRY(last)) {
553                         struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
554                         if ((char *)next >= end)
555                                 goto bad_block;
556                         if (!last->e_value_block && last->e_value_size) {
557                                 size_t offs = le16_to_cpu(last->e_value_offs);
558                                 if (offs < min_offs)
559                                         min_offs = offs;
560                         }
561                         last = next;
562                 }
563
564                 /* Check whether we have enough space left. */
565                 free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
566         } else {
567                 /* We will use a new extended attribute block. */
568                 free = sb->s_blocksize -
569                         sizeof(struct ext3_xattr_header) - sizeof(__u32);
570                 here = last = NULL;  /* avoid gcc uninitialized warning. */
571         }
572
573         if (not_found) {
574                 /* Request to remove a nonexistent attribute? */
575                 error = -ENODATA;
576                 if (flags & XATTR_REPLACE)
577                         goto cleanup;
578                 error = 0;
579                 if (value == NULL)
580                         goto cleanup;
581         } else {
582                 /* Request to create an existing attribute? */
583                 error = -EEXIST;
584                 if (flags & XATTR_CREATE)
585                         goto cleanup;
586                 if (!here->e_value_block && here->e_value_size) {
587                         size_t size = le32_to_cpu(here->e_value_size);
588
589                         if (le16_to_cpu(here->e_value_offs) + size > 
590                             sb->s_blocksize || size > sb->s_blocksize)
591                                 goto bad_block;
592                         free += EXT3_XATTR_SIZE(size);
593                 }
594                 free += EXT3_XATTR_LEN(name_len);
595         }
596         error = -ENOSPC;
597         if (free < EXT3_XATTR_LEN(name_len) + EXT3_XATTR_SIZE(value_len))
598                 goto cleanup;
599
600         /* Here we know that we can set the new attribute. */
601
602         if (header) {
603                 int credits = 0;
604
605                 /* assert(header == HDR(bh)); */
606                 if (header->h_refcount != cpu_to_le32(1))
607                         goto skip_get_write_access;
608                 /* ext3_journal_get_write_access() requires an unlocked bh,
609                    which complicates things here. */
610                 error = ext3_journal_get_write_access_credits(handle, bh,
611                                                               &credits);
612                 if (error)
613                         goto cleanup;
614                 lock_buffer(bh);
615                 if (header->h_refcount == cpu_to_le32(1)) {
616                         ea_bdebug(bh, "modifying in-place");
617                         ext3_xattr_cache_remove(bh);
618                         /* keep the buffer locked while modifying it. */
619                 } else {
620                         int offset;
621
622                         unlock_buffer(bh);
623                         journal_release_buffer(handle, bh, credits);
624                 skip_get_write_access:
625                         ea_bdebug(bh, "cloning");
626                         header = kmalloc(bh->b_size, GFP_KERNEL);
627                         error = -ENOMEM;
628                         if (header == NULL)
629                                 goto cleanup;
630                         memcpy(header, HDR(bh), bh->b_size);
631                         header->h_refcount = cpu_to_le32(1);
632                         offset = (char *)here - bh->b_data;
633                         here = ENTRY((char *)header + offset);
634                         offset = (char *)last - bh->b_data;
635                         last = ENTRY((char *)header + offset);
636                 }
637         } else {
638                 /* Allocate a buffer where we construct the new block. */
639                 header = kmalloc(sb->s_blocksize, GFP_KERNEL);
640                 error = -ENOMEM;
641                 if (header == NULL)
642                         goto cleanup;
643                 memset(header, 0, sb->s_blocksize);
644                 end = (char *)header + sb->s_blocksize;
645                 header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
646                 header->h_blocks = header->h_refcount = cpu_to_le32(1);
647                 last = here = ENTRY(header+1);
648         }
649
650         /* Iff we are modifying the block in-place, bh is locked here. */
651
652         if (not_found) {
653                 /* Insert the new name. */
654                 size_t size = EXT3_XATTR_LEN(name_len);
655                 size_t rest = (char *)last - (char *)here;
656                 memmove((char *)here + size, here, rest);
657                 memset(here, 0, size);
658                 here->e_name_index = name_index;
659                 here->e_name_len = name_len;
660                 memcpy(here->e_name, name, name_len);
661         } else {
662                 if (!here->e_value_block && here->e_value_size) {
663                         char *first_val = (char *)header + min_offs;
664                         size_t offs = le16_to_cpu(here->e_value_offs);
665                         char *val = (char *)header + offs;
666                         size_t size = EXT3_XATTR_SIZE(
667                                 le32_to_cpu(here->e_value_size));
668
669                         if (size == EXT3_XATTR_SIZE(value_len)) {
670                                 /* The old and the new value have the same
671                                    size. Just replace. */
672                                 here->e_value_size = cpu_to_le32(value_len);
673                                 memset(val + size - EXT3_XATTR_PAD, 0,
674                                        EXT3_XATTR_PAD); /* Clear pad bytes. */
675                                 memcpy(val, value, value_len);
676                                 goto skip_replace;
677                         }
678
679                         /* Remove the old value. */
680                         memmove(first_val + size, first_val, val - first_val);
681                         memset(first_val, 0, size);
682                         here->e_value_offs = 0;
683                         min_offs += size;
684
685                         /* Adjust all value offsets. */
686                         last = ENTRY(header+1);
687                         while (!IS_LAST_ENTRY(last)) {
688                                 size_t o = le16_to_cpu(last->e_value_offs);
689                                 if (!last->e_value_block && o < offs)
690                                         last->e_value_offs =
691                                                 cpu_to_le16(o + size);
692                                 last = EXT3_XATTR_NEXT(last);
693                         }
694                 }
695                 if (value == NULL) {
696                         /* Remove the old name. */
697                         size_t size = EXT3_XATTR_LEN(name_len);
698                         last = ENTRY((char *)last - size);
699                         memmove(here, (char*)here + size,
700                                 (char*)last - (char*)here);
701                         memset(last, 0, size);
702                 }
703         }
704
705         if (value != NULL) {
706                 /* Insert the new value. */
707                 here->e_value_size = cpu_to_le32(value_len);
708                 if (value_len) {
709                         size_t size = EXT3_XATTR_SIZE(value_len);
710                         char *val = (char *)header + min_offs - size;
711                         here->e_value_offs =
712                                 cpu_to_le16((char *)val - (char *)header);
713                         memset(val + size - EXT3_XATTR_PAD, 0,
714                                EXT3_XATTR_PAD); /* Clear the pad bytes. */
715                         memcpy(val, value, value_len);
716                 }
717         }
718
719 skip_replace:
720         if (IS_LAST_ENTRY(ENTRY(header+1))) {
721                 /* This block is now empty. */
722                 if (bh && header == HDR(bh))
723                         unlock_buffer(bh);  /* we were modifying in-place. */
724                 error = ext3_xattr_set_handle2(handle, inode, bh, NULL);
725         } else {
726                 ext3_xattr_rehash(header, here);
727                 if (bh && header == HDR(bh))
728                         unlock_buffer(bh);  /* we were modifying in-place. */
729                 error = ext3_xattr_set_handle2(handle, inode, bh, header);
730         }
731
732 cleanup:
733         brelse(bh);
734         if (!(bh && header == HDR(bh)))
735                 kfree(header);
736         up_write(&EXT3_I(inode)->xattr_sem);
737
738         return error;
739 }
740
741 /*
742  * Second half of ext3_xattr_set_handle(): Update the file system.
743  */
744 static int
745 ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
746                        struct buffer_head *old_bh,
747                        struct ext3_xattr_header *header)
748 {
749         struct super_block *sb = inode->i_sb;
750         struct buffer_head *new_bh = NULL;
751         int credits = 0, error;
752
753         if (header) {
754                 new_bh = ext3_xattr_cache_find(handle, inode, header, &credits);
755                 if (new_bh) {
756                         /* We found an identical block in the cache. */
757                         if (new_bh == old_bh)
758                                 ea_bdebug(new_bh, "keeping this block");
759                         else {
760                                 /* The old block is released after updating
761                                    the inode. */
762                                 ea_bdebug(new_bh, "reusing block");
763
764                                 error = -EDQUOT;
765                                 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
766                                         unlock_buffer(new_bh);
767                                         journal_release_buffer(handle, new_bh,
768                                                                credits);
769                                         goto cleanup;
770                                 }
771                                 HDR(new_bh)->h_refcount = cpu_to_le32(1 +
772                                         le32_to_cpu(HDR(new_bh)->h_refcount));
773                                 ea_bdebug(new_bh, "refcount now=%d",
774                                         le32_to_cpu(HDR(new_bh)->h_refcount));
775                         }
776                         unlock_buffer(new_bh);
777                 } else if (old_bh && header == HDR(old_bh)) {
778                         /* Keep this block. No need to lock the block as we
779                          * don't need to change the reference count. */
780                         new_bh = old_bh;
781                         get_bh(new_bh);
782                         ext3_xattr_cache_insert(new_bh);
783                 } else {
784                         /* We need to allocate a new block */
785                         int goal = le32_to_cpu(
786                                         EXT3_SB(sb)->s_es->s_first_data_block) +
787                                 EXT3_I(inode)->i_block_group *
788                                 EXT3_BLOCKS_PER_GROUP(sb);
789                         int block = ext3_new_block(handle,
790                                 inode, goal, 0, 0, &error);
791                         if (error)
792                                 goto cleanup;
793                         ea_idebug(inode, "creating block %d", block);
794
795                         new_bh = sb_getblk(sb, block);
796                         if (!new_bh) {
797 getblk_failed:
798                                 ext3_free_blocks(handle, inode, block, 1);
799                                 error = -EIO;
800                                 goto cleanup;
801                         }
802                         lock_buffer(new_bh);
803                         error = ext3_journal_get_create_access(handle, new_bh);
804                         if (error) {
805                                 unlock_buffer(new_bh);
806                                 goto getblk_failed;
807                         }
808                         memcpy(new_bh->b_data, header, new_bh->b_size);
809                         set_buffer_uptodate(new_bh);
810                         unlock_buffer(new_bh);
811                         ext3_xattr_cache_insert(new_bh);
812
813                         ext3_xattr_update_super_block(handle, sb);
814                 }
815                 error = ext3_journal_dirty_metadata(handle, new_bh);
816                 if (error)
817                         goto cleanup;
818         }
819
820         /* Update the inode. */
821         EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
822         inode->i_ctime = CURRENT_TIME;
823         ext3_mark_inode_dirty(handle, inode);
824         if (IS_SYNC(inode))
825                 handle->h_sync = 1;
826
827         error = 0;
828         if (old_bh && old_bh != new_bh) {
829                 /*
830                  * If there was an old block, and we are no longer using it,
831                  * release the old block.
832                 */
833                 error = ext3_journal_get_write_access(handle, old_bh);
834                 if (error)
835                         goto cleanup;
836                 lock_buffer(old_bh);
837                 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
838                         /* Free the old block. */
839                         ea_bdebug(old_bh, "freeing");
840                         ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
841
842                         /* ext3_forget() calls bforget() for us, but we
843                            let our caller release old_bh, so we need to
844                            duplicate the handle before. */
845                         get_bh(old_bh);
846                         ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
847                 } else {
848                         /* Decrement the refcount only. */
849                         HDR(old_bh)->h_refcount = cpu_to_le32(
850                                 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
851                         DQUOT_FREE_BLOCK(inode, 1);
852                         ext3_journal_dirty_metadata(handle, old_bh);
853                         ea_bdebug(old_bh, "refcount now=%d",
854                                 le32_to_cpu(HDR(old_bh)->h_refcount));
855                 }
856                 unlock_buffer(old_bh);
857         }
858
859 cleanup:
860         brelse(new_bh);
861
862         return error;
863 }
864
865 /*
866  * ext3_xattr_set()
867  *
868  * Like ext3_xattr_set_handle, but start from an inode. This extended
869  * attribute modification is a filesystem transaction by itself.
870  *
871  * Returns 0, or a negative error number on failure.
872  */
873 int
874 ext3_xattr_set(struct inode *inode, int name_index, const char *name,
875                const void *value, size_t value_len, int flags)
876 {
877         handle_t *handle;
878         int error;
879
880         handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
881         if (IS_ERR(handle)) {
882                 error = PTR_ERR(handle);
883         } else {
884                 int error2;
885
886                 error = ext3_xattr_set_handle(handle, inode, name_index, name,
887                                               value, value_len, flags);
888                 error2 = ext3_journal_stop(handle);
889                 if (error == 0)
890                         error = error2;
891         }
892
893         return error;
894 }
895
896 /*
897  * ext3_xattr_delete_inode()
898  *
899  * Free extended attribute resources associated with this inode. This
900  * is called immediately before an inode is freed.
901  */
902 void
903 ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
904 {
905         struct buffer_head *bh = NULL;
906
907         down_write(&EXT3_I(inode)->xattr_sem);
908         if (!EXT3_I(inode)->i_file_acl)
909                 goto cleanup;
910         bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
911         if (!bh) {
912                 ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
913                         "inode %ld: block %d read error", inode->i_ino,
914                         EXT3_I(inode)->i_file_acl);
915                 goto cleanup;
916         }
917         if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
918             HDR(bh)->h_blocks != cpu_to_le32(1)) {
919                 ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
920                         "inode %ld: bad block %d", inode->i_ino,
921                         EXT3_I(inode)->i_file_acl);
922                 goto cleanup;
923         }
924         if (ext3_journal_get_write_access(handle, bh) != 0)
925                 goto cleanup;
926         lock_buffer(bh);
927         if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
928                 ext3_xattr_cache_remove(bh);
929                 ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
930                 get_bh(bh);
931                 ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
932         } else {
933                 HDR(bh)->h_refcount = cpu_to_le32(
934                         le32_to_cpu(HDR(bh)->h_refcount) - 1);
935                 ext3_journal_dirty_metadata(handle, bh);
936                 if (IS_SYNC(inode))
937                         handle->h_sync = 1;
938                 DQUOT_FREE_BLOCK(inode, 1);
939         }
940         ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
941         unlock_buffer(bh);
942         EXT3_I(inode)->i_file_acl = 0;
943
944 cleanup:
945         brelse(bh);
946         up_write(&EXT3_I(inode)->xattr_sem);
947 }
948
949 /*
950  * ext3_xattr_put_super()
951  *
952  * This is called when a file system is unmounted.
953  */
954 void
955 ext3_xattr_put_super(struct super_block *sb)
956 {
957         mb_cache_shrink(ext3_xattr_cache, sb->s_bdev);
958 }
959
960 /*
961  * ext3_xattr_cache_insert()
962  *
963  * Create a new entry in the extended attribute cache, and insert
964  * it unless such an entry is already in the cache.
965  *
966  * Returns 0, or a negative error number on failure.
967  */
968 static int
969 ext3_xattr_cache_insert(struct buffer_head *bh)
970 {
971         __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
972         struct mb_cache_entry *ce;
973         int error;
974
975         ce = mb_cache_entry_alloc(ext3_xattr_cache);
976         if (!ce)
977                 return -ENOMEM;
978         error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
979         if (error) {
980                 mb_cache_entry_free(ce);
981                 if (error == -EBUSY) {
982                         ea_bdebug(bh, "already in cache (%d cache entries)",
983                                 atomic_read(&ext3_xattr_cache->c_entry_count));
984                         error = 0;
985                 }
986         } else {
987                 ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
988                           atomic_read(&ext3_xattr_cache->c_entry_count));
989                 mb_cache_entry_release(ce);
990         }
991         return error;
992 }
993
994 /*
995  * ext3_xattr_cmp()
996  *
997  * Compare two extended attribute blocks for equality.
998  *
999  * Returns 0 if the blocks are equal, 1 if they differ, and
1000  * a negative error number on errors.
1001  */
1002 static int
1003 ext3_xattr_cmp(struct ext3_xattr_header *header1,
1004                struct ext3_xattr_header *header2)
1005 {
1006         struct ext3_xattr_entry *entry1, *entry2;
1007
1008         entry1 = ENTRY(header1+1);
1009         entry2 = ENTRY(header2+1);
1010         while (!IS_LAST_ENTRY(entry1)) {
1011                 if (IS_LAST_ENTRY(entry2))
1012                         return 1;
1013                 if (entry1->e_hash != entry2->e_hash ||
1014                     entry1->e_name_len != entry2->e_name_len ||
1015                     entry1->e_value_size != entry2->e_value_size ||
1016                     memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1017                         return 1;
1018                 if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1019                         return -EIO;
1020                 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1021                            (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1022                            le32_to_cpu(entry1->e_value_size)))
1023                         return 1;
1024
1025                 entry1 = EXT3_XATTR_NEXT(entry1);
1026                 entry2 = EXT3_XATTR_NEXT(entry2);
1027         }
1028         if (!IS_LAST_ENTRY(entry2))
1029                 return 1;
1030         return 0;
1031 }
1032
1033 /*
1034  * ext3_xattr_cache_find()
1035  *
1036  * Find an identical extended attribute block.
1037  *
1038  * Returns a pointer to the block found, or NULL if such a block was
1039  * not found or an error occurred.
1040  */
1041 static struct buffer_head *
1042 ext3_xattr_cache_find(handle_t *handle, struct inode *inode,
1043                       struct ext3_xattr_header *header, int *credits)
1044 {
1045         __u32 hash = le32_to_cpu(header->h_hash);
1046         struct mb_cache_entry *ce;
1047
1048         if (!header->h_hash)
1049                 return NULL;  /* never share */
1050         ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1051         ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
1052                                        inode->i_sb->s_bdev, hash);
1053         while (ce) {
1054                 struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
1055
1056                 if (!bh) {
1057                         ext3_error(inode->i_sb, "ext3_xattr_cache_find",
1058                                 "inode %ld: block %ld read error",
1059                                 inode->i_ino, (unsigned long) ce->e_block);
1060                 } else if (ext3_journal_get_write_access_credits(
1061                                 handle, bh, credits) == 0) {
1062                         /* ext3_journal_get_write_access() requires an unlocked
1063                          * bh, which complicates things here. */
1064                         lock_buffer(bh);
1065                         if (le32_to_cpu(HDR(bh)->h_refcount) >
1066                                    EXT3_XATTR_REFCOUNT_MAX) {
1067                                 ea_idebug(inode, "block %ld refcount %d>%d",
1068                                           (unsigned long) ce->e_block,
1069                                           le32_to_cpu(HDR(bh)->h_refcount),
1070                                           EXT3_XATTR_REFCOUNT_MAX);
1071                         } else if (!ext3_xattr_cmp(header, HDR(bh))) {
1072                                 mb_cache_entry_release(ce);
1073                                 /* buffer will be unlocked by caller */
1074                                 return bh;
1075                         }
1076                         unlock_buffer(bh);
1077                         journal_release_buffer(handle, bh, *credits);
1078                         *credits = 0;
1079                         brelse(bh);
1080                 }
1081                 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
1082         }
1083         return NULL;
1084 }
1085
1086 /*
1087  * ext3_xattr_cache_remove()
1088  *
1089  * Remove the cache entry of a block from the cache. Called when a
1090  * block becomes invalid.
1091  */
1092 static void
1093 ext3_xattr_cache_remove(struct buffer_head *bh)
1094 {
1095         struct mb_cache_entry *ce;
1096
1097         ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
1098                                 bh->b_blocknr);
1099         if (ce) {
1100                 ea_bdebug(bh, "removing (%d cache entries remaining)",
1101                           atomic_read(&ext3_xattr_cache->c_entry_count)-1);
1102                 mb_cache_entry_free(ce);
1103         } else 
1104                 ea_bdebug(bh, "no cache entry");
1105 }
1106
1107 #define NAME_HASH_SHIFT 5
1108 #define VALUE_HASH_SHIFT 16
1109
1110 /*
1111  * ext3_xattr_hash_entry()
1112  *
1113  * Compute the hash of an extended attribute.
1114  */
1115 static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
1116                                          struct ext3_xattr_entry *entry)
1117 {
1118         __u32 hash = 0;
1119         char *name = entry->e_name;
1120         int n;
1121
1122         for (n=0; n < entry->e_name_len; n++) {
1123                 hash = (hash << NAME_HASH_SHIFT) ^
1124                        (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1125                        *name++;
1126         }
1127
1128         if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1129                 __u32 *value = (__u32 *)((char *)header +
1130                         le16_to_cpu(entry->e_value_offs));
1131                 for (n = (le32_to_cpu(entry->e_value_size) +
1132                      EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
1133                         hash = (hash << VALUE_HASH_SHIFT) ^
1134                                (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1135                                le32_to_cpu(*value++);
1136                 }
1137         }
1138         entry->e_hash = cpu_to_le32(hash);
1139 }
1140
1141 #undef NAME_HASH_SHIFT
1142 #undef VALUE_HASH_SHIFT
1143
1144 #define BLOCK_HASH_SHIFT 16
1145
1146 /*
1147  * ext3_xattr_rehash()
1148  *
1149  * Re-compute the extended attribute hash value after an entry has changed.
1150  */
1151 static void ext3_xattr_rehash(struct ext3_xattr_header *header,
1152                               struct ext3_xattr_entry *entry)
1153 {
1154         struct ext3_xattr_entry *here;
1155         __u32 hash = 0;
1156
1157         ext3_xattr_hash_entry(header, entry);
1158         here = ENTRY(header+1);
1159         while (!IS_LAST_ENTRY(here)) {
1160                 if (!here->e_hash) {
1161                         /* Block is not shared if an entry's hash value == 0 */
1162                         hash = 0;
1163                         break;
1164                 }
1165                 hash = (hash << BLOCK_HASH_SHIFT) ^
1166                        (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1167                        le32_to_cpu(here->e_hash);
1168                 here = EXT3_XATTR_NEXT(here);
1169         }
1170         header->h_hash = cpu_to_le32(hash);
1171 }
1172
1173 #undef BLOCK_HASH_SHIFT
1174
1175 int __init
1176 init_ext3_xattr(void)
1177 {
1178         int     err;
1179
1180         err = ext3_xattr_register(EXT3_XATTR_INDEX_USER,
1181                                   &ext3_xattr_user_handler);
1182         if (err)
1183                 return err;
1184         err = ext3_xattr_register(EXT3_XATTR_INDEX_TRUSTED,
1185                                   &ext3_xattr_trusted_handler);
1186         if (err)
1187                 goto out;
1188 #ifdef CONFIG_EXT3_FS_SECURITY
1189         err = ext3_xattr_register(EXT3_XATTR_INDEX_SECURITY,
1190                                   &ext3_xattr_security_handler);
1191         if (err)
1192                 goto out1;
1193 #endif
1194 #ifdef CONFIG_EXT3_FS_POSIX_ACL
1195         err = init_ext3_acl();
1196         if (err)
1197                 goto out2;
1198 #endif
1199         ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
1200                 sizeof(struct mb_cache_entry) +
1201                 sizeof(struct mb_cache_entry_index), 1, 6);
1202         if (!ext3_xattr_cache) {
1203                 err = -ENOMEM;
1204                 goto out3;
1205         }
1206         return 0;
1207 out3:
1208 #ifdef CONFIG_EXT3_FS_POSIX_ACL
1209         exit_ext3_acl();
1210 out2:
1211 #endif
1212 #ifdef CONFIG_EXT3_FS_SECURITY
1213         ext3_xattr_unregister(EXT3_XATTR_INDEX_SECURITY,
1214                               &ext3_xattr_security_handler);
1215 out1:
1216 #endif
1217         ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED,
1218                               &ext3_xattr_trusted_handler);
1219 out:
1220         ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
1221                               &ext3_xattr_user_handler);
1222         return err;
1223 }
1224
1225 void
1226 exit_ext3_xattr(void)
1227 {
1228         if (ext3_xattr_cache)
1229                 mb_cache_destroy(ext3_xattr_cache);
1230         ext3_xattr_cache = NULL;
1231 #ifdef CONFIG_EXT3_FS_POSIX_ACL
1232         exit_ext3_acl();
1233 #endif
1234 #ifdef CONFIG_EXT3_FS_SECURITY
1235         ext3_xattr_unregister(EXT3_XATTR_INDEX_SECURITY,
1236                               &ext3_xattr_security_handler);
1237 #endif
1238         ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED,
1239                               &ext3_xattr_trusted_handler);
1240         ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
1241                               &ext3_xattr_user_handler);
1242 }