ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / fs / ext2 / xattr.c
1 /*
2  * linux/fs/ext2/xattr.c
3  *
4  * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
5  *
6  * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7  * Extended attributes for symlinks and special files added per
8  *  suggestion of Luka Renko <luka.renko@hermes.si>.
9  */
10
11 /*
12  * Extended attributes are stored on disk blocks allocated outside of
13  * any inode. The i_file_acl field is then made to point to this allocated
14  * block. If all extended attributes of an inode are identical, these
15  * inodes may share the same extended attribute block. Such situations
16  * are automatically detected by keeping a cache of recent attribute block
17  * numbers and hashes over the block's contents in memory.
18  *
19  *
20  * Extended attribute block layout:
21  *
22  *   +------------------+
23  *   | header           |
24  *   ¦ entry 1          | |
25  *   | entry 2          | | growing downwards
26  *   | entry 3          | v
27  *   | four null bytes  |
28  *   | . . .            |
29  *   | value 1          | ^
30  *   | value 3          | | growing upwards
31  *   | value 2          | |
32  *   +------------------+
33  *
34  * The block header is followed by multiple entry descriptors. These entry
35  * descriptors are variable in size, and alligned to EXT2_XATTR_PAD
36  * byte boundaries. The entry descriptors are sorted by attribute name,
37  * so that two extended attribute blocks can be compared efficiently.
38  *
39  * Attribute values are aligned to the end of the block, stored in
40  * no specific order. They are also padded to EXT2_XATTR_PAD byte
41  * boundaries. No additional gaps are left between them.
42  *
43  * Locking strategy
44  * ----------------
45  * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem.
46  * EA blocks are only changed if they are exclusive to an inode, so
47  * holding xattr_sem also means that nothing but the EA block's reference
48  * count will change. Multiple writers to an EA block are synchronized
49  * by the bh lock. No more than a single bh lock is held at any time
50  * to avoid deadlocks.
51  */
52
53 #include <linux/buffer_head.h>
54 #include <linux/module.h>
55 #include <linux/init.h>
56 #include <linux/slab.h>
57 #include <linux/mbcache.h>
58 #include <linux/quotaops.h>
59 #include <linux/rwsem.h>
60 #include "ext2.h"
61 #include "xattr.h"
62 #include "acl.h"
63
64 /* These symbols may be needed by a module. */
65 EXPORT_SYMBOL(ext2_xattr_register);
66 EXPORT_SYMBOL(ext2_xattr_unregister);
67 EXPORT_SYMBOL(ext2_xattr_get);
68 EXPORT_SYMBOL(ext2_xattr_list);
69 EXPORT_SYMBOL(ext2_xattr_set);
70
71 #define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data))
72 #define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr))
73 #define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
74 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
75
76 #ifdef EXT2_XATTR_DEBUG
77 # define ea_idebug(inode, f...) do { \
78                 printk(KERN_DEBUG "inode %s:%ld: ", \
79                         inode->i_sb->s_id, inode->i_ino); \
80                 printk(f); \
81                 printk("\n"); \
82         } while (0)
83 # define ea_bdebug(bh, f...) do { \
84                 char b[BDEVNAME_SIZE]; \
85                 printk(KERN_DEBUG "block %s:%lu: ", \
86                         bdevname(bh->b_bdev, b), \
87                         (unsigned long) bh->b_blocknr); \
88                 printk(f); \
89                 printk("\n"); \
90         } while (0)
91 #else
92 # define ea_idebug(f...)
93 # define ea_bdebug(f...)
94 #endif
95
96 static int ext2_xattr_set2(struct inode *, struct buffer_head *,
97                            struct ext2_xattr_header *);
98
99 static int ext2_xattr_cache_insert(struct buffer_head *);
100 static struct buffer_head *ext2_xattr_cache_find(struct inode *,
101                                                  struct ext2_xattr_header *);
102 static void ext2_xattr_cache_remove(struct buffer_head *);
103 static void ext2_xattr_rehash(struct ext2_xattr_header *,
104                               struct ext2_xattr_entry *);
105
106 static struct mb_cache *ext2_xattr_cache;
107 static struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX];
108 static rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED;
109
110 int
111 ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler)
112 {
113         int error = -EINVAL;
114
115         if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) {
116                 write_lock(&ext2_handler_lock);
117                 if (!ext2_xattr_handlers[name_index-1]) {
118                         ext2_xattr_handlers[name_index-1] = handler;
119                         error = 0;
120                 }
121                 write_unlock(&ext2_handler_lock);
122         }
123         return error;
124 }
125
126 void
127 ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler)
128 {
129         if (name_index > 0 || name_index <= EXT2_XATTR_INDEX_MAX) {
130                 write_lock(&ext2_handler_lock);
131                 ext2_xattr_handlers[name_index-1] = NULL;
132                 write_unlock(&ext2_handler_lock);
133         }
134 }
135
136 static inline const char *
137 strcmp_prefix(const char *a, const char *a_prefix)
138 {
139         while (*a_prefix && *a == *a_prefix) {
140                 a++;
141                 a_prefix++;
142         }
143         return *a_prefix ? NULL : a;
144 }
145
146 /*
147  * Decode the extended attribute name, and translate it into
148  * the name_index and name suffix.
149  */
150 static struct ext2_xattr_handler *
151 ext2_xattr_resolve_name(const char **name)
152 {
153         struct ext2_xattr_handler *handler = NULL;
154         int i;
155
156         if (!*name)
157                 return NULL;
158         read_lock(&ext2_handler_lock);
159         for (i=0; i<EXT2_XATTR_INDEX_MAX; i++) {
160                 if (ext2_xattr_handlers[i]) {
161                         const char *n = strcmp_prefix(*name,
162                                 ext2_xattr_handlers[i]->prefix);
163                         if (n) {
164                                 handler = ext2_xattr_handlers[i];
165                                 *name = n;
166                                 break;
167                         }
168                 }
169         }
170         read_unlock(&ext2_handler_lock);
171         return handler;
172 }
173
174 static inline struct ext2_xattr_handler *
175 ext2_xattr_handler(int name_index)
176 {
177         struct ext2_xattr_handler *handler = NULL;
178         if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) {
179                 read_lock(&ext2_handler_lock);
180                 handler = ext2_xattr_handlers[name_index-1];
181                 read_unlock(&ext2_handler_lock);
182         }
183         return handler;
184 }
185
186 /*
187  * Inode operation getxattr()
188  *
189  * dentry->d_inode->i_sem: don't care
190  */
191 ssize_t
192 ext2_getxattr(struct dentry *dentry, const char *name,
193               void *buffer, size_t size)
194 {
195         struct ext2_xattr_handler *handler;
196         struct inode *inode = dentry->d_inode;
197
198         handler = ext2_xattr_resolve_name(&name);
199         if (!handler)
200                 return -EOPNOTSUPP;
201         return handler->get(inode, name, buffer, size);
202 }
203
204 /*
205  * Inode operation listxattr()
206  *
207  * dentry->d_inode->i_sem: don't care
208  */
209 ssize_t
210 ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
211 {
212         return ext2_xattr_list(dentry->d_inode, buffer, size);
213 }
214
215 /*
216  * Inode operation setxattr()
217  *
218  * dentry->d_inode->i_sem: down
219  */
220 int
221 ext2_setxattr(struct dentry *dentry, const char *name,
222               const void *value, size_t size, int flags)
223 {
224         struct ext2_xattr_handler *handler;
225         struct inode *inode = dentry->d_inode;
226
227         if (size == 0)
228                 value = "";  /* empty EA, do not remove */
229         handler = ext2_xattr_resolve_name(&name);
230         if (!handler)
231                 return -EOPNOTSUPP;
232         return handler->set(inode, name, value, size, flags);
233 }
234
235 /*
236  * Inode operation removexattr()
237  *
238  * dentry->d_inode->i_sem: down
239  */
240 int
241 ext2_removexattr(struct dentry *dentry, const char *name)
242 {
243         struct ext2_xattr_handler *handler;
244         struct inode *inode = dentry->d_inode;
245
246         handler = ext2_xattr_resolve_name(&name);
247         if (!handler)
248                 return -EOPNOTSUPP;
249         return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
250 }
251
252 /*
253  * ext2_xattr_get()
254  *
255  * Copy an extended attribute into the buffer
256  * provided, or compute the buffer size required.
257  * Buffer is NULL to compute the size of the buffer required.
258  *
259  * Returns a negative error number on failure, or the number of bytes
260  * used / required on success.
261  */
262 int
263 ext2_xattr_get(struct inode *inode, int name_index, const char *name,
264                void *buffer, size_t buffer_size)
265 {
266         struct buffer_head *bh = NULL;
267         struct ext2_xattr_entry *entry;
268         size_t name_len, size;
269         char *end;
270         int error;
271
272         ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
273                   name_index, name, buffer, (long)buffer_size);
274
275         if (name == NULL)
276                 return -EINVAL;
277         down_read(&EXT2_I(inode)->xattr_sem);
278         error = -ENODATA;
279         if (!EXT2_I(inode)->i_file_acl)
280                 goto cleanup;
281         ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
282         bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
283         error = -EIO;
284         if (!bh)
285                 goto cleanup;
286         ea_bdebug(bh, "b_count=%d, refcount=%d",
287                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
288         end = bh->b_data + bh->b_size;
289         if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
290             HDR(bh)->h_blocks != cpu_to_le32(1)) {
291 bad_block:      ext2_error(inode->i_sb, "ext2_xattr_get",
292                         "inode %ld: bad block %d", inode->i_ino,
293                         EXT2_I(inode)->i_file_acl);
294                 error = -EIO;
295                 goto cleanup;
296         }
297         /* find named attribute */
298         name_len = strlen(name);
299
300         error = -ERANGE;
301         if (name_len > 255)
302                 goto cleanup;
303         entry = FIRST_ENTRY(bh);
304         while (!IS_LAST_ENTRY(entry)) {
305                 struct ext2_xattr_entry *next =
306                         EXT2_XATTR_NEXT(entry);
307                 if ((char *)next >= end)
308                         goto bad_block;
309                 if (name_index == entry->e_name_index &&
310                     name_len == entry->e_name_len &&
311                     memcmp(name, entry->e_name, name_len) == 0)
312                         goto found;
313                 entry = next;
314         }
315         /* Check the remaining name entries */
316         while (!IS_LAST_ENTRY(entry)) {
317                 struct ext2_xattr_entry *next =
318                         EXT2_XATTR_NEXT(entry);
319                 if ((char *)next >= end)
320                         goto bad_block;
321                 entry = next;
322         }
323         if (ext2_xattr_cache_insert(bh))
324                 ea_idebug(inode, "cache insert failed");
325         error = -ENODATA;
326         goto cleanup;
327 found:
328         /* check the buffer size */
329         if (entry->e_value_block != 0)
330                 goto bad_block;
331         size = le32_to_cpu(entry->e_value_size);
332         if (size > inode->i_sb->s_blocksize ||
333             le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
334                 goto bad_block;
335
336         if (ext2_xattr_cache_insert(bh))
337                 ea_idebug(inode, "cache insert failed");
338         if (buffer) {
339                 error = -ERANGE;
340                 if (size > buffer_size)
341                         goto cleanup;
342                 /* return value of attribute */
343                 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
344                         size);
345         }
346         error = size;
347
348 cleanup:
349         brelse(bh);
350         up_read(&EXT2_I(inode)->xattr_sem);
351
352         return error;
353 }
354
355 /*
356  * ext2_xattr_list()
357  *
358  * Copy a list of attribute names into the buffer
359  * provided, or compute the buffer size required.
360  * Buffer is NULL to compute the size of the buffer required.
361  *
362  * Returns a negative error number on failure, or the number of bytes
363  * used / required on success.
364  */
365 int
366 ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
367 {
368         struct buffer_head *bh = NULL;
369         struct ext2_xattr_entry *entry;
370         size_t size = 0;
371         char *buf, *end;
372         int error;
373
374         ea_idebug(inode, "buffer=%p, buffer_size=%ld",
375                   buffer, (long)buffer_size);
376
377         down_read(&EXT2_I(inode)->xattr_sem);
378         error = 0;
379         if (!EXT2_I(inode)->i_file_acl)
380                 goto cleanup;
381         ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
382         bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
383         error = -EIO;
384         if (!bh)
385                 goto cleanup;
386         ea_bdebug(bh, "b_count=%d, refcount=%d",
387                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
388         end = bh->b_data + bh->b_size;
389         if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
390             HDR(bh)->h_blocks != cpu_to_le32(1)) {
391 bad_block:      ext2_error(inode->i_sb, "ext2_xattr_list",
392                         "inode %ld: bad block %d", inode->i_ino,
393                         EXT2_I(inode)->i_file_acl);
394                 error = -EIO;
395                 goto cleanup;
396         }
397         /* compute the size required for the list of attribute names */
398         for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
399              entry = EXT2_XATTR_NEXT(entry)) {
400                 struct ext2_xattr_handler *handler;
401                 struct ext2_xattr_entry *next =
402                         EXT2_XATTR_NEXT(entry);
403                 if ((char *)next >= end)
404                         goto bad_block;
405
406                 handler = ext2_xattr_handler(entry->e_name_index);
407                 if (handler)
408                         size += handler->list(NULL, inode, entry->e_name,
409                                               entry->e_name_len);
410         }
411
412         if (ext2_xattr_cache_insert(bh))
413                 ea_idebug(inode, "cache insert failed");
414         if (!buffer) {
415                 error = size;
416                 goto cleanup;
417         } else {
418                 error = -ERANGE;
419                 if (size > buffer_size)
420                         goto cleanup;
421         }
422
423         /* list the attribute names */
424         buf = buffer;
425         for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
426              entry = EXT2_XATTR_NEXT(entry)) {
427                 struct ext2_xattr_handler *handler;
428                 
429                 handler = ext2_xattr_handler(entry->e_name_index);
430                 if (handler)
431                         buf += handler->list(buf, inode, entry->e_name,
432                                              entry->e_name_len);
433         }
434         error = size;
435
436 cleanup:
437         brelse(bh);
438         up_read(&EXT2_I(inode)->xattr_sem);
439
440         return error;
441 }
442
443 /*
444  * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is
445  * not set, set it.
446  */
447 static void ext2_xattr_update_super_block(struct super_block *sb)
448 {
449         if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
450                 return;
451
452         lock_super(sb);
453         EXT2_SB(sb)->s_es->s_feature_compat |=
454                 cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR);
455         sb->s_dirt = 1;
456         mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
457         unlock_super(sb);
458 }
459
460 /*
461  * ext2_xattr_set()
462  *
463  * Create, replace or remove an extended attribute for this inode. Buffer
464  * is NULL to remove an existing extended attribute, and non-NULL to
465  * either replace an existing extended attribute, or create a new extended
466  * attribute. The flags XATTR_REPLACE and XATTR_CREATE
467  * specify that an extended attribute must exist and must not exist
468  * previous to the call, respectively.
469  *
470  * Returns 0, or a negative error number on failure.
471  */
472 int
473 ext2_xattr_set(struct inode *inode, int name_index, const char *name,
474                const void *value, size_t value_len, int flags)
475 {
476         struct super_block *sb = inode->i_sb;
477         struct buffer_head *bh = NULL;
478         struct ext2_xattr_header *header = NULL;
479         struct ext2_xattr_entry *here, *last;
480         size_t name_len, free, min_offs = sb->s_blocksize;
481         int not_found = 1, error;
482         char *end;
483         
484         /*
485          * header -- Points either into bh, or to a temporarily
486          *           allocated buffer.
487          * here -- The named entry found, or the place for inserting, within
488          *         the block pointed to by header.
489          * last -- Points right after the last named entry within the block
490          *         pointed to by header.
491          * min_offs -- The offset of the first value (values are aligned
492          *             towards the end of the block).
493          * end -- Points right after the block pointed to by header.
494          */
495         
496         ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
497                   name_index, name, value, (long)value_len);
498
499         if (IS_RDONLY(inode))
500                 return -EROFS;
501         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
502                 return -EPERM;
503         if (value == NULL)
504                 value_len = 0;
505         if (name == NULL)
506                 return -EINVAL;
507         name_len = strlen(name);
508         if (name_len > 255 || value_len > sb->s_blocksize)
509                 return -ERANGE;
510         down_write(&EXT2_I(inode)->xattr_sem);
511         if (EXT2_I(inode)->i_file_acl) {
512                 /* The inode already has an extended attribute block. */
513                 bh = sb_bread(sb, EXT2_I(inode)->i_file_acl);
514                 error = -EIO;
515                 if (!bh)
516                         goto cleanup;
517                 ea_bdebug(bh, "b_count=%d, refcount=%d",
518                         atomic_read(&(bh->b_count)),
519                         le32_to_cpu(HDR(bh)->h_refcount));
520                 header = HDR(bh);
521                 end = bh->b_data + bh->b_size;
522                 if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
523                     header->h_blocks != cpu_to_le32(1)) {
524 bad_block:              ext2_error(sb, "ext2_xattr_set",
525                                 "inode %ld: bad block %d", inode->i_ino, 
526                                    EXT2_I(inode)->i_file_acl);
527                         error = -EIO;
528                         goto cleanup;
529                 }
530                 /* Find the named attribute. */
531                 here = FIRST_ENTRY(bh);
532                 while (!IS_LAST_ENTRY(here)) {
533                         struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here);
534                         if ((char *)next >= end)
535                                 goto bad_block;
536                         if (!here->e_value_block && here->e_value_size) {
537                                 size_t offs = le16_to_cpu(here->e_value_offs);
538                                 if (offs < min_offs)
539                                         min_offs = offs;
540                         }
541                         not_found = name_index - here->e_name_index;
542                         if (!not_found)
543                                 not_found = name_len - here->e_name_len;
544                         if (!not_found)
545                                 not_found = memcmp(name, here->e_name,name_len);
546                         if (not_found <= 0)
547                                 break;
548                         here = next;
549                 }
550                 last = here;
551                 /* We still need to compute min_offs and last. */
552                 while (!IS_LAST_ENTRY(last)) {
553                         struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last);
554                         if ((char *)next >= end)
555                                 goto bad_block;
556                         if (!last->e_value_block && last->e_value_size) {
557                                 size_t offs = le16_to_cpu(last->e_value_offs);
558                                 if (offs < min_offs)
559                                         min_offs = offs;
560                         }
561                         last = next;
562                 }
563
564                 /* Check whether we have enough space left. */
565                 free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
566         } else {
567                 /* We will use a new extended attribute block. */
568                 free = sb->s_blocksize -
569                         sizeof(struct ext2_xattr_header) - sizeof(__u32);
570                 here = last = NULL;  /* avoid gcc uninitialized warning. */
571         }
572
573         if (not_found) {
574                 /* Request to remove a nonexistent attribute? */
575                 error = -ENODATA;
576                 if (flags & XATTR_REPLACE)
577                         goto cleanup;
578                 error = 0;
579                 if (value == NULL)
580                         goto cleanup;
581         } else {
582                 /* Request to create an existing attribute? */
583                 error = -EEXIST;
584                 if (flags & XATTR_CREATE)
585                         goto cleanup;
586                 if (!here->e_value_block && here->e_value_size) {
587                         size_t size = le32_to_cpu(here->e_value_size);
588
589                         if (le16_to_cpu(here->e_value_offs) + size > 
590                             sb->s_blocksize || size > sb->s_blocksize)
591                                 goto bad_block;
592                         free += EXT2_XATTR_SIZE(size);
593                 }
594                 free += EXT2_XATTR_LEN(name_len);
595         }
596         error = -ENOSPC;
597         if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len))
598                 goto cleanup;
599
600         /* Here we know that we can set the new attribute. */
601
602         if (header) {
603                 /* assert(header == HDR(bh)); */
604                 lock_buffer(bh);
605                 if (header->h_refcount == cpu_to_le32(1)) {
606                         ea_bdebug(bh, "modifying in-place");
607                         ext2_xattr_cache_remove(bh);
608                         /* keep the buffer locked while modifying it. */
609                 } else {
610                         int offset;
611
612                         unlock_buffer(bh);
613                         ea_bdebug(bh, "cloning");
614                         header = kmalloc(bh->b_size, GFP_KERNEL);
615                         error = -ENOMEM;
616                         if (header == NULL)
617                                 goto cleanup;
618                         memcpy(header, HDR(bh), bh->b_size);
619                         header->h_refcount = cpu_to_le32(1);
620
621                         offset = (char *)here - bh->b_data;
622                         here = ENTRY((char *)header + offset);
623                         offset = (char *)last - bh->b_data;
624                         last = ENTRY((char *)header + offset);
625                 }
626         } else {
627                 /* Allocate a buffer where we construct the new block. */
628                 header = kmalloc(sb->s_blocksize, GFP_KERNEL);
629                 error = -ENOMEM;
630                 if (header == NULL)
631                         goto cleanup;
632                 memset(header, 0, sb->s_blocksize);
633                 end = (char *)header + sb->s_blocksize;
634                 header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
635                 header->h_blocks = header->h_refcount = cpu_to_le32(1);
636                 last = here = ENTRY(header+1);
637         }
638
639         /* Iff we are modifying the block in-place, bh is locked here. */
640
641         if (not_found) {
642                 /* Insert the new name. */
643                 size_t size = EXT2_XATTR_LEN(name_len);
644                 size_t rest = (char *)last - (char *)here;
645                 memmove((char *)here + size, here, rest);
646                 memset(here, 0, size);
647                 here->e_name_index = name_index;
648                 here->e_name_len = name_len;
649                 memcpy(here->e_name, name, name_len);
650         } else {
651                 if (!here->e_value_block && here->e_value_size) {
652                         char *first_val = (char *)header + min_offs;
653                         size_t offs = le16_to_cpu(here->e_value_offs);
654                         char *val = (char *)header + offs;
655                         size_t size = EXT2_XATTR_SIZE(
656                                 le32_to_cpu(here->e_value_size));
657
658                         if (size == EXT2_XATTR_SIZE(value_len)) {
659                                 /* The old and the new value have the same
660                                    size. Just replace. */
661                                 here->e_value_size = cpu_to_le32(value_len);
662                                 memset(val + size - EXT2_XATTR_PAD, 0,
663                                        EXT2_XATTR_PAD); /* Clear pad bytes. */
664                                 memcpy(val, value, value_len);
665                                 goto skip_replace;
666                         }
667
668                         /* Remove the old value. */
669                         memmove(first_val + size, first_val, val - first_val);
670                         memset(first_val, 0, size);
671                         here->e_value_offs = 0;
672                         min_offs += size;
673
674                         /* Adjust all value offsets. */
675                         last = ENTRY(header+1);
676                         while (!IS_LAST_ENTRY(last)) {
677                                 size_t o = le16_to_cpu(last->e_value_offs);
678                                 if (!last->e_value_block && o < offs)
679                                         last->e_value_offs =
680                                                 cpu_to_le16(o + size);
681                                 last = EXT2_XATTR_NEXT(last);
682                         }
683                 }
684                 if (value == NULL) {
685                         /* Remove the old name. */
686                         size_t size = EXT2_XATTR_LEN(name_len);
687                         last = ENTRY((char *)last - size);
688                         memmove(here, (char*)here + size,
689                                 (char*)last - (char*)here);
690                         memset(last, 0, size);
691                 }
692         }
693
694         if (value != NULL) {
695                 /* Insert the new value. */
696                 here->e_value_size = cpu_to_le32(value_len);
697                 if (value_len) {
698                         size_t size = EXT2_XATTR_SIZE(value_len);
699                         char *val = (char *)header + min_offs - size;
700                         here->e_value_offs =
701                                 cpu_to_le16((char *)val - (char *)header);
702                         memset(val + size - EXT2_XATTR_PAD, 0,
703                                EXT2_XATTR_PAD); /* Clear the pad bytes. */
704                         memcpy(val, value, value_len);
705                 }
706         }
707
708 skip_replace:
709         if (IS_LAST_ENTRY(ENTRY(header+1))) {
710                 /* This block is now empty. */
711                 if (bh && header == HDR(bh))
712                         unlock_buffer(bh);  /* we were modifying in-place. */
713                 error = ext2_xattr_set2(inode, bh, NULL);
714         } else {
715                 ext2_xattr_rehash(header, here);
716                 if (bh && header == HDR(bh))
717                         unlock_buffer(bh);  /* we were modifying in-place. */
718                 error = ext2_xattr_set2(inode, bh, header);
719         }
720
721 cleanup:
722         brelse(bh);
723         if (!(bh && header == HDR(bh)))
724                 kfree(header);
725         up_write(&EXT2_I(inode)->xattr_sem);
726
727         return error;
728 }
729
730 /*
731  * Second half of ext2_xattr_set(): Update the file system.
732  */
733 static int
734 ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
735                 struct ext2_xattr_header *header)
736 {
737         struct super_block *sb = inode->i_sb;
738         struct buffer_head *new_bh = NULL;
739         int error;
740
741         if (header) {
742                 new_bh = ext2_xattr_cache_find(inode, header);
743                 if (new_bh) {
744                         /* We found an identical block in the cache. */
745                         if (new_bh == old_bh) {
746                                 ea_bdebug(new_bh, "keeping this block");
747                         } else {
748                                 /* The old block is released after updating
749                                    the inode.  */
750                                 ea_bdebug(new_bh, "reusing block");
751
752                                 error = -EDQUOT;
753                                 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
754                                         unlock_buffer(new_bh);
755                                         goto cleanup;
756                                 }
757                                 HDR(new_bh)->h_refcount = cpu_to_le32(1 +
758                                         le32_to_cpu(HDR(new_bh)->h_refcount));
759                                 ea_bdebug(new_bh, "refcount now=%d",
760                                         le32_to_cpu(HDR(new_bh)->h_refcount));
761                         }
762                         unlock_buffer(new_bh);
763                 } else if (old_bh && header == HDR(old_bh)) {
764                         /* Keep this block. No need to lock the block as we
765                            don't need to change the reference count. */
766                         new_bh = old_bh;
767                         get_bh(new_bh);
768                         ext2_xattr_cache_insert(new_bh);
769                 } else {
770                         /* We need to allocate a new block */
771                         int goal = le32_to_cpu(EXT2_SB(sb)->s_es->
772                                                            s_first_data_block) +
773                                    EXT2_I(inode)->i_block_group *
774                                    EXT2_BLOCKS_PER_GROUP(sb);
775                         int block = ext2_new_block(inode, goal, 0, 0, &error);
776                         if (error)
777                                 goto cleanup;
778                         ea_idebug(inode, "creating block %d", block);
779
780                         new_bh = sb_getblk(sb, block);
781                         if (!new_bh) {
782                                 ext2_free_blocks(inode, block, 1);
783                                 error = -EIO;
784                                 goto cleanup;
785                         }
786                         lock_buffer(new_bh);
787                         memcpy(new_bh->b_data, header, new_bh->b_size);
788                         set_buffer_uptodate(new_bh);
789                         unlock_buffer(new_bh);
790                         ext2_xattr_cache_insert(new_bh);
791                         
792                         ext2_xattr_update_super_block(sb);
793                 }
794                 mark_buffer_dirty(new_bh);
795                 if (IS_SYNC(inode)) {
796                         sync_dirty_buffer(new_bh);
797                         error = -EIO;
798                         if (buffer_req(new_bh) && !buffer_uptodate(new_bh))
799                                 goto cleanup;
800                 }
801         }
802
803         /* Update the inode. */
804         EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
805         inode->i_ctime = CURRENT_TIME;
806         if (IS_SYNC(inode)) {
807                 error = ext2_sync_inode (inode);
808                 if (error)
809                         goto cleanup;
810         } else
811                 mark_inode_dirty(inode);
812
813         error = 0;
814         if (old_bh && old_bh != new_bh) {
815                 /*
816                  * If there was an old block and we are no longer using it,
817                  * release the old block.
818                  */
819                 lock_buffer(old_bh);
820                 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
821                         /* Free the old block. */
822                         ea_bdebug(old_bh, "freeing");
823                         ext2_free_blocks(inode, old_bh->b_blocknr, 1);
824                         /* We let our caller release old_bh, so we
825                          * need to duplicate the buffer before. */
826                         get_bh(old_bh);
827                         bforget(old_bh);
828                 } else {
829                         /* Decrement the refcount only. */
830                         HDR(old_bh)->h_refcount = cpu_to_le32(
831                                 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
832                         DQUOT_FREE_BLOCK(inode, 1);
833                         mark_buffer_dirty(old_bh);
834                         ea_bdebug(old_bh, "refcount now=%d",
835                                 le32_to_cpu(HDR(old_bh)->h_refcount));
836                 }
837                 unlock_buffer(old_bh);
838         }
839
840 cleanup:
841         brelse(new_bh);
842
843         return error;
844 }
845
846 /*
847  * ext2_xattr_delete_inode()
848  *
849  * Free extended attribute resources associated with this inode. This
850  * is called immediately before an inode is freed.
851  */
852 void
853 ext2_xattr_delete_inode(struct inode *inode)
854 {
855         struct buffer_head *bh = NULL;
856
857         down_write(&EXT2_I(inode)->xattr_sem);
858         if (!EXT2_I(inode)->i_file_acl)
859                 goto cleanup;
860         bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
861         if (!bh) {
862                 ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
863                         "inode %ld: block %d read error", inode->i_ino,
864                         EXT2_I(inode)->i_file_acl);
865                 goto cleanup;
866         }
867         ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
868         if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
869             HDR(bh)->h_blocks != cpu_to_le32(1)) {
870                 ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
871                         "inode %ld: bad block %d", inode->i_ino,
872                         EXT2_I(inode)->i_file_acl);
873                 goto cleanup;
874         }
875         lock_buffer(bh);
876         if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
877                 ext2_xattr_cache_remove(bh);
878                 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
879                 get_bh(bh);
880                 bforget(bh);
881         } else {
882                 HDR(bh)->h_refcount = cpu_to_le32(
883                         le32_to_cpu(HDR(bh)->h_refcount) - 1);
884                 mark_buffer_dirty(bh);
885                 if (IS_SYNC(inode))
886                         sync_dirty_buffer(bh);
887                 DQUOT_FREE_BLOCK(inode, 1);
888         }
889         ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
890         unlock_buffer(bh);
891         EXT2_I(inode)->i_file_acl = 0;
892
893 cleanup:
894         brelse(bh);
895         up_write(&EXT2_I(inode)->xattr_sem);
896 }
897
898 /*
899  * ext2_xattr_put_super()
900  *
901  * This is called when a file system is unmounted.
902  */
903 void
904 ext2_xattr_put_super(struct super_block *sb)
905 {
906         mb_cache_shrink(ext2_xattr_cache, sb->s_bdev);
907 }
908
909
910 /*
911  * ext2_xattr_cache_insert()
912  *
913  * Create a new entry in the extended attribute cache, and insert
914  * it unless such an entry is already in the cache.
915  *
916  * Returns 0, or a negative error number on failure.
917  */
918 static int
919 ext2_xattr_cache_insert(struct buffer_head *bh)
920 {
921         __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
922         struct mb_cache_entry *ce;
923         int error;
924
925         ce = mb_cache_entry_alloc(ext2_xattr_cache);
926         if (!ce)
927                 return -ENOMEM;
928         error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
929         if (error) {
930                 mb_cache_entry_free(ce);
931                 if (error == -EBUSY) {
932                         ea_bdebug(bh, "already in cache (%d cache entries)",
933                                 atomic_read(&ext2_xattr_cache->c_entry_count));
934                         error = 0;
935                 }
936         } else {
937                 ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
938                           atomic_read(&ext2_xattr_cache->c_entry_count));
939                 mb_cache_entry_release(ce);
940         }
941         return error;
942 }
943
944 /*
945  * ext2_xattr_cmp()
946  *
947  * Compare two extended attribute blocks for equality.
948  *
949  * Returns 0 if the blocks are equal, 1 if they differ, and
950  * a negative error number on errors.
951  */
952 static int
953 ext2_xattr_cmp(struct ext2_xattr_header *header1,
954                struct ext2_xattr_header *header2)
955 {
956         struct ext2_xattr_entry *entry1, *entry2;
957
958         entry1 = ENTRY(header1+1);
959         entry2 = ENTRY(header2+1);
960         while (!IS_LAST_ENTRY(entry1)) {
961                 if (IS_LAST_ENTRY(entry2))
962                         return 1;
963                 if (entry1->e_hash != entry2->e_hash ||
964                     entry1->e_name_len != entry2->e_name_len ||
965                     entry1->e_value_size != entry2->e_value_size ||
966                     memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
967                         return 1;
968                 if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
969                         return -EIO;
970                 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
971                            (char *)header2 + le16_to_cpu(entry2->e_value_offs),
972                            le32_to_cpu(entry1->e_value_size)))
973                         return 1;
974
975                 entry1 = EXT2_XATTR_NEXT(entry1);
976                 entry2 = EXT2_XATTR_NEXT(entry2);
977         }
978         if (!IS_LAST_ENTRY(entry2))
979                 return 1;
980         return 0;
981 }
982
983 /*
984  * ext2_xattr_cache_find()
985  *
986  * Find an identical extended attribute block.
987  *
988  * Returns a locked buffer head to the block found, or NULL if such
989  * a block was not found or an error occurred.
990  */
991 static struct buffer_head *
992 ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
993 {
994         __u32 hash = le32_to_cpu(header->h_hash);
995         struct mb_cache_entry *ce;
996
997         if (!header->h_hash)
998                 return NULL;  /* never share */
999         ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1000         ce = mb_cache_entry_find_first(ext2_xattr_cache, 0,
1001                                        inode->i_sb->s_bdev, hash);
1002         while (ce) {
1003                 struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
1004
1005                 if (!bh) {
1006                         ext2_error(inode->i_sb, "ext2_xattr_cache_find",
1007                                 "inode %ld: block %ld read error",
1008                                 inode->i_ino, (unsigned long) ce->e_block);
1009                 } else {
1010                         lock_buffer(bh);
1011                         if (le32_to_cpu(HDR(bh)->h_refcount) >
1012                                    EXT2_XATTR_REFCOUNT_MAX) {
1013                                 ea_idebug(inode, "block %ld refcount %d>%d",
1014                                           (unsigned long) ce->e_block,
1015                                           le32_to_cpu(HDR(bh)->h_refcount),
1016                                           EXT2_XATTR_REFCOUNT_MAX);
1017                         } else if (!ext2_xattr_cmp(header, HDR(bh))) {
1018                                 ea_bdebug(bh, "b_count=%d",
1019                                           atomic_read(&(bh->b_count)));
1020                                 mb_cache_entry_release(ce);
1021                                 return bh;
1022                         }
1023                         unlock_buffer(bh);
1024                         brelse(bh);
1025                 }
1026                 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
1027         }
1028         return NULL;
1029 }
1030
1031 /*
1032  * ext2_xattr_cache_remove()
1033  *
1034  * Remove the cache entry of a block from the cache. Called when a
1035  * block becomes invalid.
1036  */
1037 static void
1038 ext2_xattr_cache_remove(struct buffer_head *bh)
1039 {
1040         struct mb_cache_entry *ce;
1041
1042         ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
1043         if (ce) {
1044                 ea_bdebug(bh, "removing (%d cache entries remaining)",
1045                           atomic_read(&ext2_xattr_cache->c_entry_count)-1);
1046                 mb_cache_entry_free(ce);
1047         } else 
1048                 ea_bdebug(bh, "no cache entry");
1049 }
1050
1051 #define NAME_HASH_SHIFT 5
1052 #define VALUE_HASH_SHIFT 16
1053
1054 /*
1055  * ext2_xattr_hash_entry()
1056  *
1057  * Compute the hash of an extended attribute.
1058  */
1059 static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header,
1060                                          struct ext2_xattr_entry *entry)
1061 {
1062         __u32 hash = 0;
1063         char *name = entry->e_name;
1064         int n;
1065
1066         for (n=0; n < entry->e_name_len; n++) {
1067                 hash = (hash << NAME_HASH_SHIFT) ^
1068                        (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1069                        *name++;
1070         }
1071
1072         if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1073                 __u32 *value = (__u32 *)((char *)header +
1074                         le16_to_cpu(entry->e_value_offs));
1075                 for (n = (le32_to_cpu(entry->e_value_size) +
1076                      EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
1077                         hash = (hash << VALUE_HASH_SHIFT) ^
1078                                (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1079                                le32_to_cpu(*value++);
1080                 }
1081         }
1082         entry->e_hash = cpu_to_le32(hash);
1083 }
1084
1085 #undef NAME_HASH_SHIFT
1086 #undef VALUE_HASH_SHIFT
1087
1088 #define BLOCK_HASH_SHIFT 16
1089
1090 /*
1091  * ext2_xattr_rehash()
1092  *
1093  * Re-compute the extended attribute hash value after an entry has changed.
1094  */
1095 static void ext2_xattr_rehash(struct ext2_xattr_header *header,
1096                               struct ext2_xattr_entry *entry)
1097 {
1098         struct ext2_xattr_entry *here;
1099         __u32 hash = 0;
1100         
1101         ext2_xattr_hash_entry(header, entry);
1102         here = ENTRY(header+1);
1103         while (!IS_LAST_ENTRY(here)) {
1104                 if (!here->e_hash) {
1105                         /* Block is not shared if an entry's hash value == 0 */
1106                         hash = 0;
1107                         break;
1108                 }
1109                 hash = (hash << BLOCK_HASH_SHIFT) ^
1110                        (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1111                        le32_to_cpu(here->e_hash);
1112                 here = EXT2_XATTR_NEXT(here);
1113         }
1114         header->h_hash = cpu_to_le32(hash);
1115 }
1116
1117 #undef BLOCK_HASH_SHIFT
1118
1119 int __init
1120 init_ext2_xattr(void)
1121 {
1122         int     err;
1123         
1124         err = ext2_xattr_register(EXT2_XATTR_INDEX_USER,
1125                                   &ext2_xattr_user_handler);
1126         if (err)
1127                 return err;
1128         err = ext2_xattr_register(EXT2_XATTR_INDEX_TRUSTED,
1129                                   &ext2_xattr_trusted_handler);
1130         if (err)
1131                 goto out;
1132 #ifdef CONFIG_EXT2_FS_SECURITY
1133         err = ext2_xattr_register(EXT2_XATTR_INDEX_SECURITY,
1134                                   &ext2_xattr_security_handler);
1135         if (err)
1136                 goto out1;
1137 #endif
1138 #ifdef CONFIG_EXT2_FS_POSIX_ACL
1139         err = init_ext2_acl();
1140         if (err)
1141                 goto out2;
1142 #endif
1143         ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
1144                 sizeof(struct mb_cache_entry) +
1145                 sizeof(struct mb_cache_entry_index), 1, 6);
1146         if (!ext2_xattr_cache) {
1147                 err = -ENOMEM;
1148                 goto out3;
1149         }
1150         return 0;
1151 out3:
1152 #ifdef CONFIG_EXT2_FS_POSIX_ACL
1153         exit_ext2_acl();
1154 out2:
1155 #endif
1156 #ifdef CONFIG_EXT2_FS_SECURITY
1157         ext2_xattr_unregister(EXT2_XATTR_INDEX_SECURITY,
1158                               &ext2_xattr_security_handler);
1159 out1:
1160 #endif
1161         ext2_xattr_unregister(EXT2_XATTR_INDEX_TRUSTED,
1162                               &ext2_xattr_trusted_handler);
1163 out:
1164         ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
1165                               &ext2_xattr_user_handler);
1166         return err;
1167 }
1168
1169 void
1170 exit_ext2_xattr(void)
1171 {
1172         mb_cache_destroy(ext2_xattr_cache);
1173 #ifdef CONFIG_EXT2_FS_POSIX_ACL
1174         exit_ext2_acl();
1175 #endif
1176 #ifdef CONFIG_EXT2_FS_SECURITY
1177         ext2_xattr_unregister(EXT2_XATTR_INDEX_SECURITY,
1178                               &ext2_xattr_security_handler);
1179 #endif
1180         ext2_xattr_unregister(EXT2_XATTR_INDEX_TRUSTED,
1181                               &ext2_xattr_trusted_handler);
1182         ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
1183                               &ext2_xattr_user_handler);
1184 }