patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / fs / ext2 / xattr.c
1 /*
2  * linux/fs/ext2/xattr.c
3  *
4  * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
5  *
6  * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7  * Extended attributes for symlinks and special files added per
8  *  suggestion of Luka Renko <luka.renko@hermes.si>.
9  */
10
11 /*
12  * Extended attributes are stored on disk blocks allocated outside of
13  * any inode. The i_file_acl field is then made to point to this allocated
14  * block. If all extended attributes of an inode are identical, these
15  * inodes may share the same extended attribute block. Such situations
16  * are automatically detected by keeping a cache of recent attribute block
17  * numbers and hashes over the block's contents in memory.
18  *
19  *
20  * Extended attribute block layout:
21  *
22  *   +------------------+
23  *   | header           |
24  *   ¦ entry 1          | |
25  *   | entry 2          | | growing downwards
26  *   | entry 3          | v
27  *   | four null bytes  |
28  *   | . . .            |
29  *   | value 1          | ^
30  *   | value 3          | | growing upwards
31  *   | value 2          | |
32  *   +------------------+
33  *
34  * The block header is followed by multiple entry descriptors. These entry
35  * descriptors are variable in size, and alligned to EXT2_XATTR_PAD
36  * byte boundaries. The entry descriptors are sorted by attribute name,
37  * so that two extended attribute blocks can be compared efficiently.
38  *
39  * Attribute values are aligned to the end of the block, stored in
40  * no specific order. They are also padded to EXT2_XATTR_PAD byte
41  * boundaries. No additional gaps are left between them.
42  *
43  * Locking strategy
44  * ----------------
45  * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem.
46  * EA blocks are only changed if they are exclusive to an inode, so
47  * holding xattr_sem also means that nothing but the EA block's reference
48  * count will change. Multiple writers to an EA block are synchronized
49  * by the bh lock. No more than a single bh lock is held at any time
50  * to avoid deadlocks.
51  */
52
53 #include <linux/buffer_head.h>
54 #include <linux/module.h>
55 #include <linux/init.h>
56 #include <linux/slab.h>
57 #include <linux/mbcache.h>
58 #include <linux/quotaops.h>
59 #include <linux/rwsem.h>
60 #include <linux/vs_dlimit.h>
61 #include "ext2.h"
62 #include "xattr.h"
63 #include "acl.h"
64
65 /* These symbols may be needed by a module. */
66 EXPORT_SYMBOL(ext2_xattr_register);
67 EXPORT_SYMBOL(ext2_xattr_unregister);
68 EXPORT_SYMBOL(ext2_xattr_get);
69 EXPORT_SYMBOL(ext2_xattr_list);
70 EXPORT_SYMBOL(ext2_xattr_set);
71
72 #define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data))
73 #define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr))
74 #define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
75 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
76
77 #ifdef EXT2_XATTR_DEBUG
78 # define ea_idebug(inode, f...) do { \
79                 printk(KERN_DEBUG "inode %s:%ld: ", \
80                         inode->i_sb->s_id, inode->i_ino); \
81                 printk(f); \
82                 printk("\n"); \
83         } while (0)
84 # define ea_bdebug(bh, f...) do { \
85                 char b[BDEVNAME_SIZE]; \
86                 printk(KERN_DEBUG "block %s:%lu: ", \
87                         bdevname(bh->b_bdev, b), \
88                         (unsigned long) bh->b_blocknr); \
89                 printk(f); \
90                 printk("\n"); \
91         } while (0)
92 #else
93 # define ea_idebug(f...)
94 # define ea_bdebug(f...)
95 #endif
96
97 static int ext2_xattr_set2(struct inode *, struct buffer_head *,
98                            struct ext2_xattr_header *);
99
100 static int ext2_xattr_cache_insert(struct buffer_head *);
101 static struct buffer_head *ext2_xattr_cache_find(struct inode *,
102                                                  struct ext2_xattr_header *);
103 static void ext2_xattr_cache_remove(struct buffer_head *);
104 static void ext2_xattr_rehash(struct ext2_xattr_header *,
105                               struct ext2_xattr_entry *);
106
107 static struct mb_cache *ext2_xattr_cache;
108 static struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX];
109 static rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED;
110
111 int
112 ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler)
113 {
114         int error = -EINVAL;
115
116         if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) {
117                 write_lock(&ext2_handler_lock);
118                 if (!ext2_xattr_handlers[name_index-1]) {
119                         ext2_xattr_handlers[name_index-1] = handler;
120                         error = 0;
121                 }
122                 write_unlock(&ext2_handler_lock);
123         }
124         return error;
125 }
126
127 void
128 ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler)
129 {
130         if (name_index > 0 || name_index <= EXT2_XATTR_INDEX_MAX) {
131                 write_lock(&ext2_handler_lock);
132                 ext2_xattr_handlers[name_index-1] = NULL;
133                 write_unlock(&ext2_handler_lock);
134         }
135 }
136
137 static inline const char *
138 strcmp_prefix(const char *a, const char *a_prefix)
139 {
140         while (*a_prefix && *a == *a_prefix) {
141                 a++;
142                 a_prefix++;
143         }
144         return *a_prefix ? NULL : a;
145 }
146
147 /*
148  * Decode the extended attribute name, and translate it into
149  * the name_index and name suffix.
150  */
151 static struct ext2_xattr_handler *
152 ext2_xattr_resolve_name(const char **name)
153 {
154         struct ext2_xattr_handler *handler = NULL;
155         int i;
156
157         if (!*name)
158                 return NULL;
159         read_lock(&ext2_handler_lock);
160         for (i=0; i<EXT2_XATTR_INDEX_MAX; i++) {
161                 if (ext2_xattr_handlers[i]) {
162                         const char *n = strcmp_prefix(*name,
163                                 ext2_xattr_handlers[i]->prefix);
164                         if (n) {
165                                 handler = ext2_xattr_handlers[i];
166                                 *name = n;
167                                 break;
168                         }
169                 }
170         }
171         read_unlock(&ext2_handler_lock);
172         return handler;
173 }
174
175 static inline struct ext2_xattr_handler *
176 ext2_xattr_handler(int name_index)
177 {
178         struct ext2_xattr_handler *handler = NULL;
179         if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) {
180                 read_lock(&ext2_handler_lock);
181                 handler = ext2_xattr_handlers[name_index-1];
182                 read_unlock(&ext2_handler_lock);
183         }
184         return handler;
185 }
186
187 /*
188  * Inode operation getxattr()
189  *
190  * dentry->d_inode->i_sem: don't care
191  */
192 ssize_t
193 ext2_getxattr(struct dentry *dentry, const char *name,
194               void *buffer, size_t size)
195 {
196         struct ext2_xattr_handler *handler;
197         struct inode *inode = dentry->d_inode;
198
199         handler = ext2_xattr_resolve_name(&name);
200         if (!handler)
201                 return -EOPNOTSUPP;
202         return handler->get(inode, name, buffer, size);
203 }
204
205 /*
206  * Inode operation listxattr()
207  *
208  * dentry->d_inode->i_sem: don't care
209  */
210 ssize_t
211 ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
212 {
213         return ext2_xattr_list(dentry->d_inode, buffer, size);
214 }
215
216 /*
217  * Inode operation setxattr()
218  *
219  * dentry->d_inode->i_sem: down
220  */
221 int
222 ext2_setxattr(struct dentry *dentry, const char *name,
223               const void *value, size_t size, int flags)
224 {
225         struct ext2_xattr_handler *handler;
226         struct inode *inode = dentry->d_inode;
227
228         if (size == 0)
229                 value = "";  /* empty EA, do not remove */
230         handler = ext2_xattr_resolve_name(&name);
231         if (!handler)
232                 return -EOPNOTSUPP;
233         return handler->set(inode, name, value, size, flags);
234 }
235
236 /*
237  * Inode operation removexattr()
238  *
239  * dentry->d_inode->i_sem: down
240  */
241 int
242 ext2_removexattr(struct dentry *dentry, const char *name)
243 {
244         struct ext2_xattr_handler *handler;
245         struct inode *inode = dentry->d_inode;
246
247         handler = ext2_xattr_resolve_name(&name);
248         if (!handler)
249                 return -EOPNOTSUPP;
250         return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
251 }
252
253 /*
254  * ext2_xattr_get()
255  *
256  * Copy an extended attribute into the buffer
257  * provided, or compute the buffer size required.
258  * Buffer is NULL to compute the size of the buffer required.
259  *
260  * Returns a negative error number on failure, or the number of bytes
261  * used / required on success.
262  */
263 int
264 ext2_xattr_get(struct inode *inode, int name_index, const char *name,
265                void *buffer, size_t buffer_size)
266 {
267         struct buffer_head *bh = NULL;
268         struct ext2_xattr_entry *entry;
269         size_t name_len, size;
270         char *end;
271         int error;
272
273         ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
274                   name_index, name, buffer, (long)buffer_size);
275
276         if (name == NULL)
277                 return -EINVAL;
278         down_read(&EXT2_I(inode)->xattr_sem);
279         error = -ENODATA;
280         if (!EXT2_I(inode)->i_file_acl)
281                 goto cleanup;
282         ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
283         bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
284         error = -EIO;
285         if (!bh)
286                 goto cleanup;
287         ea_bdebug(bh, "b_count=%d, refcount=%d",
288                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
289         end = bh->b_data + bh->b_size;
290         if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
291             HDR(bh)->h_blocks != cpu_to_le32(1)) {
292 bad_block:      ext2_error(inode->i_sb, "ext2_xattr_get",
293                         "inode %ld: bad block %d", inode->i_ino,
294                         EXT2_I(inode)->i_file_acl);
295                 error = -EIO;
296                 goto cleanup;
297         }
298         /* find named attribute */
299         name_len = strlen(name);
300
301         error = -ERANGE;
302         if (name_len > 255)
303                 goto cleanup;
304         entry = FIRST_ENTRY(bh);
305         while (!IS_LAST_ENTRY(entry)) {
306                 struct ext2_xattr_entry *next =
307                         EXT2_XATTR_NEXT(entry);
308                 if ((char *)next >= end)
309                         goto bad_block;
310                 if (name_index == entry->e_name_index &&
311                     name_len == entry->e_name_len &&
312                     memcmp(name, entry->e_name, name_len) == 0)
313                         goto found;
314                 entry = next;
315         }
316         /* Check the remaining name entries */
317         while (!IS_LAST_ENTRY(entry)) {
318                 struct ext2_xattr_entry *next =
319                         EXT2_XATTR_NEXT(entry);
320                 if ((char *)next >= end)
321                         goto bad_block;
322                 entry = next;
323         }
324         if (ext2_xattr_cache_insert(bh))
325                 ea_idebug(inode, "cache insert failed");
326         error = -ENODATA;
327         goto cleanup;
328 found:
329         /* check the buffer size */
330         if (entry->e_value_block != 0)
331                 goto bad_block;
332         size = le32_to_cpu(entry->e_value_size);
333         if (size > inode->i_sb->s_blocksize ||
334             le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
335                 goto bad_block;
336
337         if (ext2_xattr_cache_insert(bh))
338                 ea_idebug(inode, "cache insert failed");
339         if (buffer) {
340                 error = -ERANGE;
341                 if (size > buffer_size)
342                         goto cleanup;
343                 /* return value of attribute */
344                 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
345                         size);
346         }
347         error = size;
348
349 cleanup:
350         brelse(bh);
351         up_read(&EXT2_I(inode)->xattr_sem);
352
353         return error;
354 }
355
356 /*
357  * ext2_xattr_list()
358  *
359  * Copy a list of attribute names into the buffer
360  * provided, or compute the buffer size required.
361  * Buffer is NULL to compute the size of the buffer required.
362  *
363  * Returns a negative error number on failure, or the number of bytes
364  * used / required on success.
365  */
366 int
367 ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
368 {
369         struct buffer_head *bh = NULL;
370         struct ext2_xattr_entry *entry;
371         size_t size = 0;
372         char *buf, *end;
373         int error;
374
375         ea_idebug(inode, "buffer=%p, buffer_size=%ld",
376                   buffer, (long)buffer_size);
377
378         down_read(&EXT2_I(inode)->xattr_sem);
379         error = 0;
380         if (!EXT2_I(inode)->i_file_acl)
381                 goto cleanup;
382         ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
383         bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
384         error = -EIO;
385         if (!bh)
386                 goto cleanup;
387         ea_bdebug(bh, "b_count=%d, refcount=%d",
388                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
389         end = bh->b_data + bh->b_size;
390         if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
391             HDR(bh)->h_blocks != cpu_to_le32(1)) {
392 bad_block:      ext2_error(inode->i_sb, "ext2_xattr_list",
393                         "inode %ld: bad block %d", inode->i_ino,
394                         EXT2_I(inode)->i_file_acl);
395                 error = -EIO;
396                 goto cleanup;
397         }
398         /* compute the size required for the list of attribute names */
399         for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
400              entry = EXT2_XATTR_NEXT(entry)) {
401                 struct ext2_xattr_handler *handler;
402                 struct ext2_xattr_entry *next =
403                         EXT2_XATTR_NEXT(entry);
404                 if ((char *)next >= end)
405                         goto bad_block;
406
407                 handler = ext2_xattr_handler(entry->e_name_index);
408                 if (handler)
409                         size += handler->list(NULL, inode, entry->e_name,
410                                               entry->e_name_len);
411         }
412
413         if (ext2_xattr_cache_insert(bh))
414                 ea_idebug(inode, "cache insert failed");
415         if (!buffer) {
416                 error = size;
417                 goto cleanup;
418         } else {
419                 error = -ERANGE;
420                 if (size > buffer_size)
421                         goto cleanup;
422         }
423
424         /* list the attribute names */
425         buf = buffer;
426         for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
427              entry = EXT2_XATTR_NEXT(entry)) {
428                 struct ext2_xattr_handler *handler;
429                 
430                 handler = ext2_xattr_handler(entry->e_name_index);
431                 if (handler)
432                         buf += handler->list(buf, inode, entry->e_name,
433                                              entry->e_name_len);
434         }
435         error = size;
436
437 cleanup:
438         brelse(bh);
439         up_read(&EXT2_I(inode)->xattr_sem);
440
441         return error;
442 }
443
444 /*
445  * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is
446  * not set, set it.
447  */
448 static void ext2_xattr_update_super_block(struct super_block *sb)
449 {
450         if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
451                 return;
452
453         lock_super(sb);
454         EXT2_SB(sb)->s_es->s_feature_compat |=
455                 cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR);
456         sb->s_dirt = 1;
457         mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
458         unlock_super(sb);
459 }
460
461 /*
462  * ext2_xattr_set()
463  *
464  * Create, replace or remove an extended attribute for this inode. Buffer
465  * is NULL to remove an existing extended attribute, and non-NULL to
466  * either replace an existing extended attribute, or create a new extended
467  * attribute. The flags XATTR_REPLACE and XATTR_CREATE
468  * specify that an extended attribute must exist and must not exist
469  * previous to the call, respectively.
470  *
471  * Returns 0, or a negative error number on failure.
472  */
473 int
474 ext2_xattr_set(struct inode *inode, int name_index, const char *name,
475                const void *value, size_t value_len, int flags)
476 {
477         struct super_block *sb = inode->i_sb;
478         struct buffer_head *bh = NULL;
479         struct ext2_xattr_header *header = NULL;
480         struct ext2_xattr_entry *here, *last;
481         size_t name_len, free, min_offs = sb->s_blocksize;
482         int not_found = 1, error;
483         char *end;
484         
485         /*
486          * header -- Points either into bh, or to a temporarily
487          *           allocated buffer.
488          * here -- The named entry found, or the place for inserting, within
489          *         the block pointed to by header.
490          * last -- Points right after the last named entry within the block
491          *         pointed to by header.
492          * min_offs -- The offset of the first value (values are aligned
493          *             towards the end of the block).
494          * end -- Points right after the block pointed to by header.
495          */
496         
497         ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
498                   name_index, name, value, (long)value_len);
499
500         if (IS_RDONLY(inode))
501                 return -EROFS;
502         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
503                 return -EPERM;
504         if (value == NULL)
505                 value_len = 0;
506         if (name == NULL)
507                 return -EINVAL;
508         name_len = strlen(name);
509         if (name_len > 255 || value_len > sb->s_blocksize)
510                 return -ERANGE;
511         down_write(&EXT2_I(inode)->xattr_sem);
512         if (EXT2_I(inode)->i_file_acl) {
513                 /* The inode already has an extended attribute block. */
514                 bh = sb_bread(sb, EXT2_I(inode)->i_file_acl);
515                 error = -EIO;
516                 if (!bh)
517                         goto cleanup;
518                 ea_bdebug(bh, "b_count=%d, refcount=%d",
519                         atomic_read(&(bh->b_count)),
520                         le32_to_cpu(HDR(bh)->h_refcount));
521                 header = HDR(bh);
522                 end = bh->b_data + bh->b_size;
523                 if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
524                     header->h_blocks != cpu_to_le32(1)) {
525 bad_block:              ext2_error(sb, "ext2_xattr_set",
526                                 "inode %ld: bad block %d", inode->i_ino, 
527                                    EXT2_I(inode)->i_file_acl);
528                         error = -EIO;
529                         goto cleanup;
530                 }
531                 /* Find the named attribute. */
532                 here = FIRST_ENTRY(bh);
533                 while (!IS_LAST_ENTRY(here)) {
534                         struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here);
535                         if ((char *)next >= end)
536                                 goto bad_block;
537                         if (!here->e_value_block && here->e_value_size) {
538                                 size_t offs = le16_to_cpu(here->e_value_offs);
539                                 if (offs < min_offs)
540                                         min_offs = offs;
541                         }
542                         not_found = name_index - here->e_name_index;
543                         if (!not_found)
544                                 not_found = name_len - here->e_name_len;
545                         if (!not_found)
546                                 not_found = memcmp(name, here->e_name,name_len);
547                         if (not_found <= 0)
548                                 break;
549                         here = next;
550                 }
551                 last = here;
552                 /* We still need to compute min_offs and last. */
553                 while (!IS_LAST_ENTRY(last)) {
554                         struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last);
555                         if ((char *)next >= end)
556                                 goto bad_block;
557                         if (!last->e_value_block && last->e_value_size) {
558                                 size_t offs = le16_to_cpu(last->e_value_offs);
559                                 if (offs < min_offs)
560                                         min_offs = offs;
561                         }
562                         last = next;
563                 }
564
565                 /* Check whether we have enough space left. */
566                 free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
567         } else {
568                 /* We will use a new extended attribute block. */
569                 free = sb->s_blocksize -
570                         sizeof(struct ext2_xattr_header) - sizeof(__u32);
571                 here = last = NULL;  /* avoid gcc uninitialized warning. */
572         }
573
574         if (not_found) {
575                 /* Request to remove a nonexistent attribute? */
576                 error = -ENODATA;
577                 if (flags & XATTR_REPLACE)
578                         goto cleanup;
579                 error = 0;
580                 if (value == NULL)
581                         goto cleanup;
582         } else {
583                 /* Request to create an existing attribute? */
584                 error = -EEXIST;
585                 if (flags & XATTR_CREATE)
586                         goto cleanup;
587                 if (!here->e_value_block && here->e_value_size) {
588                         size_t size = le32_to_cpu(here->e_value_size);
589
590                         if (le16_to_cpu(here->e_value_offs) + size > 
591                             sb->s_blocksize || size > sb->s_blocksize)
592                                 goto bad_block;
593                         free += EXT2_XATTR_SIZE(size);
594                 }
595                 free += EXT2_XATTR_LEN(name_len);
596         }
597         error = -ENOSPC;
598         if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len))
599                 goto cleanup;
600
601         /* Here we know that we can set the new attribute. */
602
603         if (header) {
604                 /* assert(header == HDR(bh)); */
605                 lock_buffer(bh);
606                 if (header->h_refcount == cpu_to_le32(1)) {
607                         ea_bdebug(bh, "modifying in-place");
608                         ext2_xattr_cache_remove(bh);
609                         /* keep the buffer locked while modifying it. */
610                 } else {
611                         int offset;
612
613                         unlock_buffer(bh);
614                         ea_bdebug(bh, "cloning");
615                         header = kmalloc(bh->b_size, GFP_KERNEL);
616                         error = -ENOMEM;
617                         if (header == NULL)
618                                 goto cleanup;
619                         memcpy(header, HDR(bh), bh->b_size);
620                         header->h_refcount = cpu_to_le32(1);
621
622                         offset = (char *)here - bh->b_data;
623                         here = ENTRY((char *)header + offset);
624                         offset = (char *)last - bh->b_data;
625                         last = ENTRY((char *)header + offset);
626                 }
627         } else {
628                 /* Allocate a buffer where we construct the new block. */
629                 header = kmalloc(sb->s_blocksize, GFP_KERNEL);
630                 error = -ENOMEM;
631                 if (header == NULL)
632                         goto cleanup;
633                 memset(header, 0, sb->s_blocksize);
634                 end = (char *)header + sb->s_blocksize;
635                 header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
636                 header->h_blocks = header->h_refcount = cpu_to_le32(1);
637                 last = here = ENTRY(header+1);
638         }
639
640         /* Iff we are modifying the block in-place, bh is locked here. */
641
642         if (not_found) {
643                 /* Insert the new name. */
644                 size_t size = EXT2_XATTR_LEN(name_len);
645                 size_t rest = (char *)last - (char *)here;
646                 memmove((char *)here + size, here, rest);
647                 memset(here, 0, size);
648                 here->e_name_index = name_index;
649                 here->e_name_len = name_len;
650                 memcpy(here->e_name, name, name_len);
651         } else {
652                 if (!here->e_value_block && here->e_value_size) {
653                         char *first_val = (char *)header + min_offs;
654                         size_t offs = le16_to_cpu(here->e_value_offs);
655                         char *val = (char *)header + offs;
656                         size_t size = EXT2_XATTR_SIZE(
657                                 le32_to_cpu(here->e_value_size));
658
659                         if (size == EXT2_XATTR_SIZE(value_len)) {
660                                 /* The old and the new value have the same
661                                    size. Just replace. */
662                                 here->e_value_size = cpu_to_le32(value_len);
663                                 memset(val + size - EXT2_XATTR_PAD, 0,
664                                        EXT2_XATTR_PAD); /* Clear pad bytes. */
665                                 memcpy(val, value, value_len);
666                                 goto skip_replace;
667                         }
668
669                         /* Remove the old value. */
670                         memmove(first_val + size, first_val, val - first_val);
671                         memset(first_val, 0, size);
672                         here->e_value_offs = 0;
673                         min_offs += size;
674
675                         /* Adjust all value offsets. */
676                         last = ENTRY(header+1);
677                         while (!IS_LAST_ENTRY(last)) {
678                                 size_t o = le16_to_cpu(last->e_value_offs);
679                                 if (!last->e_value_block && o < offs)
680                                         last->e_value_offs =
681                                                 cpu_to_le16(o + size);
682                                 last = EXT2_XATTR_NEXT(last);
683                         }
684                 }
685                 if (value == NULL) {
686                         /* Remove the old name. */
687                         size_t size = EXT2_XATTR_LEN(name_len);
688                         last = ENTRY((char *)last - size);
689                         memmove(here, (char*)here + size,
690                                 (char*)last - (char*)here);
691                         memset(last, 0, size);
692                 }
693         }
694
695         if (value != NULL) {
696                 /* Insert the new value. */
697                 here->e_value_size = cpu_to_le32(value_len);
698                 if (value_len) {
699                         size_t size = EXT2_XATTR_SIZE(value_len);
700                         char *val = (char *)header + min_offs - size;
701                         here->e_value_offs =
702                                 cpu_to_le16((char *)val - (char *)header);
703                         memset(val + size - EXT2_XATTR_PAD, 0,
704                                EXT2_XATTR_PAD); /* Clear the pad bytes. */
705                         memcpy(val, value, value_len);
706                 }
707         }
708
709 skip_replace:
710         if (IS_LAST_ENTRY(ENTRY(header+1))) {
711                 /* This block is now empty. */
712                 if (bh && header == HDR(bh))
713                         unlock_buffer(bh);  /* we were modifying in-place. */
714                 error = ext2_xattr_set2(inode, bh, NULL);
715         } else {
716                 ext2_xattr_rehash(header, here);
717                 if (bh && header == HDR(bh))
718                         unlock_buffer(bh);  /* we were modifying in-place. */
719                 error = ext2_xattr_set2(inode, bh, header);
720         }
721
722 cleanup:
723         brelse(bh);
724         if (!(bh && header == HDR(bh)))
725                 kfree(header);
726         up_write(&EXT2_I(inode)->xattr_sem);
727
728         return error;
729 }
730
731 /*
732  * Second half of ext2_xattr_set(): Update the file system.
733  */
734 static int
735 ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
736                 struct ext2_xattr_header *header)
737 {
738         struct super_block *sb = inode->i_sb;
739         struct buffer_head *new_bh = NULL;
740         int error;
741
742         if (header) {
743                 new_bh = ext2_xattr_cache_find(inode, header);
744                 if (new_bh) {
745                         /* We found an identical block in the cache. */
746                         if (new_bh == old_bh) {
747                                 ea_bdebug(new_bh, "keeping this block");
748                         } else {
749                                 /* The old block is released after updating
750                                    the inode.  */
751                                 ea_bdebug(new_bh, "reusing block");
752
753                                 error = -ENOSPC;
754                                 if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1))
755                                         goto cleanup;
756                                 error = -EDQUOT;
757                                 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
758                                         DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
759                                         unlock_buffer(new_bh);
760                                         goto cleanup;
761                                 }
762                                 HDR(new_bh)->h_refcount = cpu_to_le32(1 +
763                                         le32_to_cpu(HDR(new_bh)->h_refcount));
764                                 ea_bdebug(new_bh, "refcount now=%d",
765                                         le32_to_cpu(HDR(new_bh)->h_refcount));
766                         }
767                         unlock_buffer(new_bh);
768                 } else if (old_bh && header == HDR(old_bh)) {
769                         /* Keep this block. No need to lock the block as we
770                            don't need to change the reference count. */
771                         new_bh = old_bh;
772                         get_bh(new_bh);
773                         ext2_xattr_cache_insert(new_bh);
774                 } else {
775                         /* We need to allocate a new block */
776                         int goal = le32_to_cpu(EXT2_SB(sb)->s_es->
777                                                            s_first_data_block) +
778                                    EXT2_I(inode)->i_block_group *
779                                    EXT2_BLOCKS_PER_GROUP(sb);
780                         int block = ext2_new_block(inode, goal, 0, 0, &error);
781                         if (error)
782                                 goto cleanup;
783                         ea_idebug(inode, "creating block %d", block);
784
785                         new_bh = sb_getblk(sb, block);
786                         if (!new_bh) {
787                                 ext2_free_blocks(inode, block, 1);
788                                 error = -EIO;
789                                 goto cleanup;
790                         }
791                         lock_buffer(new_bh);
792                         memcpy(new_bh->b_data, header, new_bh->b_size);
793                         set_buffer_uptodate(new_bh);
794                         unlock_buffer(new_bh);
795                         ext2_xattr_cache_insert(new_bh);
796                         
797                         ext2_xattr_update_super_block(sb);
798                 }
799                 mark_buffer_dirty(new_bh);
800                 if (IS_SYNC(inode)) {
801                         sync_dirty_buffer(new_bh);
802                         error = -EIO;
803                         if (buffer_req(new_bh) && !buffer_uptodate(new_bh))
804                                 goto cleanup;
805                 }
806         }
807
808         /* Update the inode. */
809         EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
810         inode->i_ctime = CURRENT_TIME;
811         if (IS_SYNC(inode)) {
812                 error = ext2_sync_inode (inode);
813                 if (error)
814                         goto cleanup;
815         } else
816                 mark_inode_dirty(inode);
817
818         error = 0;
819         if (old_bh && old_bh != new_bh) {
820                 /*
821                  * If there was an old block and we are no longer using it,
822                  * release the old block.
823                  */
824                 lock_buffer(old_bh);
825                 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
826                         /* Free the old block. */
827                         ea_bdebug(old_bh, "freeing");
828                         ext2_free_blocks(inode, old_bh->b_blocknr, 1);
829                         /* We let our caller release old_bh, so we
830                          * need to duplicate the buffer before. */
831                         get_bh(old_bh);
832                         bforget(old_bh);
833                 } else {
834                         /* Decrement the refcount only. */
835                         HDR(old_bh)->h_refcount = cpu_to_le32(
836                                 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
837                         DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
838                         DQUOT_FREE_BLOCK(inode, 1);
839                         mark_buffer_dirty(old_bh);
840                         ea_bdebug(old_bh, "refcount now=%d",
841                                 le32_to_cpu(HDR(old_bh)->h_refcount));
842                 }
843                 unlock_buffer(old_bh);
844         }
845
846 cleanup:
847         brelse(new_bh);
848
849         return error;
850 }
851
852 /*
853  * ext2_xattr_delete_inode()
854  *
855  * Free extended attribute resources associated with this inode. This
856  * is called immediately before an inode is freed.
857  */
858 void
859 ext2_xattr_delete_inode(struct inode *inode)
860 {
861         struct buffer_head *bh = NULL;
862
863         down_write(&EXT2_I(inode)->xattr_sem);
864         if (!EXT2_I(inode)->i_file_acl)
865                 goto cleanup;
866         bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
867         if (!bh) {
868                 ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
869                         "inode %ld: block %d read error", inode->i_ino,
870                         EXT2_I(inode)->i_file_acl);
871                 goto cleanup;
872         }
873         ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
874         if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
875             HDR(bh)->h_blocks != cpu_to_le32(1)) {
876                 ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
877                         "inode %ld: bad block %d", inode->i_ino,
878                         EXT2_I(inode)->i_file_acl);
879                 goto cleanup;
880         }
881         lock_buffer(bh);
882         if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
883                 ext2_xattr_cache_remove(bh);
884                 ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
885                 get_bh(bh);
886                 bforget(bh);
887         } else {
888                 HDR(bh)->h_refcount = cpu_to_le32(
889                         le32_to_cpu(HDR(bh)->h_refcount) - 1);
890                 mark_buffer_dirty(bh);
891                 if (IS_SYNC(inode))
892                         sync_dirty_buffer(bh);
893                 DLIMIT_FREE_BLOCK(inode->i_sb, inode->i_xid, 1);
894                 DQUOT_FREE_BLOCK(inode, 1);
895         }
896         ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
897         unlock_buffer(bh);
898         EXT2_I(inode)->i_file_acl = 0;
899
900 cleanup:
901         brelse(bh);
902         up_write(&EXT2_I(inode)->xattr_sem);
903 }
904
905 /*
906  * ext2_xattr_put_super()
907  *
908  * This is called when a file system is unmounted.
909  */
910 void
911 ext2_xattr_put_super(struct super_block *sb)
912 {
913         mb_cache_shrink(ext2_xattr_cache, sb->s_bdev);
914 }
915
916
917 /*
918  * ext2_xattr_cache_insert()
919  *
920  * Create a new entry in the extended attribute cache, and insert
921  * it unless such an entry is already in the cache.
922  *
923  * Returns 0, or a negative error number on failure.
924  */
925 static int
926 ext2_xattr_cache_insert(struct buffer_head *bh)
927 {
928         __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
929         struct mb_cache_entry *ce;
930         int error;
931
932         ce = mb_cache_entry_alloc(ext2_xattr_cache);
933         if (!ce)
934                 return -ENOMEM;
935         error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
936         if (error) {
937                 mb_cache_entry_free(ce);
938                 if (error == -EBUSY) {
939                         ea_bdebug(bh, "already in cache (%d cache entries)",
940                                 atomic_read(&ext2_xattr_cache->c_entry_count));
941                         error = 0;
942                 }
943         } else {
944                 ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
945                           atomic_read(&ext2_xattr_cache->c_entry_count));
946                 mb_cache_entry_release(ce);
947         }
948         return error;
949 }
950
951 /*
952  * ext2_xattr_cmp()
953  *
954  * Compare two extended attribute blocks for equality.
955  *
956  * Returns 0 if the blocks are equal, 1 if they differ, and
957  * a negative error number on errors.
958  */
959 static int
960 ext2_xattr_cmp(struct ext2_xattr_header *header1,
961                struct ext2_xattr_header *header2)
962 {
963         struct ext2_xattr_entry *entry1, *entry2;
964
965         entry1 = ENTRY(header1+1);
966         entry2 = ENTRY(header2+1);
967         while (!IS_LAST_ENTRY(entry1)) {
968                 if (IS_LAST_ENTRY(entry2))
969                         return 1;
970                 if (entry1->e_hash != entry2->e_hash ||
971                     entry1->e_name_len != entry2->e_name_len ||
972                     entry1->e_value_size != entry2->e_value_size ||
973                     memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
974                         return 1;
975                 if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
976                         return -EIO;
977                 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
978                            (char *)header2 + le16_to_cpu(entry2->e_value_offs),
979                            le32_to_cpu(entry1->e_value_size)))
980                         return 1;
981
982                 entry1 = EXT2_XATTR_NEXT(entry1);
983                 entry2 = EXT2_XATTR_NEXT(entry2);
984         }
985         if (!IS_LAST_ENTRY(entry2))
986                 return 1;
987         return 0;
988 }
989
990 /*
991  * ext2_xattr_cache_find()
992  *
993  * Find an identical extended attribute block.
994  *
995  * Returns a locked buffer head to the block found, or NULL if such
996  * a block was not found or an error occurred.
997  */
998 static struct buffer_head *
999 ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
1000 {
1001         __u32 hash = le32_to_cpu(header->h_hash);
1002         struct mb_cache_entry *ce;
1003
1004         if (!header->h_hash)
1005                 return NULL;  /* never share */
1006         ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1007         ce = mb_cache_entry_find_first(ext2_xattr_cache, 0,
1008                                        inode->i_sb->s_bdev, hash);
1009         while (ce) {
1010                 struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
1011
1012                 if (!bh) {
1013                         ext2_error(inode->i_sb, "ext2_xattr_cache_find",
1014                                 "inode %ld: block %ld read error",
1015                                 inode->i_ino, (unsigned long) ce->e_block);
1016                 } else {
1017                         lock_buffer(bh);
1018                         if (le32_to_cpu(HDR(bh)->h_refcount) >
1019                                    EXT2_XATTR_REFCOUNT_MAX) {
1020                                 ea_idebug(inode, "block %ld refcount %d>%d",
1021                                           (unsigned long) ce->e_block,
1022                                           le32_to_cpu(HDR(bh)->h_refcount),
1023                                           EXT2_XATTR_REFCOUNT_MAX);
1024                         } else if (!ext2_xattr_cmp(header, HDR(bh))) {
1025                                 ea_bdebug(bh, "b_count=%d",
1026                                           atomic_read(&(bh->b_count)));
1027                                 mb_cache_entry_release(ce);
1028                                 return bh;
1029                         }
1030                         unlock_buffer(bh);
1031                         brelse(bh);
1032                 }
1033                 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
1034         }
1035         return NULL;
1036 }
1037
1038 /*
1039  * ext2_xattr_cache_remove()
1040  *
1041  * Remove the cache entry of a block from the cache. Called when a
1042  * block becomes invalid.
1043  */
1044 static void
1045 ext2_xattr_cache_remove(struct buffer_head *bh)
1046 {
1047         struct mb_cache_entry *ce;
1048
1049         ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
1050         if (ce) {
1051                 ea_bdebug(bh, "removing (%d cache entries remaining)",
1052                           atomic_read(&ext2_xattr_cache->c_entry_count)-1);
1053                 mb_cache_entry_free(ce);
1054         } else 
1055                 ea_bdebug(bh, "no cache entry");
1056 }
1057
1058 #define NAME_HASH_SHIFT 5
1059 #define VALUE_HASH_SHIFT 16
1060
1061 /*
1062  * ext2_xattr_hash_entry()
1063  *
1064  * Compute the hash of an extended attribute.
1065  */
1066 static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header,
1067                                          struct ext2_xattr_entry *entry)
1068 {
1069         __u32 hash = 0;
1070         char *name = entry->e_name;
1071         int n;
1072
1073         for (n=0; n < entry->e_name_len; n++) {
1074                 hash = (hash << NAME_HASH_SHIFT) ^
1075                        (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1076                        *name++;
1077         }
1078
1079         if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1080                 __u32 *value = (__u32 *)((char *)header +
1081                         le16_to_cpu(entry->e_value_offs));
1082                 for (n = (le32_to_cpu(entry->e_value_size) +
1083                      EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
1084                         hash = (hash << VALUE_HASH_SHIFT) ^
1085                                (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1086                                le32_to_cpu(*value++);
1087                 }
1088         }
1089         entry->e_hash = cpu_to_le32(hash);
1090 }
1091
1092 #undef NAME_HASH_SHIFT
1093 #undef VALUE_HASH_SHIFT
1094
1095 #define BLOCK_HASH_SHIFT 16
1096
1097 /*
1098  * ext2_xattr_rehash()
1099  *
1100  * Re-compute the extended attribute hash value after an entry has changed.
1101  */
1102 static void ext2_xattr_rehash(struct ext2_xattr_header *header,
1103                               struct ext2_xattr_entry *entry)
1104 {
1105         struct ext2_xattr_entry *here;
1106         __u32 hash = 0;
1107         
1108         ext2_xattr_hash_entry(header, entry);
1109         here = ENTRY(header+1);
1110         while (!IS_LAST_ENTRY(here)) {
1111                 if (!here->e_hash) {
1112                         /* Block is not shared if an entry's hash value == 0 */
1113                         hash = 0;
1114                         break;
1115                 }
1116                 hash = (hash << BLOCK_HASH_SHIFT) ^
1117                        (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1118                        le32_to_cpu(here->e_hash);
1119                 here = EXT2_XATTR_NEXT(here);
1120         }
1121         header->h_hash = cpu_to_le32(hash);
1122 }
1123
1124 #undef BLOCK_HASH_SHIFT
1125
1126 int __init
1127 init_ext2_xattr(void)
1128 {
1129         int     err;
1130         
1131         err = ext2_xattr_register(EXT2_XATTR_INDEX_USER,
1132                                   &ext2_xattr_user_handler);
1133         if (err)
1134                 return err;
1135         err = ext2_xattr_register(EXT2_XATTR_INDEX_TRUSTED,
1136                                   &ext2_xattr_trusted_handler);
1137         if (err)
1138                 goto out;
1139 #ifdef CONFIG_EXT2_FS_SECURITY
1140         err = ext2_xattr_register(EXT2_XATTR_INDEX_SECURITY,
1141                                   &ext2_xattr_security_handler);
1142         if (err)
1143                 goto out1;
1144 #endif
1145 #ifdef CONFIG_EXT2_FS_POSIX_ACL
1146         err = init_ext2_acl();
1147         if (err)
1148                 goto out2;
1149 #endif
1150         ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
1151                 sizeof(struct mb_cache_entry) +
1152                 sizeof(struct mb_cache_entry_index), 1, 6);
1153         if (!ext2_xattr_cache) {
1154                 err = -ENOMEM;
1155                 goto out3;
1156         }
1157         return 0;
1158 out3:
1159 #ifdef CONFIG_EXT2_FS_POSIX_ACL
1160         exit_ext2_acl();
1161 out2:
1162 #endif
1163 #ifdef CONFIG_EXT2_FS_SECURITY
1164         ext2_xattr_unregister(EXT2_XATTR_INDEX_SECURITY,
1165                               &ext2_xattr_security_handler);
1166 out1:
1167 #endif
1168         ext2_xattr_unregister(EXT2_XATTR_INDEX_TRUSTED,
1169                               &ext2_xattr_trusted_handler);
1170 out:
1171         ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
1172                               &ext2_xattr_user_handler);
1173         return err;
1174 }
1175
1176 void
1177 exit_ext2_xattr(void)
1178 {
1179         mb_cache_destroy(ext2_xattr_cache);
1180 #ifdef CONFIG_EXT2_FS_POSIX_ACL
1181         exit_ext2_acl();
1182 #endif
1183 #ifdef CONFIG_EXT2_FS_SECURITY
1184         ext2_xattr_unregister(EXT2_XATTR_INDEX_SECURITY,
1185                               &ext2_xattr_security_handler);
1186 #endif
1187         ext2_xattr_unregister(EXT2_XATTR_INDEX_TRUSTED,
1188                               &ext2_xattr_trusted_handler);
1189         ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
1190                               &ext2_xattr_user_handler);
1191 }