patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / fs / ext3 / xattr.c
1 /*
2  * linux/fs/ext3/xattr.c
3  *
4  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5  *
6  * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7  * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8  * Extended attributes for symlinks and special files added per
9  *  suggestion of Luka Renko <luka.renko@hermes.si>.
10  */
11
12 /*
13  * Extended attributes are stored on disk blocks allocated outside of
14  * any inode. The i_file_acl field is then made to point to this allocated
15  * block. If all extended attributes of an inode are identical, these
16  * inodes may share the same extended attribute block. Such situations
17  * are automatically detected by keeping a cache of recent attribute block
18  * numbers and hashes over the block's contents in memory.
19  *
20  *
21  * Extended attribute block layout:
22  *
23  *   +------------------+
24  *   | header           |
25  *   ¦ entry 1          | |
26  *   | entry 2          | | growing downwards
27  *   | entry 3          | v
28  *   | four null bytes  |
29  *   | . . .            |
30  *   | value 1          | ^
31  *   | value 3          | | growing upwards
32  *   | value 2          | |
33  *   +------------------+
34  *
35  * The block header is followed by multiple entry descriptors. These entry
36  * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
37  * byte boundaries. The entry descriptors are sorted by attribute name,
38  * so that two extended attribute blocks can be compared efficiently.
39  *
40  * Attribute values are aligned to the end of the block, stored in
41  * no specific order. They are also padded to EXT3_XATTR_PAD byte
42  * boundaries. No additional gaps are left between them.
43  *
44  * Locking strategy
45  * ----------------
46  * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
47  * EA blocks are only changed if they are exclusive to an inode, so
48  * holding xattr_sem also means that nothing but the EA block's reference
49  * count will change. Multiple writers to an EA block are synchronized
50  * by the bh lock. No more than a single bh lock is held at any time
51  * to avoid deadlocks.
52  */
53
54 #include <linux/init.h>
55 #include <linux/fs.h>
56 #include <linux/slab.h>
57 #include <linux/ext3_jbd.h>
58 #include <linux/ext3_fs.h>
59 #include <linux/mbcache.h>
60 #include <linux/quotaops.h>
61 #include <linux/rwsem.h>
62 #include <linux/vs_dlimit.h>
63 #include "xattr.h"
64 #include "acl.h"
65
66 #define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
67 #define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
68 #define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
69 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
70
71 #ifdef EXT3_XATTR_DEBUG
72 # define ea_idebug(inode, f...) do { \
73                 printk(KERN_DEBUG "inode %s:%ld: ", \
74                         inode->i_sb->s_id, inode->i_ino); \
75                 printk(f); \
76                 printk("\n"); \
77         } while (0)
78 # define ea_bdebug(bh, f...) do { \
79                 char b[BDEVNAME_SIZE]; \
80                 printk(KERN_DEBUG "block %s:%lu: ", \
81                         bdevname(bh->b_bdev, b), \
82                         (unsigned long) bh->b_blocknr); \
83                 printk(f); \
84                 printk("\n"); \
85         } while (0)
86 #else
87 # define ea_idebug(f...)
88 # define ea_bdebug(f...)
89 #endif
90
91 static int ext3_xattr_set_handle2(handle_t *, struct inode *,
92                                   struct buffer_head *,
93                                   struct ext3_xattr_header *);
94
95 static int ext3_xattr_cache_insert(struct buffer_head *);
96 static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
97                                                  struct ext3_xattr_header *,
98                                                  int *);
99 static void ext3_xattr_cache_remove(struct buffer_head *);
100 static void ext3_xattr_rehash(struct ext3_xattr_header *,
101                               struct ext3_xattr_entry *);
102
103 static struct mb_cache *ext3_xattr_cache;
104 static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX];
105 static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED;
106
107 int
108 ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler)
109 {
110         int error = -EINVAL;
111
112         if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
113                 write_lock(&ext3_handler_lock);
114                 if (!ext3_xattr_handlers[name_index-1]) {
115                         ext3_xattr_handlers[name_index-1] = handler;
116                         error = 0;
117                 }
118                 write_unlock(&ext3_handler_lock);
119         }
120         return error;
121 }
122
123 void
124 ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler)
125 {
126         if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) {
127                 write_lock(&ext3_handler_lock);
128                 ext3_xattr_handlers[name_index-1] = NULL;
129                 write_unlock(&ext3_handler_lock);
130         }
131 }
132
133 static inline const char *
134 strcmp_prefix(const char *a, const char *a_prefix)
135 {
136         while (*a_prefix && *a == *a_prefix) {
137                 a++;
138                 a_prefix++;
139         }
140         return *a_prefix ? NULL : a;
141 }
142
143 /*
144  * Decode the extended attribute name, and translate it into
145  * the name_index and name suffix.
146  */
147 static inline struct ext3_xattr_handler *
148 ext3_xattr_resolve_name(const char **name)
149 {
150         struct ext3_xattr_handler *handler = NULL;
151         int i;
152
153         if (!*name)
154                 return NULL;
155         read_lock(&ext3_handler_lock);
156         for (i=0; i<EXT3_XATTR_INDEX_MAX; i++) {
157                 if (ext3_xattr_handlers[i]) {
158                         const char *n = strcmp_prefix(*name,
159                                 ext3_xattr_handlers[i]->prefix);
160                         if (n) {
161                                 handler = ext3_xattr_handlers[i];
162                                 *name = n;
163                                 break;
164                         }
165                 }
166         }
167         read_unlock(&ext3_handler_lock);
168         return handler;
169 }
170
171 static inline struct ext3_xattr_handler *
172 ext3_xattr_handler(int name_index)
173 {
174         struct ext3_xattr_handler *handler = NULL;
175         if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
176                 read_lock(&ext3_handler_lock);
177                 handler = ext3_xattr_handlers[name_index-1];
178                 read_unlock(&ext3_handler_lock);
179         }
180         return handler;
181 }
182
183 /*
184  * Inode operation getxattr()
185  *
186  * dentry->d_inode->i_sem: don't care
187  */
188 ssize_t
189 ext3_getxattr(struct dentry *dentry, const char *name,
190               void *buffer, size_t size)
191 {
192         struct ext3_xattr_handler *handler;
193         struct inode *inode = dentry->d_inode;
194
195         handler = ext3_xattr_resolve_name(&name);
196         if (!handler)
197                 return -EOPNOTSUPP;
198         return handler->get(inode, name, buffer, size);
199 }
200
201 /*
202  * Inode operation listxattr()
203  *
204  * dentry->d_inode->i_sem: don't care
205  */
206 ssize_t
207 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
208 {
209         return ext3_xattr_list(dentry->d_inode, buffer, size);
210 }
211
212 /*
213  * Inode operation setxattr()
214  *
215  * dentry->d_inode->i_sem: down
216  */
217 int
218 ext3_setxattr(struct dentry *dentry, const char *name,
219               const void *value, size_t size, int flags)
220 {
221         struct ext3_xattr_handler *handler;
222         struct inode *inode = dentry->d_inode;
223
224         if (size == 0)
225                 value = "";  /* empty EA, do not remove */
226         handler = ext3_xattr_resolve_name(&name);
227         if (!handler)
228                 return -EOPNOTSUPP;
229         return handler->set(inode, name, value, size, flags);
230 }
231
232 /*
233  * Inode operation removexattr()
234  *
235  * dentry->d_inode->i_sem: down
236  */
237 int
238 ext3_removexattr(struct dentry *dentry, const char *name)
239 {
240         struct ext3_xattr_handler *handler;
241         struct inode *inode = dentry->d_inode;
242
243         handler = ext3_xattr_resolve_name(&name);
244         if (!handler)
245                 return -EOPNOTSUPP;
246         return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
247 }
248
249 /*
250  * ext3_xattr_get()
251  *
252  * Copy an extended attribute into the buffer
253  * provided, or compute the buffer size required.
254  * Buffer is NULL to compute the size of the buffer required.
255  *
256  * Returns a negative error number on failure, or the number of bytes
257  * used / required on success.
258  */
259 int
260 ext3_xattr_get(struct inode *inode, int name_index, const char *name,
261                void *buffer, size_t buffer_size)
262 {
263         struct buffer_head *bh = NULL;
264         struct ext3_xattr_entry *entry;
265         size_t name_len, size;
266         char *end;
267         int error;
268
269         ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
270                   name_index, name, buffer, (long)buffer_size);
271
272         if (name == NULL)
273                 return -EINVAL;
274         down_read(&EXT3_I(inode)->xattr_sem);
275         error = -ENODATA;
276         if (!EXT3_I(inode)->i_file_acl)
277                 goto cleanup;
278         ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
279         bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
280         error = -EIO;
281         if (!bh)
282                 goto cleanup;
283         ea_bdebug(bh, "b_count=%d, refcount=%d",
284                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
285         end = bh->b_data + bh->b_size;
286         if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
287             HDR(bh)->h_blocks != cpu_to_le32(1)) {
288 bad_block:      ext3_error(inode->i_sb, "ext3_xattr_get",
289                         "inode %ld: bad block %d", inode->i_ino,
290                         EXT3_I(inode)->i_file_acl);
291                 error = -EIO;
292                 goto cleanup;
293         }
294         /* find named attribute */
295         name_len = strlen(name);
296
297         error = -ERANGE;
298         if (name_len > 255)
299                 goto cleanup;
300         entry = FIRST_ENTRY(bh);
301         while (!IS_LAST_ENTRY(entry)) {
302                 struct ext3_xattr_entry *next =
303                         EXT3_XATTR_NEXT(entry);
304                 if ((char *)next >= end)
305                         goto bad_block;
306                 if (name_index == entry->e_name_index &&
307                     name_len == entry->e_name_len &&
308                     memcmp(name, entry->e_name, name_len) == 0)
309                         goto found;
310                 entry = next;
311         }
312         /* Check the remaining name entries */
313         while (!IS_LAST_ENTRY(entry)) {
314                 struct ext3_xattr_entry *next =
315                         EXT3_XATTR_NEXT(entry);
316                 if ((char *)next >= end)
317                         goto bad_block;
318                 entry = next;
319         }
320         if (ext3_xattr_cache_insert(bh))
321                 ea_idebug(inode, "cache insert failed");
322         error = -ENODATA;
323         goto cleanup;
324 found:
325         /* check the buffer size */
326         if (entry->e_value_block != 0)
327                 goto bad_block;
328         size = le32_to_cpu(entry->e_value_size);
329         if (size > inode->i_sb->s_blocksize ||
330             le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
331                 goto bad_block;
332
333         if (ext3_xattr_cache_insert(bh))
334                 ea_idebug(inode, "cache insert failed");
335         if (buffer) {
336                 error = -ERANGE;
337                 if (size > buffer_size)
338                         goto cleanup;
339                 /* return value of attribute */
340                 memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
341                         size);
342         }
343         error = size;
344
345 cleanup:
346         brelse(bh);
347         up_read(&EXT3_I(inode)->xattr_sem);
348
349         return error;
350 }
351
352 /*
353  * ext3_xattr_list()
354  *
355  * Copy a list of attribute names into the buffer
356  * provided, or compute the buffer size required.
357  * Buffer is NULL to compute the size of the buffer required.
358  *
359  * Returns a negative error number on failure, or the number of bytes
360  * used / required on success.
361  */
362 int
363 ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
364 {
365         struct buffer_head *bh = NULL;
366         struct ext3_xattr_entry *entry;
367         size_t size = 0;
368         char *buf, *end;
369         int error;
370
371         ea_idebug(inode, "buffer=%p, buffer_size=%ld",
372                   buffer, (long)buffer_size);
373
374         down_read(&EXT3_I(inode)->xattr_sem);
375         error = 0;
376         if (!EXT3_I(inode)->i_file_acl)
377                 goto cleanup;
378         ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
379         bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
380         error = -EIO;
381         if (!bh)
382                 goto cleanup;
383         ea_bdebug(bh, "b_count=%d, refcount=%d",
384                 atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
385         end = bh->b_data + bh->b_size;
386         if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
387             HDR(bh)->h_blocks != cpu_to_le32(1)) {
388 bad_block:      ext3_error(inode->i_sb, "ext3_xattr_list",
389                         "inode %ld: bad block %d", inode->i_ino,
390                         EXT3_I(inode)->i_file_acl);
391                 error = -EIO;
392                 goto cleanup;
393         }
394         /* compute the size required for the list of attribute names */
395         for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
396              entry = EXT3_XATTR_NEXT(entry)) {
397                 struct ext3_xattr_handler *handler;
398                 struct ext3_xattr_entry *next =
399                         EXT3_XATTR_NEXT(entry);
400                 if ((char *)next >= end)
401                         goto bad_block;
402
403                 handler = ext3_xattr_handler(entry->e_name_index);
404                 if (handler)
405                         size += handler->list(NULL, inode, entry->e_name,
406                                               entry->e_name_len);
407         }
408
409         if (ext3_xattr_cache_insert(bh))
410                 ea_idebug(inode, "cache insert failed");
411         if (!buffer) {
412                 error = size;
413                 goto cleanup;
414         } else {
415                 error = -ERANGE;
416                 if (size > buffer_size)
417                         goto cleanup;
418         }
419
420         /* list the attribute names */
421         buf = buffer;
422         for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
423              entry = EXT3_XATTR_NEXT(entry)) {
424                 struct ext3_xattr_handler *handler;
425
426                 handler = ext3_xattr_handler(entry->e_name_index);
427                 if (handler)
428                         buf += handler->list(buf, inode, entry->e_name,
429                                              entry->e_name_len);
430         }
431         error = size;
432
433 cleanup:
434         brelse(bh);
435         up_read(&EXT3_I(inode)->xattr_sem);
436
437         return error;
438 }
439
440 /*
441  * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
442  * not set, set it.
443  */
444 static void ext3_xattr_update_super_block(handle_t *handle,
445                                           struct super_block *sb)
446 {
447         if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
448                 return;
449
450         lock_super(sb);
451         if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
452                 EXT3_SB(sb)->s_es->s_feature_compat |=
453                         cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
454                 sb->s_dirt = 1;
455                 ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
456         }
457         unlock_super(sb);
458 }
459
460 /*
461  * ext3_xattr_set_handle()
462  *
463  * Create, replace or remove an extended attribute for this inode. Buffer
464  * is NULL to remove an existing extended attribute, and non-NULL to
465  * either replace an existing extended attribute, or create a new extended
466  * attribute. The flags XATTR_REPLACE and XATTR_CREATE
467  * specify that an extended attribute must exist and must not exist
468  * previous to the call, respectively.
469  *
470  * Returns 0, or a negative error number on failure.
471  */
472 int
473 ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
474                       const char *name, const void *value, size_t value_len,
475                       int flags)
476 {
477         struct super_block *sb = inode->i_sb;
478         struct buffer_head *bh = NULL;
479         struct ext3_xattr_header *header = NULL;
480         struct ext3_xattr_entry *here, *last;
481         size_t name_len, free, min_offs = sb->s_blocksize;
482         int not_found = 1, error;
483         char *end;
484
485         /*
486          * header -- Points either into bh, or to a temporarily
487          *           allocated buffer.
488          * here -- The named entry found, or the place for inserting, within
489          *         the block pointed to by header.
490          * last -- Points right after the last named entry within the block
491          *         pointed to by header.
492          * min_offs -- The offset of the first value (values are aligned
493          *             towards the end of the block).
494          * end -- Points right after the block pointed to by header.
495          */
496
497         ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
498                   name_index, name, value, (long)value_len);
499
500         if (IS_RDONLY(inode))
501                 return -EROFS;
502         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
503                 return -EPERM;
504         if (value == NULL)
505                 value_len = 0;
506         if (name == NULL)
507                 return -EINVAL;
508         name_len = strlen(name);
509         if (name_len > 255 || value_len > sb->s_blocksize)
510                 return -ERANGE;
511         down_write(&EXT3_I(inode)->xattr_sem);
512         if (EXT3_I(inode)->i_file_acl) {
513                 /* The inode already has an extended attribute block. */
514                 bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
515                 error = -EIO;
516                 if (!bh)
517                         goto cleanup;
518                 ea_bdebug(bh, "b_count=%d, refcount=%d",
519                         atomic_read(&(bh->b_count)),
520                         le32_to_cpu(HDR(bh)->h_refcount));
521                 header = HDR(bh);
522                 end = bh->b_data + bh->b_size;
523                 if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
524                     header->h_blocks != cpu_to_le32(1)) {
525 bad_block:              ext3_error(sb, "ext3_xattr_set",
526                                 "inode %ld: bad block %d", inode->i_ino,
527                                 EXT3_I(inode)->i_file_acl);
528                         error = -EIO;
529                         goto cleanup;
530                 }
531                 /* Find the named attribute. */
532                 here = FIRST_ENTRY(bh);
533                 while (!IS_LAST_ENTRY(here)) {
534                         struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
535                         if ((char *)next >= end)
536                                 goto bad_block;
537                         if (!here->e_value_block && here->e_value_size) {
538                                 size_t offs = le16_to_cpu(here->e_value_offs);
539                                 if (offs < min_offs)
540                                         min_offs = offs;
541                         }
542                         not_found = name_index - here->e_name_index;
543                         if (!not_found)
544                                 not_found = name_len - here->e_name_len;
545                         if (!not_found)
546                                 not_found = memcmp(name, here->e_name,name_len);
547                         if (not_found <= 0)
548                                 break;
549                         here = next;
550                 }
551                 last = here;
552                 /* We still need to compute min_offs and last. */
553                 while (!IS_LAST_ENTRY(last)) {
554                         struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
555                         if ((char *)next >= end)
556                                 goto bad_block;
557                         if (!last->e_value_block && last->e_value_size) {
558                                 size_t offs = le16_to_cpu(last->e_value_offs);
559                                 if (offs < min_offs)
560                                         min_offs = offs;
561                         }
562                         last = next;
563                 }
564
565                 /* Check whether we have enough space left. */
566                 free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
567         } else {
568                 /* We will use a new extended attribute block. */
569                 free = sb->s_blocksize -
570                         sizeof(struct ext3_xattr_header) - sizeof(__u32);
571                 here = last = NULL;  /* avoid gcc uninitialized warning. */
572         }
573
574         if (not_found) {
575                 /* Request to remove a nonexistent attribute? */
576                 error = -ENODATA;
577                 if (flags & XATTR_REPLACE)
578                         goto cleanup;
579                 error = 0;
580                 if (value == NULL)
581                         goto cleanup;
582         } else {
583                 /* Request to create an existing attribute? */
584                 error = -EEXIST;
585                 if (flags & XATTR_CREATE)
586                         goto cleanup;
587                 if (!here->e_value_block && here->e_value_size) {
588                         size_t size = le32_to_cpu(here->e_value_size);
589
590                         if (le16_to_cpu(here->e_value_offs) + size > 
591                             sb->s_blocksize || size > sb->s_blocksize)
592                                 goto bad_block;
593                         free += EXT3_XATTR_SIZE(size);
594                 }
595                 free += EXT3_XATTR_LEN(name_len);
596         }
597         error = -ENOSPC;
598         if (free < EXT3_XATTR_LEN(name_len) + EXT3_XATTR_SIZE(value_len))
599                 goto cleanup;
600
601         /* Here we know that we can set the new attribute. */
602
603         if (header) {
604                 int credits = 0;
605
606                 /* assert(header == HDR(bh)); */
607                 if (header->h_refcount != cpu_to_le32(1))
608                         goto skip_get_write_access;
609                 /* ext3_journal_get_write_access() requires an unlocked bh,
610                    which complicates things here. */
611                 error = ext3_journal_get_write_access_credits(handle, bh,
612                                                               &credits);
613                 if (error)
614                         goto cleanup;
615                 lock_buffer(bh);
616                 if (header->h_refcount == cpu_to_le32(1)) {
617                         ea_bdebug(bh, "modifying in-place");
618                         ext3_xattr_cache_remove(bh);
619                         /* keep the buffer locked while modifying it. */
620                 } else {
621                         int offset;
622
623                         unlock_buffer(bh);
624                         journal_release_buffer(handle, bh, credits);
625                 skip_get_write_access:
626                         ea_bdebug(bh, "cloning");
627                         header = kmalloc(bh->b_size, GFP_KERNEL);
628                         error = -ENOMEM;
629                         if (header == NULL)
630                                 goto cleanup;
631                         memcpy(header, HDR(bh), bh->b_size);
632                         header->h_refcount = cpu_to_le32(1);
633                         offset = (char *)here - bh->b_data;
634                         here = ENTRY((char *)header + offset);
635                         offset = (char *)last - bh->b_data;
636                         last = ENTRY((char *)header + offset);
637                 }
638         } else {
639                 /* Allocate a buffer where we construct the new block. */
640                 header = kmalloc(sb->s_blocksize, GFP_KERNEL);
641                 error = -ENOMEM;
642                 if (header == NULL)
643                         goto cleanup;
644                 memset(header, 0, sb->s_blocksize);
645                 end = (char *)header + sb->s_blocksize;
646                 header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
647                 header->h_blocks = header->h_refcount = cpu_to_le32(1);
648                 last = here = ENTRY(header+1);
649         }
650
651         /* Iff we are modifying the block in-place, bh is locked here. */
652
653         if (not_found) {
654                 /* Insert the new name. */
655                 size_t size = EXT3_XATTR_LEN(name_len);
656                 size_t rest = (char *)last - (char *)here;
657                 memmove((char *)here + size, here, rest);
658                 memset(here, 0, size);
659                 here->e_name_index = name_index;
660                 here->e_name_len = name_len;
661                 memcpy(here->e_name, name, name_len);
662         } else {
663                 if (!here->e_value_block && here->e_value_size) {
664                         char *first_val = (char *)header + min_offs;
665                         size_t offs = le16_to_cpu(here->e_value_offs);
666                         char *val = (char *)header + offs;
667                         size_t size = EXT3_XATTR_SIZE(
668                                 le32_to_cpu(here->e_value_size));
669
670                         if (size == EXT3_XATTR_SIZE(value_len)) {
671                                 /* The old and the new value have the same
672                                    size. Just replace. */
673                                 here->e_value_size = cpu_to_le32(value_len);
674                                 memset(val + size - EXT3_XATTR_PAD, 0,
675                                        EXT3_XATTR_PAD); /* Clear pad bytes. */
676                                 memcpy(val, value, value_len);
677                                 goto skip_replace;
678                         }
679
680                         /* Remove the old value. */
681                         memmove(first_val + size, first_val, val - first_val);
682                         memset(first_val, 0, size);
683                         here->e_value_offs = 0;
684                         min_offs += size;
685
686                         /* Adjust all value offsets. */
687                         last = ENTRY(header+1);
688                         while (!IS_LAST_ENTRY(last)) {
689                                 size_t o = le16_to_cpu(last->e_value_offs);
690                                 if (!last->e_value_block && o < offs)
691                                         last->e_value_offs =
692                                                 cpu_to_le16(o + size);
693                                 last = EXT3_XATTR_NEXT(last);
694                         }
695                 }
696                 if (value == NULL) {
697                         /* Remove the old name. */
698                         size_t size = EXT3_XATTR_LEN(name_len);
699                         last = ENTRY((char *)last - size);
700                         memmove(here, (char*)here + size,
701                                 (char*)last - (char*)here);
702                         memset(last, 0, size);
703                 }
704         }
705
706         if (value != NULL) {
707                 /* Insert the new value. */
708                 here->e_value_size = cpu_to_le32(value_len);
709                 if (value_len) {
710                         size_t size = EXT3_XATTR_SIZE(value_len);
711                         char *val = (char *)header + min_offs - size;
712                         here->e_value_offs =
713                                 cpu_to_le16((char *)val - (char *)header);
714                         memset(val + size - EXT3_XATTR_PAD, 0,
715                                EXT3_XATTR_PAD); /* Clear the pad bytes. */
716                         memcpy(val, value, value_len);
717                 }
718         }
719
720 skip_replace:
721         if (IS_LAST_ENTRY(ENTRY(header+1))) {
722                 /* This block is now empty. */
723                 if (bh && header == HDR(bh))
724                         unlock_buffer(bh);  /* we were modifying in-place. */
725                 error = ext3_xattr_set_handle2(handle, inode, bh, NULL);
726         } else {
727                 ext3_xattr_rehash(header, here);
728                 if (bh && header == HDR(bh))
729                         unlock_buffer(bh);  /* we were modifying in-place. */
730                 error = ext3_xattr_set_handle2(handle, inode, bh, header);
731         }
732
733 cleanup:
734         brelse(bh);
735         if (!(bh && header == HDR(bh)))
736                 kfree(header);
737         up_write(&EXT3_I(inode)->xattr_sem);
738
739         return error;
740 }
741
742 /*
743  * Second half of ext3_xattr_set_handle(): Update the file system.
744  */
745 static int
746 ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
747                        struct buffer_head *old_bh,
748                        struct ext3_xattr_header *header)
749 {
750         struct super_block *sb = inode->i_sb;
751         struct buffer_head *new_bh = NULL;
752         int credits = 0, error;
753
754         if (header) {
755                 new_bh = ext3_xattr_cache_find(handle, inode, header, &credits);
756                 if (new_bh) {
757                         /* We found an identical block in the cache. */
758                         if (new_bh == old_bh)
759                                 ea_bdebug(new_bh, "keeping this block");
760                         else {
761                                 /* The old block is released after updating
762                                    the inode. */
763                                 ea_bdebug(new_bh, "reusing block");
764
765                                 error = -ENOSPC;
766                                 if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1))
767                                         goto cleanup;
768                                 error = -EDQUOT;
769                                 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
770                                         DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
771                                         unlock_buffer(new_bh);
772                                         journal_release_buffer(handle, new_bh,
773                                                                credits);
774                                         goto cleanup;
775                                 }
776                                 HDR(new_bh)->h_refcount = cpu_to_le32(1 +
777                                         le32_to_cpu(HDR(new_bh)->h_refcount));
778                                 ea_bdebug(new_bh, "refcount now=%d",
779                                         le32_to_cpu(HDR(new_bh)->h_refcount));
780                         }
781                         unlock_buffer(new_bh);
782                 } else if (old_bh && header == HDR(old_bh)) {
783                         /* Keep this block. No need to lock the block as we
784                          * don't need to change the reference count. */
785                         new_bh = old_bh;
786                         get_bh(new_bh);
787                         ext3_xattr_cache_insert(new_bh);
788                 } else {
789                         /* We need to allocate a new block */
790                         int goal = le32_to_cpu(
791                                         EXT3_SB(sb)->s_es->s_first_data_block) +
792                                 EXT3_I(inode)->i_block_group *
793                                 EXT3_BLOCKS_PER_GROUP(sb);
794                         int block = ext3_new_block(handle,
795                                 inode, goal, 0, 0, &error);
796                         if (error)
797                                 goto cleanup;
798                         ea_idebug(inode, "creating block %d", block);
799
800                         new_bh = sb_getblk(sb, block);
801                         if (!new_bh) {
802 getblk_failed:
803                                 ext3_free_blocks(handle, inode, block, 1);
804                                 error = -EIO;
805                                 goto cleanup;
806                         }
807                         lock_buffer(new_bh);
808                         error = ext3_journal_get_create_access(handle, new_bh);
809                         if (error) {
810                                 unlock_buffer(new_bh);
811                                 goto getblk_failed;
812                         }
813                         memcpy(new_bh->b_data, header, new_bh->b_size);
814                         set_buffer_uptodate(new_bh);
815                         unlock_buffer(new_bh);
816                         ext3_xattr_cache_insert(new_bh);
817
818                         ext3_xattr_update_super_block(handle, sb);
819                 }
820                 error = ext3_journal_dirty_metadata(handle, new_bh);
821                 if (error)
822                         goto cleanup;
823         }
824
825         /* Update the inode. */
826         EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
827         inode->i_ctime = CURRENT_TIME;
828         ext3_mark_inode_dirty(handle, inode);
829         if (IS_SYNC(inode))
830                 handle->h_sync = 1;
831
832         error = 0;
833         if (old_bh && old_bh != new_bh) {
834                 /*
835                  * If there was an old block, and we are no longer using it,
836                  * release the old block.
837                 */
838                 error = ext3_journal_get_write_access(handle, old_bh);
839                 if (error)
840                         goto cleanup;
841                 lock_buffer(old_bh);
842                 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
843                         /* Free the old block. */
844                         ea_bdebug(old_bh, "freeing");
845                         ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
846
847                         /* ext3_forget() calls bforget() for us, but we
848                            let our caller release old_bh, so we need to
849                            duplicate the handle before. */
850                         get_bh(old_bh);
851                         ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
852                 } else {
853                         /* Decrement the refcount only. */
854                         HDR(old_bh)->h_refcount = cpu_to_le32(
855                                 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
856                         DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
857                         DQUOT_FREE_BLOCK(inode, 1);
858                         ext3_journal_dirty_metadata(handle, old_bh);
859                         ea_bdebug(old_bh, "refcount now=%d",
860                                 le32_to_cpu(HDR(old_bh)->h_refcount));
861                 }
862                 unlock_buffer(old_bh);
863         }
864
865 cleanup:
866         brelse(new_bh);
867
868         return error;
869 }
870
871 /*
872  * ext3_xattr_set()
873  *
874  * Like ext3_xattr_set_handle, but start from an inode. This extended
875  * attribute modification is a filesystem transaction by itself.
876  *
877  * Returns 0, or a negative error number on failure.
878  */
879 int
880 ext3_xattr_set(struct inode *inode, int name_index, const char *name,
881                const void *value, size_t value_len, int flags)
882 {
883         handle_t *handle;
884         int error;
885
886         handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
887         if (IS_ERR(handle)) {
888                 error = PTR_ERR(handle);
889         } else {
890                 int error2;
891
892                 error = ext3_xattr_set_handle(handle, inode, name_index, name,
893                                               value, value_len, flags);
894                 error2 = ext3_journal_stop(handle);
895                 if (error == 0)
896                         error = error2;
897         }
898
899         return error;
900 }
901
902 /*
903  * ext3_xattr_delete_inode()
904  *
905  * Free extended attribute resources associated with this inode. This
906  * is called immediately before an inode is freed.
907  */
908 void
909 ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
910 {
911         struct buffer_head *bh = NULL;
912
913         down_write(&EXT3_I(inode)->xattr_sem);
914         if (!EXT3_I(inode)->i_file_acl)
915                 goto cleanup;
916         bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
917         if (!bh) {
918                 ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
919                         "inode %ld: block %d read error", inode->i_ino,
920                         EXT3_I(inode)->i_file_acl);
921                 goto cleanup;
922         }
923         if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
924             HDR(bh)->h_blocks != cpu_to_le32(1)) {
925                 ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
926                         "inode %ld: bad block %d", inode->i_ino,
927                         EXT3_I(inode)->i_file_acl);
928                 goto cleanup;
929         }
930         if (ext3_journal_get_write_access(handle, bh) != 0)
931                 goto cleanup;
932         lock_buffer(bh);
933         if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
934                 ext3_xattr_cache_remove(bh);
935                 ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
936                 get_bh(bh);
937                 ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
938         } else {
939                 HDR(bh)->h_refcount = cpu_to_le32(
940                         le32_to_cpu(HDR(bh)->h_refcount) - 1);
941                 ext3_journal_dirty_metadata(handle, bh);
942                 if (IS_SYNC(inode))
943                         handle->h_sync = 1;
944                 DLIMIT_FREE_BLOCK(inode->i_sb, inode->i_xid, 1);
945                 DQUOT_FREE_BLOCK(inode, 1);
946         }
947         ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
948         unlock_buffer(bh);
949         EXT3_I(inode)->i_file_acl = 0;
950
951 cleanup:
952         brelse(bh);
953         up_write(&EXT3_I(inode)->xattr_sem);
954 }
955
956 /*
957  * ext3_xattr_put_super()
958  *
959  * This is called when a file system is unmounted.
960  */
961 void
962 ext3_xattr_put_super(struct super_block *sb)
963 {
964         mb_cache_shrink(ext3_xattr_cache, sb->s_bdev);
965 }
966
967 /*
968  * ext3_xattr_cache_insert()
969  *
970  * Create a new entry in the extended attribute cache, and insert
971  * it unless such an entry is already in the cache.
972  *
973  * Returns 0, or a negative error number on failure.
974  */
975 static int
976 ext3_xattr_cache_insert(struct buffer_head *bh)
977 {
978         __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
979         struct mb_cache_entry *ce;
980         int error;
981
982         ce = mb_cache_entry_alloc(ext3_xattr_cache);
983         if (!ce)
984                 return -ENOMEM;
985         error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
986         if (error) {
987                 mb_cache_entry_free(ce);
988                 if (error == -EBUSY) {
989                         ea_bdebug(bh, "already in cache (%d cache entries)",
990                                 atomic_read(&ext3_xattr_cache->c_entry_count));
991                         error = 0;
992                 }
993         } else {
994                 ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
995                           atomic_read(&ext3_xattr_cache->c_entry_count));
996                 mb_cache_entry_release(ce);
997         }
998         return error;
999 }
1000
1001 /*
1002  * ext3_xattr_cmp()
1003  *
1004  * Compare two extended attribute blocks for equality.
1005  *
1006  * Returns 0 if the blocks are equal, 1 if they differ, and
1007  * a negative error number on errors.
1008  */
1009 static int
1010 ext3_xattr_cmp(struct ext3_xattr_header *header1,
1011                struct ext3_xattr_header *header2)
1012 {
1013         struct ext3_xattr_entry *entry1, *entry2;
1014
1015         entry1 = ENTRY(header1+1);
1016         entry2 = ENTRY(header2+1);
1017         while (!IS_LAST_ENTRY(entry1)) {
1018                 if (IS_LAST_ENTRY(entry2))
1019                         return 1;
1020                 if (entry1->e_hash != entry2->e_hash ||
1021                     entry1->e_name_len != entry2->e_name_len ||
1022                     entry1->e_value_size != entry2->e_value_size ||
1023                     memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1024                         return 1;
1025                 if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1026                         return -EIO;
1027                 if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1028                            (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1029                            le32_to_cpu(entry1->e_value_size)))
1030                         return 1;
1031
1032                 entry1 = EXT3_XATTR_NEXT(entry1);
1033                 entry2 = EXT3_XATTR_NEXT(entry2);
1034         }
1035         if (!IS_LAST_ENTRY(entry2))
1036                 return 1;
1037         return 0;
1038 }
1039
1040 /*
1041  * ext3_xattr_cache_find()
1042  *
1043  * Find an identical extended attribute block.
1044  *
1045  * Returns a pointer to the block found, or NULL if such a block was
1046  * not found or an error occurred.
1047  */
1048 static struct buffer_head *
1049 ext3_xattr_cache_find(handle_t *handle, struct inode *inode,
1050                       struct ext3_xattr_header *header, int *credits)
1051 {
1052         __u32 hash = le32_to_cpu(header->h_hash);
1053         struct mb_cache_entry *ce;
1054
1055         if (!header->h_hash)
1056                 return NULL;  /* never share */
1057         ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1058         ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
1059                                        inode->i_sb->s_bdev, hash);
1060         while (ce) {
1061                 struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
1062
1063                 if (!bh) {
1064                         ext3_error(inode->i_sb, "ext3_xattr_cache_find",
1065                                 "inode %ld: block %ld read error",
1066                                 inode->i_ino, (unsigned long) ce->e_block);
1067                 } else if (ext3_journal_get_write_access_credits(
1068                                 handle, bh, credits) == 0) {
1069                         /* ext3_journal_get_write_access() requires an unlocked
1070                          * bh, which complicates things here. */
1071                         lock_buffer(bh);
1072                         if (le32_to_cpu(HDR(bh)->h_refcount) >
1073                                    EXT3_XATTR_REFCOUNT_MAX) {
1074                                 ea_idebug(inode, "block %ld refcount %d>%d",
1075                                           (unsigned long) ce->e_block,
1076                                           le32_to_cpu(HDR(bh)->h_refcount),
1077                                           EXT3_XATTR_REFCOUNT_MAX);
1078                         } else if (!ext3_xattr_cmp(header, HDR(bh))) {
1079                                 mb_cache_entry_release(ce);
1080                                 /* buffer will be unlocked by caller */
1081                                 return bh;
1082                         }
1083                         unlock_buffer(bh);
1084                         journal_release_buffer(handle, bh, *credits);
1085                         *credits = 0;
1086                         brelse(bh);
1087                 }
1088                 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
1089         }
1090         return NULL;
1091 }
1092
1093 /*
1094  * ext3_xattr_cache_remove()
1095  *
1096  * Remove the cache entry of a block from the cache. Called when a
1097  * block becomes invalid.
1098  */
1099 static void
1100 ext3_xattr_cache_remove(struct buffer_head *bh)
1101 {
1102         struct mb_cache_entry *ce;
1103
1104         ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
1105                                 bh->b_blocknr);
1106         if (ce) {
1107                 ea_bdebug(bh, "removing (%d cache entries remaining)",
1108                           atomic_read(&ext3_xattr_cache->c_entry_count)-1);
1109                 mb_cache_entry_free(ce);
1110         } else 
1111                 ea_bdebug(bh, "no cache entry");
1112 }
1113
1114 #define NAME_HASH_SHIFT 5
1115 #define VALUE_HASH_SHIFT 16
1116
1117 /*
1118  * ext3_xattr_hash_entry()
1119  *
1120  * Compute the hash of an extended attribute.
1121  */
1122 static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
1123                                          struct ext3_xattr_entry *entry)
1124 {
1125         __u32 hash = 0;
1126         char *name = entry->e_name;
1127         int n;
1128
1129         for (n=0; n < entry->e_name_len; n++) {
1130                 hash = (hash << NAME_HASH_SHIFT) ^
1131                        (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1132                        *name++;
1133         }
1134
1135         if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1136                 __u32 *value = (__u32 *)((char *)header +
1137                         le16_to_cpu(entry->e_value_offs));
1138                 for (n = (le32_to_cpu(entry->e_value_size) +
1139                      EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
1140                         hash = (hash << VALUE_HASH_SHIFT) ^
1141                                (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1142                                le32_to_cpu(*value++);
1143                 }
1144         }
1145         entry->e_hash = cpu_to_le32(hash);
1146 }
1147
1148 #undef NAME_HASH_SHIFT
1149 #undef VALUE_HASH_SHIFT
1150
1151 #define BLOCK_HASH_SHIFT 16
1152
1153 /*
1154  * ext3_xattr_rehash()
1155  *
1156  * Re-compute the extended attribute hash value after an entry has changed.
1157  */
1158 static void ext3_xattr_rehash(struct ext3_xattr_header *header,
1159                               struct ext3_xattr_entry *entry)
1160 {
1161         struct ext3_xattr_entry *here;
1162         __u32 hash = 0;
1163
1164         ext3_xattr_hash_entry(header, entry);
1165         here = ENTRY(header+1);
1166         while (!IS_LAST_ENTRY(here)) {
1167                 if (!here->e_hash) {
1168                         /* Block is not shared if an entry's hash value == 0 */
1169                         hash = 0;
1170                         break;
1171                 }
1172                 hash = (hash << BLOCK_HASH_SHIFT) ^
1173                        (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1174                        le32_to_cpu(here->e_hash);
1175                 here = EXT3_XATTR_NEXT(here);
1176         }
1177         header->h_hash = cpu_to_le32(hash);
1178 }
1179
1180 #undef BLOCK_HASH_SHIFT
1181
1182 int __init
1183 init_ext3_xattr(void)
1184 {
1185         int     err;
1186
1187         err = ext3_xattr_register(EXT3_XATTR_INDEX_USER,
1188                                   &ext3_xattr_user_handler);
1189         if (err)
1190                 return err;
1191         err = ext3_xattr_register(EXT3_XATTR_INDEX_TRUSTED,
1192                                   &ext3_xattr_trusted_handler);
1193         if (err)
1194                 goto out;
1195 #ifdef CONFIG_EXT3_FS_SECURITY
1196         err = ext3_xattr_register(EXT3_XATTR_INDEX_SECURITY,
1197                                   &ext3_xattr_security_handler);
1198         if (err)
1199                 goto out1;
1200 #endif
1201 #ifdef CONFIG_EXT3_FS_POSIX_ACL
1202         err = init_ext3_acl();
1203         if (err)
1204                 goto out2;
1205 #endif
1206         ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
1207                 sizeof(struct mb_cache_entry) +
1208                 sizeof(struct mb_cache_entry_index), 1, 6);
1209         if (!ext3_xattr_cache) {
1210                 err = -ENOMEM;
1211                 goto out3;
1212         }
1213         return 0;
1214 out3:
1215 #ifdef CONFIG_EXT3_FS_POSIX_ACL
1216         exit_ext3_acl();
1217 out2:
1218 #endif
1219 #ifdef CONFIG_EXT3_FS_SECURITY
1220         ext3_xattr_unregister(EXT3_XATTR_INDEX_SECURITY,
1221                               &ext3_xattr_security_handler);
1222 out1:
1223 #endif
1224         ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED,
1225                               &ext3_xattr_trusted_handler);
1226 out:
1227         ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
1228                               &ext3_xattr_user_handler);
1229         return err;
1230 }
1231
1232 void
1233 exit_ext3_xattr(void)
1234 {
1235         if (ext3_xattr_cache)
1236                 mb_cache_destroy(ext3_xattr_cache);
1237         ext3_xattr_cache = NULL;
1238 #ifdef CONFIG_EXT3_FS_POSIX_ACL
1239         exit_ext3_acl();
1240 #endif
1241 #ifdef CONFIG_EXT3_FS_SECURITY
1242         ext3_xattr_unregister(EXT3_XATTR_INDEX_SECURITY,
1243                               &ext3_xattr_security_handler);
1244 #endif
1245         ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED,
1246                               &ext3_xattr_trusted_handler);
1247         ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
1248                               &ext3_xattr_user_handler);
1249 }