vserver 1.9.5.x5
[linux-2.6.git] / fs / ext3 / xattr.c
index cedbcde..061852c 100644 (file)
@@ -7,22 +7,27 @@
  * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
  * Extended attributes for symlinks and special files added per
  *  suggestion of Luka Renko <luka.renko@hermes.si>.
+ * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
+ *  Red Hat Inc.
+ * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
+ *  and Andreas Gruenbacher <agruen@suse.de>.
  */
 
 /*
- * Extended attributes are stored on disk blocks allocated outside of
- * any inode. The i_file_acl field is then made to point to this allocated
- * block. If all extended attributes of an inode are identical, these
- * inodes may share the same extended attribute block. Such situations
- * are automatically detected by keeping a cache of recent attribute block
- * numbers and hashes over the block's contents in memory.
+ * Extended attributes are stored directly in inodes (on file systems with
+ * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
+ * field contains the block number if an inode uses an additional block. All
+ * attributes must fit in the inode and one additional block. Blocks that
+ * contain the identical set of attributes may be shared among several inodes.
+ * Identical blocks are detected by keeping a cache of blocks that have
+ * recently been accessed.
  *
- *
- * Extended attribute block layout:
+ * The attributes in inodes and on blocks have a different header; the entries
+ * are stored in the same format:
  *
  *   +------------------+
  *   | header           |
- *   ¦ entry 1          | |
+ *   | entry 1          | |
  *   | entry 2          | | growing downwards
  *   | entry 3          | v
  *   | four null bytes  |
  *   | value 2          | |
  *   +------------------+
  *
- * The block header is followed by multiple entry descriptors. These entry
- * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
- * byte boundaries. The entry descriptors are sorted by attribute name,
- * so that two extended attribute blocks can be compared efficiently.
- *
- * Attribute values are aligned to the end of the block, stored in
- * no specific order. They are also padded to EXT3_XATTR_PAD byte
- * boundaries. No additional gaps are left between them.
+ * The header is followed by multiple entry descriptors. In disk blocks, the
+ * entry descriptors are kept sorted. In inodes, they are unsorted. The
+ * attribute values are aligned to the end of the block in no specific order.
  *
  * Locking strategy
  * ----------------
  * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
  * EA blocks are only changed if they are exclusive to an inode, so
  * holding xattr_sem also means that nothing but the EA block's reference
- * count will change. Multiple writers to an EA block are synchronized
- * by the bh lock. No more than a single bh lock is held at any time
- * to avoid deadlocks.
+ * count can change. Multiple writers to the same block are synchronized
+ * by the buffer lock.
  */
 
 #include <linux/init.h>
 #include "xattr.h"
 #include "acl.h"
 
-#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
+#define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
 #define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
-#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
+#define BFIRST(bh) ENTRY(BHDR(bh)+1)
 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
 
+#define IHDR(inode, raw_inode) \
+       ((struct ext3_xattr_ibody_header *) \
+               ((void *)raw_inode + \
+                EXT3_GOOD_OLD_INODE_SIZE + \
+                EXT3_I(inode)->i_extra_isize))
+#define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1))
+
 #ifdef EXT3_XATTR_DEBUG
 # define ea_idebug(inode, f...) do { \
                printk(KERN_DEBUG "inode %s:%ld: ", \
 # define ea_bdebug(f...)
 #endif
 
-static int ext3_xattr_set_handle2(handle_t *, struct inode *,
-                                 struct buffer_head *,
-                                 struct ext3_xattr_header *);
-
-static int ext3_xattr_cache_insert(struct buffer_head *);
-static struct buffer_head *ext3_xattr_cache_find(handle_t *, struct inode *,
+static void ext3_xattr_cache_insert(struct buffer_head *);
+static struct buffer_head *ext3_xattr_cache_find(struct inode *,
                                                 struct ext3_xattr_header *,
-                                                int *);
-static void ext3_xattr_cache_remove(struct buffer_head *);
+                                                struct mb_cache_entry **);
 static void ext3_xattr_rehash(struct ext3_xattr_header *,
                              struct ext3_xattr_entry *);
 
 static struct mb_cache *ext3_xattr_cache;
-static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX];
-static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED;
 
-int
-ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler)
-{
-       int error = -EINVAL;
+static struct xattr_handler *ext3_xattr_handler_map[] = {
+       [EXT3_XATTR_INDEX_USER]              = &ext3_xattr_user_handler,
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+       [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext3_xattr_acl_access_handler,
+       [EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3_xattr_acl_default_handler,
+#endif
+       [EXT3_XATTR_INDEX_TRUSTED]           = &ext3_xattr_trusted_handler,
+#ifdef CONFIG_EXT3_FS_SECURITY
+       [EXT3_XATTR_INDEX_SECURITY]          = &ext3_xattr_security_handler,
+#endif
+};
 
-       if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
-               write_lock(&ext3_handler_lock);
-               if (!ext3_xattr_handlers[name_index-1]) {
-                       ext3_xattr_handlers[name_index-1] = handler;
-                       error = 0;
-               }
-               write_unlock(&ext3_handler_lock);
-       }
-       return error;
-}
+struct xattr_handler *ext3_xattr_handlers[] = {
+       &ext3_xattr_user_handler,
+       &ext3_xattr_trusted_handler,
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+       &ext3_xattr_acl_access_handler,
+       &ext3_xattr_acl_default_handler,
+#endif
+#ifdef CONFIG_EXT3_FS_SECURITY
+       &ext3_xattr_security_handler,
+#endif
+       NULL
+};
 
-void
-ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler)
+static inline struct xattr_handler *
+ext3_xattr_handler(int name_index)
 {
-       if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) {
-               write_lock(&ext3_handler_lock);
-               ext3_xattr_handlers[name_index-1] = NULL;
-               write_unlock(&ext3_handler_lock);
-       }
-}
+       struct xattr_handler *handler = NULL;
 
-static inline const char *
-strcmp_prefix(const char *a, const char *a_prefix)
-{
-       while (*a_prefix && *a == *a_prefix) {
-               a++;
-               a_prefix++;
-       }
-       return *a_prefix ? NULL : a;
+       if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
+               handler = ext3_xattr_handler_map[name_index];
+       return handler;
 }
 
 /*
- * Decode the extended attribute name, and translate it into
- * the name_index and name suffix.
+ * Inode operation listxattr()
+ *
+ * dentry->d_inode->i_sem: don't care
  */
-static inline struct ext3_xattr_handler *
-ext3_xattr_resolve_name(const char **name)
+ssize_t
+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-       struct ext3_xattr_handler *handler = NULL;
-       int i;
-
-       if (!*name)
-               return NULL;
-       read_lock(&ext3_handler_lock);
-       for (i=0; i<EXT3_XATTR_INDEX_MAX; i++) {
-               if (ext3_xattr_handlers[i]) {
-                       const char *n = strcmp_prefix(*name,
-                               ext3_xattr_handlers[i]->prefix);
-                       if (n) {
-                               handler = ext3_xattr_handlers[i];
-                               *name = n;
-                               break;
-                       }
-               }
-       }
-       read_unlock(&ext3_handler_lock);
-       return handler;
+       return ext3_xattr_list(dentry->d_inode, buffer, size);
 }
 
-static inline struct ext3_xattr_handler *
-ext3_xattr_handler(int name_index)
+static int
+ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
 {
-       struct ext3_xattr_handler *handler = NULL;
-       if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
-               read_lock(&ext3_handler_lock);
-               handler = ext3_xattr_handlers[name_index-1];
-               read_unlock(&ext3_handler_lock);
+       while (!IS_LAST_ENTRY(entry)) {
+               struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
+               if ((void *)next >= end)
+                       return -EIO;
+               entry = next;
        }
-       return handler;
+       return 0;
 }
 
-/*
- * Inode operation getxattr()
- *
- * dentry->d_inode->i_sem: don't care
- */
-ssize_t
-ext3_getxattr(struct dentry *dentry, const char *name,
-             void *buffer, size_t size)
+static inline int
+ext3_xattr_check_block(struct buffer_head *bh)
 {
-       struct ext3_xattr_handler *handler;
-       struct inode *inode = dentry->d_inode;
+       int error;
 
-       handler = ext3_xattr_resolve_name(&name);
-       if (!handler)
-               return -EOPNOTSUPP;
-       return handler->get(inode, name, buffer, size);
+       if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+           BHDR(bh)->h_blocks != cpu_to_le32(1))
+               return -EIO;
+       error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+       return error;
 }
 
-/*
- * Inode operation listxattr()
- *
- * dentry->d_inode->i_sem: don't care
- */
-ssize_t
-ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
+static inline int
+ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
 {
-       return ext3_xattr_list(dentry->d_inode, buffer, size);
-}
+       size_t value_size = le32_to_cpu(entry->e_value_size);
 
-/*
- * Inode operation setxattr()
- *
- * dentry->d_inode->i_sem: down
- */
-int
-ext3_setxattr(struct dentry *dentry, const char *name,
-             const void *value, size_t size, int flags)
-{
-       struct ext3_xattr_handler *handler;
-       struct inode *inode = dentry->d_inode;
-
-       if (size == 0)
-               value = "";  /* empty EA, do not remove */
-       handler = ext3_xattr_resolve_name(&name);
-       if (!handler)
-               return -EOPNOTSUPP;
-       return handler->set(inode, name, value, size, flags);
+       if (entry->e_value_block != 0 || value_size > size ||
+           le16_to_cpu(entry->e_value_offs) + value_size > size)
+               return -EIO;
+       return 0;
 }
 
-/*
- * Inode operation removexattr()
- *
- * dentry->d_inode->i_sem: down
- */
-int
-ext3_removexattr(struct dentry *dentry, const char *name)
+static int
+ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
+                     const char *name, size_t size, int sorted)
 {
-       struct ext3_xattr_handler *handler;
-       struct inode *inode = dentry->d_inode;
+       struct ext3_xattr_entry *entry;
+       size_t name_len;
+       int cmp = 1;
 
-       handler = ext3_xattr_resolve_name(&name);
-       if (!handler)
-               return -EOPNOTSUPP;
-       return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
+       if (name == NULL)
+               return -EINVAL;
+       name_len = strlen(name);
+       entry = *pentry;
+       for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+               cmp = name_index - entry->e_name_index;
+               if (!cmp)
+                       cmp = name_len - entry->e_name_len;
+               if (!cmp)
+                       cmp = memcmp(name, entry->e_name, name_len);
+               if (cmp <= 0 && (sorted || cmp == 0))
+                       break;
+       }
+       *pentry = entry;
+       if (!cmp && ext3_xattr_check_entry(entry, size))
+                       return -EIO;
+       return cmp ? -ENODATA : 0;
 }
 
-/*
- * ext3_xattr_get()
- *
- * Copy an extended attribute into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
 int
-ext3_xattr_get(struct inode *inode, int name_index, const char *name,
-              void *buffer, size_t buffer_size)
+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
+                    void *buffer, size_t buffer_size)
 {
        struct buffer_head *bh = NULL;
        struct ext3_xattr_entry *entry;
-       size_t name_len, size;
-       char *end;
+       size_t size;
        int error;
 
        ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
                  name_index, name, buffer, (long)buffer_size);
 
-       if (name == NULL)
-               return -EINVAL;
-       down_read(&EXT3_I(inode)->xattr_sem);
        error = -ENODATA;
        if (!EXT3_I(inode)->i_file_acl)
                goto cleanup;
        ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
        bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
-       error = -EIO;
        if (!bh)
                goto cleanup;
        ea_bdebug(bh, "b_count=%d, refcount=%d",
-               atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
-       end = bh->b_data + bh->b_size;
-       if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-           HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block:     ext3_error(inode->i_sb, "ext3_xattr_get",
-                       "inode %ld: bad block %d", inode->i_ino,
-                       EXT3_I(inode)->i_file_acl);
+               atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+       if (ext3_xattr_check_block(bh)) {
+bad_block:     ext3_error(inode->i_sb, __FUNCTION__,
+                          "inode %ld: bad block %d", inode->i_ino,
+                          EXT3_I(inode)->i_file_acl);
                error = -EIO;
                goto cleanup;
        }
-       /* find named attribute */
-       name_len = strlen(name);
-
-       error = -ERANGE;
-       if (name_len > 255)
-               goto cleanup;
-       entry = FIRST_ENTRY(bh);
-       while (!IS_LAST_ENTRY(entry)) {
-               struct ext3_xattr_entry *next =
-                       EXT3_XATTR_NEXT(entry);
-               if ((char *)next >= end)
-                       goto bad_block;
-               if (name_index == entry->e_name_index &&
-                   name_len == entry->e_name_len &&
-                   memcmp(name, entry->e_name, name_len) == 0)
-                       goto found;
-               entry = next;
-       }
-       /* Check the remaining name entries */
-       while (!IS_LAST_ENTRY(entry)) {
-               struct ext3_xattr_entry *next =
-                       EXT3_XATTR_NEXT(entry);
-               if ((char *)next >= end)
-                       goto bad_block;
-               entry = next;
-       }
-       if (ext3_xattr_cache_insert(bh))
-               ea_idebug(inode, "cache insert failed");
-       error = -ENODATA;
-       goto cleanup;
-found:
-       /* check the buffer size */
-       if (entry->e_value_block != 0)
+       ext3_xattr_cache_insert(bh);
+       entry = BFIRST(bh);
+       error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
+       if (error == -EIO)
                goto bad_block;
+       if (error)
+               goto cleanup;
        size = le32_to_cpu(entry->e_value_size);
-       if (size > inode->i_sb->s_blocksize ||
-           le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
-               goto bad_block;
-
-       if (ext3_xattr_cache_insert(bh))
-               ea_idebug(inode, "cache insert failed");
        if (buffer) {
                error = -ERANGE;
                if (size > buffer_size)
                        goto cleanup;
-               /* return value of attribute */
                memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
-                       size);
+                      size);
        }
        error = size;
 
 cleanup:
        brelse(bh);
-       up_read(&EXT3_I(inode)->xattr_sem);
+       return error;
+}
 
+static int
+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
+                    void *buffer, size_t buffer_size)
+{
+       struct ext3_xattr_ibody_header *header;
+       struct ext3_xattr_entry *entry;
+       struct ext3_inode *raw_inode;
+       struct ext3_iloc iloc;
+       size_t size;
+       void *end;
+       int error;
+
+       if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+               return -ENODATA;
+       error = ext3_get_inode_loc(inode, &iloc);
+       if (error)
+               return error;
+       raw_inode = ext3_raw_inode(&iloc);
+       header = IHDR(inode, raw_inode);
+       entry = IFIRST(header);
+       end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+       error = ext3_xattr_check_names(entry, end);
+       if (error)
+               goto cleanup;
+       error = ext3_xattr_find_entry(&entry, name_index, name,
+                                     end - (void *)entry, 0);
+       if (error)
+               goto cleanup;
+       size = le32_to_cpu(entry->e_value_size);
+       if (buffer) {
+               error = -ERANGE;
+               if (size > buffer_size)
+                       goto cleanup;
+               memcpy(buffer, (void *)IFIRST(header) +
+                      le16_to_cpu(entry->e_value_offs), size);
+       }
+       error = size;
+
+cleanup:
+       brelse(iloc.bh);
        return error;
 }
 
 /*
- * ext3_xattr_list()
+ * ext3_xattr_get()
  *
- * Copy a list of attribute names into the buffer
+ * Copy an extended attribute into the buffer
  * provided, or compute the buffer size required.
  * Buffer is NULL to compute the size of the buffer required.
  *
@@ -360,18 +315,55 @@ cleanup:
  * used / required on success.
  */
 int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+              void *buffer, size_t buffer_size)
+{
+       int error;
+
+       down_read(&EXT3_I(inode)->xattr_sem);
+       error = ext3_xattr_ibody_get(inode, name_index, name, buffer,
+                                    buffer_size);
+       if (error == -ENODATA)
+               error = ext3_xattr_block_get(inode, name_index, name, buffer,
+                                            buffer_size);
+       up_read(&EXT3_I(inode)->xattr_sem);
+       return error;
+}
+
+static int
+ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+                       char *buffer, size_t buffer_size)
+{
+       size_t rest = buffer_size;
+
+       for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+               struct xattr_handler *handler =
+                       ext3_xattr_handler(entry->e_name_index);
+
+               if (handler) {
+                       size_t size = handler->list(inode, buffer, rest,
+                                                   entry->e_name,
+                                                   entry->e_name_len);
+                       if (buffer) {
+                               if (size > rest)
+                                       return -ERANGE;
+                               buffer += size;
+                       }
+                       rest -= size;
+               }
+       }
+       return buffer_size - rest;
+}
+
+int
+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
        struct buffer_head *bh = NULL;
-       struct ext3_xattr_entry *entry;
-       size_t size = 0;
-       char *buf, *end;
        int error;
 
        ea_idebug(inode, "buffer=%p, buffer_size=%ld",
                  buffer, (long)buffer_size);
 
-       down_read(&EXT3_I(inode)->xattr_sem);
        error = 0;
        if (!EXT3_I(inode)->i_file_acl)
                goto cleanup;
@@ -381,62 +373,83 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
        if (!bh)
                goto cleanup;
        ea_bdebug(bh, "b_count=%d, refcount=%d",
-               atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
-       end = bh->b_data + bh->b_size;
-       if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-           HDR(bh)->h_blocks != cpu_to_le32(1)) {
-bad_block:     ext3_error(inode->i_sb, "ext3_xattr_list",
-                       "inode %ld: bad block %d", inode->i_ino,
-                       EXT3_I(inode)->i_file_acl);
+               atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+       if (ext3_xattr_check_block(bh)) {
+               ext3_error(inode->i_sb, __FUNCTION__,
+                          "inode %ld: bad block %d", inode->i_ino,
+                          EXT3_I(inode)->i_file_acl);
                error = -EIO;
                goto cleanup;
        }
-       /* compute the size required for the list of attribute names */
-       for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
-            entry = EXT3_XATTR_NEXT(entry)) {
-               struct ext3_xattr_handler *handler;
-               struct ext3_xattr_entry *next =
-                       EXT3_XATTR_NEXT(entry);
-               if ((char *)next >= end)
-                       goto bad_block;
-
-               handler = ext3_xattr_handler(entry->e_name_index);
-               if (handler)
-                       size += handler->list(NULL, inode, entry->e_name,
-                                             entry->e_name_len);
-       }
+       ext3_xattr_cache_insert(bh);
+       error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
 
-       if (ext3_xattr_cache_insert(bh))
-               ea_idebug(inode, "cache insert failed");
-       if (!buffer) {
-               error = size;
-               goto cleanup;
-       } else {
-               error = -ERANGE;
-               if (size > buffer_size)
-                       goto cleanup;
-       }
+cleanup:
+       brelse(bh);
+
+       return error;
+}
 
-       /* list the attribute names */
-       buf = buffer;
-       for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
-            entry = EXT3_XATTR_NEXT(entry)) {
-               struct ext3_xattr_handler *handler;
+static int
+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+       struct ext3_xattr_ibody_header *header;
+       struct ext3_inode *raw_inode;
+       struct ext3_iloc iloc;
+       void *end;
+       int error;
 
-               handler = ext3_xattr_handler(entry->e_name_index);
-               if (handler)
-                       buf += handler->list(buf, inode, entry->e_name,
-                                            entry->e_name_len);
-       }
-       error = size;
+       if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+               return 0;
+       error = ext3_get_inode_loc(inode, &iloc);
+       if (error)
+               return error;
+       raw_inode = ext3_raw_inode(&iloc);
+       header = IHDR(inode, raw_inode);
+       end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+       error = ext3_xattr_check_names(IFIRST(header), end);
+       if (error)
+               goto cleanup;
+       error = ext3_xattr_list_entries(inode, IFIRST(header),
+                                       buffer, buffer_size);
 
 cleanup:
-       brelse(bh);
-       up_read(&EXT3_I(inode)->xattr_sem);
-
+       brelse(iloc.bh);
        return error;
 }
 
+/*
+ * ext3_xattr_list()
+ *
+ * Copy a list of attribute names into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+       int i_error, b_error;
+
+       down_read(&EXT3_I(inode)->xattr_sem);
+       i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+       if (i_error < 0) {
+               b_error = 0;
+       } else {
+               if (buffer) {
+                       buffer += i_error;
+                       buffer_size -= i_error;
+               }
+               b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+               if (b_error < 0)
+                       i_error = 0;
+       }
+       up_read(&EXT3_I(inode)->xattr_sem);
+       return i_error + b_error;
+}
+
 /*
  * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
  * not set, set it.
@@ -458,341 +471,337 @@ static void ext3_xattr_update_super_block(handle_t *handle,
 }
 
 /*
- * ext3_xattr_set_handle()
- *
- * Create, replace or remove an extended attribute for this inode. Buffer
- * is NULL to remove an existing extended attribute, and non-NULL to
- * either replace an existing extended attribute, or create a new extended
- * attribute. The flags XATTR_REPLACE and XATTR_CREATE
- * specify that an extended attribute must exist and must not exist
- * previous to the call, respectively.
- *
- * Returns 0, or a negative error number on failure.
+ * Release the xattr block BH: If the reference count is > 1, decrement
+ * it; otherwise free the block.
  */
-int
-ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
-                     const char *name, const void *value, size_t value_len,
-                     int flags)
+static void
+ext3_xattr_release_block(handle_t *handle, struct inode *inode,
+                        struct buffer_head *bh)
 {
-       struct super_block *sb = inode->i_sb;
-       struct buffer_head *bh = NULL;
-       struct ext3_xattr_header *header = NULL;
-       struct ext3_xattr_entry *here, *last;
-       size_t name_len, free, min_offs = sb->s_blocksize;
-       int not_found = 1, error;
-       char *end;
-
-       /*
-        * header -- Points either into bh, or to a temporarily
-        *           allocated buffer.
-        * here -- The named entry found, or the place for inserting, within
-        *         the block pointed to by header.
-        * last -- Points right after the last named entry within the block
-        *         pointed to by header.
-        * min_offs -- The offset of the first value (values are aligned
-        *             towards the end of the block).
-        * end -- Points right after the block pointed to by header.
-        */
-
-       ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
-                 name_index, name, value, (long)value_len);
-
-       if (IS_RDONLY(inode))
-               return -EROFS;
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-               return -EPERM;
-       if (value == NULL)
-               value_len = 0;
-       if (name == NULL)
-               return -EINVAL;
-       name_len = strlen(name);
-       if (name_len > 255 || value_len > sb->s_blocksize)
-               return -ERANGE;
-       down_write(&EXT3_I(inode)->xattr_sem);
-       if (EXT3_I(inode)->i_file_acl) {
-               /* The inode already has an extended attribute block. */
-               bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
-               error = -EIO;
-               if (!bh)
-                       goto cleanup;
-               ea_bdebug(bh, "b_count=%d, refcount=%d",
-                       atomic_read(&(bh->b_count)),
-                       le32_to_cpu(HDR(bh)->h_refcount));
-               header = HDR(bh);
-               end = bh->b_data + bh->b_size;
-               if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-                   header->h_blocks != cpu_to_le32(1)) {
-bad_block:             ext3_error(sb, "ext3_xattr_set",
-                               "inode %ld: bad block %d", inode->i_ino,
-                               EXT3_I(inode)->i_file_acl);
-                       error = -EIO;
-                       goto cleanup;
-               }
-               /* Find the named attribute. */
-               here = FIRST_ENTRY(bh);
-               while (!IS_LAST_ENTRY(here)) {
-                       struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
-                       if ((char *)next >= end)
-                               goto bad_block;
-                       if (!here->e_value_block && here->e_value_size) {
-                               size_t offs = le16_to_cpu(here->e_value_offs);
-                               if (offs < min_offs)
-                                       min_offs = offs;
-                       }
-                       not_found = name_index - here->e_name_index;
-                       if (!not_found)
-                               not_found = name_len - here->e_name_len;
-                       if (!not_found)
-                               not_found = memcmp(name, here->e_name,name_len);
-                       if (not_found <= 0)
-                               break;
-                       here = next;
-               }
-               last = here;
-               /* We still need to compute min_offs and last. */
-               while (!IS_LAST_ENTRY(last)) {
-                       struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
-                       if ((char *)next >= end)
-                               goto bad_block;
-                       if (!last->e_value_block && last->e_value_size) {
-                               size_t offs = le16_to_cpu(last->e_value_offs);
-                               if (offs < min_offs)
-                                       min_offs = offs;
-                       }
-                       last = next;
-               }
-
-               /* Check whether we have enough space left. */
-               free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
+       struct mb_cache_entry *ce = NULL;
+
+       ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
+       if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+               ea_bdebug(bh, "refcount now=0; freeing");
+               if (ce)
+                       mb_cache_entry_free(ce);
+               ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
+               get_bh(bh);
+               ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
        } else {
-               /* We will use a new extended attribute block. */
-               free = sb->s_blocksize -
-                       sizeof(struct ext3_xattr_header) - sizeof(__u32);
-               here = last = NULL;  /* avoid gcc uninitialized warning. */
+               if (ext3_journal_get_write_access(handle, bh) == 0) {
+                       lock_buffer(bh);
+                       BHDR(bh)->h_refcount = cpu_to_le32(
+                               le32_to_cpu(BHDR(bh)->h_refcount) - 1);
+                       ext3_journal_dirty_metadata(handle, bh);
+                       if (IS_SYNC(inode))
+                               handle->h_sync = 1;
+                       DLIMIT_FREE_BLOCK(inode->i_sb, inode->i_xid, 1);
+                       DQUOT_FREE_BLOCK(inode, 1);
+                       unlock_buffer(bh);
+                       ea_bdebug(bh, "refcount now=%d; releasing",
+                                 le32_to_cpu(BHDR(bh)->h_refcount));
+               }
+               if (ce)
+                       mb_cache_entry_release(ce);
        }
+}
 
-       if (not_found) {
-               /* Request to remove a nonexistent attribute? */
-               error = -ENODATA;
-               if (flags & XATTR_REPLACE)
-                       goto cleanup;
-               error = 0;
-               if (value == NULL)
-                       goto cleanup;
-       } else {
-               /* Request to create an existing attribute? */
-               error = -EEXIST;
-               if (flags & XATTR_CREATE)
-                       goto cleanup;
-               if (!here->e_value_block && here->e_value_size) {
-                       size_t size = le32_to_cpu(here->e_value_size);
+struct ext3_xattr_info {
+       int name_index;
+       const char *name;
+       const void *value;
+       size_t value_len;
+};
+
+struct ext3_xattr_search {
+       struct ext3_xattr_entry *first;
+       void *base;
+       void *end;
+       struct ext3_xattr_entry *here;
+       int not_found;
+};
 
-                       if (le16_to_cpu(here->e_value_offs) + size > 
-                           sb->s_blocksize || size > sb->s_blocksize)
-                               goto bad_block;
+static int
+ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
+{
+       struct ext3_xattr_entry *last;
+       size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
+
+       /* Compute min_offs and last. */
+       last = s->first;
+       for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
+               if (!last->e_value_block && last->e_value_size) {
+                       size_t offs = le16_to_cpu(last->e_value_offs);
+                       if (offs < min_offs)
+                               min_offs = offs;
+               }
+       }
+       free = min_offs - ((void *)last - s->base) - sizeof(__u32);
+       if (!s->not_found) {
+               if (!s->here->e_value_block && s->here->e_value_size) {
+                       size_t size = le32_to_cpu(s->here->e_value_size);
                        free += EXT3_XATTR_SIZE(size);
                }
                free += EXT3_XATTR_LEN(name_len);
        }
-       error = -ENOSPC;
-       if (free < EXT3_XATTR_LEN(name_len) + EXT3_XATTR_SIZE(value_len))
-               goto cleanup;
-
-       /* Here we know that we can set the new attribute. */
-
-       if (header) {
-               int credits = 0;
-
-               /* assert(header == HDR(bh)); */
-               if (header->h_refcount != cpu_to_le32(1))
-                       goto skip_get_write_access;
-               /* ext3_journal_get_write_access() requires an unlocked bh,
-                  which complicates things here. */
-               error = ext3_journal_get_write_access_credits(handle, bh,
-                                                             &credits);
-               if (error)
-                       goto cleanup;
-               lock_buffer(bh);
-               if (header->h_refcount == cpu_to_le32(1)) {
-                       ea_bdebug(bh, "modifying in-place");
-                       ext3_xattr_cache_remove(bh);
-                       /* keep the buffer locked while modifying it. */
-               } else {
-                       int offset;
-
-                       unlock_buffer(bh);
-                       journal_release_buffer(handle, bh, credits);
-               skip_get_write_access:
-                       ea_bdebug(bh, "cloning");
-                       header = kmalloc(bh->b_size, GFP_KERNEL);
-                       error = -ENOMEM;
-                       if (header == NULL)
-                               goto cleanup;
-                       memcpy(header, HDR(bh), bh->b_size);
-                       header->h_refcount = cpu_to_le32(1);
-                       offset = (char *)here - bh->b_data;
-                       here = ENTRY((char *)header + offset);
-                       offset = (char *)last - bh->b_data;
-                       last = ENTRY((char *)header + offset);
-               }
-       } else {
-               /* Allocate a buffer where we construct the new block. */
-               header = kmalloc(sb->s_blocksize, GFP_KERNEL);
-               error = -ENOMEM;
-               if (header == NULL)
-                       goto cleanup;
-               memset(header, 0, sb->s_blocksize);
-               end = (char *)header + sb->s_blocksize;
-               header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
-               header->h_blocks = header->h_refcount = cpu_to_le32(1);
-               last = here = ENTRY(header+1);
+       if (i->value) {
+               if (free < EXT3_XATTR_SIZE(i->value_len) ||
+                   free < EXT3_XATTR_LEN(name_len) +
+                          EXT3_XATTR_SIZE(i->value_len))
+                       return -ENOSPC;
        }
 
-       /* Iff we are modifying the block in-place, bh is locked here. */
-
-       if (not_found) {
+       if (i->value && s->not_found) {
                /* Insert the new name. */
                size_t size = EXT3_XATTR_LEN(name_len);
-               size_t rest = (char *)last - (char *)here;
-               memmove((char *)here + size, here, rest);
-               memset(here, 0, size);
-               here->e_name_index = name_index;
-               here->e_name_len = name_len;
-               memcpy(here->e_name, name, name_len);
+               size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
+               memmove((void *)s->here + size, s->here, rest);
+               memset(s->here, 0, size);
+               s->here->e_name_index = i->name_index;
+               s->here->e_name_len = name_len;
+               memcpy(s->here->e_name, i->name, name_len);
        } else {
-               if (!here->e_value_block && here->e_value_size) {
-                       char *first_val = (char *)header + min_offs;
-                       size_t offs = le16_to_cpu(here->e_value_offs);
-                       char *val = (char *)header + offs;
+               if (!s->here->e_value_block && s->here->e_value_size) {
+                       void *first_val = s->base + min_offs;
+                       size_t offs = le16_to_cpu(s->here->e_value_offs);
+                       void *val = s->base + offs;
                        size_t size = EXT3_XATTR_SIZE(
-                               le32_to_cpu(here->e_value_size));
+                               le32_to_cpu(s->here->e_value_size));
 
-                       if (size == EXT3_XATTR_SIZE(value_len)) {
+                       if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) {
                                /* The old and the new value have the same
                                   size. Just replace. */
-                               here->e_value_size = cpu_to_le32(value_len);
+                               s->here->e_value_size =
+                                       cpu_to_le32(i->value_len);
                                memset(val + size - EXT3_XATTR_PAD, 0,
                                       EXT3_XATTR_PAD); /* Clear pad bytes. */
-                               memcpy(val, value, value_len);
-                               goto skip_replace;
+                               memcpy(val, i->value, i->value_len);
+                               return 0;
                        }
 
                        /* Remove the old value. */
                        memmove(first_val + size, first_val, val - first_val);
                        memset(first_val, 0, size);
-                       here->e_value_offs = 0;
+                       s->here->e_value_size = 0;
+                       s->here->e_value_offs = 0;
                        min_offs += size;
 
                        /* Adjust all value offsets. */
-                       last = ENTRY(header+1);
+                       last = s->first;
                        while (!IS_LAST_ENTRY(last)) {
                                size_t o = le16_to_cpu(last->e_value_offs);
-                               if (!last->e_value_block && o < offs)
+                               if (!last->e_value_block &&
+                                   last->e_value_size && o < offs)
                                        last->e_value_offs =
                                                cpu_to_le16(o + size);
                                last = EXT3_XATTR_NEXT(last);
                        }
                }
-               if (value == NULL) {
+               if (!i->value) {
                        /* Remove the old name. */
                        size_t size = EXT3_XATTR_LEN(name_len);
-                       last = ENTRY((char *)last - size);
-                       memmove(here, (char*)here + size,
-                               (char*)last - (char*)here);
+                       last = ENTRY((void *)last - size);
+                       memmove(s->here, (void *)s->here + size,
+                               (void *)last - (void *)s->here + sizeof(__u32));
                        memset(last, 0, size);
                }
        }
 
-       if (value != NULL) {
+       if (i->value) {
                /* Insert the new value. */
-               here->e_value_size = cpu_to_le32(value_len);
-               if (value_len) {
-                       size_t size = EXT3_XATTR_SIZE(value_len);
-                       char *val = (char *)header + min_offs - size;
-                       here->e_value_offs =
-                               cpu_to_le16((char *)val - (char *)header);
+               s->here->e_value_size = cpu_to_le32(i->value_len);
+               if (i->value_len) {
+                       size_t size = EXT3_XATTR_SIZE(i->value_len);
+                       void *val = s->base + min_offs - size;
+                       s->here->e_value_offs = cpu_to_le16(min_offs - size);
                        memset(val + size - EXT3_XATTR_PAD, 0,
                               EXT3_XATTR_PAD); /* Clear the pad bytes. */
-                       memcpy(val, value, value_len);
+                       memcpy(val, i->value, i->value_len);
                }
        }
+       return 0;
+}
 
-skip_replace:
-       if (IS_LAST_ENTRY(ENTRY(header+1))) {
-               /* This block is now empty. */
-               if (bh && header == HDR(bh))
-                       unlock_buffer(bh);  /* we were modifying in-place. */
-               error = ext3_xattr_set_handle2(handle, inode, bh, NULL);
-       } else {
-               ext3_xattr_rehash(header, here);
-               if (bh && header == HDR(bh))
-                       unlock_buffer(bh);  /* we were modifying in-place. */
-               error = ext3_xattr_set_handle2(handle, inode, bh, header);
+struct ext3_xattr_block_find {
+       struct ext3_xattr_search s;
+       struct buffer_head *bh;
+};
+
+int
+ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
+                     struct ext3_xattr_block_find *bs)
+{
+       struct super_block *sb = inode->i_sb;
+       int error;
+
+       ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
+                 i->name_index, i->name, i->value, (long)i->value_len);
+
+       if (EXT3_I(inode)->i_file_acl) {
+               /* The inode already has an extended attribute block. */
+               bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
+               error = -EIO;
+               if (!bs->bh)
+                       goto cleanup;
+               ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
+                       atomic_read(&(bs->bh->b_count)),
+                       le32_to_cpu(BHDR(bs->bh)->h_refcount));
+               if (ext3_xattr_check_block(bs->bh)) {
+                       ext3_error(sb, __FUNCTION__,
+                               "inode %ld: bad block %d", inode->i_ino,
+                               EXT3_I(inode)->i_file_acl);
+                       error = -EIO;
+                       goto cleanup;
+               }
+               /* Find the named attribute. */
+               bs->s.base = BHDR(bs->bh);
+               bs->s.first = BFIRST(bs->bh);
+               bs->s.end = bs->bh->b_data + bs->bh->b_size;
+               bs->s.here = bs->s.first;
+               error = ext3_xattr_find_entry(&bs->s.here, i->name_index,
+                                             i->name, bs->bh->b_size, 1);
+               if (error && error != -ENODATA)
+                       goto cleanup;
+               bs->s.not_found = error;
        }
+       error = 0;
 
 cleanup:
-       brelse(bh);
-       if (!(bh && header == HDR(bh)))
-               kfree(header);
-       up_write(&EXT3_I(inode)->xattr_sem);
-
        return error;
 }
 
-/*
- * Second half of ext3_xattr_set_handle(): Update the file system.
- */
 static int
-ext3_xattr_set_handle2(handle_t *handle, struct inode *inode,
-                      struct buffer_head *old_bh,
-                      struct ext3_xattr_header *header)
+ext3_xattr_block_set(handle_t *handle, struct inode *inode,
+                    struct ext3_xattr_info *i,
+                    struct ext3_xattr_block_find *bs)
 {
        struct super_block *sb = inode->i_sb;
        struct buffer_head *new_bh = NULL;
-       int credits = 0, error;
+       struct ext3_xattr_search *s = &bs->s;
+       struct mb_cache_entry *ce = NULL;
+       int error;
+
+#define header(x) ((struct ext3_xattr_header *)(x))
+
+       if (i->value && i->value_len > sb->s_blocksize)
+               return -ENOSPC;
+       if (s->base) {
+               ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
+                                       bs->bh->b_blocknr);
+               if (header(s->base)->h_refcount == cpu_to_le32(1)) {
+                       if (ce) {
+                               mb_cache_entry_free(ce);
+                               ce = NULL;
+                       }
+                       ea_bdebug(bs->bh, "modifying in-place");
+                       error = ext3_journal_get_write_access(handle, bs->bh);
+                       if (error)
+                               goto cleanup;
+                       lock_buffer(bs->bh);
+                       error = ext3_xattr_set_entry(i, s);
+                       if (!error) {
+                               if (!IS_LAST_ENTRY(s->first))
+                                       ext3_xattr_rehash(header(s->base),
+                                                         s->here);
+                               ext3_xattr_cache_insert(bs->bh);
+                       }
+                       unlock_buffer(bs->bh);
+                       if (error == -EIO)
+                               goto bad_block;
+                       if (!error)
+                               error = ext3_journal_dirty_metadata(handle,
+                                                                   bs->bh);
+                       if (error)
+                               goto cleanup;
+                       goto inserted;
+               } else {
+                       int offset = (char *)s->here - bs->bh->b_data;
+
+                       if (ce) {
+                               mb_cache_entry_release(ce);
+                               ce = NULL;
+                       }
+                       ea_bdebug(bs->bh, "cloning");
+                       s->base = kmalloc(bs->bh->b_size, GFP_KERNEL);
+                       error = -ENOMEM;
+                       if (s->base == NULL)
+                               goto cleanup;
+                       memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
+                       s->first = ENTRY(header(s->base)+1);
+                       header(s->base)->h_refcount = cpu_to_le32(1);
+                       s->here = ENTRY(s->base + offset);
+                       s->end = s->base + bs->bh->b_size;
+               }
+       } else {
+               /* Allocate a buffer where we construct the new block. */
+               s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+               /* assert(header == s->base) */
+               error = -ENOMEM;
+               if (s->base == NULL)
+                       goto cleanup;
+               memset(s->base, 0, sb->s_blocksize);
+               header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+               header(s->base)->h_blocks = cpu_to_le32(1);
+               header(s->base)->h_refcount = cpu_to_le32(1);
+               s->first = ENTRY(header(s->base)+1);
+               s->here = ENTRY(header(s->base)+1);
+               s->end = s->base + sb->s_blocksize;
+       }
 
-       if (header) {
-               new_bh = ext3_xattr_cache_find(handle, inode, header, &credits);
+       error = ext3_xattr_set_entry(i, s);
+       if (error == -EIO)
+               goto bad_block;
+       if (error)
+               goto cleanup;
+       if (!IS_LAST_ENTRY(s->first))
+               ext3_xattr_rehash(header(s->base), s->here);
+
+inserted:
+       if (!IS_LAST_ENTRY(s->first)) {
+               new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce);
                if (new_bh) {
                        /* We found an identical block in the cache. */
-                       if (new_bh == old_bh)
-                               ea_bdebug(new_bh, "keeping this block");
+                       if (new_bh == bs->bh)
+                               ea_bdebug(new_bh, "keeping");
                        else {
-                               /* The old block is released after updating
-                                  the inode. */
-                               ea_bdebug(new_bh, "reusing block");
-
                                error = -ENOSPC;
                                if (DLIMIT_ALLOC_BLOCK(sb, inode->i_xid, 1))
                                        goto cleanup;
+                               /* The old block is released after updating
+                                  the inode. */
                                error = -EDQUOT;
-                               if (DQUOT_ALLOC_BLOCK(inode, 1)) {
-                                       DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
-                                       unlock_buffer(new_bh);
-                                       journal_release_buffer(handle, new_bh,
-                                                              credits);
-                                       goto cleanup;
-                               }
-                               HDR(new_bh)->h_refcount = cpu_to_le32(1 +
-                                       le32_to_cpu(HDR(new_bh)->h_refcount));
-                               ea_bdebug(new_bh, "refcount now=%d",
-                                       le32_to_cpu(HDR(new_bh)->h_refcount));
+                               if (DQUOT_ALLOC_BLOCK(inode, 1))
+                                       goto cleanup_dlimit;
+                               error = ext3_journal_get_write_access(handle,
+                                                                     new_bh);
+                               if (error)
+                                       goto cleanup_dquot;
+                               lock_buffer(new_bh);
+                               BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
+                                       le32_to_cpu(BHDR(new_bh)->h_refcount));
+                               ea_bdebug(new_bh, "reusing; refcount now=%d",
+                                       le32_to_cpu(BHDR(new_bh)->h_refcount));
+                               unlock_buffer(new_bh);
+                               error = ext3_journal_dirty_metadata(handle,
+                                                                   new_bh);
+                               if (error)
+                                       goto cleanup_dquot;
                        }
-                       unlock_buffer(new_bh);
-               } else if (old_bh && header == HDR(old_bh)) {
-                       /* Keep this block. No need to lock the block as we
-                        * don't need to change the reference count. */
-                       new_bh = old_bh;
+                       mb_cache_entry_release(ce);
+                       ce = NULL;
+               } else if (bs->bh && s->base == bs->bh->b_data) {
+                       /* We were modifying this block in-place. */
+                       ea_bdebug(bs->bh, "keeping this block");
+                       new_bh = bs->bh;
                        get_bh(new_bh);
-                       ext3_xattr_cache_insert(new_bh);
                } else {
                        /* We need to allocate a new block */
                        int goal = le32_to_cpu(
                                        EXT3_SB(sb)->s_es->s_first_data_block) +
                                EXT3_I(inode)->i_block_group *
                                EXT3_BLOCKS_PER_GROUP(sb);
-                       int block = ext3_new_block(handle, inode, goal,
-                                                  NULL, NULL, &error);
+                       int block = ext3_new_block(handle, inode, goal, &error);
                        if (error)
                                goto cleanup;
                        ea_idebug(inode, "creating block %d", block);
@@ -810,61 +819,219 @@ getblk_failed:
                                unlock_buffer(new_bh);
                                goto getblk_failed;
                        }
-                       memcpy(new_bh->b_data, header, new_bh->b_size);
+                       memcpy(new_bh->b_data, s->base, new_bh->b_size);
                        set_buffer_uptodate(new_bh);
                        unlock_buffer(new_bh);
                        ext3_xattr_cache_insert(new_bh);
-
-                       ext3_xattr_update_super_block(handle, sb);
+                       error = ext3_journal_dirty_metadata(handle, new_bh);
+                       if (error)
+                               goto cleanup;
                }
-               error = ext3_journal_dirty_metadata(handle, new_bh);
-               if (error)
-                       goto cleanup;
        }
 
        /* Update the inode. */
        EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
-       inode->i_ctime = CURRENT_TIME;
-       ext3_mark_inode_dirty(handle, inode);
-       if (IS_SYNC(inode))
-               handle->h_sync = 1;
 
+       /* Drop the previous xattr block. */
+       if (bs->bh && bs->bh != new_bh)
+               ext3_xattr_release_block(handle, inode, bs->bh);
        error = 0;
-       if (old_bh && old_bh != new_bh) {
-               /*
-                * If there was an old block, and we are no longer using it,
-                * release the old block.
-               */
-               error = ext3_journal_get_write_access(handle, old_bh);
+
+cleanup:
+       if (ce)
+               mb_cache_entry_release(ce);
+       brelse(new_bh);
+       if (!(bs->bh && s->base == bs->bh->b_data))
+               kfree(s->base);
+
+       return error;
+
+cleanup_dquot:
+       DQUOT_FREE_BLOCK(inode, 1);
+cleanup_dlimit:
+       DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
+       goto cleanup;
+
+bad_block:
+       ext3_error(inode->i_sb, __FUNCTION__,
+                  "inode %ld: bad block %d", inode->i_ino,
+                  EXT3_I(inode)->i_file_acl);
+       goto cleanup;
+
+#undef header
+}
+
+struct ext3_xattr_ibody_find {
+       struct ext3_xattr_search s;
+       struct ext3_iloc iloc;
+};
+
+int
+ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
+                     struct ext3_xattr_ibody_find *is)
+{
+       struct ext3_xattr_ibody_header *header;
+       struct ext3_inode *raw_inode;
+       int error;
+
+       if (EXT3_I(inode)->i_extra_isize == 0)
+               return 0;
+       raw_inode = ext3_raw_inode(&is->iloc);
+       header = IHDR(inode, raw_inode);
+       is->s.base = is->s.first = IFIRST(header);
+       is->s.here = is->s.first;
+       is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+       if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
+               error = ext3_xattr_check_names(IFIRST(header), is->s.end);
                if (error)
+                       return error;
+               /* Find the named attribute. */
+               error = ext3_xattr_find_entry(&is->s.here, i->name_index,
+                                             i->name, is->s.end -
+                                             (void *)is->s.base, 0);
+               if (error && error != -ENODATA)
+                       return error;
+               is->s.not_found = error;
+       }
+       return 0;
+}
+
+static int
+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
+                    struct ext3_xattr_info *i,
+                    struct ext3_xattr_ibody_find *is)
+{
+       struct ext3_xattr_ibody_header *header;
+       struct ext3_xattr_search *s = &is->s;
+       int error;
+
+       if (EXT3_I(inode)->i_extra_isize == 0)
+               return -ENOSPC;
+       error = ext3_xattr_set_entry(i, s);
+       if (error)
+               return error;
+       header = IHDR(inode, ext3_raw_inode(&is->iloc));
+       if (!IS_LAST_ENTRY(s->first)) {
+               header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+               EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+       } else {
+               header->h_magic = cpu_to_le32(0);
+               EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+       }
+       return 0;
+}
+
+/*
+ * ext3_xattr_set_handle()
+ *
+ * Create, replace or remove an extended attribute for this inode. Buffer
+ * is NULL to remove an existing extended attribute, and non-NULL to
+ * either replace an existing extended attribute, or create a new extended
+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
+ * specify that an extended attribute must exist and must not exist
+ * previous to the call, respectively.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int
+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+                     const char *name, const void *value, size_t value_len,
+                     int flags)
+{
+       struct ext3_xattr_info i = {
+               .name_index = name_index,
+               .name = name,
+               .value = value,
+               .value_len = value_len,
+
+       };
+       struct ext3_xattr_ibody_find is = {
+               .s = { .not_found = -ENODATA, },
+       };
+       struct ext3_xattr_block_find bs = {
+               .s = { .not_found = -ENODATA, },
+       };
+       int error;
+
+       if (IS_RDONLY(inode))
+               return -EROFS;
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+               return -EPERM;
+       if (!name)
+               return -EINVAL;
+       if (strlen(name) > 255)
+               return -ERANGE;
+       down_write(&EXT3_I(inode)->xattr_sem);
+       error = ext3_get_inode_loc(inode, &is.iloc);
+       if (error)
+               goto cleanup;
+
+       if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
+               struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
+               memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
+               EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
+       }
+
+       error = ext3_xattr_ibody_find(inode, &i, &is);
+       if (error)
+               goto cleanup;
+       if (is.s.not_found)
+               error = ext3_xattr_block_find(inode, &i, &bs);
+       if (error)
+               goto cleanup;
+       if (is.s.not_found && bs.s.not_found) {
+               error = -ENODATA;
+               if (flags & XATTR_REPLACE)
                        goto cleanup;
-               lock_buffer(old_bh);
-               if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
-                       /* Free the old block. */
-                       ea_bdebug(old_bh, "freeing");
-                       ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
-
-                       /* ext3_forget() calls bforget() for us, but we
-                          let our caller release old_bh, so we need to
-                          duplicate the handle before. */
-                       get_bh(old_bh);
-                       ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
-               } else {
-                       /* Decrement the refcount only. */
-                       HDR(old_bh)->h_refcount = cpu_to_le32(
-                               le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
-                       DLIMIT_FREE_BLOCK(sb, inode->i_xid, 1);
-                       DQUOT_FREE_BLOCK(inode, 1);
-                       ext3_journal_dirty_metadata(handle, old_bh);
-                       ea_bdebug(old_bh, "refcount now=%d",
-                               le32_to_cpu(HDR(old_bh)->h_refcount));
+               error = 0;
+               if (!value)
+                       goto cleanup;
+       } else {
+               error = -EEXIST;
+               if (flags & XATTR_CREATE)
+                       goto cleanup;
+       }
+       error = ext3_journal_get_write_access(handle, is.iloc.bh);
+       if (error)
+               goto cleanup;
+       if (!value) {
+               if (!is.s.not_found)
+                       error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+               else if (!bs.s.not_found)
+                       error = ext3_xattr_block_set(handle, inode, &i, &bs);
+       } else {
+               error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+               if (!error && !bs.s.not_found) {
+                       i.value = NULL;
+                       error = ext3_xattr_block_set(handle, inode, &i, &bs);
+               } else if (error == -ENOSPC) {
+                       error = ext3_xattr_block_set(handle, inode, &i, &bs);
+                       if (error)
+                               goto cleanup;
+                       if (!is.s.not_found) {
+                               i.value = NULL;
+                               error = ext3_xattr_ibody_set(handle, inode, &i,
+                                                            &is);
+                       }
                }
-               unlock_buffer(old_bh);
+       }
+       if (!error) {
+               ext3_xattr_update_super_block(handle, inode->i_sb);
+               inode->i_ctime = CURRENT_TIME_SEC;
+               error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
+               /*
+                * The bh is consumed by ext3_mark_iloc_dirty, even with
+                * error != 0.
+                */
+               is.iloc.bh = NULL;
+               if (IS_SYNC(inode))
+                       handle->h_sync = 1;
        }
 
 cleanup:
-       brelse(new_bh);
-
+       brelse(is.iloc.bh);
+       brelse(bs.bh);
+       up_write(&EXT3_I(inode)->xattr_sem);
        return error;
 }
 
@@ -907,54 +1074,35 @@ retry:
  * ext3_xattr_delete_inode()
  *
  * Free extended attribute resources associated with this inode. This
- * is called immediately before an inode is freed.
+ * is called immediately before an inode is freed. We have exclusive
+ * access to the inode.
  */
 void
 ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
 {
        struct buffer_head *bh = NULL;
 
-       down_write(&EXT3_I(inode)->xattr_sem);
        if (!EXT3_I(inode)->i_file_acl)
                goto cleanup;
        bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
        if (!bh) {
-               ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
+               ext3_error(inode->i_sb, __FUNCTION__,
                        "inode %ld: block %d read error", inode->i_ino,
                        EXT3_I(inode)->i_file_acl);
                goto cleanup;
        }
-       if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-           HDR(bh)->h_blocks != cpu_to_le32(1)) {
-               ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
+       if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+           BHDR(bh)->h_blocks != cpu_to_le32(1)) {
+               ext3_error(inode->i_sb, __FUNCTION__,
                        "inode %ld: bad block %d", inode->i_ino,
                        EXT3_I(inode)->i_file_acl);
                goto cleanup;
        }
-       if (ext3_journal_get_write_access(handle, bh) != 0)
-               goto cleanup;
-       lock_buffer(bh);
-       if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
-               ext3_xattr_cache_remove(bh);
-               ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
-               get_bh(bh);
-               ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
-       } else {
-               HDR(bh)->h_refcount = cpu_to_le32(
-                       le32_to_cpu(HDR(bh)->h_refcount) - 1);
-               ext3_journal_dirty_metadata(handle, bh);
-               if (IS_SYNC(inode))
-                       handle->h_sync = 1;
-               DLIMIT_FREE_BLOCK(inode->i_sb, inode->i_xid, 1);
-               DQUOT_FREE_BLOCK(inode, 1);
-       }
-       ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
-       unlock_buffer(bh);
+       ext3_xattr_release_block(handle, inode, bh);
        EXT3_I(inode)->i_file_acl = 0;
 
 cleanup:
        brelse(bh);
-       up_write(&EXT3_I(inode)->xattr_sem);
 }
 
 /*
@@ -976,30 +1124,29 @@ ext3_xattr_put_super(struct super_block *sb)
  *
  * Returns 0, or a negative error number on failure.
  */
-static int
+static void
 ext3_xattr_cache_insert(struct buffer_head *bh)
 {
-       __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+       __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
        struct mb_cache_entry *ce;
        int error;
 
        ce = mb_cache_entry_alloc(ext3_xattr_cache);
-       if (!ce)
-               return -ENOMEM;
+       if (!ce) {
+               ea_bdebug(bh, "out of memory");
+               return;
+       }
        error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
        if (error) {
                mb_cache_entry_free(ce);
                if (error == -EBUSY) {
-                       ea_bdebug(bh, "already in cache (%d cache entries)",
-                               atomic_read(&ext3_xattr_cache->c_entry_count));
+                       ea_bdebug(bh, "already in cache");
                        error = 0;
                }
        } else {
-               ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
-                         atomic_read(&ext3_xattr_cache->c_entry_count));
+               ea_bdebug(bh, "inserting [%x]", (int)hash);
                mb_cache_entry_release(ce);
        }
-       return error;
 }
 
 /*
@@ -1022,6 +1169,7 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1,
                if (IS_LAST_ENTRY(entry2))
                        return 1;
                if (entry1->e_hash != entry2->e_hash ||
+                   entry1->e_name_index != entry2->e_name_index ||
                    entry1->e_name_len != entry2->e_name_len ||
                    entry1->e_value_size != entry2->e_value_size ||
                    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
@@ -1050,8 +1198,8 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1,
  * not found or an error occurred.
  */
 static struct buffer_head *
-ext3_xattr_cache_find(handle_t *handle, struct inode *inode,
-                     struct ext3_xattr_header *header, int *credits)
+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
+                     struct mb_cache_entry **pce)
 {
        __u32 hash = le32_to_cpu(header->h_hash);
        struct mb_cache_entry *ce;
@@ -1059,62 +1207,38 @@ ext3_xattr_cache_find(handle_t *handle, struct inode *inode,
        if (!header->h_hash)
                return NULL;  /* never share */
        ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+again:
        ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
                                       inode->i_sb->s_bdev, hash);
        while (ce) {
-               struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
+               struct buffer_head *bh;
 
+               if (IS_ERR(ce)) {
+                       if (PTR_ERR(ce) == -EAGAIN)
+                               goto again;
+                       break;
+               }
+               bh = sb_bread(inode->i_sb, ce->e_block);
                if (!bh) {
-                       ext3_error(inode->i_sb, "ext3_xattr_cache_find",
+                       ext3_error(inode->i_sb, __FUNCTION__,
                                "inode %ld: block %ld read error",
                                inode->i_ino, (unsigned long) ce->e_block);
-               } else if (ext3_journal_get_write_access_credits(
-                               handle, bh, credits) == 0) {
-                       /* ext3_journal_get_write_access() requires an unlocked
-                        * bh, which complicates things here. */
-                       lock_buffer(bh);
-                       if (le32_to_cpu(HDR(bh)->h_refcount) >
-                                  EXT3_XATTR_REFCOUNT_MAX) {
-                               ea_idebug(inode, "block %ld refcount %d>%d",
-                                         (unsigned long) ce->e_block,
-                                         le32_to_cpu(HDR(bh)->h_refcount),
+               } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
+                               EXT3_XATTR_REFCOUNT_MAX) {
+                       ea_idebug(inode, "block %ld refcount %d>=%d",
+                                 (unsigned long) ce->e_block,
+                                 le32_to_cpu(BHDR(bh)->h_refcount),
                                          EXT3_XATTR_REFCOUNT_MAX);
-                       } else if (!ext3_xattr_cmp(header, HDR(bh))) {
-                               mb_cache_entry_release(ce);
-                               /* buffer will be unlocked by caller */
-                               return bh;
-                       }
-                       unlock_buffer(bh);
-                       journal_release_buffer(handle, bh, *credits);
-                       *credits = 0;
-                       brelse(bh);
+               } else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) {
+                       *pce = ce;
+                       return bh;
                }
+               brelse(bh);
                ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
        }
        return NULL;
 }
 
-/*
- * ext3_xattr_cache_remove()
- *
- * Remove the cache entry of a block from the cache. Called when a
- * block becomes invalid.
- */
-static void
-ext3_xattr_cache_remove(struct buffer_head *bh)
-{
-       struct mb_cache_entry *ce;
-
-       ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev,
-                               bh->b_blocknr);
-       if (ce) {
-               ea_bdebug(bh, "removing (%d cache entries remaining)",
-                         atomic_read(&ext3_xattr_cache->c_entry_count)-1);
-               mb_cache_entry_free(ce);
-       } else 
-               ea_bdebug(bh, "no cache entry");
-}
-
 #define NAME_HASH_SHIFT 5
 #define VALUE_HASH_SHIFT 16
 
@@ -1186,51 +1310,12 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header,
 int __init
 init_ext3_xattr(void)
 {
-       int     err;
-
-       err = ext3_xattr_register(EXT3_XATTR_INDEX_USER,
-                                 &ext3_xattr_user_handler);
-       if (err)
-               return err;
-       err = ext3_xattr_register(EXT3_XATTR_INDEX_TRUSTED,
-                                 &ext3_xattr_trusted_handler);
-       if (err)
-               goto out;
-#ifdef CONFIG_EXT3_FS_SECURITY
-       err = ext3_xattr_register(EXT3_XATTR_INDEX_SECURITY,
-                                 &ext3_xattr_security_handler);
-       if (err)
-               goto out1;
-#endif
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-       err = init_ext3_acl();
-       if (err)
-               goto out2;
-#endif
        ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
                sizeof(struct mb_cache_entry) +
-               sizeof(struct mb_cache_entry_index), 1, 6);
-       if (!ext3_xattr_cache) {
-               err = -ENOMEM;
-               goto out3;
-       }
+               sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+       if (!ext3_xattr_cache)
+               return -ENOMEM;
        return 0;
-out3:
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-       exit_ext3_acl();
-out2:
-#endif
-#ifdef CONFIG_EXT3_FS_SECURITY
-       ext3_xattr_unregister(EXT3_XATTR_INDEX_SECURITY,
-                             &ext3_xattr_security_handler);
-out1:
-#endif
-       ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED,
-                             &ext3_xattr_trusted_handler);
-out:
-       ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
-                             &ext3_xattr_user_handler);
-       return err;
 }
 
 void
@@ -1239,15 +1324,4 @@ exit_ext3_xattr(void)
        if (ext3_xattr_cache)
                mb_cache_destroy(ext3_xattr_cache);
        ext3_xattr_cache = NULL;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-       exit_ext3_acl();
-#endif
-#ifdef CONFIG_EXT3_FS_SECURITY
-       ext3_xattr_unregister(EXT3_XATTR_INDEX_SECURITY,
-                             &ext3_xattr_security_handler);
-#endif
-       ext3_xattr_unregister(EXT3_XATTR_INDEX_TRUSTED,
-                             &ext3_xattr_trusted_handler);
-       ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
-                             &ext3_xattr_user_handler);
 }