X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=fs%2Fxfs%2Flinux-2.6%2Fxfs_buf.c;h=690cc7799a7c5b045384f276ca381c97a292719e;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=4c0a72c305c531a8ab59dee51e88f3ef52eb40df;hpb=87fc8d1bb10cd459024a742c6a10961fefcef18f;p=linux-2.6.git

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4c0a72c30..690cc7799 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -51,15 +51,12 @@
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
 #include <linux/workqueue.h>
-#include <linux/suspend.h>
 #include <linux/percpu.h>
+#include <linux/blkdev.h>
+#include <linux/hash.h>
 
 #include "xfs_linux.h"
 
-#ifndef GFP_READAHEAD
-#define GFP_READAHEAD	(__GFP_NOWARN|__GFP_NORETRY)
-#endif
-
 /*
  * File wide globals
  */
@@ -118,8 +115,8 @@ ktrace_t *pagebuf_trace_buf;
  */
 
 #define pb_to_gfp(flags) \
-	(((flags) & PBF_READ_AHEAD) ? GFP_READAHEAD : \
-	 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL)
+	((((flags) & PBF_READ_AHEAD) ? __GFP_NORETRY : \
+	  ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
 
 #define pb_to_km(flags) \
 	 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
@@ -131,34 +128,71 @@ ktrace_t *pagebuf_trace_buf;
 	kmem_zone_free(pagebuf_cache, (pb));
 
 /*
- * Pagebuf hashing
+ * Page Region interfaces.
+ *
+ * For pages in filesystems where the blocksize is smaller than the
+ * pagesize, we use the page->private field (long) to hold a bitmap
+ * of uptodate regions within the page.
+ *
+ * Each such region is "bytes per page / bits per long" bytes long.
+ *
+ * NBPPR == number-of-bytes-per-page-region
+ * BTOPR == bytes-to-page-region (rounded up)
+ * BTOPRT == bytes-to-page-region-truncated (rounded down)
  */
+#if (BITS_PER_LONG == 32)
+#define PRSHIFT		(PAGE_CACHE_SHIFT - 5)	/* (32 == 1<<5) */
+#elif (BITS_PER_LONG == 64)
+#define PRSHIFT		(PAGE_CACHE_SHIFT - 6)	/* (64 == 1<<6) */
+#else
+#error BITS_PER_LONG must be 32 or 64
+#endif
+#define NBPPR		(PAGE_CACHE_SIZE/BITS_PER_LONG)
+#define BTOPR(b)	(((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
+#define BTOPRT(b)	(((unsigned int)(b) >> PRSHIFT))
+
+STATIC unsigned long
+page_region_mask(
+	size_t		offset,
+	size_t		length)
+{
+	unsigned long	mask;
+	int		first, final;
 
-#define NBITS	8
-#define NHASH	(1<<NBITS)
+	first = BTOPR(offset);
+	final = BTOPRT(offset + length - 1);
+	first = min(first, final);
 
-typedef struct {
-	struct list_head	pb_hash;
-	spinlock_t		pb_hash_lock;
-} pb_hash_t;
+	mask = ~0UL;
+	mask <<= BITS_PER_LONG - (final - first);
+	mask >>= BITS_PER_LONG - (final);
 
-STATIC pb_hash_t	pbhash[NHASH];
-#define pb_hash(pb)	&pbhash[pb->pb_hash_index]
+	ASSERT(offset + length <= PAGE_CACHE_SIZE);
+	ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
 
-STATIC int
-_bhash(
-	struct block_device *bdev,
-	loff_t		base)
+	return mask;
+}
+
+STATIC inline void
+set_page_region(
+	struct page	*page,
+	size_t		offset,
+	size_t		length)
 {
-	int		bit, hval;
+	page->private |= page_region_mask(offset, length);
+	if (page->private == ~0UL)
+		SetPageUptodate(page);
+}
 
-	base >>= 9;
-	base ^= (unsigned long)bdev / L1_CACHE_BYTES;
-	for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) {
-		hval ^= (int)base & (NHASH-1);
-		base >>= NBITS;
-	}
-	return hval;
+STATIC inline int
+test_page_region(
+	struct page	*page,
+	size_t		offset,
+	size_t		length)
+{
+	unsigned long	mask = page_region_mask(offset, length);
+
+	return (mask && (page->private & mask) == mask);
 }
 
 /*
@@ -172,7 +206,7 @@ typedef struct a_list {
 
 STATIC a_list_t		*as_free_head;
 STATIC int		as_list_len;
-STATIC spinlock_t	as_lock = SPIN_LOCK_UNLOCKED;
+STATIC DEFINE_SPINLOCK(as_lock);
 
 /*
  * Try to batch vunmaps because they are costly.
@@ -183,8 +217,8 @@ free_address(
 {
 	a_list_t	*aentry;
 
-	aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC);
-	if (aentry) {
+	aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC & ~__GFP_HIGH);
+	if (likely(aentry)) {
 		spin_lock(&as_lock);
 		aentry->next = as_free_head;
 		aentry->vm_addr = addr;
@@ -344,7 +378,6 @@ _pagebuf_lookup_pages(
 	uint			flags)
 {
 	struct address_space	*mapping = bp->pb_target->pbr_mapping;
-	unsigned int		sectorshift = bp->pb_target->pbr_sshift;
 	size_t			blocksize = bp->pb_target->pbr_bsize;
 	size_t			size = bp->pb_count_desired;
 	size_t			nbytes, offset;
@@ -387,13 +420,13 @@ _pagebuf_lookup_pages(
 			 */
 			if (!(++retries % 100))
 				printk(KERN_ERR
-					"possible deadlock in %s (mode:0x%x)\n",
+					"XFS: possible memory allocation "
+					"deadlock in %s (mode:0x%x)\n",
 					__FUNCTION__, gfp_mask);
 
 			XFS_STATS_INC(pb_page_retries);
 			pagebuf_daemon_wakeup(0, gfp_mask);
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_timeout(10);
+			blk_congestion_wait(WRITE, HZ/50);
 			goto retry;
 		}
 
@@ -404,22 +437,11 @@ _pagebuf_lookup_pages(
 
 		if (!PageUptodate(page)) {
 			page_count--;
-			if (blocksize == PAGE_CACHE_SIZE) {
+			if (blocksize >= PAGE_CACHE_SIZE) {
 				if (flags & PBF_READ)
 					bp->pb_locked = 1;
 			} else if (!PagePrivate(page)) {
-				unsigned long	j, range;
-
-				/*
-				 * In this case page->private holds a bitmap
-				 * of uptodate sectors within the page
-				 */
-				ASSERT(blocksize < PAGE_CACHE_SIZE);
-				range = (offset + nbytes) >> sectorshift;
-				for (j = offset >> sectorshift; j < range; j++)
-					if (!test_bit(j, &page->private))
-						break;
-				if (j == range)
+				if (test_page_region(page, offset, nbytes))
 					page_count++;
 			}
 		}
@@ -486,9 +508,9 @@ _pagebuf_map_pages(
  *	which may imply that this call will block until those buffers
  *	are unlocked.  No I/O is implied by this call.
  */
-STATIC xfs_buf_t *
-_pagebuf_find(				/* find buffer for block	*/
-	xfs_buftarg_t		*target,/* target for block		*/
+xfs_buf_t *
+_pagebuf_find(
+	xfs_buftarg_t		*btp,	/* block device target		*/
 	loff_t			ioff,	/* starting offset of range	*/
 	size_t			isize,	/* length of range		*/
 	page_buf_flags_t	flags,	/* PBF_TRYLOCK			*/
@@ -496,59 +518,55 @@ _pagebuf_find(				/* find buffer for block	*/
 {
 	loff_t			range_base;
 	size_t			range_length;
-	int			hval;
-	pb_hash_t		*h;
+	xfs_bufhash_t		*hash;
 	xfs_buf_t		*pb, *n;
-	int			not_locked;
 
 	range_base = (ioff << BBSHIFT);
 	range_length = (isize << BBSHIFT);
 
-	/* Ensure we never do IOs smaller than the sector size */
-	BUG_ON(range_length < (1 << target->pbr_sshift));
+	/* Check for IOs smaller than the sector size / not sector aligned */
+	ASSERT(!(range_length < (1 << btp->pbr_sshift)));
+	ASSERT(!(range_base & (loff_t)btp->pbr_smask));
 
-	/* Ensure we never do IOs that are not sector aligned */
-	BUG_ON(range_base & (loff_t)target->pbr_smask);
+	hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
 
-	hval = _bhash(target->pbr_bdev, range_base);
-	h = &pbhash[hval];
+	spin_lock(&hash->bh_lock);
 
-	spin_lock(&h->pb_hash_lock);
-	list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) {
-		if (pb->pb_target == target &&
-		    pb->pb_file_offset == range_base &&
+	list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {
+		ASSERT(btp == pb->pb_target);
+		if (pb->pb_file_offset == range_base &&
 		    pb->pb_buffer_length == range_length) {
-			/* If we look at something bring it to the
-			 * front of the list for next time
+			/*
+			 * If we look at something bring it to the
+			 * front of the list for next time.
 			 */
 			atomic_inc(&pb->pb_hold);
-			list_move(&pb->pb_hash_list, &h->pb_hash);
+			list_move(&pb->pb_hash_list, &hash->bh_list);
 			goto found;
 		}
 	}
 
 	/* No match found */
 	if (new_pb) {
-		_pagebuf_initialize(new_pb, target, range_base,
+		_pagebuf_initialize(new_pb, btp, range_base,
 				range_length, flags);
-		new_pb->pb_hash_index = hval;
-		list_add(&new_pb->pb_hash_list, &h->pb_hash);
+		new_pb->pb_hash = hash;
+		list_add(&new_pb->pb_hash_list, &hash->bh_list);
 	} else {
 		XFS_STATS_INC(pb_miss_locked);
 	}
 
-	spin_unlock(&h->pb_hash_lock);
-	return (new_pb);
+	spin_unlock(&hash->bh_lock);
+	return new_pb;
 
 found:
-	spin_unlock(&h->pb_hash_lock);
+	spin_unlock(&hash->bh_lock);
 
 	/* Attempt to get the semaphore without sleeping,
 	 * if this does not work then we need to drop the
 	 * spinlock and do a hard attempt on the semaphore.
 	 */
-	not_locked = down_trylock(&pb->pb_sema);
-	if (not_locked) {
+	if (down_trylock(&pb->pb_sema)) {
 		if (!(flags & PBF_TRYLOCK)) {
 			/* wait for buffer ownership */
 			PB_TRACE(pb, "get_lock", 0);
@@ -578,39 +596,14 @@ found:
 	return (pb);
 }
 
-
-/*
- *	pagebuf_find
- *
- *	pagebuf_find returns a buffer matching the specified range of
- *	data for the specified target, if any of the relevant blocks
- *	are in memory.  The buffer may have unallocated holes, if
- *	some, but not all, of the blocks are in memory.  Even where
- *	pages are present in the buffer, not all of every page may be
- *	valid.
- */
-xfs_buf_t *
-pagebuf_find(				/* find buffer for block	*/
-					/* if the block is in memory	*/
-	xfs_buftarg_t		*target,/* target for block		*/
-	loff_t			ioff,	/* starting offset of range	*/
-	size_t			isize,	/* length of range		*/
-	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/
-{
-	return _pagebuf_find(target, ioff, isize, flags, NULL);
-}
-
 /*
- *	pagebuf_get
+ *	xfs_buf_get_flags assembles a buffer covering the specified range.
  *
- *	pagebuf_get assembles a buffer covering the specified range.
- *	Some or all of the blocks in the range may be valid.  Storage
- *	in memory for all portions of the buffer will be allocated,
- *	although backing storage may not be.  If PBF_READ is set in
- *	flags, pagebuf_iostart is called also.
+ *	Storage in memory for all portions of the buffer will be allocated,
+ *	although backing storage may not be.
  */
 xfs_buf_t *
-pagebuf_get(				/* allocate a buffer		*/
+xfs_buf_get_flags(			/* allocate a buffer		*/
 	xfs_buftarg_t		*target,/* target for buffer		*/
 	loff_t			ioff,	/* starting offset of range	*/
 	size_t			isize,	/* length of range		*/
@@ -640,8 +633,8 @@ pagebuf_get(				/* allocate a buffer		*/
 	if (!(pb->pb_flags & PBF_MAPPED)) {
 		error = _pagebuf_map_pages(pb, flags);
 		if (unlikely(error)) {
-			printk(KERN_WARNING
-			       "pagebuf_get: failed to map pages\n");
+			printk(KERN_WARNING "%s: failed to map pages\n",
+					__FUNCTION__);
 			goto no_buffer;
 		}
 	}
@@ -655,30 +648,50 @@ pagebuf_get(				/* allocate a buffer		*/
 	pb->pb_bn = ioff;
 	pb->pb_count_desired = pb->pb_buffer_length;
 
-	if (flags & PBF_READ) {
+	PB_TRACE(pb, "get", (unsigned long)flags);
+	return pb;
+
+ no_buffer:
+	if (flags & (PBF_LOCK | PBF_TRYLOCK))
+		pagebuf_unlock(pb);
+	pagebuf_rele(pb);
+	return NULL;
+}
+
+xfs_buf_t *
+xfs_buf_read_flags(
+	xfs_buftarg_t		*target,
+	loff_t			ioff,
+	size_t			isize,
+	page_buf_flags_t	flags)
+{
+	xfs_buf_t		*pb;
+
+	flags |= PBF_READ;
+
+	pb = xfs_buf_get_flags(target, ioff, isize, flags);
+	if (pb) {
 		if (PBF_NOT_DONE(pb)) {
-			PB_TRACE(pb, "get_read", (unsigned long)flags);
+			PB_TRACE(pb, "read", (unsigned long)flags);
 			XFS_STATS_INC(pb_get_read);
 			pagebuf_iostart(pb, flags);
 		} else if (flags & PBF_ASYNC) {
-			PB_TRACE(pb, "get_read_async", (unsigned long)flags);
+			PB_TRACE(pb, "read_async", (unsigned long)flags);
 			/*
 			 * Read ahead call which is already satisfied,
 			 * drop the buffer
 			 */
 			goto no_buffer;
 		} else {
-			PB_TRACE(pb, "get_read_done", (unsigned long)flags);
+			PB_TRACE(pb, "read_done", (unsigned long)flags);
 			/* We do not want read in the flags */
 			pb->pb_flags &= ~PBF_READ;
 		}
-	} else {
-		PB_TRACE(pb, "get_write", (unsigned long)flags);
 	}
 
 	return pb;
 
-no_buffer:
+ no_buffer:
 	if (flags & (PBF_LOCK | PBF_TRYLOCK))
 		pagebuf_unlock(pb);
 	pagebuf_rele(pb);
@@ -720,11 +733,9 @@ pagebuf_readahead(
 	bdi = target->pbr_mapping->backing_dev_info;
 	if (bdi_read_congested(bdi))
 		return;
-	if (bdi_write_congested(bdi))
-		return;
 
-	flags |= (PBF_TRYLOCK|PBF_READ|PBF_ASYNC|PBF_READ_AHEAD);
-	pagebuf_get(target, ioff, isize, flags);
+	flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);
+	xfs_buf_read_flags(target, ioff, isize, flags);
 }
 
 xfs_buf_t *
@@ -875,18 +886,29 @@ void
 pagebuf_rele(
 	xfs_buf_t		*pb)
 {
-	pb_hash_t		*hash = pb_hash(pb);
+	xfs_bufhash_t		*hash = pb->pb_hash;
 
 	PB_TRACE(pb, "rele", pb->pb_relse);
 
-	if (atomic_dec_and_lock(&pb->pb_hold, &hash->pb_hash_lock)) {
+	/*
+	 * pagebuf_lookup buffers are not hashed, not delayed write,
+	 * and don't have their own release routines.  Special case.
+	 */
+	if (unlikely(!hash)) {
+		ASSERT(!pb->pb_relse);
+		if (atomic_dec_and_test(&pb->pb_hold))
+			xfs_buf_free(pb);
+		return;
+	}
+
+	if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
 		int		do_free = 1;
 
 		if (pb->pb_relse) {
 			atomic_inc(&pb->pb_hold);
-			spin_unlock(&hash->pb_hash_lock);
+			spin_unlock(&hash->bh_lock);
 			(*(pb->pb_relse)) (pb);
-			spin_lock(&hash->pb_hash_lock);
+			spin_lock(&hash->bh_lock);
 			do_free = 0;
 		}
 
@@ -901,10 +923,10 @@ pagebuf_rele(
 
 		if (do_free) {
 			list_del_init(&pb->pb_hash_list);
-			spin_unlock(&hash->pb_hash_lock);
+			spin_unlock(&hash->bh_lock);
 			pagebuf_free(pb);
 		} else {
-			spin_unlock(&hash->pb_hash_lock);
+			spin_unlock(&hash->bh_lock);
 		}
 	}
 }
@@ -944,6 +966,7 @@ pagebuf_cond_lock(			/* lock buffer, if not locked	*/
 	return(locked ? 0 : -EBUSY);
 }
 
+#ifdef DEBUG
 /*
  *	pagebuf_lock_value
  *
@@ -955,6 +978,7 @@ pagebuf_lock_value(
 {
 	return(atomic_read(&pb->pb_sema.count));
 }
+#endif
 
 /*
  *	pagebuf_lock
@@ -1084,7 +1108,7 @@ _pagebuf_wait_unpin(
  *	done with respect to that I/O.	The pb_iodone routine, if
  *	present, will be called as a side-effect.
  */
-void
+STATIC void
 pagebuf_iodone_work(
 	void			*v)
 {
@@ -1225,7 +1249,6 @@ bio_end_io_pagebuf(
 {
 	xfs_buf_t		*pb = (xfs_buf_t *)bio->bi_private;
 	unsigned int		i, blocksize = pb->pb_target->pbr_bsize;
-	unsigned int		sectorshift = pb->pb_target->pbr_sshift;
 	struct bio_vec		*bvec = bio->bi_io_vec;
 
 	if (bio->bi_size)
@@ -1243,14 +1266,7 @@ bio_end_io_pagebuf(
 			SetPageUptodate(page);
 		} else if (!PagePrivate(page) &&
 				(pb->pb_flags & _PBF_PAGE_CACHE)) {
-			unsigned long	j, range;
-
-			ASSERT(blocksize < PAGE_CACHE_SIZE);
-			range = (bvec->bv_offset + bvec->bv_len) >> sectorshift;
-			for (j = bvec->bv_offset >> sectorshift; j < range; j++)
-				set_bit(j, &page->private);
-			if (page->private == (unsigned long)(PAGE_CACHE_SIZE-1))
-				SetPageUptodate(page);
+			set_page_region(page, bvec->bv_offset, bvec->bv_len);
 		}
 
 		if (_pagebuf_iolocked(pb)) {
@@ -1263,7 +1279,7 @@ bio_end_io_pagebuf(
 	return 0;
 }
 
-void
+STATIC void
 _pagebuf_ioapply(
 	xfs_buf_t		*pb)
 {
@@ -1473,6 +1489,65 @@ pagebuf_iomove(
  *	Handling of buftargs.
  */
 
+/*
+ * Wait for any bufs with callbacks that have been submitted but
+ * have not yet returned... walk the hash list for the target.
+ */
+void
+xfs_wait_buftarg(
+	xfs_buftarg_t	*btp)
+{
+	xfs_buf_t	*bp, *n;
+	xfs_bufhash_t	*hash;
+	uint		i;
+
+	for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+		hash = &btp->bt_hash[i];
+again:
+		spin_lock(&hash->bh_lock);
+		list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {
+			ASSERT(btp == bp->pb_target);
+			if (!(bp->pb_flags & PBF_FS_MANAGED)) {
+				spin_unlock(&hash->bh_lock);
+				delay(100);
+				goto again;
+			}
+		}
+		spin_unlock(&hash->bh_lock);
+	}
+}
+
+/*
+ * Allocate buffer hash table for a given target.
+ * For devices containing metadata (i.e. not the log/realtime devices)
+ * we need to allocate a much larger hash table.
+ */
+STATIC void
+xfs_alloc_bufhash(
+	xfs_buftarg_t		*btp,
+	int			external)
+{
+	unsigned int		i;
+
+	btp->bt_hashshift = external ? 3 : 8;	/* 8 or 256 buckets */
+	btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
+	btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
+					sizeof(xfs_bufhash_t), KM_SLEEP);
+	for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+		spin_lock_init(&btp->bt_hash[i].bh_lock);
+		INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+	}
+}
+
+STATIC void
+xfs_free_bufhash(
+	xfs_buftarg_t		*btp)
+{
+	kmem_free(btp->bt_hash,
+			(1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+	btp->bt_hash = NULL;
+}
+
 void
 xfs_free_buftarg(
 	xfs_buftarg_t		*btp,
@@ -1481,6 +1556,7 @@ xfs_free_buftarg(
 	xfs_flush_buftarg(btp, 1);
 	if (external)
 		xfs_blkdev_put(btp->pbr_bdev);
+	xfs_free_bufhash(btp);
 	iput(btp->pbr_mapping->host);
 	kmem_free(btp, sizeof(*btp));
 }
@@ -1495,11 +1571,12 @@ xfs_incore_relse(
 	truncate_inode_pages(btp->pbr_mapping, 0LL);
 }
 
-int
-xfs_setsize_buftarg(
+STATIC int
+xfs_setsize_buftarg_flags(
 	xfs_buftarg_t		*btp,
 	unsigned int		blocksize,
-	unsigned int		sectorsize)
+	unsigned int		sectorsize,
+	int			verbose)
 {
 	btp->pbr_bsize = blocksize;
 	btp->pbr_sshift = ffs(sectorsize) - 1;
@@ -1511,9 +1588,42 @@ xfs_setsize_buftarg(
 			sectorsize, XFS_BUFTARG_NAME(btp));
 		return EINVAL;
 	}
+
+	if (verbose &&
+	    (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
+		printk(KERN_WARNING
+			"XFS: %u byte sectors in use on device %s.  "
+			"This is suboptimal; %u or greater is ideal.\n",
+			sectorsize, XFS_BUFTARG_NAME(btp),
+			(unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
+	}
+
 	return 0;
 }
 
+/*
+* When allocating the initial buffer target we have not yet
+* read in the superblock, so don't know what sized sectors
+* are being used is at this early stage.  Play safe.
+*/
+STATIC int
+xfs_setsize_buftarg_early(
+	xfs_buftarg_t		*btp,
+	struct block_device	*bdev)
+{
+	return xfs_setsize_buftarg_flags(btp,
+			PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
+}
+
+int
+xfs_setsize_buftarg(
+	xfs_buftarg_t		*btp,
+	unsigned int		blocksize,
+	unsigned int		sectorsize)
+{
+	return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+}
+
 STATIC int
 xfs_mapping_buftarg(
 	xfs_buftarg_t		*btp,
@@ -1542,14 +1652,15 @@ xfs_mapping_buftarg(
 	mapping = &inode->i_data;
 	mapping->a_ops = &mapping_aops;
 	mapping->backing_dev_info = bdi;
-	mapping_set_gfp_mask(mapping, GFP_KERNEL);
+	mapping_set_gfp_mask(mapping, GFP_NOFS);
 	btp->pbr_mapping = mapping;
 	return 0;
 }
 
 xfs_buftarg_t *
 xfs_alloc_buftarg(
-	struct block_device	*bdev)
+	struct block_device	*bdev,
+	int			external)
 {
 	xfs_buftarg_t		*btp;
 
@@ -1557,10 +1668,11 @@ xfs_alloc_buftarg(
 
 	btp->pbr_dev =  bdev->bd_dev;
 	btp->pbr_bdev = bdev;
-	if (xfs_setsize_buftarg(btp, PAGE_CACHE_SIZE, bdev_hardsect_size(bdev)))
+	if (xfs_setsize_buftarg_early(btp, bdev))
 		goto error;
 	if (xfs_mapping_buftarg(btp, bdev))
 		goto error;
+	xfs_alloc_bufhash(btp, external);
 	return btp;
 
 error:
@@ -1574,7 +1686,7 @@ error:
  */
 
 STATIC LIST_HEAD(pbd_delwrite_queue);
-STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;
+STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
 
 STATIC void
 pagebuf_delwri_queue(
@@ -1665,9 +1777,7 @@ pagebuf_daemon(
 
 	INIT_LIST_HEAD(&tmp);
 	do {
-		/* swsusp */
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_FREEZE);
+		try_to_freeze(PF_FREEZE);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
@@ -1826,8 +1936,6 @@ pagebuf_daemon_stop(void)
 int __init
 pagebuf_init(void)
 {
-	int			i;
-
 	pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
 			SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (pagebuf_cache == NULL) {
@@ -1848,11 +1956,6 @@ pagebuf_init(void)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < NHASH; i++) {
-		spin_lock_init(&pbhash[i].pb_hash_lock);
-		INIT_LIST_HEAD(&pbhash[i].pb_hash);
-	}
-
 	return 0;
 }