Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / fs / xfs / linux-2.6 / xfs_super.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_bit.h"
20 #include "xfs_log.h"
21 #include "xfs_clnt.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir.h"
27 #include "xfs_dir2.h"
28 #include "xfs_alloc.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_quota.h"
31 #include "xfs_mount.h"
32 #include "xfs_bmap_btree.h"
33 #include "xfs_alloc_btree.h"
34 #include "xfs_ialloc_btree.h"
35 #include "xfs_dir_sf.h"
36 #include "xfs_dir2_sf.h"
37 #include "xfs_attr_sf.h"
38 #include "xfs_dinode.h"
39 #include "xfs_inode.h"
40 #include "xfs_btree.h"
41 #include "xfs_ialloc.h"
42 #include "xfs_bmap.h"
43 #include "xfs_rtalloc.h"
44 #include "xfs_error.h"
45 #include "xfs_itable.h"
46 #include "xfs_rw.h"
47 #include "xfs_acl.h"
48 #include "xfs_cap.h"
49 #include "xfs_mac.h"
50 #include "xfs_attr.h"
51 #include "xfs_buf_item.h"
52 #include "xfs_utils.h"
53 #include "xfs_version.h"
54
55 #include <linux/namei.h>
56 #include <linux/init.h>
57 #include <linux/mount.h>
58 #include <linux/mempool.h>
59 #include <linux/writeback.h>
60 #include <linux/kthread.h>
61
62 STATIC struct quotactl_ops xfs_quotactl_operations;
63 STATIC struct super_operations xfs_super_operations;
64 STATIC kmem_zone_t *xfs_vnode_zone;
65 STATIC kmem_zone_t *xfs_ioend_zone;
66 mempool_t *xfs_ioend_pool;
67
68 STATIC struct xfs_mount_args *
69 xfs_args_allocate(
70         struct super_block      *sb,
71         int                     silent)
72 {
73         struct xfs_mount_args   *args;
74
75         args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP);
76         args->logbufs = args->logbufsize = -1;
77         strncpy(args->fsname, sb->s_id, MAXNAMELEN);
78
79         /* Copy the already-parsed mount(2) flags we're interested in */
80         if (sb->s_flags & MS_DIRSYNC)
81                 args->flags |= XFSMNT_DIRSYNC;
82         if (sb->s_flags & MS_SYNCHRONOUS)
83                 args->flags |= XFSMNT_WSYNC;
84         if (silent)
85                 args->flags |= XFSMNT_QUIET;
86         args->flags |= XFSMNT_32BITINODES;
87
88         return args;
89 }
90
91 __uint64_t
92 xfs_max_file_offset(
93         unsigned int            blockshift)
94 {
95         unsigned int            pagefactor = 1;
96         unsigned int            bitshift = BITS_PER_LONG - 1;
97
98         /* Figure out maximum filesize, on Linux this can depend on
99          * the filesystem blocksize (on 32 bit platforms).
100          * __block_prepare_write does this in an [unsigned] long...
101          *      page->index << (PAGE_CACHE_SHIFT - bbits)
102          * So, for page sized blocks (4K on 32 bit platforms),
103          * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
104          *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
105          * but for smaller blocksizes it is less (bbits = log2 bsize).
106          * Note1: get_block_t takes a long (implicit cast from above)
107          * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
108          * can optionally convert the [unsigned] long from above into
109          * an [unsigned] long long.
110          */
111
112 #if BITS_PER_LONG == 32
113 # if defined(CONFIG_LBD)
114         ASSERT(sizeof(sector_t) == 8);
115         pagefactor = PAGE_CACHE_SIZE;
116         bitshift = BITS_PER_LONG;
117 # else
118         pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
119 # endif
120 #endif
121
122         return (((__uint64_t)pagefactor) << bitshift) - 1;
123 }
124
125 STATIC __inline__ void
126 xfs_set_inodeops(
127         struct inode            *inode)
128 {
129         switch (inode->i_mode & S_IFMT) {
130         case S_IFREG:
131                 inode->i_op = &xfs_inode_operations;
132                 inode->i_fop = &xfs_file_operations;
133                 inode->i_mapping->a_ops = &xfs_address_space_operations;
134                 break;
135         case S_IFDIR:
136                 inode->i_op = &xfs_dir_inode_operations;
137                 inode->i_fop = &xfs_dir_file_operations;
138                 break;
139         case S_IFLNK:
140                 inode->i_op = &xfs_symlink_inode_operations;
141                 if (inode->i_blocks)
142                         inode->i_mapping->a_ops = &xfs_address_space_operations;
143                 break;
144         default:
145                 inode->i_op = &xfs_inode_operations;
146                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
147                 break;
148         }
149 }
150
151 STATIC __inline__ void
152 xfs_revalidate_inode(
153         xfs_mount_t             *mp,
154         vnode_t                 *vp,
155         xfs_inode_t             *ip)
156 {
157         struct inode            *inode = vn_to_inode(vp);
158
159         inode->i_mode   = ip->i_d.di_mode;
160         inode->i_nlink  = ip->i_d.di_nlink;
161         inode->i_uid    = ip->i_d.di_uid;
162         inode->i_gid    = ip->i_d.di_gid;
163         inode->i_xid    = ip->i_d.di_xid;
164
165         switch (inode->i_mode & S_IFMT) {
166         case S_IFBLK:
167         case S_IFCHR:
168                 inode->i_rdev =
169                         MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
170                               sysv_minor(ip->i_df.if_u2.if_rdev));
171                 break;
172         default:
173                 inode->i_rdev = 0;
174                 break;
175         }
176
177         inode->i_blksize = xfs_preferred_iosize(mp);
178         inode->i_generation = ip->i_d.di_gen;
179         i_size_write(inode, ip->i_d.di_size);
180         inode->i_blocks =
181                 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
182         inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
183         inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
184         inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
185         inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
186         inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
187         inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
188         if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
189                 inode->i_flags |= S_IMMUTABLE;
190         else
191                 inode->i_flags &= ~S_IMMUTABLE;
192         if (ip->i_d.di_flags & XFS_DIFLAG_IUNLINK)
193                 inode->i_flags |= S_IUNLINK;
194         else
195                 inode->i_flags &= ~S_IUNLINK;
196         if (ip->i_d.di_flags & XFS_DIFLAG_BARRIER)
197                 inode->i_flags |= S_BARRIER;
198         else
199                 inode->i_flags &= ~S_BARRIER;
200         if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
201                 inode->i_flags |= S_APPEND;
202         else
203                 inode->i_flags &= ~S_APPEND;
204         if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
205                 inode->i_flags |= S_SYNC;
206         else
207                 inode->i_flags &= ~S_SYNC;
208         if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
209                 inode->i_flags |= S_NOATIME;
210         else
211                 inode->i_flags &= ~S_NOATIME;
212         vp->v_flag &= ~VMODIFIED;
213 }
214
215 void
216 xfs_initialize_vnode(
217         bhv_desc_t              *bdp,
218         vnode_t                 *vp,
219         bhv_desc_t              *inode_bhv,
220         int                     unlock)
221 {
222         xfs_inode_t             *ip = XFS_BHVTOI(inode_bhv);
223         struct inode            *inode = vn_to_inode(vp);
224
225         if (!inode_bhv->bd_vobj) {
226                 vp->v_vfsp = bhvtovfs(bdp);
227                 bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops);
228                 bhv_insert(VN_BHV_HEAD(vp), inode_bhv);
229         }
230
231         /*
232          * We need to set the ops vectors, and unlock the inode, but if
233          * we have been called during the new inode create process, it is
234          * too early to fill in the Linux inode.  We will get called a
235          * second time once the inode is properly set up, and then we can
236          * finish our work.
237          */
238         if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
239                 xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
240                 xfs_set_inodeops(inode);
241
242                 ip->i_flags &= ~XFS_INEW;
243                 barrier();
244
245                 unlock_new_inode(inode);
246         }
247 }
248
249 int
250 xfs_blkdev_get(
251         xfs_mount_t             *mp,
252         const char              *name,
253         struct block_device     **bdevp)
254 {
255         int                     error = 0;
256
257         *bdevp = open_bdev_excl(name, 0, mp);
258         if (IS_ERR(*bdevp)) {
259                 error = PTR_ERR(*bdevp);
260                 printk("XFS: Invalid device [%s], error=%d\n", name, error);
261         }
262
263         return -error;
264 }
265
266 void
267 xfs_blkdev_put(
268         struct block_device     *bdev)
269 {
270         if (bdev)
271                 close_bdev_excl(bdev);
272 }
273
274 /*
275  * Try to write out the superblock using barriers.
276  */
277 STATIC int
278 xfs_barrier_test(
279         xfs_mount_t     *mp)
280 {
281         xfs_buf_t       *sbp = xfs_getsb(mp, 0);
282         int             error;
283
284         XFS_BUF_UNDONE(sbp);
285         XFS_BUF_UNREAD(sbp);
286         XFS_BUF_UNDELAYWRITE(sbp);
287         XFS_BUF_WRITE(sbp);
288         XFS_BUF_UNASYNC(sbp);
289         XFS_BUF_ORDERED(sbp);
290
291         xfsbdstrat(mp, sbp);
292         error = xfs_iowait(sbp);
293
294         /*
295          * Clear all the flags we set and possible error state in the
296          * buffer.  We only did the write to try out whether barriers
297          * worked and shouldn't leave any traces in the superblock
298          * buffer.
299          */
300         XFS_BUF_DONE(sbp);
301         XFS_BUF_ERROR(sbp, 0);
302         XFS_BUF_UNORDERED(sbp);
303
304         xfs_buf_relse(sbp);
305         return error;
306 }
307
308 void
309 xfs_mountfs_check_barriers(xfs_mount_t *mp)
310 {
311         int error;
312
313         if (mp->m_logdev_targp != mp->m_ddev_targp) {
314                 xfs_fs_cmn_err(CE_NOTE, mp,
315                   "Disabling barriers, not supported with external log device");
316                 mp->m_flags &= ~XFS_MOUNT_BARRIER;
317                 return;
318         }
319
320         if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
321                                         QUEUE_ORDERED_NONE) {
322                 xfs_fs_cmn_err(CE_NOTE, mp,
323                   "Disabling barriers, not supported by the underlying device");
324                 mp->m_flags &= ~XFS_MOUNT_BARRIER;
325                 return;
326         }
327
328         error = xfs_barrier_test(mp);
329         if (error) {
330                 xfs_fs_cmn_err(CE_NOTE, mp,
331                   "Disabling barriers, trial barrier write failed");
332                 mp->m_flags &= ~XFS_MOUNT_BARRIER;
333                 return;
334         }
335 }
336
337 void
338 xfs_blkdev_issue_flush(
339         xfs_buftarg_t           *buftarg)
340 {
341         blkdev_issue_flush(buftarg->bt_bdev, NULL);
342 }
343
344 STATIC struct inode *
345 xfs_fs_alloc_inode(
346         struct super_block      *sb)
347 {
348         vnode_t                 *vp;
349
350         vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP);
351         if (unlikely(!vp))
352                 return NULL;
353         return vn_to_inode(vp);
354 }
355
356 STATIC void
357 xfs_fs_destroy_inode(
358         struct inode            *inode)
359 {
360         kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode));
361 }
362
363 STATIC void
364 xfs_fs_inode_init_once(
365         void                    *vnode,
366         kmem_zone_t             *zonep,
367         unsigned long           flags)
368 {
369         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
370                       SLAB_CTOR_CONSTRUCTOR)
371                 inode_init_once(vn_to_inode((vnode_t *)vnode));
372 }
373
374 STATIC int
375 xfs_init_zones(void)
376 {
377         xfs_vnode_zone = kmem_zone_init_flags(sizeof(vnode_t), "xfs_vnode_t",
378                                         KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
379                                         KM_ZONE_SPREAD,
380                                         xfs_fs_inode_init_once);
381         if (!xfs_vnode_zone)
382                 goto out;
383
384         xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
385         if (!xfs_ioend_zone)
386                 goto out_destroy_vnode_zone;
387
388         xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
389                                                   xfs_ioend_zone);
390         if (!xfs_ioend_pool)
391                 goto out_free_ioend_zone;
392         return 0;
393
394  out_free_ioend_zone:
395         kmem_zone_destroy(xfs_ioend_zone);
396  out_destroy_vnode_zone:
397         kmem_zone_destroy(xfs_vnode_zone);
398  out:
399         return -ENOMEM;
400 }
401
402 STATIC void
403 xfs_destroy_zones(void)
404 {
405         mempool_destroy(xfs_ioend_pool);
406         kmem_zone_destroy(xfs_vnode_zone);
407         kmem_zone_destroy(xfs_ioend_zone);
408 }
409
410 /*
411  * Attempt to flush the inode, this will actually fail
412  * if the inode is pinned, but we dirty the inode again
413  * at the point when it is unpinned after a log write,
414  * since this is when the inode itself becomes flushable.
415  */
416 STATIC int
417 xfs_fs_write_inode(
418         struct inode            *inode,
419         int                     sync)
420 {
421         vnode_t                 *vp = vn_from_inode(inode);
422         int                     error = 0, flags = FLUSH_INODE;
423
424         if (vp) {
425                 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
426                 if (sync)
427                         flags |= FLUSH_SYNC;
428                 VOP_IFLUSH(vp, flags, error);
429                 if (error == EAGAIN) {
430                         if (sync)
431                                 VOP_IFLUSH(vp, flags | FLUSH_LOG, error);
432                         else
433                                 error = 0;
434                 }
435         }
436
437         return -error;
438 }
439
440 STATIC void
441 xfs_fs_clear_inode(
442         struct inode            *inode)
443 {
444         vnode_t                 *vp = vn_from_inode(inode);
445         int                     error, cache;
446
447         vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
448
449         XFS_STATS_INC(vn_rele);
450         XFS_STATS_INC(vn_remove);
451         XFS_STATS_INC(vn_reclaim);
452         XFS_STATS_DEC(vn_active);
453
454         /*
455          * This can happen because xfs_iget_core calls xfs_idestroy if we
456          * find an inode with di_mode == 0 but without IGET_CREATE set.
457          */
458         if (vp->v_fbhv)
459                 VOP_INACTIVE(vp, NULL, cache);
460
461         VN_LOCK(vp);
462         vp->v_flag &= ~VMODIFIED;
463         VN_UNLOCK(vp, 0);
464
465         if (vp->v_fbhv) {
466                 VOP_RECLAIM(vp, error);
467                 if (error)
468                         panic("vn_purge: cannot reclaim");
469         }
470
471         ASSERT(vp->v_fbhv == NULL);
472
473 #ifdef XFS_VNODE_TRACE
474         ktrace_free(vp->v_trace);
475 #endif
476 }
477
478 /*
479  * Enqueue a work item to be picked up by the vfs xfssyncd thread.
480  * Doing this has two advantages:
481  * - It saves on stack space, which is tight in certain situations
482  * - It can be used (with care) as a mechanism to avoid deadlocks.
483  * Flushing while allocating in a full filesystem requires both.
484  */
485 STATIC void
486 xfs_syncd_queue_work(
487         struct vfs      *vfs,
488         void            *data,
489         void            (*syncer)(vfs_t *, void *))
490 {
491         vfs_sync_work_t *work;
492
493         work = kmem_alloc(sizeof(struct vfs_sync_work), KM_SLEEP);
494         INIT_LIST_HEAD(&work->w_list);
495         work->w_syncer = syncer;
496         work->w_data = data;
497         work->w_vfs = vfs;
498         spin_lock(&vfs->vfs_sync_lock);
499         list_add_tail(&work->w_list, &vfs->vfs_sync_list);
500         spin_unlock(&vfs->vfs_sync_lock);
501         wake_up_process(vfs->vfs_sync_task);
502 }
503
504 /*
505  * Flush delayed allocate data, attempting to free up reserved space
506  * from existing allocations.  At this point a new allocation attempt
507  * has failed with ENOSPC and we are in the process of scratching our
508  * heads, looking about for more room...
509  */
510 STATIC void
511 xfs_flush_inode_work(
512         vfs_t           *vfs,
513         void            *inode)
514 {
515         filemap_flush(((struct inode *)inode)->i_mapping);
516         iput((struct inode *)inode);
517 }
518
519 void
520 xfs_flush_inode(
521         xfs_inode_t     *ip)
522 {
523         struct inode    *inode = vn_to_inode(XFS_ITOV(ip));
524         struct vfs      *vfs = XFS_MTOVFS(ip->i_mount);
525
526         igrab(inode);
527         xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
528         delay(msecs_to_jiffies(500));
529 }
530
531 /*
532  * This is the "bigger hammer" version of xfs_flush_inode_work...
533  * (IOW, "If at first you don't succeed, use a Bigger Hammer").
534  */
535 STATIC void
536 xfs_flush_device_work(
537         vfs_t           *vfs,
538         void            *inode)
539 {
540         sync_blockdev(vfs->vfs_super->s_bdev);
541         iput((struct inode *)inode);
542 }
543
544 void
545 xfs_flush_device(
546         xfs_inode_t     *ip)
547 {
548         struct inode    *inode = vn_to_inode(XFS_ITOV(ip));
549         struct vfs      *vfs = XFS_MTOVFS(ip->i_mount);
550
551         igrab(inode);
552         xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
553         delay(msecs_to_jiffies(500));
554         xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
555 }
556
557 #define SYNCD_FLAGS     (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR|SYNC_REFCACHE)
558 STATIC void
559 vfs_sync_worker(
560         vfs_t           *vfsp,
561         void            *unused)
562 {
563         int             error;
564
565         if (!(vfsp->vfs_flag & VFS_RDONLY))
566                 VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error);
567         vfsp->vfs_sync_seq++;
568         wmb();
569         wake_up(&vfsp->vfs_wait_single_sync_task);
570 }
571
572 STATIC int
573 xfssyncd(
574         void                    *arg)
575 {
576         long                    timeleft;
577         vfs_t                   *vfsp = (vfs_t *) arg;
578         struct vfs_sync_work    *work, *n;
579         LIST_HEAD               (tmp);
580
581         timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
582         for (;;) {
583                 timeleft = schedule_timeout_interruptible(timeleft);
584                 /* swsusp */
585                 try_to_freeze();
586                 if (kthread_should_stop() && list_empty(&vfsp->vfs_sync_list))
587                         break;
588
589                 spin_lock(&vfsp->vfs_sync_lock);
590                 /*
591                  * We can get woken by laptop mode, to do a sync -
592                  * that's the (only!) case where the list would be
593                  * empty with time remaining.
594                  */
595                 if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
596                         if (!timeleft)
597                                 timeleft = xfs_syncd_centisecs *
598                                                         msecs_to_jiffies(10);
599                         INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
600                         list_add_tail(&vfsp->vfs_sync_work.w_list,
601                                         &vfsp->vfs_sync_list);
602                 }
603                 list_for_each_entry_safe(work, n, &vfsp->vfs_sync_list, w_list)
604                         list_move(&work->w_list, &tmp);
605                 spin_unlock(&vfsp->vfs_sync_lock);
606
607                 list_for_each_entry_safe(work, n, &tmp, w_list) {
608                         (*work->w_syncer)(vfsp, work->w_data);
609                         list_del(&work->w_list);
610                         if (work == &vfsp->vfs_sync_work)
611                                 continue;
612                         kmem_free(work, sizeof(struct vfs_sync_work));
613                 }
614         }
615
616         return 0;
617 }
618
619 STATIC int
620 xfs_fs_start_syncd(
621         vfs_t                   *vfsp)
622 {
623         vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
624         vfsp->vfs_sync_work.w_vfs = vfsp;
625         vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd");
626         if (IS_ERR(vfsp->vfs_sync_task))
627                 return -PTR_ERR(vfsp->vfs_sync_task);
628         return 0;
629 }
630
631 STATIC void
632 xfs_fs_stop_syncd(
633         vfs_t                   *vfsp)
634 {
635         kthread_stop(vfsp->vfs_sync_task);
636 }
637
638 STATIC void
639 xfs_fs_put_super(
640         struct super_block      *sb)
641 {
642         vfs_t                   *vfsp = vfs_from_sb(sb);
643         int                     error;
644
645         xfs_fs_stop_syncd(vfsp);
646         VFS_SYNC(vfsp, SYNC_ATTR|SYNC_DELWRI, NULL, error);
647         if (!error)
648                 VFS_UNMOUNT(vfsp, 0, NULL, error);
649         if (error) {
650                 printk("XFS unmount got error %d\n", error);
651                 printk("%s: vfsp/0x%p left dangling!\n", __FUNCTION__, vfsp);
652                 return;
653         }
654
655         vfs_deallocate(vfsp);
656 }
657
658 STATIC void
659 xfs_fs_write_super(
660         struct super_block      *sb)
661 {
662         vfs_t                   *vfsp = vfs_from_sb(sb);
663         int                     error;
664
665         if (sb->s_flags & MS_RDONLY) {
666                 sb->s_dirt = 0; /* paranoia */
667                 return;
668         }
669         /* Push the log and superblock a little */
670         VFS_SYNC(vfsp, SYNC_FSDATA, NULL, error);
671         sb->s_dirt = 0;
672 }
673
674 STATIC int
675 xfs_fs_sync_super(
676         struct super_block      *sb,
677         int                     wait)
678 {
679         vfs_t           *vfsp = vfs_from_sb(sb);
680         int             error;
681         int             flags = SYNC_FSDATA;
682
683         if (unlikely(sb->s_frozen == SB_FREEZE_WRITE))
684                 flags = SYNC_QUIESCE;
685         else
686                 flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
687
688         VFS_SYNC(vfsp, flags, NULL, error);
689         sb->s_dirt = 0;
690
691         if (unlikely(laptop_mode)) {
692                 int     prev_sync_seq = vfsp->vfs_sync_seq;
693
694                 /*
695                  * The disk must be active because we're syncing.
696                  * We schedule xfssyncd now (now that the disk is
697                  * active) instead of later (when it might not be).
698                  */
699                 wake_up_process(vfsp->vfs_sync_task);
700                 /*
701                  * We have to wait for the sync iteration to complete.
702                  * If we don't, the disk activity caused by the sync
703                  * will come after the sync is completed, and that
704                  * triggers another sync from laptop mode.
705                  */
706                 wait_event(vfsp->vfs_wait_single_sync_task,
707                                 vfsp->vfs_sync_seq != prev_sync_seq);
708         }
709
710         return -error;
711 }
712
713 STATIC int
714 xfs_fs_statfs(
715         struct super_block      *sb,
716         struct kstatfs          *statp)
717 {
718         vfs_t                   *vfsp = vfs_from_sb(sb);
719         int                     error;
720
721         VFS_STATVFS(vfsp, statp, NULL, error);
722         return -error;
723 }
724
725 STATIC int
726 xfs_fs_remount(
727         struct super_block      *sb,
728         int                     *flags,
729         char                    *options)
730 {
731         vfs_t                   *vfsp = vfs_from_sb(sb);
732         struct xfs_mount_args   *args = xfs_args_allocate(sb, 0);
733         int                     error;
734
735         VFS_PARSEARGS(vfsp, options, args, 1, error);
736         if ((args->flags2 & XFSMNT2_TAGXID) &&
737                 !(sb->s_flags & MS_TAGXID)) {
738                 printk("XFS: %s: tagxid not permitted on remount.\n",
739                         sb->s_id);
740                 error = EINVAL;
741         }
742         if (!error)
743                 VFS_MNTUPDATE(vfsp, flags, args, error);
744         kmem_free(args, sizeof(*args));
745         return -error;
746 }
747
748 STATIC void
749 xfs_fs_lockfs(
750         struct super_block      *sb)
751 {
752         VFS_FREEZE(vfs_from_sb(sb));
753 }
754
755 STATIC int
756 xfs_fs_show_options(
757         struct seq_file         *m,
758         struct vfsmount         *mnt)
759 {
760         struct vfs              *vfsp = vfs_from_sb(mnt->mnt_sb);
761         int                     error;
762
763         VFS_SHOWARGS(vfsp, m, error);
764         return error;
765 }
766
767 STATIC int
768 xfs_fs_quotasync(
769         struct super_block      *sb,
770         int                     type)
771 {
772         struct vfs              *vfsp = vfs_from_sb(sb);
773         int                     error;
774
775         VFS_QUOTACTL(vfsp, Q_XQUOTASYNC, 0, (caddr_t)NULL, error);
776         return -error;
777 }
778
779 STATIC int
780 xfs_fs_getxstate(
781         struct super_block      *sb,
782         struct fs_quota_stat    *fqs)
783 {
784         struct vfs              *vfsp = vfs_from_sb(sb);
785         int                     error;
786
787         VFS_QUOTACTL(vfsp, Q_XGETQSTAT, 0, (caddr_t)fqs, error);
788         return -error;
789 }
790
791 STATIC int
792 xfs_fs_setxstate(
793         struct super_block      *sb,
794         unsigned int            flags,
795         int                     op)
796 {
797         struct vfs              *vfsp = vfs_from_sb(sb);
798         int                     error;
799
800         VFS_QUOTACTL(vfsp, op, 0, (caddr_t)&flags, error);
801         return -error;
802 }
803
804 STATIC int
805 xfs_fs_getxquota(
806         struct super_block      *sb,
807         int                     type,
808         qid_t                   id,
809         struct fs_disk_quota    *fdq)
810 {
811         struct vfs              *vfsp = vfs_from_sb(sb);
812         int                     error, getmode;
813
814         getmode = (type == USRQUOTA) ? Q_XGETQUOTA :
815                  ((type == GRPQUOTA) ? Q_XGETGQUOTA : Q_XGETPQUOTA);
816         VFS_QUOTACTL(vfsp, getmode, id, (caddr_t)fdq, error);
817         return -error;
818 }
819
820 STATIC int
821 xfs_fs_setxquota(
822         struct super_block      *sb,
823         int                     type,
824         qid_t                   id,
825         struct fs_disk_quota    *fdq)
826 {
827         struct vfs              *vfsp = vfs_from_sb(sb);
828         int                     error, setmode;
829
830         setmode = (type == USRQUOTA) ? Q_XSETQLIM :
831                  ((type == GRPQUOTA) ? Q_XSETGQLIM : Q_XSETPQLIM);
832         VFS_QUOTACTL(vfsp, setmode, id, (caddr_t)fdq, error);
833         return -error;
834 }
835
836 STATIC int
837 xfs_fs_fill_super(
838         struct super_block      *sb,
839         void                    *data,
840         int                     silent)
841 {
842         vnode_t                 *rootvp;
843         struct vfs              *vfsp = vfs_allocate(sb);
844         struct xfs_mount_args   *args = xfs_args_allocate(sb, silent);
845         struct kstatfs          statvfs;
846         int                     error, error2;
847
848         bhv_insert_all_vfsops(vfsp);
849
850         VFS_PARSEARGS(vfsp, (char *)data, args, 0, error);
851         if (error) {
852                 bhv_remove_all_vfsops(vfsp, 1);
853                 goto fail_vfsop;
854         }
855
856         sb_min_blocksize(sb, BBSIZE);
857 #ifdef CONFIG_XFS_EXPORT
858         sb->s_export_op = &xfs_export_operations;
859 #endif
860         sb->s_qcop = &xfs_quotactl_operations;
861         sb->s_op = &xfs_super_operations;
862
863         VFS_MOUNT(vfsp, args, NULL, error);
864         if (error) {
865                 bhv_remove_all_vfsops(vfsp, 1);
866                 goto fail_vfsop;
867         }
868
869         VFS_STATVFS(vfsp, &statvfs, NULL, error);
870         if (error)
871                 goto fail_unmount;
872
873         sb->s_dirt = 1;
874         sb->s_magic = statvfs.f_type;
875         sb->s_blocksize = statvfs.f_bsize;
876         sb->s_blocksize_bits = ffs(statvfs.f_bsize) - 1;
877         sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
878         sb->s_time_gran = 1;
879         set_posix_acl_flag(sb);
880
881         VFS_ROOT(vfsp, &rootvp, error);
882         if (error)
883                 goto fail_unmount;
884
885         sb->s_root = d_alloc_root(vn_to_inode(rootvp));
886         if (!sb->s_root) {
887                 error = ENOMEM;
888                 goto fail_vnrele;
889         }
890         if (is_bad_inode(sb->s_root->d_inode)) {
891                 error = EINVAL;
892                 goto fail_vnrele;
893         }
894         if ((error = xfs_fs_start_syncd(vfsp)))
895                 goto fail_vnrele;
896         vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
897
898         kmem_free(args, sizeof(*args));
899         return 0;
900
901 fail_vnrele:
902         if (sb->s_root) {
903                 dput(sb->s_root);
904                 sb->s_root = NULL;
905         } else {
906                 VN_RELE(rootvp);
907         }
908
909 fail_unmount:
910         VFS_UNMOUNT(vfsp, 0, NULL, error2);
911
912 fail_vfsop:
913         vfs_deallocate(vfsp);
914         kmem_free(args, sizeof(*args));
915         return -error;
916 }
917
918 STATIC struct super_block *
919 xfs_fs_get_sb(
920         struct file_system_type *fs_type,
921         int                     flags,
922         const char              *dev_name,
923         void                    *data)
924 {
925         return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
926 }
927
928 STATIC struct super_operations xfs_super_operations = {
929         .alloc_inode            = xfs_fs_alloc_inode,
930         .destroy_inode          = xfs_fs_destroy_inode,
931         .write_inode            = xfs_fs_write_inode,
932         .clear_inode            = xfs_fs_clear_inode,
933         .put_super              = xfs_fs_put_super,
934         .write_super            = xfs_fs_write_super,
935         .sync_fs                = xfs_fs_sync_super,
936         .write_super_lockfs     = xfs_fs_lockfs,
937         .statfs                 = xfs_fs_statfs,
938         .remount_fs             = xfs_fs_remount,
939         .show_options           = xfs_fs_show_options,
940 };
941
942 STATIC struct quotactl_ops xfs_quotactl_operations = {
943         .quota_sync             = xfs_fs_quotasync,
944         .get_xstate             = xfs_fs_getxstate,
945         .set_xstate             = xfs_fs_setxstate,
946         .get_xquota             = xfs_fs_getxquota,
947         .set_xquota             = xfs_fs_setxquota,
948 };
949
950 STATIC struct file_system_type xfs_fs_type = {
951         .owner                  = THIS_MODULE,
952         .name                   = "xfs",
953         .get_sb                 = xfs_fs_get_sb,
954         .kill_sb                = kill_block_super,
955         .fs_flags               = FS_REQUIRES_DEV,
956 };
957
958
959 STATIC int __init
960 init_xfs_fs( void )
961 {
962         int                     error;
963         struct sysinfo          si;
964         static char             message[] __initdata = KERN_INFO \
965                 XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n";
966
967         printk(message);
968
969         si_meminfo(&si);
970         xfs_physmem = si.totalram;
971
972         ktrace_init(64);
973
974         error = xfs_init_zones();
975         if (error < 0)
976                 goto undo_zones;
977
978         error = xfs_buf_init();
979         if (error < 0)
980                 goto undo_buffers;
981
982         vn_init();
983         xfs_init();
984         uuid_init();
985         vfs_initquota();
986
987         error = register_filesystem(&xfs_fs_type);
988         if (error)
989                 goto undo_register;
990         return 0;
991
992 undo_register:
993         xfs_buf_terminate();
994
995 undo_buffers:
996         xfs_destroy_zones();
997
998 undo_zones:
999         return error;
1000 }
1001
1002 STATIC void __exit
1003 exit_xfs_fs( void )
1004 {
1005         vfs_exitquota();
1006         unregister_filesystem(&xfs_fs_type);
1007         xfs_cleanup();
1008         xfs_buf_terminate();
1009         xfs_destroy_zones();
1010         ktrace_uninit();
1011 }
1012
1013 module_init(init_xfs_fs);
1014 module_exit(exit_xfs_fs);
1015
1016 MODULE_AUTHOR("Silicon Graphics, Inc.");
1017 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
1018 MODULE_LICENSE("GPL");