ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / fs / jfs / inode.c
1 /*
2  *   Copyright (c) International Business Machines Corp., 2000-2002
3  *   Portions Copyright (c) Christoph Hellwig, 2001-2002
4  *
5  *   This program is free software;  you can redistribute it and/or modify
6  *   it under the terms of the GNU General Public License as published by
7  *   the Free Software Foundation; either version 2 of the License, or 
8  *   (at your option) any later version.
9  * 
10  *   This program is distributed in the hope that it will be useful,
11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13  *   the GNU General Public License for more details.
14  *
15  *   You should have received a copy of the GNU General Public License
16  *   along with this program;  if not, write to the Free Software 
17  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19
20 #include <linux/fs.h>
21 #include <linux/mpage.h>
22 #include <linux/buffer_head.h>
23 #include <linux/pagemap.h>
24 #include "jfs_incore.h"
25 #include "jfs_filsys.h"
26 #include "jfs_imap.h"
27 #include "jfs_extent.h"
28 #include "jfs_unicode.h"
29 #include "jfs_debug.h"
30
31
32 extern struct inode_operations jfs_dir_inode_operations;
33 extern struct inode_operations jfs_file_inode_operations;
34 extern struct inode_operations jfs_symlink_inode_operations;
35 extern struct file_operations jfs_dir_operations;
36 extern struct file_operations jfs_file_operations;
37 struct address_space_operations jfs_aops;
38 extern int freeZeroLink(struct inode *);
39
40 void jfs_read_inode(struct inode *inode)
41 {
42         if (diRead(inode)) { 
43                 make_bad_inode(inode);
44                 return;
45         }
46
47         if (S_ISREG(inode->i_mode)) {
48                 inode->i_op = &jfs_file_inode_operations;
49                 inode->i_fop = &jfs_file_operations;
50                 inode->i_mapping->a_ops = &jfs_aops;
51         } else if (S_ISDIR(inode->i_mode)) {
52                 inode->i_op = &jfs_dir_inode_operations;
53                 inode->i_fop = &jfs_dir_operations;
54                 inode->i_mapping->a_ops = &jfs_aops;
55                 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
56         } else if (S_ISLNK(inode->i_mode)) {
57                 if (inode->i_size >= IDATASIZE) {
58                         inode->i_op = &page_symlink_inode_operations;
59                         inode->i_mapping->a_ops = &jfs_aops;
60                 } else
61                         inode->i_op = &jfs_symlink_inode_operations;
62         } else {
63                 inode->i_op = &jfs_file_inode_operations;
64                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
65         }
66 }
67
68 /*
69  * Workhorse of both fsync & write_inode
70  */
71 int jfs_commit_inode(struct inode *inode, int wait)
72 {
73         int rc = 0;
74         tid_t tid;
75         static int noisy = 5;
76
77         jfs_info("In jfs_commit_inode, inode = 0x%p", inode);
78
79         /*
80          * Don't commit if inode has been committed since last being
81          * marked dirty, or if it has been deleted.
82          */
83         if (test_cflag(COMMIT_Nolink, inode) ||
84             !test_cflag(COMMIT_Dirty, inode))
85                 return 0;
86
87         if (isReadOnly(inode)) {
88                 /* kernel allows writes to devices on read-only
89                  * partitions and may think inode is dirty
90                  */
91                 if (!special_file(inode->i_mode) && noisy) {
92                         jfs_err("jfs_commit_inode(0x%p) called on "
93                                    "read-only volume", inode);
94                         jfs_err("Is remount racy?");
95                         noisy--;
96                 }
97                 return 0;
98         }
99
100         tid = txBegin(inode->i_sb, COMMIT_INODE);
101         down(&JFS_IP(inode)->commit_sem);
102         rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0);
103         txEnd(tid);
104         up(&JFS_IP(inode)->commit_sem);
105         return rc;
106 }
107
108 void jfs_write_inode(struct inode *inode, int wait)
109 {
110         if (test_cflag(COMMIT_Nolink, inode))
111                 return;
112         /*
113          * If COMMIT_DIRTY is not set, the inode isn't really dirty.
114          * It has been committed since the last change, but was still
115          * on the dirty inode list.
116          */
117          if (!test_cflag(COMMIT_Dirty, inode)) {
118                 /* Make sure committed changes hit the disk */
119                 jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait);
120                 return;
121          }
122
123         if (jfs_commit_inode(inode, wait)) {
124                 jfs_err("jfs_write_inode: jfs_commit_inode failed!");
125         }
126 }
127
128 void jfs_delete_inode(struct inode *inode)
129 {
130         jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
131
132         if (test_cflag(COMMIT_Freewmap, inode))
133                 freeZeroLink(inode);
134
135         diFree(inode);
136
137         clear_inode(inode);
138 }
139
140 void jfs_dirty_inode(struct inode *inode)
141 {
142         static int noisy = 5;
143
144         if (isReadOnly(inode)) {
145                 if (!special_file(inode->i_mode) && noisy) {
146                         /* kernel allows writes to devices on read-only
147                          * partitions and may try to mark inode dirty
148                          */
149                         jfs_err("jfs_dirty_inode called on read-only volume");
150                         jfs_err("Is remount racy?");
151                         noisy--;
152                 }
153                 return;
154         }
155
156         set_cflag(COMMIT_Dirty, inode);
157 }
158
159 static int
160 jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
161                         struct buffer_head *bh_result, int create)
162 {
163         s64 lblock64 = lblock;
164         int no_size_check = 0;
165         int rc = 0;
166         int take_locks;
167         xad_t xad;
168         s64 xaddr;
169         int xflag;
170         s32 xlen;
171
172         /*
173          * If this is a special inode (imap, dmap) or directory,
174          * the lock should already be taken
175          */
176         take_locks = ((JFS_IP(ip)->fileset != AGGREGATE_I) &&
177                       !S_ISDIR(ip->i_mode));
178         /*
179          * Take appropriate lock on inode
180          */
181         if (take_locks) {
182                 if (create)
183                         IWRITE_LOCK(ip);
184                 else
185                         IREAD_LOCK(ip);
186         }
187
188         /*
189          * A directory's "data" is the inode index table, but i_size is the
190          * size of the d-tree, so don't check the offset against i_size
191          */
192         if (S_ISDIR(ip->i_mode))
193                 no_size_check = 1;
194
195         if ((no_size_check ||
196              ((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size)) &&
197             (xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, no_size_check)
198              == 0) && xlen) {
199                 if (xflag & XAD_NOTRECORDED) {
200                         if (!create)
201                                 /*
202                                  * Allocated but not recorded, read treats
203                                  * this as a hole
204                                  */
205                                 goto unlock;
206 #ifdef _JFS_4K
207                         XADoffset(&xad, lblock64);
208                         XADlength(&xad, xlen);
209                         XADaddress(&xad, xaddr);
210 #else                           /* _JFS_4K */
211                         /*
212                          * As long as block size = 4K, this isn't a problem.
213                          * We should mark the whole page not ABNR, but how
214                          * will we know to mark the other blocks BH_New?
215                          */
216                         BUG();
217 #endif                          /* _JFS_4K */
218                         rc = extRecord(ip, &xad);
219                         if (rc)
220                                 goto unlock;
221                         set_buffer_new(bh_result);
222                 }
223
224                 map_bh(bh_result, ip->i_sb, xaddr);
225                 bh_result->b_size = xlen << ip->i_blkbits;
226                 goto unlock;
227         }
228         if (!create)
229                 goto unlock;
230
231         /*
232          * Allocate a new block
233          */
234 #ifdef _JFS_4K
235         if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
236                 goto unlock;
237         rc = extAlloc(ip, max_blocks, lblock64, &xad, FALSE);
238         if (rc)
239                 goto unlock;
240
241         set_buffer_new(bh_result);
242         map_bh(bh_result, ip->i_sb, addressXAD(&xad));
243         bh_result->b_size = lengthXAD(&xad) << ip->i_blkbits;
244
245 #else                           /* _JFS_4K */
246         /*
247          * We need to do whatever it takes to keep all but the last buffers
248          * in 4K pages - see jfs_write.c
249          */
250         BUG();
251 #endif                          /* _JFS_4K */
252
253       unlock:
254         /*
255          * Release lock on inode
256          */
257         if (take_locks) {
258                 if (create)
259                         IWRITE_UNLOCK(ip);
260                 else
261                         IREAD_UNLOCK(ip);
262         }
263         return rc;
264 }
265
266 static int jfs_get_block(struct inode *ip, sector_t lblock,
267                          struct buffer_head *bh_result, int create)
268 {
269         return jfs_get_blocks(ip, lblock, 1, bh_result, create);
270 }
271
272 static int jfs_writepage(struct page *page, struct writeback_control *wbc)
273 {
274         return block_write_full_page(page, jfs_get_block, wbc);
275 }
276
277 static int jfs_writepages(struct address_space *mapping,
278                         struct writeback_control *wbc)
279 {
280         return mpage_writepages(mapping, wbc, jfs_get_block);
281 }
282
283 static int jfs_readpage(struct file *file, struct page *page)
284 {
285         return mpage_readpage(page, jfs_get_block);
286 }
287
288 static int jfs_readpages(struct file *file, struct address_space *mapping,
289                 struct list_head *pages, unsigned nr_pages)
290 {
291         return mpage_readpages(mapping, pages, nr_pages, jfs_get_block);
292 }
293
294 static int jfs_prepare_write(struct file *file,
295                              struct page *page, unsigned from, unsigned to)
296 {
297         return nobh_prepare_write(page, from, to, jfs_get_block);
298 }
299
300 static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
301 {
302         return generic_block_bmap(mapping, block, jfs_get_block);
303 }
304
305 static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
306         const struct iovec *iov, loff_t offset, unsigned long nr_segs)
307 {
308         struct file *file = iocb->ki_filp;
309         struct inode *inode = file->f_mapping->host;
310
311         return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
312                                 offset, nr_segs, jfs_get_blocks, NULL);
313 }
314
315 struct address_space_operations jfs_aops = {
316         .readpage       = jfs_readpage,
317         .readpages      = jfs_readpages,
318         .writepage      = jfs_writepage,
319         .writepages     = jfs_writepages,
320         .sync_page      = block_sync_page,
321         .prepare_write  = jfs_prepare_write,
322         .commit_write   = nobh_commit_write,
323         .bmap           = jfs_bmap,
324         .direct_IO      = jfs_direct_IO,
325 };
326
327 /*
328  * Guts of jfs_truncate.  Called with locks already held.  Can be called
329  * with directory for truncating directory index table.
330  */
331 void jfs_truncate_nolock(struct inode *ip, loff_t length)
332 {
333         loff_t newsize;
334         tid_t tid;
335
336         ASSERT(length >= 0);
337
338         if (test_cflag(COMMIT_Nolink, ip)) {
339                 xtTruncate(0, ip, length, COMMIT_WMAP);
340                 return;
341         }
342
343         do {
344                 tid = txBegin(ip->i_sb, 0);
345
346                 /*
347                  * The commit_sem cannot be taken before txBegin.
348                  * txBegin may block and there is a chance the inode
349                  * could be marked dirty and need to be committed
350                  * before txBegin unblocks
351                  */
352                 down(&JFS_IP(ip)->commit_sem);
353
354                 newsize = xtTruncate(tid, ip, length,
355                                      COMMIT_TRUNCATE | COMMIT_PWMAP);
356                 if (newsize < 0) {
357                         txEnd(tid);
358                         up(&JFS_IP(ip)->commit_sem);
359                         break;
360                 }
361
362                 ip->i_mtime = ip->i_ctime = CURRENT_TIME;
363                 mark_inode_dirty(ip);
364
365                 txCommit(tid, 1, &ip, 0);
366                 txEnd(tid);
367                 up(&JFS_IP(ip)->commit_sem);
368         } while (newsize > length);     /* Truncate isn't always atomic */
369 }
370
371 void jfs_truncate(struct inode *ip)
372 {
373         jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
374
375         nobh_truncate_page(ip->i_mapping, ip->i_size);
376
377         IWRITE_LOCK(ip);
378         jfs_truncate_nolock(ip, ip->i_size);
379         IWRITE_UNLOCK(ip);
380 }