This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / fs / intermezzo / journal_ext3.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (C) 1998 Peter J. Braam <braam@clusterfs.com>
5  *  Copyright (C) 2000 Red Hat, Inc.
6  *  Copyright (C) 2000 Los Alamos National Laboratory
7  *  Copyright (C) 2000 TurboLinux, Inc.
8  *  Copyright (C) 2001 Mountain View Data, Inc.
9  *  Copyright (C) 2001 Tacit Networks, Inc. <phil@off.net>
10  *
11  *   This file is part of InterMezzo, http://www.inter-mezzo.org.
12  *
13  *   InterMezzo is free software; you can redistribute it and/or
14  *   modify it under the terms of version 2 of the GNU General Public
15  *   License as published by the Free Software Foundation.
16  *
17  *   InterMezzo is distributed in the hope that it will be useful,
18  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  *   GNU General Public License for more details.
21  *
22  *   You should have received a copy of the GNU General Public License
23  *   along with InterMezzo; if not, write to the Free Software
24  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25  */
26
27 #include <linux/types.h>
28 #include <linux/param.h>
29 #include <linux/kernel.h>
30 #include <linux/sched.h>
31 #include <linux/fs.h>
32 #include <linux/slab.h>
33 #include <linux/vmalloc.h>
34 #include <linux/stat.h>
35 #include <linux/errno.h>
36 #include <asm/segment.h>
37 #include <asm/uaccess.h>
38 #include <linux/string.h>
39 #if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
40 #include <linux/jbd.h>
41 #include <linux/ext3_fs.h>
42 #include <linux/ext3_jbd.h>
43 #endif
44
45 #include "intermezzo_fs.h"
46 #include "intermezzo_psdev.h"
47
48 #if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE)
49
50 #define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
51 #define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb))
52
53 /* space requirements: 
54    presto_do_truncate: 
55         used to truncate the KML forward to next fset->chunksize boundary
56           - zero partial block
57           - update inode
58    presto_write_record: 
59         write header (< one block) 
60         write one path (< MAX_PATHLEN) 
61         possibly write another path (< MAX_PATHLEN)
62         write suffix (< one block) 
63    presto_update_last_rcvd
64         write one block
65 */
66
67 static loff_t presto_e3_freespace(struct presto_cache *cache,
68                                          struct super_block *sb)
69 {
70         loff_t freebl = le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count);
71         loff_t avail =   freebl - 
72                 le32_to_cpu(EXT3_SB(sb)->s_es->s_r_blocks_count);
73         return (avail <<  EXT3_BLOCK_SIZE_BITS(sb));
74 }
75
76 /* start the filesystem journal operations */
77 static void *presto_e3_trans_start(struct presto_file_set *fset, 
78                                    struct inode *inode, 
79                                    int op)
80 {
81         int jblocks;
82         int trunc_blks, one_path_blks, extra_path_blks, 
83                 extra_name_blks, lml_blks; 
84         __u32 avail_kmlblocks;
85         handle_t *handle;
86
87         if ( presto_no_journal(fset) ||
88              strcmp(fset->fset_cache->cache_type, "ext3"))
89           {
90             CDEBUG(D_JOURNAL, "got cache_type \"%s\"\n",
91                    fset->fset_cache->cache_type);
92             return NULL;
93           }
94
95         avail_kmlblocks = EXT3_SB(inode->i_sb)->s_es->s_free_blocks_count;
96         
97         if ( avail_kmlblocks < 3 ) {
98                 return ERR_PTR(-ENOSPC);
99         }
100         
101         if (  (op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR)
102               && avail_kmlblocks < 6 ) {
103                 return ERR_PTR(-ENOSPC);
104         }            
105
106         /* Need journal space for:
107              at least three writes to KML (two one block writes, one a path) 
108              possibly a second name (unlink, rmdir)
109              possibly a second path (symlink, rename)
110              a one block write to the last rcvd file 
111         */
112
113         trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; 
114         one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3;
115         lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2;
116         extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); 
117         extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); 
118
119         /* additional blocks appear for "two pathname" operations
120            and operations involving the LML records 
121         */
122         switch (op) {
123         case KML_OPCODE_TRUNC:
124                 jblocks = one_path_blks + extra_name_blks + trunc_blks
125                         + EXT3_DELETE_TRANS_BLOCKS; 
126                 break;
127         case KML_OPCODE_KML_TRUNC:
128                 /* Hopefully this is a little better, but I'm still mostly
129                  * guessing here. */
130                 /* unlink 1 */
131                 jblocks = extra_name_blks + trunc_blks +
132                         EXT3_DELETE_TRANS_BLOCKS + 2; 
133
134                 /* unlink 2 */
135                 jblocks += extra_name_blks + trunc_blks +
136                         EXT3_DELETE_TRANS_BLOCKS + 2; 
137
138                 /* rename 1 */
139                 jblocks += 2 * extra_path_blks + trunc_blks + 
140                         2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
141
142                 /* rename 2 */
143                 jblocks += 2 * extra_path_blks + trunc_blks + 
144                         2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
145                 break;
146         case KML_OPCODE_RELEASE:
147                 /* 
148                 jblocks = one_path_blks + lml_blks + 2*trunc_blks; 
149                 */
150                 jblocks = one_path_blks; 
151                 break;
152         case KML_OPCODE_SETATTR:
153                 jblocks = one_path_blks + trunc_blks + 1 ; 
154                 break;
155         case KML_OPCODE_CREATE:
156                 jblocks = one_path_blks + trunc_blks 
157                         + EXT3_DATA_TRANS_BLOCKS + 3 + 2; 
158                 break;
159         case KML_OPCODE_LINK:
160                 jblocks = one_path_blks + trunc_blks 
161                         + EXT3_DATA_TRANS_BLOCKS + 2; 
162                 break;
163         case KML_OPCODE_UNLINK:
164                 jblocks = one_path_blks + extra_name_blks + trunc_blks
165                         + EXT3_DELETE_TRANS_BLOCKS + 2; 
166                 break;
167         case KML_OPCODE_SYMLINK:
168                 jblocks = one_path_blks + extra_path_blks + trunc_blks
169                         + EXT3_DATA_TRANS_BLOCKS + 5; 
170                 break;
171         case KML_OPCODE_MKDIR:
172                 jblocks = one_path_blks + trunc_blks
173                         + EXT3_DATA_TRANS_BLOCKS + 4 + 2;
174                 break;
175         case KML_OPCODE_RMDIR:
176                 jblocks = one_path_blks + extra_name_blks + trunc_blks
177                         + EXT3_DELETE_TRANS_BLOCKS + 1; 
178                 break;
179         case KML_OPCODE_MKNOD:
180                 jblocks = one_path_blks + trunc_blks + 
181                         EXT3_DATA_TRANS_BLOCKS + 3 + 2;
182                 break;
183         case KML_OPCODE_RENAME:
184                 jblocks = one_path_blks + extra_path_blks + trunc_blks + 
185                         2 * EXT3_DATA_TRANS_BLOCKS + 2 + 3;
186                 break;
187         case KML_OPCODE_WRITE:
188                 jblocks = one_path_blks; 
189                 /*  add this when we can wrap our transaction with 
190                     that of ext3_file_write (ordered writes)
191                     +  EXT3_DATA_TRANS_BLOCKS;
192                 */
193                 break;
194         default:
195                 CDEBUG(D_JOURNAL, "invalid operation %d for journal\n", op);
196                 return NULL;
197         }
198
199         CDEBUG(D_JOURNAL, "creating journal handle (%d blocks) for op %d\n",
200                jblocks, op);
201         /* journal_start/stop does not do its own locking while updating
202          * the handle/transaction information. Hence we create our own
203          * critical section to protect these calls. -SHP
204          */
205         lock_kernel();
206         handle = journal_start(EXT3_JOURNAL(inode), jblocks);
207         unlock_kernel();
208         return handle;
209 }
210
211 static void presto_e3_trans_commit(struct presto_file_set *fset, void *handle)
212 {
213         if ( presto_no_journal(fset) || !handle)
214                 return;
215
216         /* See comments before journal_start above. -SHP */
217         lock_kernel();
218         journal_stop(handle);
219         unlock_kernel();
220 }
221
222 static void presto_e3_journal_file_data(struct inode *inode)
223 {
224 #ifdef EXT3_JOURNAL_DATA_FL
225         EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
226 #else
227 #warning You must have a facility to enable journaled writes for recovery!
228 #endif
229 }
230
231 /* The logic here is a slightly modified version of ext3/inode.c:block_to_path
232  */
233 static int presto_e3_has_all_data(struct inode *inode)
234 {
235         int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
236         int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
237         const long direct_blocks = EXT3_NDIR_BLOCKS,
238                 indirect_blocks = ptrs,
239                 double_blocks = (1 << (ptrs_bits * 2));
240         long block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
241                 inode->i_sb->s_blocksize_bits;
242
243         ENTRY;
244
245         if (inode->i_size == 0) {
246                 EXIT;
247                 return 1;
248         }
249
250         if (block < direct_blocks) {
251                 /* No indirect blocks, no problem. */
252         } else if (block < indirect_blocks + direct_blocks) {
253                 block++;
254         } else if (block < double_blocks + indirect_blocks + direct_blocks) {
255                 block += 2;
256         } else if (((block - double_blocks - indirect_blocks - direct_blocks)
257                     >> (ptrs_bits * 2)) < ptrs) {
258                 block += 3;
259         }
260
261         block *= (inode->i_sb->s_blocksize / 512);
262
263         CDEBUG(D_CACHE, "Need %ld blocks, have %ld.\n", block, inode->i_blocks);
264
265         if (block > inode->i_blocks) {
266                 EXIT;
267                 return 0;
268         }
269
270         EXIT;
271         return 1;
272 }
273
274 struct journal_ops presto_ext3_journal_ops = {
275         .tr_all_data     = presto_e3_has_all_data,
276         .tr_avail        = presto_e3_freespace,
277         .tr_start        =  presto_e3_trans_start,
278         .tr_commit       = presto_e3_trans_commit,
279         .tr_journal_data = presto_e3_journal_file_data,
280         .tr_ilookup      = presto_iget_ilookup
281 };
282
283 #endif /* CONFIG_EXT3_FS */