1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2000 Stelias Computing, Inc.
5 * Copyright (C) 2000 Red Hat, Inc.
6 * Copyright (C) 2000 TurboLinux, Inc.
7 * Copyright (C) 2000 Los Alamos National Laboratory.
8 * Copyright (C) 2000, 2001 Tacit Networks, Inc.
9 * Copyright (C) 2000 Peter J. Braam
10 * Copyright (C) 2001 Mountain View Data, Inc.
11 * Copyright (C) 2001 Cluster File Systems, Inc.
13 * This file is part of InterMezzo, http://www.inter-mezzo.org.
15 * InterMezzo is free software; you can redistribute it and/or
16 * modify it under the terms of version 2 of the GNU General Public
17 * License as published by the Free Software Foundation.
19 * InterMezzo is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with InterMezzo; if not, write to the Free Software
26 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 * This file manages file I/O
32 #include <asm/bitops.h>
33 #include <asm/uaccess.h>
34 #include <asm/system.h>
36 #include <linux/errno.h>
38 #include <linux/ext2_fs.h>
39 #include <linux/slab.h>
40 #include <linux/vmalloc.h>
41 #include <linux/sched.h>
42 #include <linux/stat.h>
43 #include <linux/string.h>
44 #include <linux/blkdev.h>
45 #include <linux/init.h>
46 #include <linux/module.h>
48 #include <linux/fsfilter.h>
49 #include "intermezzo_fs.h"
50 #include "intermezzo_psdev.h"
52 * these are initialized in super.c
54 extern int presto_permission(struct inode *inode, int mask, struct nameidata *nd);
57 static int presto_open_upcall(int minor, struct dentry *de)
61 struct presto_file_set *fset;
63 struct lento_vfs_context info;
64 struct presto_dentry_data *dd = presto_d2d(de);
66 PRESTO_ALLOC(buffer, PAGE_SIZE);
68 CERROR("PRESTO: out of memory!\n");
71 fset = presto_fset(de);
72 path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE);
73 pathlen = MYPATHLEN(buffer, path);
75 CDEBUG(D_FILE, "de %p, dd %p\n", de, dd);
76 if (dd->remote_ino == 0) {
77 rc = presto_get_fileid(minor, fset, de);
79 memset (&info, 0, sizeof(info));
80 if (dd->remote_ino > 0) {
81 info.remote_ino = dd->remote_ino;
82 info.remote_generation = dd->remote_generation;
84 CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc,
85 (unsigned long long) dd->remote_ino);
87 rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info);
88 PRESTO_FREE(buffer, PAGE_SIZE);
92 static inline int open_check_dod(struct file *file,
93 struct presto_file_set *fset)
95 int gen, is_iopen = 0, minor;
96 struct presto_cache *cache = fset->fset_cache;
99 minor = presto_c2m(cache);
101 if ( ISLENTO(minor) ) {
102 CDEBUG(D_CACHE, "is lento, not doing DOD.\n");
106 /* Files are only ever opened by inode during backfetches, when by
107 * definition we have the authoritative copy of the data. No DOD. */
108 is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen);
111 CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n");
115 if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) {
116 CDEBUG(D_CACHE, "fileset not on demand.\n");
120 if (file->f_flags & O_TRUNC) {
121 CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n");
125 if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) {
126 CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n");
130 if (presto_chk(file->f_dentry, PRESTO_DATA)) {
131 CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n");
135 if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) {
136 CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n");
143 static int presto_file_open(struct inode *inode, struct file *file)
146 struct file_operations *fops;
147 struct presto_cache *cache;
148 struct presto_file_set *fset;
149 struct presto_file_data *fdata;
150 int writable = (file->f_flags & (O_RDWR | O_WRONLY));
155 if (presto_prep(file->f_dentry, &cache, &fset) < 0) {
160 minor = presto_c2m(cache);
162 CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n",
163 presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino,
166 if ( !ISLENTO(minor) && (file->f_flags & O_RDWR ||
167 file->f_flags & O_WRONLY)) {
168 CDEBUG(D_CACHE, "calling presto_get_permit\n");
169 if ( presto_get_permit(inode) < 0 ) {
173 presto_put_permit(inode);
176 if (open_check_dod(file, fset)) {
177 CDEBUG(D_CACHE, "presto_open_upcall\n");
178 CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry);
179 presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
180 rc = presto_open_upcall(minor, file->f_dentry);
183 CERROR("%s: returning error %d\n", __FUNCTION__, rc);
189 /* file was truncated upon open: do not refetch */
190 if (file->f_flags & O_TRUNC) {
191 CDEBUG(D_CACHE, "setting DATA, ATTR\n");
192 presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
195 fops = filter_c2cffops(cache->cache_filter);
197 CDEBUG(D_CACHE, "calling fs open\n");
198 rc = fops->open(inode, file);
207 PRESTO_ALLOC(fdata, sizeof(*fdata));
212 /* LOCK: XXX check that the kernel lock protects this alloc */
213 fdata->fd_do_lml = 0;
214 fdata->fd_bytes_written = 0;
215 fdata->fd_fsuid = current->fsuid;
216 fdata->fd_fsgid = current->fsgid;
217 fdata->fd_mode = file->f_dentry->d_inode->i_mode;
218 fdata->fd_uid = file->f_dentry->d_inode->i_uid;
219 fdata->fd_gid = file->f_dentry->d_inode->i_gid;
220 fdata->fd_ngroups = current->group_info->ngroups;
221 for (i=0 ; i < current->group_info->ngroups ; i++)
222 fdata->fd_groups[i] = GROUP_AT(current->group_info,i);
224 fdata->fd_info.flags = LENTO_FL_KML;
226 /* this is for the case of DOD,
227 reint_close will adjust flags if needed */
228 fdata->fd_info.flags = 0;
231 presto_getversion(&fdata->fd_version, inode);
232 file->private_data = fdata;
234 file->private_data = NULL;
241 int presto_adjust_lml(struct file *file, struct lento_vfs_context *info)
243 struct presto_file_data *fdata =
244 (struct presto_file_data *) file->private_data;
251 memcpy(&fdata->fd_info, info, sizeof(*info));
257 static int presto_file_release(struct inode *inode, struct file *file)
260 struct file_operations *fops;
261 struct presto_cache *cache;
262 struct presto_file_set *fset;
263 struct presto_file_data *fdata =
264 (struct presto_file_data *)file->private_data;
267 rc = presto_prep(file->f_dentry, &cache, &fset);
273 fops = filter_c2cffops(cache->cache_filter);
274 if (fops && fops->release)
275 rc = fops->release(inode, file);
277 CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n",
278 ISLENTO(cache->cache_psdev->uc_minor),
279 cache->cache_psdev->uc_minor, rc, fdata);
281 /* this file was modified: ignore close errors, write KML */
282 if (fdata && fdata->fd_do_lml) {
283 /* XXX: remove when lento gets file granularity cd */
284 if ( presto_get_permit(inode) < 0 ) {
289 fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime;
290 rc = presto_do_close(fset, file);
291 presto_put_permit(inode);
295 PRESTO_FREE(fdata, sizeof(*fdata));
296 file->private_data = NULL;
303 static void presto_apply_write_policy(struct file *file,
304 struct presto_file_set *fset, loff_t res)
306 struct presto_file_data *fdata =
307 (struct presto_file_data *)file->private_data;
308 struct presto_cache *cache = fset->fset_cache;
309 struct presto_version new_file_ver;
313 /* Here we do a journal close after a fixed or a specified
314 amount of KBytes, currently a global parameter set with
315 sysctl. If files are open for a long time, this gives added
316 protection. (XXX todo: per cache, add ioctl, handle
317 journaling in a thread, add more options etc.)
320 if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) &&
321 (!ISLENTO(cache->cache_psdev->uc_minor))) {
322 fdata->fd_bytes_written += res;
324 if (fdata->fd_bytes_written >= fset->fset_file_maxio) {
325 presto_getversion(&new_file_ver,
326 file->f_dentry->d_inode);
327 /* This is really heavy weight and should be fixed
328 ASAP. At most we should be recording the number
329 of bytes written and not locking the kernel,
330 wait for permits, etc, on the write path. SHP
333 if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) {
335 /* we must be disconnected, not to worry */
339 error = presto_journal_close(&rec, fset, fdata,
343 presto_put_permit(file->f_dentry->d_inode);
346 CERROR("presto_close: cannot journal close\n");
347 /* XXX these errors are really bad */
351 fdata->fd_bytes_written = 0;
356 static ssize_t presto_file_write(struct file *file, const char *buf,
357 size_t size, loff_t *off)
361 struct presto_cache *cache;
362 struct presto_file_set *fset;
363 struct file_operations *fops;
367 unsigned long blocks;
368 struct presto_file_data *fdata;
371 error = presto_prep(file->f_dentry, &cache, &fset);
377 blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1;
378 /* XXX 3 is for ext2 indirect blocks ... */
379 res_size = 2 * PRESTO_REQHIGH + ((blocks+3)
380 << file->f_dentry->d_inode->i_sb->s_blocksize_bits);
382 error = presto_reserve_space(fset->fset_cache, res_size);
383 CDEBUG(D_INODE, "Reserved %Ld for %Zd\n", res_size, size);
389 CDEBUG(D_INODE, "islento %d, minor: %d\n",
390 ISLENTO(cache->cache_psdev->uc_minor),
391 cache->cache_psdev->uc_minor);
394 * XXX this lock should become a per inode lock when
395 * Vinny's changes are in; we could just use i_sem.
397 read_lock(&fset->fset_lml.fd_lock);
398 fdata = (struct presto_file_data *)file->private_data;
399 do_lml_here = size && (fdata->fd_do_lml == 0) &&
400 !presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL);
403 fdata->fd_do_lml = 1;
404 read_unlock(&fset->fset_lml.fd_lock);
407 There might be a bug here. We need to make
408 absolutely sure that the ext3_file_write commits
409 after our transaction that writes the LML record.
410 Nesting the file write helps if new blocks are allocated.
414 struct presto_version file_version;
415 /* handle different space reqs from file system below! */
416 handle = presto_trans_start(fset, file->f_dentry->d_inode,
418 if ( IS_ERR(handle) ) {
419 presto_release_space(fset->fset_cache, res_size);
420 CERROR("presto_write: no space for transaction\n");
424 presto_getversion(&file_version, file->f_dentry->d_inode);
425 res = presto_write_lml_close(&rec, fset, file,
426 fdata->fd_info.remote_ino,
427 fdata->fd_info.remote_generation,
428 &fdata->fd_info.remote_version,
430 fdata->fd_lml_offset = rec.offset;
432 CERROR("intermezzo: PANIC failed to write LML\n");
437 presto_trans_commit(fset, handle);
440 fops = filter_c2cffops(cache->cache_filter);
441 res = fops->write(file, buf, size, off);
443 CDEBUG(D_FILE, "file write returns short write: size %Zd, res %Zd\n", size, res);
446 if ( (res > 0) && fdata )
447 presto_apply_write_policy(file, fset, res);
450 presto_release_space(fset->fset_cache, res_size);
454 struct file_operations presto_file_fops = {
455 .write = presto_file_write,
456 .open = presto_file_open,
457 .release = presto_file_release,
458 .ioctl = presto_ioctl
461 struct inode_operations presto_file_iops = {
462 .permission = presto_permission,
463 .setattr = presto_setattr,
464 #ifdef CONFIG_FS_EXT_ATTR
465 .set_ext_attr = presto_set_ext_attr,
469 /* FIXME: I bet we want to add a lock here and in presto_file_open. */
470 int izo_purge_file(struct presto_file_set *fset, char *file)
476 struct dentry *dentry;
480 /* FIXME: not mtpt it's gone */
481 len = strlen(fset->fset_cache->cache_mtpt) + strlen(file) + 1;
482 PRESTO_ALLOC(path, len + 1);
486 sprintf(path, "%s/%s", fset->fset_cache->cache_mtpt, file);
487 rc = izo_lookup_file(fset, path, &nd);
492 /* FIXME: take a lock here */
494 if (dentry->d_inode->i_atime.tv_sec > get_seconds() - 5) {
495 /* We lost the race; this file was accessed while we were doing
496 * ioctls and lookups and whatnot. */
501 /* FIXME: Check if this file is open. */
503 handle = presto_trans_start(fset, dentry->d_inode, KML_OPCODE_TRUNC);
504 if (IS_ERR(handle)) {
509 /* FIXME: Write LML record */
511 oldsize = dentry->d_inode->i_size;
512 rc = izo_do_truncate(fset, dentry, 0, oldsize);
515 rc = izo_do_truncate(fset, dentry, oldsize, 0);
520 /* FIXME: clear LML record */
523 /* FIXME: release the lock here */
526 if (handle != NULL && !IS_ERR(handle))
527 presto_trans_commit(fset, handle);
529 PRESTO_FREE(path, len + 1);