1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Author: Peter J. Braam <braam@clusterfs.com>
5 * Copyright (C) 1998 Stelias Computing Inc
6 * Copyright (C) 1999 Red Hat Inc.
8 * This file is part of InterMezzo, http://www.inter-mezzo.org.
10 * InterMezzo is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * InterMezzo is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with InterMezzo; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 * This file implements basic routines supporting the semantics
25 #include <linux/types.h>
26 #include <linux/kernel.h>
27 #include <linux/sched.h>
29 #include <linux/namei.h>
30 #include <linux/stat.h>
31 #include <linux/errno.h>
32 #include <linux/vmalloc.h>
33 #include <linux/slab.h>
34 #include <asm/segment.h>
35 #include <asm/uaccess.h>
36 #include <linux/string.h>
38 #include "intermezzo_fs.h"
39 #include "intermezzo_psdev.h"
41 int presto_walk(const char *name, struct nameidata *nd)
44 /* we do not follow symlinks to support symlink operations
45 correctly. The vfs should always hand us resolved dentries
46 so we should not be required to use LOOKUP_FOLLOW. At the
47 reintegrating end, lento again should be working with the
48 resolved pathname and not the symlink. SHP
49 XXX: This code implies that direct symlinks do not work. SHP
51 unsigned int flags = 0; //LOOKUP_POSITIVE;
54 err = path_lookup(name, flags, nd);
59 /* find the presto minor device for this inode */
60 int presto_i2m(struct inode *inode)
62 struct presto_cache *cache;
64 cache = presto_get_cache(inode);
65 CDEBUG(D_PSDEV, "\n");
67 CERROR("PRESTO: BAD: cannot find cache for dev %s, ino %ld\n",
68 inode->i_sb->s_id, inode->i_ino);
73 return cache->cache_psdev->uc_minor;
76 inline int presto_f2m(struct presto_file_set *fset)
78 return fset->fset_cache->cache_psdev->uc_minor;
82 inline int presto_c2m(struct presto_cache *cache)
84 return cache->cache_psdev->uc_minor;
88 /* XXX check this out */
89 struct presto_file_set *presto_path2fileset(const char *name)
92 struct presto_file_set *fileset;
96 error = presto_walk(name, &nd);
99 error = do_revalidate(nd.dentry);
102 fileset = presto_fset(nd.dentry);
106 fileset = ERR_PTR(error);
112 /* check a flag on this dentry or fset root. Semantics:
113 - most flags: test if it is set
114 - PRESTO_ATTR, PRESTO_DATA return 1 if PRESTO_FSETINSYNC is set
116 int presto_chk(struct dentry *dentry, int flag)
119 struct presto_file_set *fset = presto_fset(dentry);
122 minor = presto_i2m(dentry->d_inode);
123 if ( izo_channels[minor].uc_no_filter ) {
128 /* if the fileset is in sync DATA and ATTR are OK */
130 (flag == PRESTO_ATTR || flag == PRESTO_DATA) &&
131 (fset->fset_flags & FSET_INSYNC) ) {
132 CDEBUG(D_INODE, "fset in sync (ino %ld)!\n",
133 fset->fset_dentry->d_inode->i_ino);
139 return (presto_d2d(dentry)->dd_flags & flag);
142 /* set a bit in the dentry flags */
143 void presto_set(struct dentry *dentry, int flag)
146 if ( dentry->d_inode ) {
147 CDEBUG(D_INODE, "SET ino %ld, flag %x\n",
148 dentry->d_inode->i_ino, flag);
150 if ( presto_d2d(dentry) == NULL) {
151 CERROR("dentry without d_fsdata in presto_set: %p: %*s", dentry,
152 dentry->d_name.len, dentry->d_name.name);
155 presto_d2d(dentry)->dd_flags |= flag;
159 /* given a path: complete the closes on the fset */
160 int lento_complete_closes(char *path)
163 struct dentry *dentry;
165 struct presto_file_set *fset;
168 error = presto_walk(path, &nd);
177 if ( !presto_ispresto(dentry->d_inode) ) {
182 fset = presto_fset(dentry);
185 CERROR("No fileset!\n");
190 /* transactions and locking are internal to this function */
191 error = presto_complete_lml(fset);
200 /* given a path: write a close record and cancel an LML record, finally
201 call truncate LML. Lento is doing this so it goes in with uid/gid's
204 int lento_cancel_lml(char *path,
207 __u32 remote_generation,
208 __u32 remote_version,
209 struct lento_vfs_context *info)
213 struct dentry *dentry;
215 struct presto_file_set *fset;
217 struct presto_version new_ver;
221 error = presto_walk(path, &nd);
229 if ( !presto_ispresto(dentry->d_inode) ) {
234 fset = presto_fset(dentry);
238 CERROR("No fileset!\n");
243 /* this only requires a transaction below which is automatic */
244 handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_RELEASE);
245 if ( IS_ERR(handle) ) {
251 if (info->flags & LENTO_FL_CANCEL_LML) {
252 error = presto_clear_lml_close(fset, lml_offset);
254 presto_trans_commit(fset, handle);
261 if (info->flags & LENTO_FL_WRITE_KML) {
262 presto_getversion(&new_ver, dentry->d_inode);
263 error = presto_journal_close(&rec, fset, NULL, dentry,
267 presto_trans_commit(fset, handle);
272 if (info->flags & LENTO_FL_WRITE_EXPECT) {
273 error = presto_write_last_rcvd(&rec, fset, info);
276 presto_trans_commit(fset, handle);
281 presto_trans_commit(fset, handle);
283 if (info->flags & LENTO_FL_CANCEL_LML) {
284 presto_truncate_lml(fset);
295 /* given a dentry, operate on the flags in its dentry. Used by downcalls */
296 int izo_mark_dentry(struct dentry *dentry, int and_flag, int or_flag,
301 if (presto_d2d(dentry) == NULL) {
302 CERROR("InterMezzo: no ddata for inode %ld in %s\n",
303 dentry->d_inode->i_ino, __FUNCTION__);
307 CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
308 dentry->d_inode->i_ino, and_flag, or_flag,
309 presto_d2d(dentry)->dd_flags);
311 presto_d2d(dentry)->dd_flags &= and_flag;
312 presto_d2d(dentry)->dd_flags |= or_flag;
314 *res = presto_d2d(dentry)->dd_flags;
319 /* given a path, operate on the flags in its cache. Used by mark_ioctl */
320 int izo_mark_cache(struct dentry *dentry, int and_flag, int or_flag,
323 struct presto_cache *cache;
325 if (presto_d2d(dentry) == NULL) {
326 CERROR("InterMezzo: no ddata for inode %ld in %s\n",
327 dentry->d_inode->i_ino, __FUNCTION__);
331 CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
332 dentry->d_inode->i_ino, and_flag, or_flag,
333 presto_d2d(dentry)->dd_flags);
335 cache = presto_get_cache(dentry->d_inode);
337 CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
341 cache->cache_flags &= and_flag;
342 cache->cache_flags |= or_flag;
344 *res = (int)cache->cache_flags;
349 int presto_set_max_kml_size(const char *path, unsigned long max_size)
351 struct presto_file_set *fset;
355 fset = presto_path2fileset(path);
358 return PTR_ERR(fset);
361 fset->kml_truncate_size = max_size;
362 CDEBUG(D_CACHE, "KML truncate size set to %lu bytes for fset %s.\n",
369 int izo_mark_fset(struct dentry *dentry, int and_flag, int or_flag,
372 struct presto_file_set *fset;
374 fset = presto_fset(dentry);
376 CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
377 make_bad_inode(dentry->d_inode);
380 fset->fset_flags &= and_flag;
381 fset->fset_flags |= or_flag;
383 *res = (int)fset->fset_flags;
388 /* talk to Lento about the permit */
389 static int presto_permit_upcall(struct dentry *dentry)
396 struct presto_file_set *fset = NULL;
400 if ( (minor = presto_i2m(dentry->d_inode)) < 0) {
405 fset = presto_fset(dentry);
411 if ( !presto_lento_up(minor) ) {
412 if ( fset->fset_flags & FSET_STEAL_PERMIT ) {
421 PRESTO_ALLOC(buffer, PAGE_SIZE);
423 CERROR("PRESTO: out of memory!\n");
427 path = presto_path(dentry, fset->fset_dentry, buffer, PAGE_SIZE);
428 pathlen = MYPATHLEN(buffer, path);
429 fsetnamelen = strlen(fset->fset_name);
430 rc = izo_upc_permit(minor, dentry, pathlen, path, fset->fset_name);
431 PRESTO_FREE(buffer, PAGE_SIZE);
436 /* get a write permit for the fileset of this inode
437 * - if this returns a negative value there was an error
438 * - if 0 is returned the permit was already in the kernel -- or --
439 * Lento gave us the permit without reintegration
440 * - lento returns the number of records it reintegrated
442 * Note that if this fileset has branches, a permit will -never- to a normal
443 * process for writing in the data area (ie, outside of .intermezzo)
445 int presto_get_permit(struct inode * inode)
448 struct presto_file_set *fset;
449 int minor = presto_i2m(inode);
458 if ( ISLENTO(minor) ) {
463 if (list_empty(&inode->i_dentry)) {
464 CERROR("No alias for inode %d\n", (int) inode->i_ino);
469 de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
471 if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
476 fset = presto_fset(de);
478 CERROR("Presto: no fileset in presto_get_permit!\n");
483 if (fset->fset_flags & FSET_HAS_BRANCHES) {
488 spin_lock(&fset->fset_permit_lock);
489 if (fset->fset_flags & FSET_HASPERMIT) {
490 fset->fset_permit_count++;
491 CDEBUG(D_INODE, "permit count now %d, inode %lx\n",
492 fset->fset_permit_count, inode->i_ino);
493 spin_unlock(&fset->fset_permit_lock);
498 /* Allow reintegration to proceed without locks -SHP */
499 fset->fset_permit_upcall_count++;
500 if (fset->fset_permit_upcall_count == 1) {
501 spin_unlock(&fset->fset_permit_lock);
502 rc = presto_permit_upcall(fset->fset_dentry);
503 spin_lock(&fset->fset_permit_lock);
504 fset->fset_permit_upcall_count--;
506 izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
508 fset->fset_permit_count++;
509 } else if (rc == ENOTCONN) {
510 CERROR("InterMezzo: disconnected operation. stealing permit.\n");
511 izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
513 fset->fset_permit_count++;
514 /* set a disconnected flag here to stop upcalls */
517 CERROR("InterMezzo: presto_permit_upcall failed: %d\n", rc);
519 /* go to sleep here and try again? */
521 wake_up_interruptible(&fset->fset_permit_queue);
523 /* Someone is already doing an upcall; go to sleep. */
524 DECLARE_WAITQUEUE(wait, current);
526 spin_unlock(&fset->fset_permit_lock);
527 add_wait_queue(&fset->fset_permit_queue, &wait);
529 set_current_state(TASK_INTERRUPTIBLE);
531 spin_lock(&fset->fset_permit_lock);
532 if (fset->fset_permit_upcall_count == 0)
534 spin_unlock(&fset->fset_permit_lock);
536 if (signal_pending(current)) {
537 remove_wait_queue(&fset->fset_permit_queue,
543 remove_wait_queue(&fset->fset_permit_queue, &wait);
544 /* We've been woken up: do we have the permit? */
545 if (fset->fset_flags & FSET_HASPERMIT)
546 /* FIXME: Is this the right thing? */
550 CDEBUG(D_INODE, "permit count now %d, ino %ld (likely 1), "
551 "rc %d\n", fset->fset_permit_count, inode->i_ino, rc);
552 spin_unlock(&fset->fset_permit_lock);
557 int presto_put_permit(struct inode * inode)
560 struct presto_file_set *fset;
561 int minor = presto_i2m(inode);
569 if ( ISLENTO(minor) ) {
574 if (list_empty(&inode->i_dentry)) {
575 CERROR("No alias for inode %d\n", (int) inode->i_ino);
580 de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
582 fset = presto_fset(de);
584 CERROR("InterMezzo: no fileset in %s!\n", __FUNCTION__);
589 if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
594 spin_lock(&fset->fset_permit_lock);
595 if (fset->fset_flags & FSET_HASPERMIT) {
596 if (fset->fset_permit_count > 0)
597 fset->fset_permit_count--;
599 CERROR("Put permit while permit count is 0, "
600 "inode %ld!\n", inode->i_ino);
602 fset->fset_permit_count = 0;
603 CERROR("InterMezzo: put permit while no permit, inode %ld, "
604 "flags %x!\n", inode->i_ino, fset->fset_flags);
607 CDEBUG(D_INODE, "permit count now %d, inode %ld\n",
608 fset->fset_permit_count, inode->i_ino);
610 if (fset->fset_flags & FSET_PERMIT_WAITING &&
611 fset->fset_permit_count == 0) {
612 CDEBUG(D_INODE, "permit count now 0, ino %ld, wake sleepers\n",
614 wake_up_interruptible(&fset->fset_permit_queue);
616 spin_unlock(&fset->fset_permit_lock);
622 void presto_getversion(struct presto_version * presto_version,
623 struct inode * inode)
625 presto_version->pv_mtime_sec = inode->i_mtime.tv_sec;
626 presto_version->pv_mtime_nsec = inode->i_mtime.tv_nsec;
627 presto_version->pv_ctime_sec = inode->i_ctime.tv_sec;
628 presto_version->pv_ctime_nsec = inode->i_ctime.tv_nsec;
629 presto_version->pv_size = (__u64)inode->i_size;
633 /* If uuid is non-null, it is the uuid of the peer that's making the revocation
634 * request. If it is null, this request was made locally, without external
635 * pressure to give up the permit. This most often occurs when a client
638 * FIXME: this function needs to be refactored slightly once we start handling
641 int izo_revoke_permit(struct dentry *dentry, __u8 uuid[16])
643 struct presto_file_set *fset;
644 DECLARE_WAITQUEUE(wait, current);
649 minor = presto_i2m(dentry->d_inode);
655 fset = presto_fset(dentry);
661 spin_lock(&fset->fset_permit_lock);
662 if (fset->fset_flags & FSET_PERMIT_WAITING) {
663 CERROR("InterMezzo: Two processes are waiting on the same permit--this not yet supported! Aborting this particular permit request...\n");
665 spin_unlock(&fset->fset_permit_lock);
669 if (fset->fset_permit_count == 0)
672 /* Something is still using this permit. Mark that we're waiting for it
673 * and go to sleep. */
674 rc = izo_mark_fset(dentry, ~0, FSET_PERMIT_WAITING, NULL);
675 spin_unlock(&fset->fset_permit_lock);
681 add_wait_queue(&fset->fset_permit_queue, &wait);
683 set_current_state(TASK_INTERRUPTIBLE);
685 spin_lock(&fset->fset_permit_lock);
686 if (fset->fset_permit_count == 0)
688 spin_unlock(&fset->fset_permit_lock);
690 if (signal_pending(current)) {
691 /* FIXME: there must be a better thing to return... */
692 remove_wait_queue(&fset->fset_permit_queue, &wait);
697 /* FIXME: maybe there should be a timeout here. */
702 remove_wait_queue(&fset->fset_permit_queue, &wait);
704 /* By this point fset->fset_permit_count is zero and we're holding the
706 CDEBUG(D_CACHE, "InterMezzo: releasing permit inode %ld\n",
707 dentry->d_inode->i_ino);
710 rc = izo_upc_revoke_permit(minor, fset->fset_name, uuid);
712 spin_unlock(&fset->fset_permit_lock);
718 izo_mark_fset(fset->fset_dentry, ~FSET_PERMIT_WAITING, 0, NULL);
719 izo_mark_fset(fset->fset_dentry, ~FSET_HASPERMIT, 0, NULL);
720 spin_unlock(&fset->fset_permit_lock);
725 inline int presto_is_read_only(struct presto_file_set * fset)
728 struct presto_cache *cache = fset->fset_cache;
730 minor= cache->cache_psdev->uc_minor;
731 mask= (ISLENTO(minor)? FSET_LENTO_RO : FSET_CLIENT_RO);
732 if ( fset->fset_flags & mask )
734 mask= (ISLENTO(minor)? CACHE_LENTO_RO : CACHE_CLIENT_RO);
735 return ((cache->cache_flags & mask)? 1 : 0);